Package ganeti :: Package cmdlib :: Module cluster
[hide private]
[frames] | no frames]

Source Code for Module ganeti.cmdlib.cluster

   1  # 
   2  # 
   3   
   4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. 
   5  # All rights reserved. 
   6  # 
   7  # Redistribution and use in source and binary forms, with or without 
   8  # modification, are permitted provided that the following conditions are 
   9  # met: 
  10  # 
  11  # 1. Redistributions of source code must retain the above copyright notice, 
  12  # this list of conditions and the following disclaimer. 
  13  # 
  14  # 2. Redistributions in binary form must reproduce the above copyright 
  15  # notice, this list of conditions and the following disclaimer in the 
  16  # documentation and/or other materials provided with the distribution. 
  17  # 
  18  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
  19  # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
  20  # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
  21  # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
  22  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  23  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  24  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
  25  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
  26  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
  27  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
  28  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  29   
  30   
  31  """Logical units dealing with the cluster.""" 
  32   
  33  import copy 
  34  import itertools 
  35  import logging 
  36  import operator 
  37  import os 
  38  import re 
  39  import time 
  40   
  41  from ganeti import compat 
  42  from ganeti import constants 
  43  from ganeti import errors 
  44  from ganeti import hypervisor 
  45  from ganeti import locking 
  46  from ganeti import masterd 
  47  from ganeti import netutils 
  48  from ganeti import objects 
  49  from ganeti import opcodes 
  50  from ganeti import pathutils 
  51  from ganeti import query 
  52  import ganeti.rpc.node as rpc 
  53  from ganeti import runtime 
  54  from ganeti import ssh 
  55  from ganeti import uidpool 
  56  from ganeti import utils 
  57  from ganeti import vcluster 
  58   
  59  from ganeti.cmdlib.base import NoHooksLU, QueryBase, LogicalUnit, \ 
  60    ResultWithJobs 
  61  from ganeti.cmdlib.common import ShareAll, RunPostHook, \ 
  62    ComputeAncillaryFiles, RedistributeAncillaryFiles, UploadHelper, \ 
  63    GetWantedInstances, MergeAndVerifyHvState, MergeAndVerifyDiskState, \ 
  64    GetUpdatedIPolicy, ComputeNewInstanceViolations, GetUpdatedParams, \ 
  65    CheckOSParams, CheckHVParams, AdjustCandidatePool, CheckNodePVs, \ 
  66    ComputeIPolicyInstanceViolation, AnnotateDiskParams, SupportsOob, \ 
  67    CheckIpolicyVsDiskTemplates, CheckDiskAccessModeValidity, \ 
  68    CheckDiskAccessModeConsistency, CreateNewClientCert, EnsureKvmdOnNodes 
  69   
  70  import ganeti.masterd.instance 
71 72 73 -def _UpdateMasterClientCert( 74 lu, master_uuid, cluster, feedback_fn, 75 client_cert=pathutils.NODED_CLIENT_CERT_FILE, 76 client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP):
77 """Renews the master's client certificate and propagates the config. 78 79 @type lu: C{LogicalUnit} 80 @param lu: the logical unit holding the config 81 @type master_uuid: string 82 @param master_uuid: the master node's UUID 83 @type cluster: C{objects.Cluster} 84 @param cluster: the cluster's configuration 85 @type feedback_fn: function 86 @param feedback_fn: feedback functions for config updates 87 @type client_cert: string 88 @param client_cert: the path of the client certificate 89 @type client_cert_tmp: string 90 @param client_cert_tmp: the temporary path of the client certificate 91 @rtype: string 92 @return: the digest of the newly created client certificate 93 94 """ 95 client_digest = CreateNewClientCert(lu, master_uuid, filename=client_cert_tmp) 96 utils.AddNodeToCandidateCerts(master_uuid, client_digest, 97 cluster.candidate_certs) 98 # This triggers an update of the config and distribution of it with the old 99 # SSL certificate 100 lu.cfg.Update(cluster, feedback_fn) 101 102 utils.RemoveFile(client_cert) 103 utils.RenameFile(client_cert_tmp, client_cert) 104 return client_digest
105
106 107 -class LUClusterRenewCrypto(NoHooksLU):
108 """Renew the cluster's crypto tokens. 109 110 Note that most of this operation is done in gnt_cluster.py, this LU only 111 takes care of the renewal of the client SSL certificates. 112 113 """ 114 _MAX_NUM_RETRIES = 3 115
116 - def Exec(self, feedback_fn):
117 master_uuid = self.cfg.GetMasterNode() 118 logging.debug("Renewing the master's SSL node certificate." 119 " Master's UUID: %s.", master_uuid) 120 cluster = self.cfg.GetClusterInfo() 121 122 server_digest = utils.GetCertificateDigest( 123 cert_filename=pathutils.NODED_CERT_FILE) 124 logging.debug("SSL digest of the node certificate: %s.", server_digest) 125 utils.AddNodeToCandidateCerts("%s-SERVER" % master_uuid, 126 server_digest, 127 cluster.candidate_certs) 128 logging.debug("Added master's digest as *-SERVER entry to configuration." 129 " Current list of candidate certificates: %s.", 130 str(cluster.candidate_certs)) 131 132 try: 133 old_master_digest = utils.GetCertificateDigest( 134 cert_filename=pathutils.NODED_CLIENT_CERT_FILE) 135 logging.debug("SSL digest of old master's SSL node certificate: %s.", 136 old_master_digest) 137 utils.AddNodeToCandidateCerts("%s-OLDMASTER" % master_uuid, 138 old_master_digest, 139 cluster.candidate_certs) 140 logging.debug("Added old master's node certificate digest to config" 141 " as *-OLDMASTER. Current list of candidate certificates:" 142 " %s.", str(cluster.candidate_certs)) 143 144 except IOError: 145 logging.info("No old master certificate available.") 146 147 last_exception = None 148 for i in range(self._MAX_NUM_RETRIES): 149 try: 150 # Technically it should not be necessary to set the cert 151 # paths. However, due to a bug in the mock library, we 152 # have to do this to be able to test the function properly. 153 _UpdateMasterClientCert( 154 self, master_uuid, cluster, feedback_fn, 155 client_cert=pathutils.NODED_CLIENT_CERT_FILE, 156 client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP) 157 logging.debug("Successfully renewed the master's node certificate.") 158 break 159 except errors.OpExecError as e: 160 logging.error("Renewing the master's SSL node certificate failed" 161 " at attempt no. %s with error '%s'", str(i), e) 162 last_exception = e 163 else: 164 if last_exception: 165 feedback_fn("Could not renew the master's client SSL certificate." 166 " Cleaning up. Error: %s." % last_exception) 167 # Cleaning up temporary certificates 168 utils.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid, 169 cluster.candidate_certs) 170 utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid, 171 cluster.candidate_certs) 172 logging.debug("Cleaned up *-SERVER and *-OLDMASTER certificate from" 173 " master candidate cert list. Current state of the" 174 " list: %s.", str(cluster.candidate_certs)) 175 try: 176 utils.RemoveFile(pathutils.NODED_CLIENT_CERT_FILE_TMP) 177 except IOError as e: 178 logging.debug("Could not clean up temporary node certificate of the" 179 " master node. (Possibly because it was already removed" 180 " properly.) Error: %s.", e) 181 return 182 183 node_errors = {} 184 nodes = self.cfg.GetAllNodesInfo() 185 logging.debug("Renewing non-master nodes' node certificates.") 186 for (node_uuid, node_info) in nodes.items(): 187 if node_info.offline: 188 feedback_fn("* Skipping offline node %s" % node_info.name) 189 logging.debug("Skipping offline node %s (UUID: %s).", 190 node_info.name, node_uuid) 191 continue 192 if node_uuid != master_uuid: 193 logging.debug("Renewing node certificate of node '%s'.", node_uuid) 194 last_exception = None 195 for i in range(self._MAX_NUM_RETRIES): 196 try: 197 new_digest = CreateNewClientCert(self, node_uuid) 198 if node_info.master_candidate: 199 utils.AddNodeToCandidateCerts(node_uuid, 200 new_digest, 201 cluster.candidate_certs) 202 logging.debug("Added the node's certificate to candidate" 203 " certificate list. Current list: %s.", 204 str(cluster.candidate_certs)) 205 break 206 except errors.OpExecError as e: 207 last_exception = e 208 logging.error("Could not renew a non-master node's SSL node" 209 " certificate at attempt no. %s. The node's UUID" 210 " is %s, and the error was: %s.", 211 str(i), node_uuid, e) 212 else: 213 if last_exception: 214 node_errors[node_uuid] = last_exception 215 216 if node_errors: 217 msg = ("Some nodes' SSL client certificates could not be renewed." 218 " Please make sure those nodes are reachable and rerun" 219 " the operation. The affected nodes and their errors are:\n") 220 for uuid, e in node_errors.items(): 221 msg += "Node %s: %s\n" % (uuid, e) 222 feedback_fn(msg) 223 224 utils.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid, 225 cluster.candidate_certs) 226 utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid, 227 cluster.candidate_certs) 228 logging.debug("Cleaned up *-SERVER and *-OLDMASTER certificate from" 229 " master candidate cert list. Current state of the" 230 " list: %s.", cluster.candidate_certs) 231 232 # Trigger another update of the config now with the new master cert 233 logging.debug("Trigger an update of the configuration on all nodes.") 234 self.cfg.Update(cluster, feedback_fn)
235
236 237 -class LUClusterActivateMasterIp(NoHooksLU):
238 """Activate the master IP on the master node. 239 240 """
241 - def Exec(self, feedback_fn):
242 """Activate the master IP. 243 244 """ 245 master_params = self.cfg.GetMasterNetworkParameters() 246 ems = self.cfg.GetUseExternalMipScript() 247 result = self.rpc.call_node_activate_master_ip(master_params.uuid, 248 master_params, ems) 249 result.Raise("Could not activate the master IP")
250
251 252 -class LUClusterDeactivateMasterIp(NoHooksLU):
253 """Deactivate the master IP on the master node. 254 255 """
256 - def Exec(self, feedback_fn):
257 """Deactivate the master IP. 258 259 """ 260 master_params = self.cfg.GetMasterNetworkParameters() 261 ems = self.cfg.GetUseExternalMipScript() 262 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, 263 master_params, ems) 264 result.Raise("Could not deactivate the master IP")
265
266 267 -class LUClusterConfigQuery(NoHooksLU):
268 """Return configuration values. 269 270 """ 271 REQ_BGL = False 272
273 - def CheckArguments(self):
274 self.cq = ClusterQuery(None, self.op.output_fields, False)
275
276 - def ExpandNames(self):
277 self.cq.ExpandNames(self)
278
279 - def DeclareLocks(self, level):
280 self.cq.DeclareLocks(self, level)
281
282 - def Exec(self, feedback_fn):
283 result = self.cq.OldStyleQuery(self) 284 285 assert len(result) == 1 286 287 return result[0]
288
289 290 -class LUClusterDestroy(LogicalUnit):
291 """Logical unit for destroying the cluster. 292 293 """ 294 HPATH = "cluster-destroy" 295 HTYPE = constants.HTYPE_CLUSTER 296
297 - def BuildHooksEnv(self):
298 """Build hooks env. 299 300 """ 301 return { 302 "OP_TARGET": self.cfg.GetClusterName(), 303 }
304
305 - def BuildHooksNodes(self):
306 """Build hooks nodes. 307 308 """ 309 return ([], [])
310
311 - def CheckPrereq(self):
312 """Check prerequisites. 313 314 This checks whether the cluster is empty. 315 316 Any errors are signaled by raising errors.OpPrereqError. 317 318 """ 319 master = self.cfg.GetMasterNode() 320 321 nodelist = self.cfg.GetNodeList() 322 if len(nodelist) != 1 or nodelist[0] != master: 323 raise errors.OpPrereqError("There are still %d node(s) in" 324 " this cluster." % (len(nodelist) - 1), 325 errors.ECODE_INVAL) 326 instancelist = self.cfg.GetInstanceList() 327 if instancelist: 328 raise errors.OpPrereqError("There are still %d instance(s) in" 329 " this cluster." % len(instancelist), 330 errors.ECODE_INVAL)
331
332 - def Exec(self, feedback_fn):
333 """Destroys the cluster. 334 335 """ 336 master_params = self.cfg.GetMasterNetworkParameters() 337 338 # Run post hooks on master node before it's removed 339 RunPostHook(self, self.cfg.GetNodeName(master_params.uuid)) 340 341 ems = self.cfg.GetUseExternalMipScript() 342 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, 343 master_params, ems) 344 result.Warn("Error disabling the master IP address", self.LogWarning) 345 return master_params.uuid
346
347 348 -class LUClusterPostInit(LogicalUnit):
349 """Logical unit for running hooks after cluster initialization. 350 351 """ 352 HPATH = "cluster-init" 353 HTYPE = constants.HTYPE_CLUSTER 354
355 - def CheckArguments(self):
356 self.master_uuid = self.cfg.GetMasterNode() 357 self.master_ndparams = self.cfg.GetNdParams(self.cfg.GetMasterNodeInfo()) 358 359 # TODO: When Issue 584 is solved, and None is properly parsed when used 360 # as a default value, ndparams.get(.., None) can be changed to 361 # ndparams[..] to access the values directly 362 363 # OpenvSwitch: Warn user if link is missing 364 if (self.master_ndparams[constants.ND_OVS] and not 365 self.master_ndparams.get(constants.ND_OVS_LINK, None)): 366 self.LogInfo("No physical interface for OpenvSwitch was given." 367 " OpenvSwitch will not have an outside connection. This" 368 " might not be what you want.")
369
370 - def BuildHooksEnv(self):
371 """Build hooks env. 372 373 """ 374 return { 375 "OP_TARGET": self.cfg.GetClusterName(), 376 }
377
378 - def BuildHooksNodes(self):
379 """Build hooks nodes. 380 381 """ 382 return ([], [self.cfg.GetMasterNode()])
383
384 - def Exec(self, feedback_fn):
385 """Create and configure Open vSwitch 386 387 """ 388 if self.master_ndparams[constants.ND_OVS]: 389 result = self.rpc.call_node_configure_ovs( 390 self.master_uuid, 391 self.master_ndparams[constants.ND_OVS_NAME], 392 self.master_ndparams.get(constants.ND_OVS_LINK, None)) 393 result.Raise("Could not successully configure Open vSwitch") 394 395 cluster = self.cfg.GetClusterInfo() 396 _UpdateMasterClientCert(self, self.master_uuid, cluster, feedback_fn) 397 398 return True
399
400 401 -class ClusterQuery(QueryBase):
402 FIELDS = query.CLUSTER_FIELDS 403 404 #: Do not sort (there is only one item) 405 SORT_FIELD = None 406
407 - def ExpandNames(self, lu):
408 lu.needed_locks = {} 409 410 # The following variables interact with _QueryBase._GetNames 411 self.wanted = locking.ALL_SET 412 self.do_locking = self.use_locking 413 414 if self.do_locking: 415 raise errors.OpPrereqError("Can not use locking for cluster queries", 416 errors.ECODE_INVAL)
417
418 - def DeclareLocks(self, lu, level):
419 pass
420
421 - def _GetQueryData(self, lu):
422 """Computes the list of nodes and their attributes. 423 424 """ 425 # Locking is not used 426 assert not (compat.any(lu.glm.is_owned(level) 427 for level in locking.LEVELS 428 if level != locking.LEVEL_CLUSTER) or 429 self.do_locking or self.use_locking) 430 431 if query.CQ_CONFIG in self.requested_data: 432 cluster = lu.cfg.GetClusterInfo() 433 nodes = lu.cfg.GetAllNodesInfo() 434 else: 435 cluster = NotImplemented 436 nodes = NotImplemented 437 438 if query.CQ_QUEUE_DRAINED in self.requested_data: 439 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE) 440 else: 441 drain_flag = NotImplemented 442 443 if query.CQ_WATCHER_PAUSE in self.requested_data: 444 master_node_uuid = lu.cfg.GetMasterNode() 445 446 result = lu.rpc.call_get_watcher_pause(master_node_uuid) 447 result.Raise("Can't retrieve watcher pause from master node '%s'" % 448 lu.cfg.GetMasterNodeName()) 449 450 watcher_pause = result.payload 451 else: 452 watcher_pause = NotImplemented 453 454 return query.ClusterQueryData(cluster, nodes, drain_flag, watcher_pause)
455
456 457 -class LUClusterQuery(NoHooksLU):
458 """Query cluster configuration. 459 460 """ 461 REQ_BGL = False 462
463 - def ExpandNames(self):
464 self.needed_locks = {}
465
466 - def Exec(self, feedback_fn):
467 """Return cluster config. 468 469 """ 470 cluster = self.cfg.GetClusterInfo() 471 os_hvp = {} 472 473 # Filter just for enabled hypervisors 474 for os_name, hv_dict in cluster.os_hvp.items(): 475 os_hvp[os_name] = {} 476 for hv_name, hv_params in hv_dict.items(): 477 if hv_name in cluster.enabled_hypervisors: 478 os_hvp[os_name][hv_name] = hv_params 479 480 # Convert ip_family to ip_version 481 primary_ip_version = constants.IP4_VERSION 482 if cluster.primary_ip_family == netutils.IP6Address.family: 483 primary_ip_version = constants.IP6_VERSION 484 485 result = { 486 "software_version": constants.RELEASE_VERSION, 487 "protocol_version": constants.PROTOCOL_VERSION, 488 "config_version": constants.CONFIG_VERSION, 489 "os_api_version": max(constants.OS_API_VERSIONS), 490 "export_version": constants.EXPORT_VERSION, 491 "vcs_version": constants.VCS_VERSION, 492 "architecture": runtime.GetArchInfo(), 493 "name": cluster.cluster_name, 494 "master": self.cfg.GetMasterNodeName(), 495 "default_hypervisor": cluster.primary_hypervisor, 496 "enabled_hypervisors": cluster.enabled_hypervisors, 497 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name]) 498 for hypervisor_name in cluster.enabled_hypervisors]), 499 "os_hvp": os_hvp, 500 "beparams": cluster.beparams, 501 "osparams": cluster.osparams, 502 "ipolicy": cluster.ipolicy, 503 "nicparams": cluster.nicparams, 504 "ndparams": cluster.ndparams, 505 "diskparams": cluster.diskparams, 506 "candidate_pool_size": cluster.candidate_pool_size, 507 "max_running_jobs": cluster.max_running_jobs, 508 "master_netdev": cluster.master_netdev, 509 "master_netmask": cluster.master_netmask, 510 "use_external_mip_script": cluster.use_external_mip_script, 511 "volume_group_name": cluster.volume_group_name, 512 "drbd_usermode_helper": cluster.drbd_usermode_helper, 513 "file_storage_dir": cluster.file_storage_dir, 514 "shared_file_storage_dir": cluster.shared_file_storage_dir, 515 "maintain_node_health": cluster.maintain_node_health, 516 "ctime": cluster.ctime, 517 "mtime": cluster.mtime, 518 "uuid": cluster.uuid, 519 "tags": list(cluster.GetTags()), 520 "uid_pool": cluster.uid_pool, 521 "default_iallocator": cluster.default_iallocator, 522 "default_iallocator_params": cluster.default_iallocator_params, 523 "reserved_lvs": cluster.reserved_lvs, 524 "primary_ip_version": primary_ip_version, 525 "prealloc_wipe_disks": cluster.prealloc_wipe_disks, 526 "hidden_os": cluster.hidden_os, 527 "blacklisted_os": cluster.blacklisted_os, 528 "enabled_disk_templates": cluster.enabled_disk_templates, 529 "enabled_user_shutdown": cluster.enabled_user_shutdown, 530 } 531 532 return result
533
534 535 -class LUClusterRedistConf(NoHooksLU):
536 """Force the redistribution of cluster configuration. 537 538 This is a very simple LU. 539 540 """ 541 REQ_BGL = False 542
543 - def ExpandNames(self):
544 self.needed_locks = { 545 locking.LEVEL_NODE: locking.ALL_SET, 546 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 547 } 548 self.share_locks = ShareAll()
549
550 - def Exec(self, feedback_fn):
551 """Redistribute the configuration. 552 553 """ 554 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn) 555 RedistributeAncillaryFiles(self)
556
557 558 -class LUClusterRename(LogicalUnit):
559 """Rename the cluster. 560 561 """ 562 HPATH = "cluster-rename" 563 HTYPE = constants.HTYPE_CLUSTER 564
565 - def BuildHooksEnv(self):
566 """Build hooks env. 567 568 """ 569 return { 570 "OP_TARGET": self.cfg.GetClusterName(), 571 "NEW_NAME": self.op.name, 572 }
573
574 - def BuildHooksNodes(self):
575 """Build hooks nodes. 576 577 """ 578 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
579
580 - def CheckPrereq(self):
581 """Verify that the passed name is a valid one. 582 583 """ 584 hostname = netutils.GetHostname(name=self.op.name, 585 family=self.cfg.GetPrimaryIPFamily()) 586 587 new_name = hostname.name 588 self.ip = new_ip = hostname.ip 589 old_name = self.cfg.GetClusterName() 590 old_ip = self.cfg.GetMasterIP() 591 if new_name == old_name and new_ip == old_ip: 592 raise errors.OpPrereqError("Neither the name nor the IP address of the" 593 " cluster has changed", 594 errors.ECODE_INVAL) 595 if new_ip != old_ip: 596 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT): 597 raise errors.OpPrereqError("The given cluster IP address (%s) is" 598 " reachable on the network" % 599 new_ip, errors.ECODE_NOTUNIQUE) 600 601 self.op.name = new_name
602
603 - def Exec(self, feedback_fn):
604 """Rename the cluster. 605 606 """ 607 clustername = self.op.name 608 new_ip = self.ip 609 610 # shutdown the master IP 611 master_params = self.cfg.GetMasterNetworkParameters() 612 ems = self.cfg.GetUseExternalMipScript() 613 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, 614 master_params, ems) 615 result.Raise("Could not disable the master role") 616 617 try: 618 cluster = self.cfg.GetClusterInfo() 619 cluster.cluster_name = clustername 620 cluster.master_ip = new_ip 621 self.cfg.Update(cluster, feedback_fn) 622 623 # update the known hosts file 624 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE) 625 node_list = self.cfg.GetOnlineNodeList() 626 try: 627 node_list.remove(master_params.uuid) 628 except ValueError: 629 pass 630 UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE) 631 finally: 632 master_params.ip = new_ip 633 result = self.rpc.call_node_activate_master_ip(master_params.uuid, 634 master_params, ems) 635 result.Warn("Could not re-enable the master role on the master," 636 " please restart manually", self.LogWarning) 637 638 return clustername
639
640 641 -class LUClusterRepairDiskSizes(NoHooksLU):
642 """Verifies the cluster disks sizes. 643 644 """ 645 REQ_BGL = False 646
647 - def ExpandNames(self):
648 if self.op.instances: 649 (_, self.wanted_names) = GetWantedInstances(self, self.op.instances) 650 # Not getting the node allocation lock as only a specific set of 651 # instances (and their nodes) is going to be acquired 652 self.needed_locks = { 653 locking.LEVEL_NODE_RES: [], 654 locking.LEVEL_INSTANCE: self.wanted_names, 655 } 656 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE 657 else: 658 self.wanted_names = None 659 self.needed_locks = { 660 locking.LEVEL_NODE_RES: locking.ALL_SET, 661 locking.LEVEL_INSTANCE: locking.ALL_SET, 662 663 # This opcode is acquires the node locks for all instances 664 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 665 } 666 667 self.share_locks = { 668 locking.LEVEL_NODE_RES: 1, 669 locking.LEVEL_INSTANCE: 0, 670 locking.LEVEL_NODE_ALLOC: 1, 671 }
672
673 - def DeclareLocks(self, level):
674 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None: 675 self._LockInstancesNodes(primary_only=True, level=level)
676
677 - def CheckPrereq(self):
678 """Check prerequisites. 679 680 This only checks the optional instance list against the existing names. 681 682 """ 683 if self.wanted_names is None: 684 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE) 685 686 self.wanted_instances = \ 687 map(compat.snd, self.cfg.GetMultiInstanceInfoByName(self.wanted_names))
688
689 - def _EnsureChildSizes(self, disk):
690 """Ensure children of the disk have the needed disk size. 691 692 This is valid mainly for DRBD8 and fixes an issue where the 693 children have smaller disk size. 694 695 @param disk: an L{ganeti.objects.Disk} object 696 697 """ 698 if disk.dev_type == constants.DT_DRBD8: 699 assert disk.children, "Empty children for DRBD8?" 700 fchild = disk.children[0] 701 mismatch = fchild.size < disk.size 702 if mismatch: 703 self.LogInfo("Child disk has size %d, parent %d, fixing", 704 fchild.size, disk.size) 705 fchild.size = disk.size 706 707 # and we recurse on this child only, not on the metadev 708 return self._EnsureChildSizes(fchild) or mismatch 709 else: 710 return False
711
712 - def Exec(self, feedback_fn):
713 """Verify the size of cluster disks. 714 715 """ 716 # TODO: check child disks too 717 # TODO: check differences in size between primary/secondary nodes 718 per_node_disks = {} 719 for instance in self.wanted_instances: 720 pnode = instance.primary_node 721 if pnode not in per_node_disks: 722 per_node_disks[pnode] = [] 723 for idx, disk in enumerate(instance.disks): 724 per_node_disks[pnode].append((instance, idx, disk)) 725 726 assert not (frozenset(per_node_disks.keys()) - 727 self.owned_locks(locking.LEVEL_NODE_RES)), \ 728 "Not owning correct locks" 729 assert not self.owned_locks(locking.LEVEL_NODE) 730 731 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, 732 per_node_disks.keys()) 733 734 changed = [] 735 for node_uuid, dskl in per_node_disks.items(): 736 if not dskl: 737 # no disks on the node 738 continue 739 740 newl = [([v[2].Copy()], v[0]) for v in dskl] 741 node_name = self.cfg.GetNodeName(node_uuid) 742 result = self.rpc.call_blockdev_getdimensions(node_uuid, newl) 743 if result.fail_msg: 744 self.LogWarning("Failure in blockdev_getdimensions call to node" 745 " %s, ignoring", node_name) 746 continue 747 if len(result.payload) != len(dskl): 748 logging.warning("Invalid result from node %s: len(dksl)=%d," 749 " result.payload=%s", node_name, len(dskl), 750 result.payload) 751 self.LogWarning("Invalid result from node %s, ignoring node results", 752 node_name) 753 continue 754 for ((instance, idx, disk), dimensions) in zip(dskl, result.payload): 755 if dimensions is None: 756 self.LogWarning("Disk %d of instance %s did not return size" 757 " information, ignoring", idx, instance.name) 758 continue 759 if not isinstance(dimensions, (tuple, list)): 760 self.LogWarning("Disk %d of instance %s did not return valid" 761 " dimension information, ignoring", idx, 762 instance.name) 763 continue 764 (size, spindles) = dimensions 765 if not isinstance(size, (int, long)): 766 self.LogWarning("Disk %d of instance %s did not return valid" 767 " size information, ignoring", idx, instance.name) 768 continue 769 size = size >> 20 770 if size != disk.size: 771 self.LogInfo("Disk %d of instance %s has mismatched size," 772 " correcting: recorded %d, actual %d", idx, 773 instance.name, disk.size, size) 774 disk.size = size 775 self.cfg.Update(instance, feedback_fn) 776 changed.append((instance.name, idx, "size", size)) 777 if es_flags[node_uuid]: 778 if spindles is None: 779 self.LogWarning("Disk %d of instance %s did not return valid" 780 " spindles information, ignoring", idx, 781 instance.name) 782 elif disk.spindles is None or disk.spindles != spindles: 783 self.LogInfo("Disk %d of instance %s has mismatched spindles," 784 " correcting: recorded %s, actual %s", 785 idx, instance.name, disk.spindles, spindles) 786 disk.spindles = spindles 787 self.cfg.Update(instance, feedback_fn) 788 changed.append((instance.name, idx, "spindles", disk.spindles)) 789 if self._EnsureChildSizes(disk): 790 self.cfg.Update(instance, feedback_fn) 791 changed.append((instance.name, idx, "size", disk.size)) 792 return changed
793
794 795 -def _ValidateNetmask(cfg, netmask):
796 """Checks if a netmask is valid. 797 798 @type cfg: L{config.ConfigWriter} 799 @param cfg: The cluster configuration 800 @type netmask: int 801 @param netmask: the netmask to be verified 802 @raise errors.OpPrereqError: if the validation fails 803 804 """ 805 ip_family = cfg.GetPrimaryIPFamily() 806 try: 807 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family) 808 except errors.ProgrammerError: 809 raise errors.OpPrereqError("Invalid primary ip family: %s." % 810 ip_family, errors.ECODE_INVAL) 811 if not ipcls.ValidateNetmask(netmask): 812 raise errors.OpPrereqError("CIDR netmask (%s) not valid" % 813 (netmask), errors.ECODE_INVAL)
814
815 816 -def CheckFileBasedStoragePathVsEnabledDiskTemplates( 817 logging_warn_fn, file_storage_dir, enabled_disk_templates, 818 file_disk_template):
819 """Checks whether the given file-based storage directory is acceptable. 820 821 Note: This function is public, because it is also used in bootstrap.py. 822 823 @type logging_warn_fn: function 824 @param logging_warn_fn: function which accepts a string and logs it 825 @type file_storage_dir: string 826 @param file_storage_dir: the directory to be used for file-based instances 827 @type enabled_disk_templates: list of string 828 @param enabled_disk_templates: the list of enabled disk templates 829 @type file_disk_template: string 830 @param file_disk_template: the file-based disk template for which the 831 path should be checked 832 833 """ 834 assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageTypes( 835 constants.ST_FILE, constants.ST_SHARED_FILE 836 )) 837 file_storage_enabled = file_disk_template in enabled_disk_templates 838 if file_storage_dir is not None: 839 if file_storage_dir == "": 840 if file_storage_enabled: 841 raise errors.OpPrereqError( 842 "Unsetting the '%s' storage directory while having '%s' storage" 843 " enabled is not permitted." % 844 (file_disk_template, file_disk_template)) 845 else: 846 if not file_storage_enabled: 847 logging_warn_fn( 848 "Specified a %s storage directory, although %s storage is not" 849 " enabled." % (file_disk_template, file_disk_template)) 850 else: 851 raise errors.ProgrammerError("Received %s storage dir with value" 852 " 'None'." % file_disk_template)
853
854 855 -def CheckFileStoragePathVsEnabledDiskTemplates( 856 logging_warn_fn, file_storage_dir, enabled_disk_templates):
857 """Checks whether the given file storage directory is acceptable. 858 859 @see: C{CheckFileBasedStoragePathVsEnabledDiskTemplates} 860 861 """ 862 CheckFileBasedStoragePathVsEnabledDiskTemplates( 863 logging_warn_fn, file_storage_dir, enabled_disk_templates, 864 constants.DT_FILE)
865
866 867 -def CheckSharedFileStoragePathVsEnabledDiskTemplates( 868 logging_warn_fn, file_storage_dir, enabled_disk_templates):
869 """Checks whether the given shared file storage directory is acceptable. 870 871 @see: C{CheckFileBasedStoragePathVsEnabledDiskTemplates} 872 873 """ 874 CheckFileBasedStoragePathVsEnabledDiskTemplates( 875 logging_warn_fn, file_storage_dir, enabled_disk_templates, 876 constants.DT_SHARED_FILE)
877
878 879 -class LUClusterSetParams(LogicalUnit):
880 """Change the parameters of the cluster. 881 882 """ 883 HPATH = "cluster-modify" 884 HTYPE = constants.HTYPE_CLUSTER 885 REQ_BGL = False 886
887 - def CheckArguments(self):
888 """Check parameters 889 890 """ 891 if self.op.uid_pool: 892 uidpool.CheckUidPool(self.op.uid_pool) 893 894 if self.op.add_uids: 895 uidpool.CheckUidPool(self.op.add_uids) 896 897 if self.op.remove_uids: 898 uidpool.CheckUidPool(self.op.remove_uids) 899 900 if self.op.master_netmask is not None: 901 _ValidateNetmask(self.cfg, self.op.master_netmask) 902 903 if self.op.diskparams: 904 for dt_params in self.op.diskparams.values(): 905 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES) 906 try: 907 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS) 908 CheckDiskAccessModeValidity(self.op.diskparams) 909 except errors.OpPrereqError, err: 910 raise errors.OpPrereqError("While verify diskparams options: %s" % err, 911 errors.ECODE_INVAL)
912
913 - def ExpandNames(self):
914 # FIXME: in the future maybe other cluster params won't require checking on 915 # all nodes to be modified. 916 # FIXME: This opcode changes cluster-wide settings. Is acquiring all 917 # resource locks the right thing, shouldn't it be the BGL instead? 918 self.needed_locks = { 919 locking.LEVEL_NODE: locking.ALL_SET, 920 locking.LEVEL_INSTANCE: locking.ALL_SET, 921 locking.LEVEL_NODEGROUP: locking.ALL_SET, 922 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 923 } 924 self.share_locks = ShareAll()
925
926 - def BuildHooksEnv(self):
927 """Build hooks env. 928 929 """ 930 return { 931 "OP_TARGET": self.cfg.GetClusterName(), 932 "NEW_VG_NAME": self.op.vg_name, 933 }
934
935 - def BuildHooksNodes(self):
936 """Build hooks nodes. 937 938 """ 939 mn = self.cfg.GetMasterNode() 940 return ([mn], [mn])
941
942 - def _CheckVgName(self, node_uuids, enabled_disk_templates, 943 new_enabled_disk_templates):
944 """Check the consistency of the vg name on all nodes and in case it gets 945 unset whether there are instances still using it. 946 947 """ 948 lvm_is_enabled = utils.IsLvmEnabled(enabled_disk_templates) 949 lvm_gets_enabled = utils.LvmGetsEnabled(enabled_disk_templates, 950 new_enabled_disk_templates) 951 current_vg_name = self.cfg.GetVGName() 952 953 if self.op.vg_name == '': 954 if lvm_is_enabled: 955 raise errors.OpPrereqError("Cannot unset volume group if lvm-based" 956 " disk templates are or get enabled.") 957 958 if self.op.vg_name is None: 959 if current_vg_name is None and lvm_is_enabled: 960 raise errors.OpPrereqError("Please specify a volume group when" 961 " enabling lvm-based disk-templates.") 962 963 if self.op.vg_name is not None and not self.op.vg_name: 964 if self.cfg.HasAnyDiskOfType(constants.DT_PLAIN): 965 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based" 966 " instances exist", errors.ECODE_INVAL) 967 968 if (self.op.vg_name is not None and lvm_is_enabled) or \ 969 (self.cfg.GetVGName() is not None and lvm_gets_enabled): 970 self._CheckVgNameOnNodes(node_uuids)
971
972 - def _CheckVgNameOnNodes(self, node_uuids):
973 """Check the status of the volume group on each node. 974 975 """ 976 vglist = self.rpc.call_vg_list(node_uuids) 977 for node_uuid in node_uuids: 978 msg = vglist[node_uuid].fail_msg 979 if msg: 980 # ignoring down node 981 self.LogWarning("Error while gathering data on node %s" 982 " (ignoring node): %s", 983 self.cfg.GetNodeName(node_uuid), msg) 984 continue 985 vgstatus = utils.CheckVolumeGroupSize(vglist[node_uuid].payload, 986 self.op.vg_name, 987 constants.MIN_VG_SIZE) 988 if vgstatus: 989 raise errors.OpPrereqError("Error on node '%s': %s" % 990 (self.cfg.GetNodeName(node_uuid), vgstatus), 991 errors.ECODE_ENVIRON)
992 993 @staticmethod
994 - def _GetDiskTemplateSetsInner(op_enabled_disk_templates, 995 old_enabled_disk_templates):
996 """Computes three sets of disk templates. 997 998 @see: C{_GetDiskTemplateSets} for more details. 999 1000 """ 1001 enabled_disk_templates = None 1002 new_enabled_disk_templates = [] 1003 disabled_disk_templates = [] 1004 if op_enabled_disk_templates: 1005 enabled_disk_templates = op_enabled_disk_templates 1006 new_enabled_disk_templates = \ 1007 list(set(enabled_disk_templates) 1008 - set(old_enabled_disk_templates)) 1009 disabled_disk_templates = \ 1010 list(set(old_enabled_disk_templates) 1011 - set(enabled_disk_templates)) 1012 else: 1013 enabled_disk_templates = old_enabled_disk_templates 1014 return (enabled_disk_templates, new_enabled_disk_templates, 1015 disabled_disk_templates)
1016
1017 - def _GetDiskTemplateSets(self, cluster):
1018 """Computes three sets of disk templates. 1019 1020 The three sets are: 1021 - disk templates that will be enabled after this operation (no matter if 1022 they were enabled before or not) 1023 - disk templates that get enabled by this operation (thus haven't been 1024 enabled before.) 1025 - disk templates that get disabled by this operation 1026 1027 """ 1028 return self._GetDiskTemplateSetsInner(self.op.enabled_disk_templates, 1029 cluster.enabled_disk_templates)
1030
1031 - def _CheckIpolicy(self, cluster, enabled_disk_templates):
1032 """Checks the ipolicy. 1033 1034 @type cluster: C{objects.Cluster} 1035 @param cluster: the cluster's configuration 1036 @type enabled_disk_templates: list of string 1037 @param enabled_disk_templates: list of (possibly newly) enabled disk 1038 templates 1039 1040 """ 1041 # FIXME: write unit tests for this 1042 if self.op.ipolicy: 1043 self.new_ipolicy = GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy, 1044 group_policy=False) 1045 1046 CheckIpolicyVsDiskTemplates(self.new_ipolicy, 1047 enabled_disk_templates) 1048 1049 all_instances = self.cfg.GetAllInstancesInfo().values() 1050 violations = set() 1051 for group in self.cfg.GetAllNodeGroupsInfo().values(): 1052 instances = frozenset([inst for inst in all_instances 1053 if compat.any(nuuid in group.members 1054 for nuuid in inst.all_nodes)]) 1055 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy) 1056 ipol = masterd.instance.CalculateGroupIPolicy(cluster, group) 1057 new = ComputeNewInstanceViolations(ipol, new_ipolicy, instances, 1058 self.cfg) 1059 if new: 1060 violations.update(new) 1061 1062 if violations: 1063 self.LogWarning("After the ipolicy change the following instances" 1064 " violate them: %s", 1065 utils.CommaJoin(utils.NiceSort(violations))) 1066 else: 1067 CheckIpolicyVsDiskTemplates(cluster.ipolicy, 1068 enabled_disk_templates)
1069
1070 - def _CheckDrbdHelperOnNodes(self, drbd_helper, node_uuids):
1071 """Checks whether the set DRBD helper actually exists on the nodes. 1072 1073 @type drbd_helper: string 1074 @param drbd_helper: path of the drbd usermode helper binary 1075 @type node_uuids: list of strings 1076 @param node_uuids: list of node UUIDs to check for the helper 1077 1078 """ 1079 # checks given drbd helper on all nodes 1080 helpers = self.rpc.call_drbd_helper(node_uuids) 1081 for (_, ninfo) in self.cfg.GetMultiNodeInfo(node_uuids): 1082 if ninfo.offline: 1083 self.LogInfo("Not checking drbd helper on offline node %s", 1084 ninfo.name) 1085 continue 1086 msg = helpers[ninfo.uuid].fail_msg 1087 if msg: 1088 raise errors.OpPrereqError("Error checking drbd helper on node" 1089 " '%s': %s" % (ninfo.name, msg), 1090 errors.ECODE_ENVIRON) 1091 node_helper = helpers[ninfo.uuid].payload 1092 if node_helper != drbd_helper: 1093 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" % 1094 (ninfo.name, node_helper), 1095 errors.ECODE_ENVIRON)
1096
1097 - def _CheckDrbdHelper(self, node_uuids, drbd_enabled, drbd_gets_enabled):
1098 """Check the DRBD usermode helper. 1099 1100 @type node_uuids: list of strings 1101 @param node_uuids: a list of nodes' UUIDs 1102 @type drbd_enabled: boolean 1103 @param drbd_enabled: whether DRBD will be enabled after this operation 1104 (no matter if it was disabled before or not) 1105 @type drbd_gets_enabled: boolen 1106 @param drbd_gets_enabled: true if DRBD was disabled before this 1107 operation, but will be enabled afterwards 1108 1109 """ 1110 if self.op.drbd_helper == '': 1111 if drbd_enabled: 1112 raise errors.OpPrereqError("Cannot disable drbd helper while" 1113 " DRBD is enabled.") 1114 if self.cfg.HasAnyDiskOfType(constants.DT_DRBD8): 1115 raise errors.OpPrereqError("Cannot disable drbd helper while" 1116 " drbd-based instances exist", 1117 errors.ECODE_INVAL) 1118 1119 else: 1120 if self.op.drbd_helper is not None and drbd_enabled: 1121 self._CheckDrbdHelperOnNodes(self.op.drbd_helper, node_uuids) 1122 else: 1123 if drbd_gets_enabled: 1124 current_drbd_helper = self.cfg.GetClusterInfo().drbd_usermode_helper 1125 if current_drbd_helper is not None: 1126 self._CheckDrbdHelperOnNodes(current_drbd_helper, node_uuids) 1127 else: 1128 raise errors.OpPrereqError("Cannot enable DRBD without a" 1129 " DRBD usermode helper set.")
1130
1131 - def _CheckInstancesOfDisabledDiskTemplates( 1132 self, disabled_disk_templates):
1133 """Check whether we try to disable a disk template that is in use. 1134 1135 @type disabled_disk_templates: list of string 1136 @param disabled_disk_templates: list of disk templates that are going to 1137 be disabled by this operation 1138 1139 """ 1140 for disk_template in disabled_disk_templates: 1141 if self.cfg.HasAnyDiskOfType(disk_template): 1142 raise errors.OpPrereqError( 1143 "Cannot disable disk template '%s', because there is at least one" 1144 " instance using it." % disk_template)
1145
1146 - def CheckPrereq(self):
1147 """Check prerequisites. 1148 1149 This checks whether the given params don't conflict and 1150 if the given volume group is valid. 1151 1152 """ 1153 node_uuids = self.owned_locks(locking.LEVEL_NODE) 1154 self.cluster = cluster = self.cfg.GetClusterInfo() 1155 1156 vm_capable_node_uuids = [node.uuid 1157 for node in self.cfg.GetAllNodesInfo().values() 1158 if node.uuid in node_uuids and node.vm_capable] 1159 1160 (enabled_disk_templates, new_enabled_disk_templates, 1161 disabled_disk_templates) = self._GetDiskTemplateSets(cluster) 1162 self._CheckInstancesOfDisabledDiskTemplates(disabled_disk_templates) 1163 1164 self._CheckVgName(vm_capable_node_uuids, enabled_disk_templates, 1165 new_enabled_disk_templates) 1166 1167 if self.op.file_storage_dir is not None: 1168 CheckFileStoragePathVsEnabledDiskTemplates( 1169 self.LogWarning, self.op.file_storage_dir, enabled_disk_templates) 1170 1171 if self.op.shared_file_storage_dir is not None: 1172 CheckSharedFileStoragePathVsEnabledDiskTemplates( 1173 self.LogWarning, self.op.shared_file_storage_dir, 1174 enabled_disk_templates) 1175 1176 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates 1177 drbd_gets_enabled = constants.DT_DRBD8 in new_enabled_disk_templates 1178 self._CheckDrbdHelper(vm_capable_node_uuids, 1179 drbd_enabled, drbd_gets_enabled) 1180 1181 # validate params changes 1182 if self.op.beparams: 1183 objects.UpgradeBeParams(self.op.beparams) 1184 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 1185 self.new_beparams = cluster.SimpleFillBE(self.op.beparams) 1186 1187 if self.op.ndparams: 1188 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) 1189 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams) 1190 1191 # TODO: we need a more general way to handle resetting 1192 # cluster-level parameters to default values 1193 if self.new_ndparams["oob_program"] == "": 1194 self.new_ndparams["oob_program"] = \ 1195 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM] 1196 1197 if self.op.hv_state: 1198 new_hv_state = MergeAndVerifyHvState(self.op.hv_state, 1199 self.cluster.hv_state_static) 1200 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values)) 1201 for hv, values in new_hv_state.items()) 1202 1203 if self.op.disk_state: 1204 new_disk_state = MergeAndVerifyDiskState(self.op.disk_state, 1205 self.cluster.disk_state_static) 1206 self.new_disk_state = \ 1207 dict((storage, dict((name, cluster.SimpleFillDiskState(values)) 1208 for name, values in svalues.items())) 1209 for storage, svalues in new_disk_state.items()) 1210 1211 self._CheckIpolicy(cluster, enabled_disk_templates) 1212 1213 if self.op.nicparams: 1214 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES) 1215 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams) 1216 objects.NIC.CheckParameterSyntax(self.new_nicparams) 1217 nic_errors = [] 1218 1219 # check all instances for consistency 1220 for instance in self.cfg.GetAllInstancesInfo().values(): 1221 for nic_idx, nic in enumerate(instance.nics): 1222 params_copy = copy.deepcopy(nic.nicparams) 1223 params_filled = objects.FillDict(self.new_nicparams, params_copy) 1224 1225 # check parameter syntax 1226 try: 1227 objects.NIC.CheckParameterSyntax(params_filled) 1228 except errors.ConfigurationError, err: 1229 nic_errors.append("Instance %s, nic/%d: %s" % 1230 (instance.name, nic_idx, err)) 1231 1232 # if we're moving instances to routed, check that they have an ip 1233 target_mode = params_filled[constants.NIC_MODE] 1234 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip: 1235 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip" 1236 " address" % (instance.name, nic_idx)) 1237 if nic_errors: 1238 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" % 1239 "\n".join(nic_errors), errors.ECODE_INVAL) 1240 1241 # hypervisor list/parameters 1242 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {}) 1243 if self.op.hvparams: 1244 for hv_name, hv_dict in self.op.hvparams.items(): 1245 if hv_name not in self.new_hvparams: 1246 self.new_hvparams[hv_name] = hv_dict 1247 else: 1248 self.new_hvparams[hv_name].update(hv_dict) 1249 1250 # disk template parameters 1251 self.new_diskparams = objects.FillDict(cluster.diskparams, {}) 1252 if self.op.diskparams: 1253 for dt_name, dt_params in self.op.diskparams.items(): 1254 if dt_name not in self.new_diskparams: 1255 self.new_diskparams[dt_name] = dt_params 1256 else: 1257 self.new_diskparams[dt_name].update(dt_params) 1258 CheckDiskAccessModeConsistency(self.op.diskparams, self.cfg) 1259 1260 # os hypervisor parameters 1261 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {}) 1262 if self.op.os_hvp: 1263 for os_name, hvs in self.op.os_hvp.items(): 1264 if os_name not in self.new_os_hvp: 1265 self.new_os_hvp[os_name] = hvs 1266 else: 1267 for hv_name, hv_dict in hvs.items(): 1268 if hv_dict is None: 1269 # Delete if it exists 1270 self.new_os_hvp[os_name].pop(hv_name, None) 1271 elif hv_name not in self.new_os_hvp[os_name]: 1272 self.new_os_hvp[os_name][hv_name] = hv_dict 1273 else: 1274 self.new_os_hvp[os_name][hv_name].update(hv_dict) 1275 1276 # os parameters 1277 self.new_osp = objects.FillDict(cluster.osparams, {}) 1278 if self.op.osparams: 1279 for os_name, osp in self.op.osparams.items(): 1280 if os_name not in self.new_osp: 1281 self.new_osp[os_name] = {} 1282 1283 self.new_osp[os_name] = GetUpdatedParams(self.new_osp[os_name], osp, 1284 use_none=True) 1285 1286 if not self.new_osp[os_name]: 1287 # we removed all parameters 1288 del self.new_osp[os_name] 1289 else: 1290 # check the parameter validity (remote check) 1291 CheckOSParams(self, False, [self.cfg.GetMasterNode()], 1292 os_name, self.new_osp[os_name]) 1293 1294 # changes to the hypervisor list 1295 if self.op.enabled_hypervisors is not None: 1296 for hv in self.op.enabled_hypervisors: 1297 # if the hypervisor doesn't already exist in the cluster 1298 # hvparams, we initialize it to empty, and then (in both 1299 # cases) we make sure to fill the defaults, as we might not 1300 # have a complete defaults list if the hypervisor wasn't 1301 # enabled before 1302 if hv not in new_hvp: 1303 new_hvp[hv] = {} 1304 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv]) 1305 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES) 1306 1307 if self.op.hvparams or self.op.enabled_hypervisors is not None: 1308 # either the enabled list has changed, or the parameters have, validate 1309 for hv_name, hv_params in self.new_hvparams.items(): 1310 if ((self.op.hvparams and hv_name in self.op.hvparams) or 1311 (self.op.enabled_hypervisors and 1312 hv_name in self.op.enabled_hypervisors)): 1313 # either this is a new hypervisor, or its parameters have changed 1314 hv_class = hypervisor.GetHypervisorClass(hv_name) 1315 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1316 hv_class.CheckParameterSyntax(hv_params) 1317 CheckHVParams(self, node_uuids, hv_name, hv_params) 1318 1319 self._CheckDiskTemplateConsistency() 1320 1321 if self.op.os_hvp: 1322 # no need to check any newly-enabled hypervisors, since the 1323 # defaults have already been checked in the above code-block 1324 for os_name, os_hvp in self.new_os_hvp.items(): 1325 for hv_name, hv_params in os_hvp.items(): 1326 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1327 # we need to fill in the new os_hvp on top of the actual hv_p 1328 cluster_defaults = self.new_hvparams.get(hv_name, {}) 1329 new_osp = objects.FillDict(cluster_defaults, hv_params) 1330 hv_class = hypervisor.GetHypervisorClass(hv_name) 1331 hv_class.CheckParameterSyntax(new_osp) 1332 CheckHVParams(self, node_uuids, hv_name, new_osp) 1333 1334 if self.op.default_iallocator: 1335 alloc_script = utils.FindFile(self.op.default_iallocator, 1336 constants.IALLOCATOR_SEARCH_PATH, 1337 os.path.isfile) 1338 if alloc_script is None: 1339 raise errors.OpPrereqError("Invalid default iallocator script '%s'" 1340 " specified" % self.op.default_iallocator, 1341 errors.ECODE_INVAL)
1342
1344 """Check whether the disk templates that are going to be disabled 1345 are still in use by some instances. 1346 1347 """ 1348 if self.op.enabled_disk_templates: 1349 cluster = self.cfg.GetClusterInfo() 1350 instances = self.cfg.GetAllInstancesInfo() 1351 1352 disk_templates_to_remove = set(cluster.enabled_disk_templates) \ 1353 - set(self.op.enabled_disk_templates) 1354 for instance in instances.itervalues(): 1355 if instance.disk_template in disk_templates_to_remove: 1356 raise errors.OpPrereqError("Cannot disable disk template '%s'," 1357 " because instance '%s' is using it." % 1358 (instance.disk_template, instance.name))
1359
1360 - def _SetVgName(self, feedback_fn):
1361 """Determines and sets the new volume group name. 1362 1363 """ 1364 if self.op.vg_name is not None: 1365 new_volume = self.op.vg_name 1366 if not new_volume: 1367 new_volume = None 1368 if new_volume != self.cfg.GetVGName(): 1369 self.cfg.SetVGName(new_volume) 1370 else: 1371 feedback_fn("Cluster LVM configuration already in desired" 1372 " state, not changing")
1373
1374 - def _SetFileStorageDir(self, feedback_fn):
1375 """Set the file storage directory. 1376 1377 """ 1378 if self.op.file_storage_dir is not None: 1379 if self.cluster.file_storage_dir == self.op.file_storage_dir: 1380 feedback_fn("Global file storage dir already set to value '%s'" 1381 % self.cluster.file_storage_dir) 1382 else: 1383 self.cluster.file_storage_dir = self.op.file_storage_dir
1384
1385 - def _SetSharedFileStorageDir(self, feedback_fn):
1386 """Set the shared file storage directory. 1387 1388 """ 1389 if self.op.shared_file_storage_dir is not None: 1390 if self.cluster.shared_file_storage_dir == \ 1391 self.op.shared_file_storage_dir: 1392 feedback_fn("Global shared file storage dir already set to value '%s'" 1393 % self.cluster.shared_file_storage_dir) 1394 else: 1395 self.cluster.shared_file_storage_dir = self.op.shared_file_storage_dir
1396
1397 - def _SetDrbdHelper(self, feedback_fn):
1398 """Set the DRBD usermode helper. 1399 1400 """ 1401 if self.op.drbd_helper is not None: 1402 if not constants.DT_DRBD8 in self.cluster.enabled_disk_templates: 1403 feedback_fn("Note that you specified a drbd user helper, but did not" 1404 " enable the drbd disk template.") 1405 new_helper = self.op.drbd_helper 1406 if not new_helper: 1407 new_helper = None 1408 if new_helper != self.cfg.GetDRBDHelper(): 1409 self.cfg.SetDRBDHelper(new_helper) 1410 else: 1411 feedback_fn("Cluster DRBD helper already in desired state," 1412 " not changing")
1413
1414 - def Exec(self, feedback_fn):
1415 """Change the parameters of the cluster. 1416 1417 """ 1418 if self.op.enabled_disk_templates: 1419 self.cluster.enabled_disk_templates = \ 1420 list(self.op.enabled_disk_templates) 1421 1422 self._SetVgName(feedback_fn) 1423 self._SetFileStorageDir(feedback_fn) 1424 self._SetSharedFileStorageDir(feedback_fn) 1425 self._SetDrbdHelper(feedback_fn) 1426 1427 ensure_kvmd = False 1428 1429 if self.op.hvparams: 1430 self.cluster.hvparams = self.new_hvparams 1431 if self.op.os_hvp: 1432 self.cluster.os_hvp = self.new_os_hvp 1433 if self.op.enabled_hypervisors is not None: 1434 self.cluster.hvparams = self.new_hvparams 1435 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors 1436 ensure_kvmd = True 1437 if self.op.beparams: 1438 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams 1439 if self.op.nicparams: 1440 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams 1441 if self.op.ipolicy: 1442 self.cluster.ipolicy = self.new_ipolicy 1443 if self.op.osparams: 1444 self.cluster.osparams = self.new_osp 1445 if self.op.ndparams: 1446 self.cluster.ndparams = self.new_ndparams 1447 if self.op.diskparams: 1448 self.cluster.diskparams = self.new_diskparams 1449 if self.op.hv_state: 1450 self.cluster.hv_state_static = self.new_hv_state 1451 if self.op.disk_state: 1452 self.cluster.disk_state_static = self.new_disk_state 1453 1454 if self.op.candidate_pool_size is not None: 1455 self.cluster.candidate_pool_size = self.op.candidate_pool_size 1456 # we need to update the pool size here, otherwise the save will fail 1457 AdjustCandidatePool(self, [], feedback_fn) 1458 1459 if self.op.max_running_jobs is not None: 1460 self.cluster.max_running_jobs = self.op.max_running_jobs 1461 1462 if self.op.maintain_node_health is not None: 1463 if self.op.maintain_node_health and not constants.ENABLE_CONFD: 1464 feedback_fn("Note: CONFD was disabled at build time, node health" 1465 " maintenance is not useful (still enabling it)") 1466 self.cluster.maintain_node_health = self.op.maintain_node_health 1467 1468 if self.op.modify_etc_hosts is not None: 1469 self.cluster.modify_etc_hosts = self.op.modify_etc_hosts 1470 1471 if self.op.prealloc_wipe_disks is not None: 1472 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks 1473 1474 if self.op.add_uids is not None: 1475 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids) 1476 1477 if self.op.remove_uids is not None: 1478 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids) 1479 1480 if self.op.uid_pool is not None: 1481 self.cluster.uid_pool = self.op.uid_pool 1482 1483 if self.op.default_iallocator is not None: 1484 self.cluster.default_iallocator = self.op.default_iallocator 1485 1486 if self.op.default_iallocator_params is not None: 1487 self.cluster.default_iallocator_params = self.op.default_iallocator_params 1488 1489 if self.op.reserved_lvs is not None: 1490 self.cluster.reserved_lvs = self.op.reserved_lvs 1491 1492 if self.op.use_external_mip_script is not None: 1493 self.cluster.use_external_mip_script = self.op.use_external_mip_script 1494 1495 if self.op.enabled_user_shutdown is not None and \ 1496 self.cluster.enabled_user_shutdown != self.op.enabled_user_shutdown: 1497 self.cluster.enabled_user_shutdown = self.op.enabled_user_shutdown 1498 ensure_kvmd = True 1499 1500 def helper_os(aname, mods, desc): 1501 desc += " OS list" 1502 lst = getattr(self.cluster, aname) 1503 for key, val in mods: 1504 if key == constants.DDM_ADD: 1505 if val in lst: 1506 feedback_fn("OS %s already in %s, ignoring" % (val, desc)) 1507 else: 1508 lst.append(val) 1509 elif key == constants.DDM_REMOVE: 1510 if val in lst: 1511 lst.remove(val) 1512 else: 1513 feedback_fn("OS %s not found in %s, ignoring" % (val, desc)) 1514 else: 1515 raise errors.ProgrammerError("Invalid modification '%s'" % key)
1516 1517 if self.op.hidden_os: 1518 helper_os("hidden_os", self.op.hidden_os, "hidden") 1519 1520 if self.op.blacklisted_os: 1521 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted") 1522 1523 if self.op.master_netdev: 1524 master_params = self.cfg.GetMasterNetworkParameters() 1525 ems = self.cfg.GetUseExternalMipScript() 1526 feedback_fn("Shutting down master ip on the current netdev (%s)" % 1527 self.cluster.master_netdev) 1528 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, 1529 master_params, ems) 1530 if not self.op.force: 1531 result.Raise("Could not disable the master ip") 1532 else: 1533 if result.fail_msg: 1534 msg = ("Could not disable the master ip (continuing anyway): %s" % 1535 result.fail_msg) 1536 feedback_fn(msg) 1537 feedback_fn("Changing master_netdev from %s to %s" % 1538 (master_params.netdev, self.op.master_netdev)) 1539 self.cluster.master_netdev = self.op.master_netdev 1540 1541 if self.op.master_netmask: 1542 master_params = self.cfg.GetMasterNetworkParameters() 1543 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask) 1544 result = self.rpc.call_node_change_master_netmask( 1545 master_params.uuid, master_params.netmask, 1546 self.op.master_netmask, master_params.ip, 1547 master_params.netdev) 1548 result.Warn("Could not change the master IP netmask", feedback_fn) 1549 self.cluster.master_netmask = self.op.master_netmask 1550 1551 self.cfg.Update(self.cluster, feedback_fn) 1552 1553 if self.op.master_netdev: 1554 master_params = self.cfg.GetMasterNetworkParameters() 1555 feedback_fn("Starting the master ip on the new master netdev (%s)" % 1556 self.op.master_netdev) 1557 ems = self.cfg.GetUseExternalMipScript() 1558 result = self.rpc.call_node_activate_master_ip(master_params.uuid, 1559 master_params, ems) 1560 result.Warn("Could not re-enable the master ip on the master," 1561 " please restart manually", self.LogWarning) 1562 1563 # Even though 'self.op.enabled_user_shutdown' is being tested 1564 # above, the RPCs can only be done after 'self.cfg.Update' because 1565 # this will update the cluster object and sync 'Ssconf', and kvmd 1566 # uses 'Ssconf'. 1567 if ensure_kvmd: 1568 EnsureKvmdOnNodes(self, feedback_fn)
1569
1570 1571 -class LUClusterVerify(NoHooksLU):
1572 """Submits all jobs necessary to verify the cluster. 1573 1574 """ 1575 REQ_BGL = False 1576
1577 - def ExpandNames(self):
1578 self.needed_locks = {}
1579
1580 - def Exec(self, feedback_fn):
1581 jobs = [] 1582 1583 if self.op.group_name: 1584 groups = [self.op.group_name] 1585 depends_fn = lambda: None 1586 else: 1587 groups = self.cfg.GetNodeGroupList() 1588 1589 # Verify global configuration 1590 jobs.append([ 1591 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors), 1592 ]) 1593 1594 # Always depend on global verification 1595 depends_fn = lambda: [(-len(jobs), [])] 1596 1597 jobs.extend( 1598 [opcodes.OpClusterVerifyGroup(group_name=group, 1599 ignore_errors=self.op.ignore_errors, 1600 depends=depends_fn())] 1601 for group in groups) 1602 1603 # Fix up all parameters 1604 for op in itertools.chain(*jobs): # pylint: disable=W0142 1605 op.debug_simulate_errors = self.op.debug_simulate_errors 1606 op.verbose = self.op.verbose 1607 op.error_codes = self.op.error_codes 1608 try: 1609 op.skip_checks = self.op.skip_checks 1610 except AttributeError: 1611 assert not isinstance(op, opcodes.OpClusterVerifyGroup) 1612 1613 return ResultWithJobs(jobs)
1614
1615 1616 -class _VerifyErrors(object):
1617 """Mix-in for cluster/group verify LUs. 1618 1619 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects 1620 self.op and self._feedback_fn to be available.) 1621 1622 """ 1623 1624 ETYPE_FIELD = "code" 1625 ETYPE_ERROR = constants.CV_ERROR 1626 ETYPE_WARNING = constants.CV_WARNING 1627
1628 - def _Error(self, ecode, item, msg, *args, **kwargs):
1629 """Format an error message. 1630 1631 Based on the opcode's error_codes parameter, either format a 1632 parseable error code, or a simpler error string. 1633 1634 This must be called only from Exec and functions called from Exec. 1635 1636 """ 1637 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) 1638 itype, etxt, _ = ecode 1639 # If the error code is in the list of ignored errors, demote the error to a 1640 # warning 1641 if etxt in self.op.ignore_errors: # pylint: disable=E1101 1642 ltype = self.ETYPE_WARNING 1643 # first complete the msg 1644 if args: 1645 msg = msg % args 1646 # then format the whole message 1647 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101 1648 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) 1649 else: 1650 if item: 1651 item = " " + item 1652 else: 1653 item = "" 1654 msg = "%s: %s%s: %s" % (ltype, itype, item, msg) 1655 # and finally report it via the feedback_fn 1656 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101 1657 # do not mark the operation as failed for WARN cases only 1658 if ltype == self.ETYPE_ERROR: 1659 self.bad = True
1660
1661 - def _ErrorIf(self, cond, *args, **kwargs):
1662 """Log an error message if the passed condition is True. 1663 1664 """ 1665 if (bool(cond) 1666 or self.op.debug_simulate_errors): # pylint: disable=E1101 1667 self._Error(*args, **kwargs)
1668
1669 1670 -def _GetAllHypervisorParameters(cluster, instances):
1671 """Compute the set of all hypervisor parameters. 1672 1673 @type cluster: L{objects.Cluster} 1674 @param cluster: the cluster object 1675 @param instances: list of L{objects.Instance} 1676 @param instances: additional instances from which to obtain parameters 1677 @rtype: list of (origin, hypervisor, parameters) 1678 @return: a list with all parameters found, indicating the hypervisor they 1679 apply to, and the origin (can be "cluster", "os X", or "instance Y") 1680 1681 """ 1682 hvp_data = [] 1683 1684 for hv_name in cluster.enabled_hypervisors: 1685 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name))) 1686 1687 for os_name, os_hvp in cluster.os_hvp.items(): 1688 for hv_name, hv_params in os_hvp.items(): 1689 if hv_params: 1690 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name) 1691 hvp_data.append(("os %s" % os_name, hv_name, full_params)) 1692 1693 # TODO: collapse identical parameter values in a single one 1694 for instance in instances: 1695 if instance.hvparams: 1696 hvp_data.append(("instance %s" % instance.name, instance.hypervisor, 1697 cluster.FillHV(instance))) 1698 1699 return hvp_data
1700
1701 1702 -class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1703 """Verifies the cluster config. 1704 1705 """ 1706 REQ_BGL = False 1707
1708 - def _VerifyHVP(self, hvp_data):
1709 """Verifies locally the syntax of the hypervisor parameters. 1710 1711 """ 1712 for item, hv_name, hv_params in hvp_data: 1713 msg = ("hypervisor %s parameters syntax check (source %s): %%s" % 1714 (item, hv_name)) 1715 try: 1716 hv_class = hypervisor.GetHypervisorClass(hv_name) 1717 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1718 hv_class.CheckParameterSyntax(hv_params) 1719 except errors.GenericError, err: 1720 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1721
1722 - def ExpandNames(self):
1723 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET) 1724 self.share_locks = ShareAll()
1725
1726 - def CheckPrereq(self):
1727 """Check prerequisites. 1728 1729 """ 1730 # Retrieve all information 1731 self.all_group_info = self.cfg.GetAllNodeGroupsInfo() 1732 self.all_node_info = self.cfg.GetAllNodesInfo() 1733 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1734
1735 - def Exec(self, feedback_fn):
1736 """Verify integrity of cluster, performing various test on nodes. 1737 1738 """ 1739 self.bad = False 1740 self._feedback_fn = feedback_fn 1741 1742 feedback_fn("* Verifying cluster config") 1743 1744 for msg in self.cfg.VerifyConfig(): 1745 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg) 1746 1747 feedback_fn("* Verifying cluster certificate files") 1748 1749 for cert_filename in pathutils.ALL_CERT_FILES: 1750 (errcode, msg) = utils.VerifyCertificate(cert_filename) 1751 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode) 1752 1753 self._ErrorIf(not utils.CanRead(constants.LUXID_USER, 1754 pathutils.NODED_CERT_FILE), 1755 constants.CV_ECLUSTERCERT, 1756 None, 1757 pathutils.NODED_CERT_FILE + " must be accessible by the " + 1758 constants.LUXID_USER + " user") 1759 1760 feedback_fn("* Verifying hypervisor parameters") 1761 1762 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(), 1763 self.all_inst_info.values())) 1764 1765 feedback_fn("* Verifying all nodes belong to an existing group") 1766 1767 # We do this verification here because, should this bogus circumstance 1768 # occur, it would never be caught by VerifyGroup, which only acts on 1769 # nodes/instances reachable from existing node groups. 1770 1771 dangling_nodes = set(node for node in self.all_node_info.values() 1772 if node.group not in self.all_group_info) 1773 1774 dangling_instances = {} 1775 no_node_instances = [] 1776 1777 for inst in self.all_inst_info.values(): 1778 if inst.primary_node in [node.uuid for node in dangling_nodes]: 1779 dangling_instances.setdefault(inst.primary_node, []).append(inst) 1780 elif inst.primary_node not in self.all_node_info: 1781 no_node_instances.append(inst) 1782 1783 pretty_dangling = [ 1784 "%s (%s)" % 1785 (node.name, 1786 utils.CommaJoin(inst.name for 1787 inst in dangling_instances.get(node.uuid, []))) 1788 for node in dangling_nodes] 1789 1790 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES, 1791 None, 1792 "the following nodes (and their instances) belong to a non" 1793 " existing group: %s", utils.CommaJoin(pretty_dangling)) 1794 1795 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST, 1796 None, 1797 "the following instances have a non-existing primary-node:" 1798 " %s", utils.CommaJoin(inst.name for 1799 inst in no_node_instances)) 1800 1801 return not self.bad
1802
1803 1804 -class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1805 """Verifies the status of a node group. 1806 1807 """ 1808 HPATH = "cluster-verify" 1809 HTYPE = constants.HTYPE_CLUSTER 1810 REQ_BGL = False 1811 1812 _HOOKS_INDENT_RE = re.compile("^", re.M) 1813
1814 - class NodeImage(object):
1815 """A class representing the logical and physical status of a node. 1816 1817 @type uuid: string 1818 @ivar uuid: the node UUID to which this object refers 1819 @ivar volumes: a structure as returned from 1820 L{ganeti.backend.GetVolumeList} (runtime) 1821 @ivar instances: a list of running instances (runtime) 1822 @ivar pinst: list of configured primary instances (config) 1823 @ivar sinst: list of configured secondary instances (config) 1824 @ivar sbp: dictionary of {primary-node: list of instances} for all 1825 instances for which this node is secondary (config) 1826 @ivar mfree: free memory, as reported by hypervisor (runtime) 1827 @ivar dfree: free disk, as reported by the node (runtime) 1828 @ivar offline: the offline status (config) 1829 @type rpc_fail: boolean 1830 @ivar rpc_fail: whether the RPC verify call was successfull (overall, 1831 not whether the individual keys were correct) (runtime) 1832 @type lvm_fail: boolean 1833 @ivar lvm_fail: whether the RPC call didn't return valid LVM data 1834 @type hyp_fail: boolean 1835 @ivar hyp_fail: whether the RPC call didn't return the instance list 1836 @type ghost: boolean 1837 @ivar ghost: whether this is a known node or not (config) 1838 @type os_fail: boolean 1839 @ivar os_fail: whether the RPC call didn't return valid OS data 1840 @type oslist: list 1841 @ivar oslist: list of OSes as diagnosed by DiagnoseOS 1842 @type vm_capable: boolean 1843 @ivar vm_capable: whether the node can host instances 1844 @type pv_min: float 1845 @ivar pv_min: size in MiB of the smallest PVs 1846 @type pv_max: float 1847 @ivar pv_max: size in MiB of the biggest PVs 1848 1849 """
1850 - def __init__(self, offline=False, uuid=None, vm_capable=True):
1851 self.uuid = uuid 1852 self.volumes = {} 1853 self.instances = [] 1854 self.pinst = [] 1855 self.sinst = [] 1856 self.sbp = {} 1857 self.mfree = 0 1858 self.dfree = 0 1859 self.offline = offline 1860 self.vm_capable = vm_capable 1861 self.rpc_fail = False 1862 self.lvm_fail = False 1863 self.hyp_fail = False 1864 self.ghost = False 1865 self.os_fail = False 1866 self.oslist = {} 1867 self.pv_min = None 1868 self.pv_max = None
1869
1870 - def ExpandNames(self):
1871 # This raises errors.OpPrereqError on its own: 1872 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 1873 1874 # Get instances in node group; this is unsafe and needs verification later 1875 inst_uuids = \ 1876 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 1877 1878 self.needed_locks = { 1879 locking.LEVEL_INSTANCE: self.cfg.GetInstanceNames(inst_uuids), 1880 locking.LEVEL_NODEGROUP: [self.group_uuid], 1881 locking.LEVEL_NODE: [], 1882 1883 # This opcode is run by watcher every five minutes and acquires all nodes 1884 # for a group. It doesn't run for a long time, so it's better to acquire 1885 # the node allocation lock as well. 1886 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 1887 } 1888 1889 self.share_locks = ShareAll()
1890
1891 - def DeclareLocks(self, level):
1892 if level == locking.LEVEL_NODE: 1893 # Get members of node group; this is unsafe and needs verification later 1894 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members) 1895 1896 # In Exec(), we warn about mirrored instances that have primary and 1897 # secondary living in separate node groups. To fully verify that 1898 # volumes for these instances are healthy, we will need to do an 1899 # extra call to their secondaries. We ensure here those nodes will 1900 # be locked. 1901 for inst_name in self.owned_locks(locking.LEVEL_INSTANCE): 1902 # Important: access only the instances whose lock is owned 1903 instance = self.cfg.GetInstanceInfoByName(inst_name) 1904 if instance.disk_template in constants.DTS_INT_MIRROR: 1905 nodes.update(instance.secondary_nodes) 1906 1907 self.needed_locks[locking.LEVEL_NODE] = nodes
1908
1909 - def CheckPrereq(self):
1910 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 1911 self.group_info = self.cfg.GetNodeGroup(self.group_uuid) 1912 1913 group_node_uuids = set(self.group_info.members) 1914 group_inst_uuids = \ 1915 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 1916 1917 unlocked_node_uuids = \ 1918 group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE)) 1919 1920 unlocked_inst_uuids = \ 1921 group_inst_uuids.difference( 1922 [self.cfg.GetInstanceInfoByName(name).uuid 1923 for name in self.owned_locks(locking.LEVEL_INSTANCE)]) 1924 1925 if unlocked_node_uuids: 1926 raise errors.OpPrereqError( 1927 "Missing lock for nodes: %s" % 1928 utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)), 1929 errors.ECODE_STATE) 1930 1931 if unlocked_inst_uuids: 1932 raise errors.OpPrereqError( 1933 "Missing lock for instances: %s" % 1934 utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)), 1935 errors.ECODE_STATE) 1936 1937 self.all_node_info = self.cfg.GetAllNodesInfo() 1938 self.all_inst_info = self.cfg.GetAllInstancesInfo() 1939 1940 self.my_node_uuids = group_node_uuids 1941 self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid]) 1942 for node_uuid in group_node_uuids) 1943 1944 self.my_inst_uuids = group_inst_uuids 1945 self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid]) 1946 for inst_uuid in group_inst_uuids) 1947 1948 # We detect here the nodes that will need the extra RPC calls for verifying 1949 # split LV volumes; they should be locked. 1950 extra_lv_nodes = set() 1951 1952 for inst in self.my_inst_info.values(): 1953 if inst.disk_template in constants.DTS_INT_MIRROR: 1954 for nuuid in inst.all_nodes: 1955 if self.all_node_info[nuuid].group != self.group_uuid: 1956 extra_lv_nodes.add(nuuid) 1957 1958 unlocked_lv_nodes = \ 1959 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) 1960 1961 if unlocked_lv_nodes: 1962 raise errors.OpPrereqError("Missing node locks for LV check: %s" % 1963 utils.CommaJoin(unlocked_lv_nodes), 1964 errors.ECODE_STATE) 1965 self.extra_lv_nodes = list(extra_lv_nodes)
1966
1967 - def _VerifyNode(self, ninfo, nresult):
1968 """Perform some basic validation on data returned from a node. 1969 1970 - check the result data structure is well formed and has all the 1971 mandatory fields 1972 - check ganeti version 1973 1974 @type ninfo: L{objects.Node} 1975 @param ninfo: the node to check 1976 @param nresult: the results from the node 1977 @rtype: boolean 1978 @return: whether overall this call was successful (and we can expect 1979 reasonable values in the respose) 1980 1981 """ 1982 # main result, nresult should be a non-empty dict 1983 test = not nresult or not isinstance(nresult, dict) 1984 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name, 1985 "unable to verify node: no data returned") 1986 if test: 1987 return False 1988 1989 # compares ganeti version 1990 local_version = constants.PROTOCOL_VERSION 1991 remote_version = nresult.get("version", None) 1992 test = not (remote_version and 1993 isinstance(remote_version, (list, tuple)) and 1994 len(remote_version) == 2) 1995 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name, 1996 "connection to node returned invalid data") 1997 if test: 1998 return False 1999 2000 test = local_version != remote_version[0] 2001 self._ErrorIf(test, constants.CV_ENODEVERSION, ninfo.name, 2002 "incompatible protocol versions: master %s," 2003 " node %s", local_version, remote_version[0]) 2004 if test: 2005 return False 2006 2007 # node seems compatible, we can actually try to look into its results 2008 2009 # full package version 2010 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], 2011 constants.CV_ENODEVERSION, ninfo.name, 2012 "software version mismatch: master %s, node %s", 2013 constants.RELEASE_VERSION, remote_version[1], 2014 code=self.ETYPE_WARNING) 2015 2016 hyp_result = nresult.get(constants.NV_HYPERVISOR, None) 2017 if ninfo.vm_capable and isinstance(hyp_result, dict): 2018 for hv_name, hv_result in hyp_result.iteritems(): 2019 test = hv_result is not None 2020 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, 2021 "hypervisor %s verify failure: '%s'", hv_name, hv_result) 2022 2023 hvp_result = nresult.get(constants.NV_HVPARAMS, None) 2024 if ninfo.vm_capable and isinstance(hvp_result, list): 2025 for item, hv_name, hv_result in hvp_result: 2026 self._ErrorIf(True, constants.CV_ENODEHV, ninfo.name, 2027 "hypervisor %s parameter verify failure (source %s): %s", 2028 hv_name, item, hv_result) 2029 2030 test = nresult.get(constants.NV_NODESETUP, 2031 ["Missing NODESETUP results"]) 2032 self._ErrorIf(test, constants.CV_ENODESETUP, ninfo.name, 2033 "node setup error: %s", "; ".join(test)) 2034 2035 return True
2036
2037 - def _VerifyNodeTime(self, ninfo, nresult, 2038 nvinfo_starttime, nvinfo_endtime):
2039 """Check the node time. 2040 2041 @type ninfo: L{objects.Node} 2042 @param ninfo: the node to check 2043 @param nresult: the remote results for the node 2044 @param nvinfo_starttime: the start time of the RPC call 2045 @param nvinfo_endtime: the end time of the RPC call 2046 2047 """ 2048 ntime = nresult.get(constants.NV_TIME, None) 2049 try: 2050 ntime_merged = utils.MergeTime(ntime) 2051 except (ValueError, TypeError): 2052 self._ErrorIf(True, constants.CV_ENODETIME, ninfo.name, 2053 "Node returned invalid time") 2054 return 2055 2056 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): 2057 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) 2058 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): 2059 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) 2060 else: 2061 ntime_diff = None 2062 2063 self._ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, ninfo.name, 2064 "Node time diverges by at least %s from master node time", 2065 ntime_diff)
2066
2067 - def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2068 """Check the node LVM results and update info for cross-node checks. 2069 2070 @type ninfo: L{objects.Node} 2071 @param ninfo: the node to check 2072 @param nresult: the remote results for the node 2073 @param vg_name: the configured VG name 2074 @type nimg: L{NodeImage} 2075 @param nimg: node image 2076 2077 """ 2078 if vg_name is None: 2079 return 2080 2081 # checks vg existence and size > 20G 2082 vglist = nresult.get(constants.NV_VGLIST, None) 2083 test = not vglist 2084 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name, 2085 "unable to check volume groups") 2086 if not test: 2087 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, 2088 constants.MIN_VG_SIZE) 2089 self._ErrorIf(vgstatus, constants.CV_ENODELVM, ninfo.name, vgstatus) 2090 2091 # Check PVs 2092 (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage) 2093 for em in errmsgs: 2094 self._Error(constants.CV_ENODELVM, ninfo.name, em) 2095 if pvminmax is not None: 2096 (nimg.pv_min, nimg.pv_max) = pvminmax
2097
2098 - def _VerifyGroupDRBDVersion(self, node_verify_infos):
2099 """Check cross-node DRBD version consistency. 2100 2101 @type node_verify_infos: dict 2102 @param node_verify_infos: infos about nodes as returned from the 2103 node_verify call. 2104 2105 """ 2106 node_versions = {} 2107 for node_uuid, ndata in node_verify_infos.items(): 2108 nresult = ndata.payload 2109 if nresult: 2110 version = nresult.get(constants.NV_DRBDVERSION, None) 2111 if version: 2112 node_versions[node_uuid] = version 2113 2114 if len(set(node_versions.values())) > 1: 2115 for node_uuid, version in sorted(node_versions.items()): 2116 msg = "DRBD version mismatch: %s" % version 2117 self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg, 2118 code=self.ETYPE_WARNING)
2119
2120 - def _VerifyGroupLVM(self, node_image, vg_name):
2121 """Check cross-node consistency in LVM. 2122 2123 @type node_image: dict 2124 @param node_image: info about nodes, mapping from node to names to 2125 L{NodeImage} objects 2126 @param vg_name: the configured VG name 2127 2128 """ 2129 if vg_name is None: 2130 return 2131 2132 # Only exclusive storage needs this kind of checks 2133 if not self._exclusive_storage: 2134 return 2135 2136 # exclusive_storage wants all PVs to have the same size (approximately), 2137 # if the smallest and the biggest ones are okay, everything is fine. 2138 # pv_min is None iff pv_max is None 2139 vals = filter((lambda ni: ni.pv_min is not None), node_image.values()) 2140 if not vals: 2141 return 2142 (pvmin, minnode_uuid) = min((ni.pv_min, ni.uuid) for ni in vals) 2143 (pvmax, maxnode_uuid) = max((ni.pv_max, ni.uuid) for ni in vals) 2144 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax) 2145 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name, 2146 "PV sizes differ too much in the group; smallest (%s MB) is" 2147 " on %s, biggest (%s MB) is on %s", 2148 pvmin, self.cfg.GetNodeName(minnode_uuid), 2149 pvmax, self.cfg.GetNodeName(maxnode_uuid))
2150
2151 - def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2152 """Check the node bridges. 2153 2154 @type ninfo: L{objects.Node} 2155 @param ninfo: the node to check 2156 @param nresult: the remote results for the node 2157 @param bridges: the expected list of bridges 2158 2159 """ 2160 if not bridges: 2161 return 2162 2163 missing = nresult.get(constants.NV_BRIDGES, None) 2164 test = not isinstance(missing, list) 2165 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name, 2166 "did not return valid bridge information") 2167 if not test: 2168 self._ErrorIf(bool(missing), constants.CV_ENODENET, ninfo.name, 2169 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2170
2171 - def _VerifyNodeUserScripts(self, ninfo, nresult):
2172 """Check the results of user scripts presence and executability on the node 2173 2174 @type ninfo: L{objects.Node} 2175 @param ninfo: the node to check 2176 @param nresult: the remote results for the node 2177 2178 """ 2179 test = not constants.NV_USERSCRIPTS in nresult 2180 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, ninfo.name, 2181 "did not return user scripts information") 2182 2183 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None) 2184 if not test: 2185 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, ninfo.name, 2186 "user scripts not present or not executable: %s" % 2187 utils.CommaJoin(sorted(broken_scripts)))
2188
2189 - def _VerifyNodeNetwork(self, ninfo, nresult):
2190 """Check the node network connectivity results. 2191 2192 @type ninfo: L{objects.Node} 2193 @param ninfo: the node to check 2194 @param nresult: the remote results for the node 2195 2196 """ 2197 test = constants.NV_NODELIST not in nresult 2198 self._ErrorIf(test, constants.CV_ENODESSH, ninfo.name, 2199 "node hasn't returned node ssh connectivity data") 2200 if not test: 2201 if nresult[constants.NV_NODELIST]: 2202 for a_node, a_msg in nresult[constants.NV_NODELIST].items(): 2203 self._ErrorIf(True, constants.CV_ENODESSH, ninfo.name, 2204 "ssh communication with node '%s': %s", a_node, a_msg) 2205 2206 test = constants.NV_NODENETTEST not in nresult 2207 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name, 2208 "node hasn't returned node tcp connectivity data") 2209 if not test: 2210 if nresult[constants.NV_NODENETTEST]: 2211 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) 2212 for anode in nlist: 2213 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, 2214 "tcp communication with node '%s': %s", 2215 anode, nresult[constants.NV_NODENETTEST][anode]) 2216 2217 test = constants.NV_MASTERIP not in nresult 2218 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name, 2219 "node hasn't returned node master IP reachability data") 2220 if not test: 2221 if not nresult[constants.NV_MASTERIP]: 2222 if ninfo.uuid == self.master_node: 2223 msg = "the master node cannot reach the master IP (not configured?)" 2224 else: 2225 msg = "cannot reach the master IP" 2226 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg)
2227
2228 - def _VerifyInstance(self, instance, node_image, diskstatus):
2229 """Verify an instance. 2230 2231 This function checks to see if the required block devices are 2232 available on the instance's node, and that the nodes are in the correct 2233 state. 2234 2235 """ 2236 pnode_uuid = instance.primary_node 2237 pnode_img = node_image[pnode_uuid] 2238 groupinfo = self.cfg.GetAllNodeGroupsInfo() 2239 2240 node_vol_should = {} 2241 instance.MapLVsByNode(node_vol_should) 2242 2243 cluster = self.cfg.GetClusterInfo() 2244 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, 2245 self.group_info) 2246 err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg) 2247 self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name, 2248 utils.CommaJoin(err), code=self.ETYPE_WARNING) 2249 2250 for node_uuid in node_vol_should: 2251 n_img = node_image[node_uuid] 2252 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 2253 # ignore missing volumes on offline or broken nodes 2254 continue 2255 for volume in node_vol_should[node_uuid]: 2256 test = volume not in n_img.volumes 2257 self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name, 2258 "volume %s missing on node %s", volume, 2259 self.cfg.GetNodeName(node_uuid)) 2260 2261 if instance.admin_state == constants.ADMINST_UP: 2262 test = instance.uuid not in pnode_img.instances and not pnode_img.offline 2263 self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name, 2264 "instance not running on its primary node %s", 2265 self.cfg.GetNodeName(pnode_uuid)) 2266 self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, 2267 instance.name, "instance is marked as running and lives on" 2268 " offline node %s", self.cfg.GetNodeName(pnode_uuid)) 2269 2270 diskdata = [(nname, success, status, idx) 2271 for (nname, disks) in diskstatus.items() 2272 for idx, (success, status) in enumerate(disks)] 2273 2274 for nname, success, bdev_status, idx in diskdata: 2275 # the 'ghost node' construction in Exec() ensures that we have a 2276 # node here 2277 snode = node_image[nname] 2278 bad_snode = snode.ghost or snode.offline 2279 self._ErrorIf(instance.disks_active and 2280 not success and not bad_snode, 2281 constants.CV_EINSTANCEFAULTYDISK, instance.name, 2282 "couldn't retrieve status for disk/%s on %s: %s", 2283 idx, self.cfg.GetNodeName(nname), bdev_status) 2284 2285 if instance.disks_active and success and \ 2286 (bdev_status.is_degraded or 2287 bdev_status.ldisk_status != constants.LDS_OKAY): 2288 msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname)) 2289 if bdev_status.is_degraded: 2290 msg += " is degraded" 2291 if bdev_status.ldisk_status != constants.LDS_OKAY: 2292 msg += "; state is '%s'" % \ 2293 constants.LDS_NAMES[bdev_status.ldisk_status] 2294 2295 self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg) 2296 2297 self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, 2298 constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid), 2299 "instance %s, connection to primary node failed", 2300 instance.name) 2301 2302 self._ErrorIf(len(instance.secondary_nodes) > 1, 2303 constants.CV_EINSTANCELAYOUT, instance.name, 2304 "instance has multiple secondary nodes: %s", 2305 utils.CommaJoin(instance.secondary_nodes), 2306 code=self.ETYPE_WARNING) 2307 2308 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, instance.all_nodes) 2309 if any(es_flags.values()): 2310 if instance.disk_template not in constants.DTS_EXCL_STORAGE: 2311 # Disk template not compatible with exclusive_storage: no instance 2312 # node should have the flag set 2313 es_nodes = [n 2314 for (n, es) in es_flags.items() 2315 if es] 2316 self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name, 2317 "instance has template %s, which is not supported on nodes" 2318 " that have exclusive storage set: %s", 2319 instance.disk_template, 2320 utils.CommaJoin(self.cfg.GetNodeNames(es_nodes))) 2321 for (idx, disk) in enumerate(instance.disks): 2322 self._ErrorIf(disk.spindles is None, 2323 constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name, 2324 "number of spindles not configured for disk %s while" 2325 " exclusive storage is enabled, try running" 2326 " gnt-cluster repair-disk-sizes", idx) 2327 2328 if instance.disk_template in constants.DTS_INT_MIRROR: 2329 instance_nodes = utils.NiceSort(instance.all_nodes) 2330 instance_groups = {} 2331 2332 for node_uuid in instance_nodes: 2333 instance_groups.setdefault(self.all_node_info[node_uuid].group, 2334 []).append(node_uuid) 2335 2336 pretty_list = [ 2337 "%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)), 2338 groupinfo[group].name) 2339 # Sort so that we always list the primary node first. 2340 for group, nodes in sorted(instance_groups.items(), 2341 key=lambda (_, nodes): pnode_uuid in nodes, 2342 reverse=True)] 2343 2344 self._ErrorIf(len(instance_groups) > 1, 2345 constants.CV_EINSTANCESPLITGROUPS, 2346 instance.name, "instance has primary and secondary nodes in" 2347 " different groups: %s", utils.CommaJoin(pretty_list), 2348 code=self.ETYPE_WARNING) 2349 2350 inst_nodes_offline = [] 2351 for snode in instance.secondary_nodes: 2352 s_img = node_image[snode] 2353 self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC, 2354 self.cfg.GetNodeName(snode), 2355 "instance %s, connection to secondary node failed", 2356 instance.name) 2357 2358 if s_img.offline: 2359 inst_nodes_offline.append(snode) 2360 2361 # warn that the instance lives on offline nodes 2362 self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, 2363 instance.name, "instance has offline secondary node(s) %s", 2364 utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline))) 2365 # ... or ghost/non-vm_capable nodes 2366 for node_uuid in instance.all_nodes: 2367 self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE, 2368 instance.name, "instance lives on ghost node %s", 2369 self.cfg.GetNodeName(node_uuid)) 2370 self._ErrorIf(not node_image[node_uuid].vm_capable, 2371 constants.CV_EINSTANCEBADNODE, instance.name, 2372 "instance lives on non-vm_capable node %s", 2373 self.cfg.GetNodeName(node_uuid))
2374
2375 - def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2376 """Verify if there are any unknown volumes in the cluster. 2377 2378 The .os, .swap and backup volumes are ignored. All other volumes are 2379 reported as unknown. 2380 2381 @type reserved: L{ganeti.utils.FieldSet} 2382 @param reserved: a FieldSet of reserved volume names 2383 2384 """ 2385 for node_uuid, n_img in node_image.items(): 2386 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or 2387 self.all_node_info[node_uuid].group != self.group_uuid): 2388 # skip non-healthy nodes 2389 continue 2390 for volume in n_img.volumes: 2391 test = ((node_uuid not in node_vol_should or 2392 volume not in node_vol_should[node_uuid]) and 2393 not reserved.Matches(volume)) 2394 self._ErrorIf(test, constants.CV_ENODEORPHANLV, 2395 self.cfg.GetNodeName(node_uuid), 2396 "volume %s is unknown", volume, 2397 code=_VerifyErrors.ETYPE_WARNING)
2398
2399 - def _VerifyNPlusOneMemory(self, node_image, all_insts):
2400 """Verify N+1 Memory Resilience. 2401 2402 Check that if one single node dies we can still start all the 2403 instances it was primary for. 2404 2405 """ 2406 cluster_info = self.cfg.GetClusterInfo() 2407 for node_uuid, n_img in node_image.items(): 2408 # This code checks that every node which is now listed as 2409 # secondary has enough memory to host all instances it is 2410 # supposed to should a single other node in the cluster fail. 2411 # FIXME: not ready for failover to an arbitrary node 2412 # FIXME: does not support file-backed instances 2413 # WARNING: we currently take into account down instances as well 2414 # as up ones, considering that even if they're down someone 2415 # might want to start them even in the event of a node failure. 2416 if n_img.offline or \ 2417 self.all_node_info[node_uuid].group != self.group_uuid: 2418 # we're skipping nodes marked offline and nodes in other groups from 2419 # the N+1 warning, since most likely we don't have good memory 2420 # information from them; we already list instances living on such 2421 # nodes, and that's enough warning 2422 continue 2423 #TODO(dynmem): also consider ballooning out other instances 2424 for prinode, inst_uuids in n_img.sbp.items(): 2425 needed_mem = 0 2426 for inst_uuid in inst_uuids: 2427 bep = cluster_info.FillBE(all_insts[inst_uuid]) 2428 if bep[constants.BE_AUTO_BALANCE]: 2429 needed_mem += bep[constants.BE_MINMEM] 2430 test = n_img.mfree < needed_mem 2431 self._ErrorIf(test, constants.CV_ENODEN1, 2432 self.cfg.GetNodeName(node_uuid), 2433 "not enough memory to accomodate instance failovers" 2434 " should node %s fail (%dMiB needed, %dMiB available)", 2435 self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)
2436
2437 - def _VerifyClientCertificates(self, nodes, all_nvinfo):
2438 """Verifies the consistency of the client certificates. 2439 2440 This includes several aspects: 2441 - the individual validation of all nodes' certificates 2442 - the consistency of the master candidate certificate map 2443 - the consistency of the master candidate certificate map with the 2444 certificates that the master candidates are actually using. 2445 2446 @param nodes: the list of nodes to consider in this verification 2447 @param all_nvinfo: the map of results of the verify_node call to 2448 all nodes 2449 2450 """ 2451 candidate_certs = self.cfg.GetClusterInfo().candidate_certs 2452 if candidate_certs is None or len(candidate_certs) == 0: 2453 self._ErrorIf( 2454 True, constants.CV_ECLUSTERCLIENTCERT, None, 2455 "The cluster's list of master candidate certificates is empty." 2456 " If you just updated the cluster, please run" 2457 " 'gnt-cluster renew-crypto --new-node-certificates'.") 2458 return 2459 2460 self._ErrorIf( 2461 len(candidate_certs) != len(set(candidate_certs.values())), 2462 constants.CV_ECLUSTERCLIENTCERT, None, 2463 "There are at least two master candidates configured to use the same" 2464 " certificate.") 2465 2466 # collect the client certificate 2467 for node in nodes: 2468 if node.offline: 2469 continue 2470 2471 nresult = all_nvinfo[node.uuid] 2472 if nresult.fail_msg or not nresult.payload: 2473 continue 2474 2475 (errcode, msg) = nresult.payload.get(constants.NV_CLIENT_CERT, None) 2476 2477 self._ErrorIf( 2478 errcode is not None, constants.CV_ECLUSTERCLIENTCERT, None, 2479 "Client certificate of node '%s' failed validation: %s (code '%s')", 2480 node.uuid, msg, errcode) 2481 2482 if not errcode: 2483 digest = msg 2484 if node.master_candidate: 2485 if node.uuid in candidate_certs: 2486 self._ErrorIf( 2487 digest != candidate_certs[node.uuid], 2488 constants.CV_ECLUSTERCLIENTCERT, None, 2489 "Client certificate digest of master candidate '%s' does not" 2490 " match its entry in the cluster's map of master candidate" 2491 " certificates. Expected: %s Got: %s", node.uuid, 2492 digest, candidate_certs[node.uuid]) 2493 else: 2494 self._ErrorIf( 2495 True, constants.CV_ECLUSTERCLIENTCERT, None, 2496 "The master candidate '%s' does not have an entry in the" 2497 " map of candidate certificates.", node.uuid) 2498 self._ErrorIf( 2499 digest in candidate_certs.values(), 2500 constants.CV_ECLUSTERCLIENTCERT, None, 2501 "Master candidate '%s' is using a certificate of another node.", 2502 node.uuid) 2503 else: 2504 self._ErrorIf( 2505 node.uuid in candidate_certs, 2506 constants.CV_ECLUSTERCLIENTCERT, None, 2507 "Node '%s' is not a master candidate, but still listed in the" 2508 " map of master candidate certificates.", node.uuid) 2509 self._ErrorIf( 2510 (node.uuid not in candidate_certs) and 2511 (digest in candidate_certs.values()), 2512 constants.CV_ECLUSTERCLIENTCERT, None, 2513 "Node '%s' is not a master candidate and is incorrectly using a" 2514 " certificate of another node which is master candidate.", 2515 node.uuid)
2516
2517 - def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo, 2518 (files_all, files_opt, files_mc, files_vm)):
2519 """Verifies file checksums collected from all nodes. 2520 2521 @param nodes: List of L{objects.Node} objects 2522 @param master_node_uuid: UUID of master node 2523 @param all_nvinfo: RPC results 2524 2525 """ 2526 # Define functions determining which nodes to consider for a file 2527 files2nodefn = [ 2528 (files_all, None), 2529 (files_mc, lambda node: (node.master_candidate or 2530 node.uuid == master_node_uuid)), 2531 (files_vm, lambda node: node.vm_capable), 2532 ] 2533 2534 # Build mapping from filename to list of nodes which should have the file 2535 nodefiles = {} 2536 for (files, fn) in files2nodefn: 2537 if fn is None: 2538 filenodes = nodes 2539 else: 2540 filenodes = filter(fn, nodes) 2541 nodefiles.update((filename, 2542 frozenset(map(operator.attrgetter("uuid"), filenodes))) 2543 for filename in files) 2544 2545 assert set(nodefiles) == (files_all | files_mc | files_vm) 2546 2547 fileinfo = dict((filename, {}) for filename in nodefiles) 2548 ignore_nodes = set() 2549 2550 for node in nodes: 2551 if node.offline: 2552 ignore_nodes.add(node.uuid) 2553 continue 2554 2555 nresult = all_nvinfo[node.uuid] 2556 2557 if nresult.fail_msg or not nresult.payload: 2558 node_files = None 2559 else: 2560 fingerprints = nresult.payload.get(constants.NV_FILELIST, {}) 2561 node_files = dict((vcluster.LocalizeVirtualPath(key), value) 2562 for (key, value) in fingerprints.items()) 2563 del fingerprints 2564 2565 test = not (node_files and isinstance(node_files, dict)) 2566 self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name, 2567 "Node did not return file checksum data") 2568 if test: 2569 ignore_nodes.add(node.uuid) 2570 continue 2571 2572 # Build per-checksum mapping from filename to nodes having it 2573 for (filename, checksum) in node_files.items(): 2574 assert filename in nodefiles 2575 fileinfo[filename].setdefault(checksum, set()).add(node.uuid) 2576 2577 for (filename, checksums) in fileinfo.items(): 2578 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum" 2579 2580 # Nodes having the file 2581 with_file = frozenset(node_uuid 2582 for node_uuids in fileinfo[filename].values() 2583 for node_uuid in node_uuids) - ignore_nodes 2584 2585 expected_nodes = nodefiles[filename] - ignore_nodes 2586 2587 # Nodes missing file 2588 missing_file = expected_nodes - with_file 2589 2590 if filename in files_opt: 2591 # All or no nodes 2592 self._ErrorIf(missing_file and missing_file != expected_nodes, 2593 constants.CV_ECLUSTERFILECHECK, None, 2594 "File %s is optional, but it must exist on all or no" 2595 " nodes (not found on %s)", 2596 filename, 2597 utils.CommaJoin( 2598 utils.NiceSort( 2599 map(self.cfg.GetNodeName, missing_file)))) 2600 else: 2601 self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None, 2602 "File %s is missing from node(s) %s", filename, 2603 utils.CommaJoin( 2604 utils.NiceSort( 2605 map(self.cfg.GetNodeName, missing_file)))) 2606 2607 # Warn if a node has a file it shouldn't 2608 unexpected = with_file - expected_nodes 2609 self._ErrorIf(unexpected, 2610 constants.CV_ECLUSTERFILECHECK, None, 2611 "File %s should not exist on node(s) %s", 2612 filename, utils.CommaJoin( 2613 utils.NiceSort(map(self.cfg.GetNodeName, unexpected)))) 2614 2615 # See if there are multiple versions of the file 2616 test = len(checksums) > 1 2617 if test: 2618 variants = ["variant %s on %s" % 2619 (idx + 1, 2620 utils.CommaJoin(utils.NiceSort( 2621 map(self.cfg.GetNodeName, node_uuids)))) 2622 for (idx, (checksum, node_uuids)) in 2623 enumerate(sorted(checksums.items()))] 2624 else: 2625 variants = [] 2626 2627 self._ErrorIf(test, constants.CV_ECLUSTERFILECHECK, None, 2628 "File %s found with %s different checksums (%s)", 2629 filename, len(checksums), "; ".join(variants))
2630
2631 - def _VerifyNodeDrbdHelper(self, ninfo, nresult, drbd_helper):
2632 """Verify the drbd helper. 2633 2634 """ 2635 if drbd_helper: 2636 helper_result = nresult.get(constants.NV_DRBDHELPER, None) 2637 test = (helper_result is None) 2638 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name, 2639 "no drbd usermode helper returned") 2640 if helper_result: 2641 status, payload = helper_result 2642 test = not status 2643 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name, 2644 "drbd usermode helper check unsuccessful: %s", payload) 2645 test = status and (payload != drbd_helper) 2646 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name, 2647 "wrong drbd usermode helper: %s", payload)
2648
2649 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper, 2650 drbd_map):
2651 """Verifies and the node DRBD status. 2652 2653 @type ninfo: L{objects.Node} 2654 @param ninfo: the node to check 2655 @param nresult: the remote results for the node 2656 @param instanceinfo: the dict of instances 2657 @param drbd_helper: the configured DRBD usermode helper 2658 @param drbd_map: the DRBD map as returned by 2659 L{ganeti.config.ConfigWriter.ComputeDRBDMap} 2660 2661 """ 2662 self._VerifyNodeDrbdHelper(ninfo, nresult, drbd_helper) 2663 2664 # compute the DRBD minors 2665 node_drbd = {} 2666 for minor, inst_uuid in drbd_map[ninfo.uuid].items(): 2667 test = inst_uuid not in instanceinfo 2668 self._ErrorIf(test, constants.CV_ECLUSTERCFG, None, 2669 "ghost instance '%s' in temporary DRBD map", inst_uuid) 2670 # ghost instance should not be running, but otherwise we 2671 # don't give double warnings (both ghost instance and 2672 # unallocated minor in use) 2673 if test: 2674 node_drbd[minor] = (inst_uuid, False) 2675 else: 2676 instance = instanceinfo[inst_uuid] 2677 node_drbd[minor] = (inst_uuid, instance.disks_active) 2678 2679 # and now check them 2680 used_minors = nresult.get(constants.NV_DRBDLIST, []) 2681 test = not isinstance(used_minors, (tuple, list)) 2682 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name, 2683 "cannot parse drbd status file: %s", str(used_minors)) 2684 if test: 2685 # we cannot check drbd status 2686 return 2687 2688 for minor, (inst_uuid, must_exist) in node_drbd.items(): 2689 test = minor not in used_minors and must_exist 2690 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name, 2691 "drbd minor %d of instance %s is not active", minor, 2692 self.cfg.GetInstanceName(inst_uuid)) 2693 for minor in used_minors: 2694 test = minor not in node_drbd 2695 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name, 2696 "unallocated drbd minor %d is in use", minor)
2697
2698 - def _UpdateNodeOS(self, ninfo, nresult, nimg):
2699 """Builds the node OS structures. 2700 2701 @type ninfo: L{objects.Node} 2702 @param ninfo: the node to check 2703 @param nresult: the remote results for the node 2704 @param nimg: the node image object 2705 2706 """ 2707 remote_os = nresult.get(constants.NV_OSLIST, None) 2708 test = (not isinstance(remote_os, list) or 2709 not compat.all(isinstance(v, list) and len(v) == 7 2710 for v in remote_os)) 2711 2712 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name, 2713 "node hasn't returned valid OS data") 2714 2715 nimg.os_fail = test 2716 2717 if test: 2718 return 2719 2720 os_dict = {} 2721 2722 for (name, os_path, status, diagnose, 2723 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]: 2724 2725 if name not in os_dict: 2726 os_dict[name] = [] 2727 2728 # parameters is a list of lists instead of list of tuples due to 2729 # JSON lacking a real tuple type, fix it: 2730 parameters = [tuple(v) for v in parameters] 2731 os_dict[name].append((os_path, status, diagnose, 2732 set(variants), set(parameters), set(api_ver))) 2733 2734 nimg.oslist = os_dict
2735
2736 - def _VerifyNodeOS(self, ninfo, nimg, base):
2737 """Verifies the node OS list. 2738 2739 @type ninfo: L{objects.Node} 2740 @param ninfo: the node to check 2741 @param nimg: the node image object 2742 @param base: the 'template' node we match against (e.g. from the master) 2743 2744 """ 2745 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" 2746 2747 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l] 2748 for os_name, os_data in nimg.oslist.items(): 2749 assert os_data, "Empty OS status for OS %s?!" % os_name 2750 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] 2751 self._ErrorIf(not f_status, constants.CV_ENODEOS, ninfo.name, 2752 "Invalid OS %s (located at %s): %s", 2753 os_name, f_path, f_diag) 2754 self._ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, ninfo.name, 2755 "OS '%s' has multiple entries" 2756 " (first one shadows the rest): %s", 2757 os_name, utils.CommaJoin([v[0] for v in os_data])) 2758 # comparisons with the 'base' image 2759 test = os_name not in base.oslist 2760 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name, 2761 "Extra OS %s not present on reference node (%s)", 2762 os_name, self.cfg.GetNodeName(base.uuid)) 2763 if test: 2764 continue 2765 assert base.oslist[os_name], "Base node has empty OS status?" 2766 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0] 2767 if not b_status: 2768 # base OS is invalid, skipping 2769 continue 2770 for kind, a, b in [("API version", f_api, b_api), 2771 ("variants list", f_var, b_var), 2772 ("parameters", beautify_params(f_param), 2773 beautify_params(b_param))]: 2774 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name, 2775 "OS %s for %s differs from reference node %s:" 2776 " [%s] vs. [%s]", kind, os_name, 2777 self.cfg.GetNodeName(base.uuid), 2778 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b))) 2779 2780 # check any missing OSes 2781 missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) 2782 self._ErrorIf(missing, constants.CV_ENODEOS, ninfo.name, 2783 "OSes present on reference node %s" 2784 " but missing on this node: %s", 2785 self.cfg.GetNodeName(base.uuid), utils.CommaJoin(missing))
2786
2787 - def _VerifyAcceptedFileStoragePaths(self, ninfo, nresult, is_master):
2788 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}. 2789 2790 @type ninfo: L{objects.Node} 2791 @param ninfo: the node to check 2792 @param nresult: the remote results for the node 2793 @type is_master: bool 2794 @param is_master: Whether node is the master node 2795 2796 """ 2797 cluster = self.cfg.GetClusterInfo() 2798 if (is_master and 2799 (cluster.IsFileStorageEnabled() or 2800 cluster.IsSharedFileStorageEnabled())): 2801 try: 2802 fspaths = nresult[constants.NV_ACCEPTED_STORAGE_PATHS] 2803 except KeyError: 2804 # This should never happen 2805 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name, 2806 "Node did not return forbidden file storage paths") 2807 else: 2808 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name, 2809 "Found forbidden file storage paths: %s", 2810 utils.CommaJoin(fspaths)) 2811 else: 2812 self._ErrorIf(constants.NV_ACCEPTED_STORAGE_PATHS in nresult, 2813 constants.CV_ENODEFILESTORAGEPATHS, ninfo.name, 2814 "Node should not have returned forbidden file storage" 2815 " paths")
2816
2817 - def _VerifyStoragePaths(self, ninfo, nresult, file_disk_template, 2818 verify_key, error_key):
2819 """Verifies (file) storage paths. 2820 2821 @type ninfo: L{objects.Node} 2822 @param ninfo: the node to check 2823 @param nresult: the remote results for the node 2824 @type file_disk_template: string 2825 @param file_disk_template: file-based disk template, whose directory 2826 is supposed to be verified 2827 @type verify_key: string 2828 @param verify_key: key for the verification map of this file 2829 verification step 2830 @param error_key: error key to be added to the verification results 2831 in case something goes wrong in this verification step 2832 2833 """ 2834 assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageTypes( 2835 constants.ST_FILE, constants.ST_SHARED_FILE 2836 )) 2837 2838 cluster = self.cfg.GetClusterInfo() 2839 if cluster.IsDiskTemplateEnabled(file_disk_template): 2840 self._ErrorIf( 2841 verify_key in nresult, 2842 error_key, ninfo.name, 2843 "The configured %s storage path is unusable: %s" % 2844 (file_disk_template, nresult.get(verify_key)))
2845
2846 - def _VerifyFileStoragePaths(self, ninfo, nresult):
2847 """Verifies (file) storage paths. 2848 2849 @see: C{_VerifyStoragePaths} 2850 2851 """ 2852 self._VerifyStoragePaths( 2853 ninfo, nresult, constants.DT_FILE, 2854 constants.NV_FILE_STORAGE_PATH, 2855 constants.CV_ENODEFILESTORAGEPATHUNUSABLE)
2856
2857 - def _VerifySharedFileStoragePaths(self, ninfo, nresult):
2858 """Verifies (file) storage paths. 2859 2860 @see: C{_VerifyStoragePaths} 2861 2862 """ 2863 self._VerifyStoragePaths( 2864 ninfo, nresult, constants.DT_SHARED_FILE, 2865 constants.NV_SHARED_FILE_STORAGE_PATH, 2866 constants.CV_ENODESHAREDFILESTORAGEPATHUNUSABLE)
2867
2868 - def _VerifyOob(self, ninfo, nresult):
2869 """Verifies out of band functionality of a node. 2870 2871 @type ninfo: L{objects.Node} 2872 @param ninfo: the node to check 2873 @param nresult: the remote results for the node 2874 2875 """ 2876 # We just have to verify the paths on master and/or master candidates 2877 # as the oob helper is invoked on the master 2878 if ((ninfo.master_candidate or ninfo.master_capable) and 2879 constants.NV_OOB_PATHS in nresult): 2880 for path_result in nresult[constants.NV_OOB_PATHS]: 2881 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, 2882 ninfo.name, path_result)
2883
2884 - def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2885 """Verifies and updates the node volume data. 2886 2887 This function will update a L{NodeImage}'s internal structures 2888 with data from the remote call. 2889 2890 @type ninfo: L{objects.Node} 2891 @param ninfo: the node to check 2892 @param nresult: the remote results for the node 2893 @param nimg: the node image object 2894 @param vg_name: the configured VG name 2895 2896 """ 2897 nimg.lvm_fail = True 2898 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") 2899 if vg_name is None: 2900 pass 2901 elif isinstance(lvdata, basestring): 2902 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name, 2903 "LVM problem on node: %s", utils.SafeEncode(lvdata)) 2904 elif not isinstance(lvdata, dict): 2905 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name, 2906 "rpc call to node failed (lvlist)") 2907 else: 2908 nimg.volumes = lvdata 2909 nimg.lvm_fail = False
2910
2911 - def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2912 """Verifies and updates the node instance list. 2913 2914 If the listing was successful, then updates this node's instance 2915 list. Otherwise, it marks the RPC call as failed for the instance 2916 list key. 2917 2918 @type ninfo: L{objects.Node} 2919 @param ninfo: the node to check 2920 @param nresult: the remote results for the node 2921 @param nimg: the node image object 2922 2923 """ 2924 idata = nresult.get(constants.NV_INSTANCELIST, None) 2925 test = not isinstance(idata, list) 2926 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, 2927 "rpc call to node failed (instancelist): %s", 2928 utils.SafeEncode(str(idata))) 2929 if test: 2930 nimg.hyp_fail = True 2931 else: 2932 nimg.instances = [uuid for (uuid, _) in 2933 self.cfg.GetMultiInstanceInfoByName(idata)]
2934
2935 - def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2936 """Verifies and computes a node information map 2937 2938 @type ninfo: L{objects.Node} 2939 @param ninfo: the node to check 2940 @param nresult: the remote results for the node 2941 @param nimg: the node image object 2942 @param vg_name: the configured VG name 2943 2944 """ 2945 # try to read free memory (from the hypervisor) 2946 hv_info = nresult.get(constants.NV_HVINFO, None) 2947 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info 2948 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, 2949 "rpc call to node failed (hvinfo)") 2950 if not test: 2951 try: 2952 nimg.mfree = int(hv_info["memory_free"]) 2953 except (ValueError, TypeError): 2954 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name, 2955 "node returned invalid nodeinfo, check hypervisor") 2956 2957 # FIXME: devise a free space model for file based instances as well 2958 if vg_name is not None: 2959 test = (constants.NV_VGLIST not in nresult or 2960 vg_name not in nresult[constants.NV_VGLIST]) 2961 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name, 2962 "node didn't return data for the volume group '%s'" 2963 " - it is either missing or broken", vg_name) 2964 if not test: 2965 try: 2966 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) 2967 except (ValueError, TypeError): 2968 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name, 2969 "node returned invalid LVM info, check LVM status")
2970
2971 - def _CollectDiskInfo(self, node_uuids, node_image, instanceinfo):
2972 """Gets per-disk status information for all instances. 2973 2974 @type node_uuids: list of strings 2975 @param node_uuids: Node UUIDs 2976 @type node_image: dict of (UUID, L{objects.Node}) 2977 @param node_image: Node objects 2978 @type instanceinfo: dict of (UUID, L{objects.Instance}) 2979 @param instanceinfo: Instance objects 2980 @rtype: {instance: {node: [(succes, payload)]}} 2981 @return: a dictionary of per-instance dictionaries with nodes as 2982 keys and disk information as values; the disk information is a 2983 list of tuples (success, payload) 2984 2985 """ 2986 node_disks = {} 2987 node_disks_dev_inst_only = {} 2988 diskless_instances = set() 2989 nodisk_instances = set() 2990 diskless = constants.DT_DISKLESS 2991 2992 for nuuid in node_uuids: 2993 node_inst_uuids = list(itertools.chain(node_image[nuuid].pinst, 2994 node_image[nuuid].sinst)) 2995 diskless_instances.update(uuid for uuid in node_inst_uuids 2996 if instanceinfo[uuid].disk_template == diskless) 2997 disks = [(inst_uuid, disk) 2998 for inst_uuid in node_inst_uuids 2999 for disk in instanceinfo[inst_uuid].disks] 3000 3001 if not disks: 3002 nodisk_instances.update(uuid for uuid in node_inst_uuids 3003 if instanceinfo[uuid].disk_template != diskless) 3004 # No need to collect data 3005 continue 3006 3007 node_disks[nuuid] = disks 3008 3009 # _AnnotateDiskParams makes already copies of the disks 3010 dev_inst_only = [] 3011 for (inst_uuid, dev) in disks: 3012 (anno_disk,) = AnnotateDiskParams(instanceinfo[inst_uuid], [dev], 3013 self.cfg) 3014 dev_inst_only.append((anno_disk, instanceinfo[inst_uuid])) 3015 3016 node_disks_dev_inst_only[nuuid] = dev_inst_only 3017 3018 assert len(node_disks) == len(node_disks_dev_inst_only) 3019 3020 # Collect data from all nodes with disks 3021 result = self.rpc.call_blockdev_getmirrorstatus_multi( 3022 node_disks.keys(), node_disks_dev_inst_only) 3023 3024 assert len(result) == len(node_disks) 3025 3026 instdisk = {} 3027 3028 for (nuuid, nres) in result.items(): 3029 node = self.cfg.GetNodeInfo(nuuid) 3030 disks = node_disks[node.uuid] 3031 3032 if nres.offline: 3033 # No data from this node 3034 data = len(disks) * [(False, "node offline")] 3035 else: 3036 msg = nres.fail_msg 3037 self._ErrorIf(msg, constants.CV_ENODERPC, node.name, 3038 "while getting disk information: %s", msg) 3039 if msg: 3040 # No data from this node 3041 data = len(disks) * [(False, msg)] 3042 else: 3043 data = [] 3044 for idx, i in enumerate(nres.payload): 3045 if isinstance(i, (tuple, list)) and len(i) == 2: 3046 data.append(i) 3047 else: 3048 logging.warning("Invalid result from node %s, entry %d: %s", 3049 node.name, idx, i) 3050 data.append((False, "Invalid result from the remote node")) 3051 3052 for ((inst_uuid, _), status) in zip(disks, data): 3053 instdisk.setdefault(inst_uuid, {}).setdefault(node.uuid, []) \ 3054 .append(status) 3055 3056 # Add empty entries for diskless instances. 3057 for inst_uuid in diskless_instances: 3058 assert inst_uuid not in instdisk 3059 instdisk[inst_uuid] = {} 3060 # ...and disk-full instances that happen to have no disks 3061 for inst_uuid in nodisk_instances: 3062 assert inst_uuid not in instdisk 3063 instdisk[inst_uuid] = {} 3064 3065 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and 3066 len(nuuids) <= len(instanceinfo[inst].all_nodes) and 3067 compat.all(isinstance(s, (tuple, list)) and 3068 len(s) == 2 for s in statuses) 3069 for inst, nuuids in instdisk.items() 3070 for nuuid, statuses in nuuids.items()) 3071 if __debug__: 3072 instdisk_keys = set(instdisk) 3073 instanceinfo_keys = set(instanceinfo) 3074 assert instdisk_keys == instanceinfo_keys, \ 3075 ("instdisk keys (%s) do not match instanceinfo keys (%s)" % 3076 (instdisk_keys, instanceinfo_keys)) 3077 3078 return instdisk
3079 3080 @staticmethod
3081 - def _SshNodeSelector(group_uuid, all_nodes):
3082 """Create endless iterators for all potential SSH check hosts. 3083 3084 """ 3085 nodes = [node for node in all_nodes 3086 if (node.group != group_uuid and 3087 not node.offline)] 3088 keyfunc = operator.attrgetter("group") 3089 3090 return map(itertools.cycle, 3091 [sorted(map(operator.attrgetter("name"), names)) 3092 for _, names in itertools.groupby(sorted(nodes, key=keyfunc), 3093 keyfunc)])
3094 3095 @classmethod
3096 - def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3097 """Choose which nodes should talk to which other nodes. 3098 3099 We will make nodes contact all nodes in their group, and one node from 3100 every other group. 3101 3102 @warning: This algorithm has a known issue if one node group is much 3103 smaller than others (e.g. just one node). In such a case all other 3104 nodes will talk to the single node. 3105 3106 """ 3107 online_nodes = sorted(node.name for node in group_nodes if not node.offline) 3108 sel = cls._SshNodeSelector(group_uuid, all_nodes) 3109 3110 return (online_nodes, 3111 dict((name, sorted([i.next() for i in sel])) 3112 for name in online_nodes))
3113
3114 - def BuildHooksEnv(self):
3115 """Build hooks env. 3116 3117 Cluster-Verify hooks just ran in the post phase and their failure makes 3118 the output be logged in the verify output and the verification to fail. 3119 3120 """ 3121 env = { 3122 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()), 3123 } 3124 3125 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags())) 3126 for node in self.my_node_info.values()) 3127 3128 return env
3129
3130 - def BuildHooksNodes(self):
3131 """Build hooks nodes. 3132 3133 """ 3134 return ([], list(self.my_node_info.keys()))
3135
3136 - def Exec(self, feedback_fn):
3137 """Verify integrity of the node group, performing various test on nodes. 3138 3139 """ 3140 # This method has too many local variables. pylint: disable=R0914 3141 feedback_fn("* Verifying group '%s'" % self.group_info.name) 3142 3143 if not self.my_node_uuids: 3144 # empty node group 3145 feedback_fn("* Empty node group, skipping verification") 3146 return True 3147 3148 self.bad = False 3149 verbose = self.op.verbose 3150 self._feedback_fn = feedback_fn 3151 3152 vg_name = self.cfg.GetVGName() 3153 drbd_helper = self.cfg.GetDRBDHelper() 3154 cluster = self.cfg.GetClusterInfo() 3155 hypervisors = cluster.enabled_hypervisors 3156 node_data_list = self.my_node_info.values() 3157 3158 i_non_redundant = [] # Non redundant instances 3159 i_non_a_balanced = [] # Non auto-balanced instances 3160 i_offline = 0 # Count of offline instances 3161 n_offline = 0 # Count of offline nodes 3162 n_drained = 0 # Count of nodes being drained 3163 node_vol_should = {} 3164 3165 # FIXME: verify OS list 3166 3167 # File verification 3168 filemap = ComputeAncillaryFiles(cluster, False) 3169 3170 # do local checksums 3171 master_node_uuid = self.master_node = self.cfg.GetMasterNode() 3172 master_ip = self.cfg.GetMasterIP() 3173 3174 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_uuids)) 3175 3176 user_scripts = [] 3177 if self.cfg.GetUseExternalMipScript(): 3178 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT) 3179 3180 node_verify_param = { 3181 constants.NV_FILELIST: 3182 map(vcluster.MakeVirtualPath, 3183 utils.UniqueSequence(filename 3184 for files in filemap 3185 for filename in files)), 3186 constants.NV_NODELIST: 3187 self._SelectSshCheckNodes(node_data_list, self.group_uuid, 3188 self.all_node_info.values()), 3189 constants.NV_HYPERVISOR: hypervisors, 3190 constants.NV_HVPARAMS: 3191 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()), 3192 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip) 3193 for node in node_data_list 3194 if not node.offline], 3195 constants.NV_INSTANCELIST: hypervisors, 3196 constants.NV_VERSION: None, 3197 constants.NV_HVINFO: self.cfg.GetHypervisorType(), 3198 constants.NV_NODESETUP: None, 3199 constants.NV_TIME: None, 3200 constants.NV_MASTERIP: (self.cfg.GetMasterNodeName(), master_ip), 3201 constants.NV_OSLIST: None, 3202 constants.NV_NONVMNODES: self.cfg.GetNonVmCapableNodeNameList(), 3203 constants.NV_USERSCRIPTS: user_scripts, 3204 constants.NV_CLIENT_CERT: None, 3205 } 3206 3207 if vg_name is not None: 3208 node_verify_param[constants.NV_VGLIST] = None 3209 node_verify_param[constants.NV_LVLIST] = vg_name 3210 node_verify_param[constants.NV_PVLIST] = [vg_name] 3211 3212 if cluster.IsDiskTemplateEnabled(constants.DT_DRBD8): 3213 if drbd_helper: 3214 node_verify_param[constants.NV_DRBDVERSION] = None 3215 node_verify_param[constants.NV_DRBDLIST] = None 3216 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper 3217 3218 if cluster.IsFileStorageEnabled() or \ 3219 cluster.IsSharedFileStorageEnabled(): 3220 # Load file storage paths only from master node 3221 node_verify_param[constants.NV_ACCEPTED_STORAGE_PATHS] = \ 3222 self.cfg.GetMasterNodeName() 3223 if cluster.IsFileStorageEnabled(): 3224 node_verify_param[constants.NV_FILE_STORAGE_PATH] = \ 3225 cluster.file_storage_dir 3226 if cluster.IsSharedFileStorageEnabled(): 3227 node_verify_param[constants.NV_SHARED_FILE_STORAGE_PATH] = \ 3228 cluster.shared_file_storage_dir 3229 3230 # bridge checks 3231 # FIXME: this needs to be changed per node-group, not cluster-wide 3232 bridges = set() 3233 default_nicpp = cluster.nicparams[constants.PP_DEFAULT] 3234 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 3235 bridges.add(default_nicpp[constants.NIC_LINK]) 3236 for inst_uuid in self.my_inst_info.values(): 3237 for nic in inst_uuid.nics: 3238 full_nic = cluster.SimpleFillNIC(nic.nicparams) 3239 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 3240 bridges.add(full_nic[constants.NIC_LINK]) 3241 3242 if bridges: 3243 node_verify_param[constants.NV_BRIDGES] = list(bridges) 3244 3245 # Build our expected cluster state 3246 node_image = dict((node.uuid, self.NodeImage(offline=node.offline, 3247 uuid=node.uuid, 3248 vm_capable=node.vm_capable)) 3249 for node in node_data_list) 3250 3251 # Gather OOB paths 3252 oob_paths = [] 3253 for node in self.all_node_info.values(): 3254 path = SupportsOob(self.cfg, node) 3255 if path and path not in oob_paths: 3256 oob_paths.append(path) 3257 3258 if oob_paths: 3259 node_verify_param[constants.NV_OOB_PATHS] = oob_paths 3260 3261 for inst_uuid in self.my_inst_uuids: 3262 instance = self.my_inst_info[inst_uuid] 3263 if instance.admin_state == constants.ADMINST_OFFLINE: 3264 i_offline += 1 3265 3266 for nuuid in instance.all_nodes: 3267 if nuuid not in node_image: 3268 gnode = self.NodeImage(uuid=nuuid) 3269 gnode.ghost = (nuuid not in self.all_node_info) 3270 node_image[nuuid] = gnode 3271 3272 instance.MapLVsByNode(node_vol_should) 3273 3274 pnode = instance.primary_node 3275 node_image[pnode].pinst.append(instance.uuid) 3276 3277 for snode in instance.secondary_nodes: 3278 nimg = node_image[snode] 3279 nimg.sinst.append(instance.uuid) 3280 if pnode not in nimg.sbp: 3281 nimg.sbp[pnode] = [] 3282 nimg.sbp[pnode].append(instance.uuid) 3283 3284 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, 3285 self.my_node_info.keys()) 3286 # The value of exclusive_storage should be the same across the group, so if 3287 # it's True for at least a node, we act as if it were set for all the nodes 3288 self._exclusive_storage = compat.any(es_flags.values()) 3289 if self._exclusive_storage: 3290 node_verify_param[constants.NV_EXCLUSIVEPVS] = True 3291 3292 node_group_uuids = dict(map(lambda n: (n.name, n.group), 3293 self.cfg.GetAllNodesInfo().values())) 3294 groups_config = self.cfg.GetAllNodeGroupsInfoDict() 3295 3296 # At this point, we have the in-memory data structures complete, 3297 # except for the runtime information, which we'll gather next 3298 3299 # Due to the way our RPC system works, exact response times cannot be 3300 # guaranteed (e.g. a broken node could run into a timeout). By keeping the 3301 # time before and after executing the request, we can at least have a time 3302 # window. 3303 nvinfo_starttime = time.time() 3304 all_nvinfo = self.rpc.call_node_verify(self.my_node_uuids, 3305 node_verify_param, 3306 self.cfg.GetClusterName(), 3307 self.cfg.GetClusterInfo().hvparams, 3308 node_group_uuids, 3309 groups_config) 3310 nvinfo_endtime = time.time() 3311 3312 if self.extra_lv_nodes and vg_name is not None: 3313 extra_lv_nvinfo = \ 3314 self.rpc.call_node_verify(self.extra_lv_nodes, 3315 {constants.NV_LVLIST: vg_name}, 3316 self.cfg.GetClusterName(), 3317 self.cfg.GetClusterInfo().hvparams, 3318 node_group_uuids, 3319 groups_config) 3320 else: 3321 extra_lv_nvinfo = {} 3322 3323 all_drbd_map = self.cfg.ComputeDRBDMap() 3324 3325 feedback_fn("* Gathering disk information (%s nodes)" % 3326 len(self.my_node_uuids)) 3327 instdisk = self._CollectDiskInfo(self.my_node_info.keys(), node_image, 3328 self.my_inst_info) 3329 3330 feedback_fn("* Verifying configuration file consistency") 3331 3332 self._VerifyClientCertificates(self.my_node_info.values(), all_nvinfo) 3333 # If not all nodes are being checked, we need to make sure the master node 3334 # and a non-checked vm_capable node are in the list. 3335 absent_node_uuids = set(self.all_node_info).difference(self.my_node_info) 3336 if absent_node_uuids: 3337 vf_nvinfo = all_nvinfo.copy() 3338 vf_node_info = list(self.my_node_info.values()) 3339 additional_node_uuids = [] 3340 if master_node_uuid not in self.my_node_info: 3341 additional_node_uuids.append(master_node_uuid) 3342 vf_node_info.append(self.all_node_info[master_node_uuid]) 3343 # Add the first vm_capable node we find which is not included, 3344 # excluding the master node (which we already have) 3345 for node_uuid in absent_node_uuids: 3346 nodeinfo = self.all_node_info[node_uuid] 3347 if (nodeinfo.vm_capable and not nodeinfo.offline and 3348 node_uuid != master_node_uuid): 3349 additional_node_uuids.append(node_uuid) 3350 vf_node_info.append(self.all_node_info[node_uuid]) 3351 break 3352 key = constants.NV_FILELIST 3353 vf_nvinfo.update(self.rpc.call_node_verify( 3354 additional_node_uuids, {key: node_verify_param[key]}, 3355 self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams, 3356 node_group_uuids, 3357 groups_config)) 3358 else: 3359 vf_nvinfo = all_nvinfo 3360 vf_node_info = self.my_node_info.values() 3361 3362 self._VerifyFiles(vf_node_info, master_node_uuid, vf_nvinfo, filemap) 3363 3364 feedback_fn("* Verifying node status") 3365 3366 refos_img = None 3367 3368 for node_i in node_data_list: 3369 nimg = node_image[node_i.uuid] 3370 3371 if node_i.offline: 3372 if verbose: 3373 feedback_fn("* Skipping offline node %s" % (node_i.name,)) 3374 n_offline += 1 3375 continue 3376 3377 if node_i.uuid == master_node_uuid: 3378 ntype = "master" 3379 elif node_i.master_candidate: 3380 ntype = "master candidate" 3381 elif node_i.drained: 3382 ntype = "drained" 3383 n_drained += 1 3384 else: 3385 ntype = "regular" 3386 if verbose: 3387 feedback_fn("* Verifying node %s (%s)" % (node_i.name, ntype)) 3388 3389 msg = all_nvinfo[node_i.uuid].fail_msg 3390 self._ErrorIf(msg, constants.CV_ENODERPC, node_i.name, 3391 "while contacting node: %s", msg) 3392 if msg: 3393 nimg.rpc_fail = True 3394 continue 3395 3396 nresult = all_nvinfo[node_i.uuid].payload 3397 3398 nimg.call_ok = self._VerifyNode(node_i, nresult) 3399 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime) 3400 self._VerifyNodeNetwork(node_i, nresult) 3401 self._VerifyNodeUserScripts(node_i, nresult) 3402 self._VerifyOob(node_i, nresult) 3403 self._VerifyAcceptedFileStoragePaths(node_i, nresult, 3404 node_i.uuid == master_node_uuid) 3405 self._VerifyFileStoragePaths(node_i, nresult) 3406 self._VerifySharedFileStoragePaths(node_i, nresult) 3407 3408 if nimg.vm_capable: 3409 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg) 3410 if constants.DT_DRBD8 in cluster.enabled_disk_templates: 3411 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper, 3412 all_drbd_map) 3413 3414 if (constants.DT_PLAIN in cluster.enabled_disk_templates) or \ 3415 (constants.DT_DRBD8 in cluster.enabled_disk_templates): 3416 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name) 3417 self._UpdateNodeInstances(node_i, nresult, nimg) 3418 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name) 3419 self._UpdateNodeOS(node_i, nresult, nimg) 3420 3421 if not nimg.os_fail: 3422 if refos_img is None: 3423 refos_img = nimg 3424 self._VerifyNodeOS(node_i, nimg, refos_img) 3425 self._VerifyNodeBridges(node_i, nresult, bridges) 3426 3427 # Check whether all running instances are primary for the node. (This 3428 # can no longer be done from _VerifyInstance below, since some of the 3429 # wrong instances could be from other node groups.) 3430 non_primary_inst_uuids = set(nimg.instances).difference(nimg.pinst) 3431 3432 for inst_uuid in non_primary_inst_uuids: 3433 test = inst_uuid in self.all_inst_info 3434 self._ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, 3435 self.cfg.GetInstanceName(inst_uuid), 3436 "instance should not run on node %s", node_i.name) 3437 self._ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name, 3438 "node is running unknown instance %s", inst_uuid) 3439 3440 self._VerifyGroupDRBDVersion(all_nvinfo) 3441 self._VerifyGroupLVM(node_image, vg_name) 3442 3443 for node_uuid, result in extra_lv_nvinfo.items(): 3444 self._UpdateNodeVolumes(self.all_node_info[node_uuid], result.payload, 3445 node_image[node_uuid], vg_name) 3446 3447 feedback_fn("* Verifying instance status") 3448 for inst_uuid in self.my_inst_uuids: 3449 instance = self.my_inst_info[inst_uuid] 3450 if verbose: 3451 feedback_fn("* Verifying instance %s" % instance.name) 3452 self._VerifyInstance(instance, node_image, instdisk[inst_uuid]) 3453 3454 # If the instance is non-redundant we cannot survive losing its primary 3455 # node, so we are not N+1 compliant. 3456 if instance.disk_template not in constants.DTS_MIRRORED: 3457 i_non_redundant.append(instance) 3458 3459 if not cluster.FillBE(instance)[constants.BE_AUTO_BALANCE]: 3460 i_non_a_balanced.append(instance) 3461 3462 feedback_fn("* Verifying orphan volumes") 3463 reserved = utils.FieldSet(*cluster.reserved_lvs) 3464 3465 # We will get spurious "unknown volume" warnings if any node of this group 3466 # is secondary for an instance whose primary is in another group. To avoid 3467 # them, we find these instances and add their volumes to node_vol_should. 3468 for instance in self.all_inst_info.values(): 3469 for secondary in instance.secondary_nodes: 3470 if (secondary in self.my_node_info 3471 and instance.name not in self.my_inst_info): 3472 instance.MapLVsByNode(node_vol_should) 3473 break 3474 3475 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved) 3476 3477 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks: 3478 feedback_fn("* Verifying N+1 Memory redundancy") 3479 self._VerifyNPlusOneMemory(node_image, self.my_inst_info) 3480 3481 feedback_fn("* Other Notes") 3482 if i_non_redundant: 3483 feedback_fn(" - NOTICE: %d non-redundant instance(s) found." 3484 % len(i_non_redundant)) 3485 3486 if i_non_a_balanced: 3487 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found." 3488 % len(i_non_a_balanced)) 3489 3490 if i_offline: 3491 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline) 3492 3493 if n_offline: 3494 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline) 3495 3496 if n_drained: 3497 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained) 3498 3499 return not self.bad
3500
3501 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3502 """Analyze the post-hooks' result 3503 3504 This method analyses the hook result, handles it, and sends some 3505 nicely-formatted feedback back to the user. 3506 3507 @param phase: one of L{constants.HOOKS_PHASE_POST} or 3508 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 3509 @param hooks_results: the results of the multi-node hooks rpc call 3510 @param feedback_fn: function used send feedback back to the caller 3511 @param lu_result: previous Exec result 3512 @return: the new Exec result, based on the previous result 3513 and hook results 3514 3515 """ 3516 # We only really run POST phase hooks, only for non-empty groups, 3517 # and are only interested in their results 3518 if not self.my_node_uuids: 3519 # empty node group 3520 pass 3521 elif phase == constants.HOOKS_PHASE_POST: 3522 # Used to change hooks' output to proper indentation 3523 feedback_fn("* Hooks Results") 3524 assert hooks_results, "invalid result from hooks" 3525 3526 for node_name in hooks_results: 3527 res = hooks_results[node_name] 3528 msg = res.fail_msg 3529 test = msg and not res.offline 3530 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name, 3531 "Communication failure in hooks execution: %s", msg) 3532 if test: 3533 lu_result = False 3534 continue 3535 if res.offline: 3536 # No need to investigate payload if node is offline 3537 continue 3538 for script, hkr, output in res.payload: 3539 test = hkr == constants.HKR_FAIL 3540 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name, 3541 "Script %s failed, output:", script) 3542 if test: 3543 output = self._HOOKS_INDENT_RE.sub(" ", output) 3544 feedback_fn("%s" % output) 3545 lu_result = False 3546 3547 return lu_result
3548
3549 3550 -class LUClusterVerifyDisks(NoHooksLU):
3551 """Verifies the cluster disks status. 3552 3553 """ 3554 REQ_BGL = False 3555
3556 - def ExpandNames(self):
3557 self.share_locks = ShareAll() 3558 self.needed_locks = { 3559 locking.LEVEL_NODEGROUP: locking.ALL_SET, 3560 }
3561
3562 - def Exec(self, feedback_fn):
3563 group_names = self.owned_locks(locking.LEVEL_NODEGROUP) 3564 3565 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group 3566 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)] 3567 for group in group_names])
3568