ganeti.cmdlib.cluster

1 # 2 # 3 4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. 5 # 6 # This program is free software; you can redistribute it and/or modify 7 # it under the terms of the GNU General Public License as published by 8 # the Free Software Foundation; either version 2 of the License, or 9 # (at your option) any later version. 10 # 11 # This program is distributed in the hope that it will be useful, but 12 # WITHOUT ANY WARRANTY; without even the implied warranty of 13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 # General Public License for more details. 15 # 16 # You should have received a copy of the GNU General Public License 17 # along with this program; if not, write to the Free Software 18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 # 02110-1301, USA. 20 21 22 """Logical units dealing with the cluster.""" 23 24 import OpenSSL 25 26 import copy 27 import itertools 28 import logging 29 import operator 30 import os 31 import re 32 import time 33 34 from ganeti import compat 35 from ganeti import constants 36 from ganeti import errors 37 from ganeti import hypervisor 38 from ganeti import locking 39 from ganeti import masterd 40 from ganeti import netutils 41 from ganeti import objects 42 from ganeti import opcodes 43 from ganeti import pathutils 44 from ganeti import query 45 from ganeti import rpc 46 from ganeti import runtime 47 from ganeti import ssh 48 from ganeti import uidpool 49 from ganeti import utils 50 from ganeti import vcluster 51 52 from ganeti.cmdlib.base import NoHooksLU, QueryBase, LogicalUnit, \ 53 ResultWithJobs 54 from ganeti.cmdlib.common import ShareAll, RunPostHook, \ 55 ComputeAncillaryFiles, RedistributeAncillaryFiles, UploadHelper, \ 56 GetWantedInstances, MergeAndVerifyHvState, MergeAndVerifyDiskState, \ 57 GetUpdatedIPolicy, ComputeNewInstanceViolations, GetUpdatedParams, \ 58 CheckOSParams, CheckHVParams, AdjustCandidatePool, CheckNodePVs, \ 59 ComputeIPolicyInstanceViolation, AnnotateDiskParams, SupportsOob, \ 60 CheckIpolicyVsDiskTemplates 61 62 import ganeti.masterd.instance

63 64 65 -class LUClusterActivateMasterIp(NoHooksLU):

66 """Activate the master IP on the master node. 67 68 """

69 - def Exec(self, feedback_fn):

70 """Activate the master IP. 71 72 """ 73 master_params = self.cfg.GetMasterNetworkParameters() 74 ems = self.cfg.GetUseExternalMipScript() 75 result = self.rpc.call_node_activate_master_ip(master_params.uuid, 76 master_params, ems) 77 result.Raise("Could not activate the master IP")

78

79 80 -class LUClusterDeactivateMasterIp(NoHooksLU):

81 """Deactivate the master IP on the master node. 82 83 """

84 - def Exec(self, feedback_fn):

85 """Deactivate the master IP. 86 87 """ 88 master_params = self.cfg.GetMasterNetworkParameters() 89 ems = self.cfg.GetUseExternalMipScript() 90 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, 91 master_params, ems) 92 result.Raise("Could not deactivate the master IP")

93

94 95 -class LUClusterConfigQuery(NoHooksLU):

96 """Return configuration values. 97 98 """ 99 REQ_BGL = False 100

101 - def CheckArguments(self):

102 self.cq = ClusterQuery(None, self.op.output_fields, False)

103

104 - def ExpandNames(self):

105 self.cq.ExpandNames(self)

106

107 - def DeclareLocks(self, level):

108 self.cq.DeclareLocks(self, level)

109

110 - def Exec(self, feedback_fn):

111 result = self.cq.OldStyleQuery(self) 112 113 assert len(result) == 1 114 115 return result[0]

116

117 118 -class LUClusterDestroy(LogicalUnit):

119 """Logical unit for destroying the cluster. 120 121 """ 122 HPATH = "cluster-destroy" 123 HTYPE = constants.HTYPE_CLUSTER 124

125 - def BuildHooksEnv(self):

126 """Build hooks env. 127 128 """ 129 return { 130 "OP_TARGET": self.cfg.GetClusterName(), 131 }

132

133 - def BuildHooksNodes(self):

134 """Build hooks nodes. 135 136 """ 137 return ([], [])

138

139 - def CheckPrereq(self):

140 """Check prerequisites. 141 142 This checks whether the cluster is empty. 143 144 Any errors are signaled by raising errors.OpPrereqError. 145 146 """ 147 master = self.cfg.GetMasterNode() 148 149 nodelist = self.cfg.GetNodeList() 150 if len(nodelist) != 1 or nodelist[0] != master: 151 raise errors.OpPrereqError("There are still %d node(s) in" 152 " this cluster." % (len(nodelist) - 1), 153 errors.ECODE_INVAL) 154 instancelist = self.cfg.GetInstanceList() 155 if instancelist: 156 raise errors.OpPrereqError("There are still %d instance(s) in" 157 " this cluster." % len(instancelist), 158 errors.ECODE_INVAL)

159

160 - def Exec(self, feedback_fn):

161 """Destroys the cluster. 162 163 """ 164 master_params = self.cfg.GetMasterNetworkParameters() 165 166 # Run post hooks on master node before it's removed 167 RunPostHook(self, self.cfg.GetNodeName(master_params.uuid)) 168 169 ems = self.cfg.GetUseExternalMipScript() 170 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, 171 master_params, ems) 172 result.Warn("Error disabling the master IP address", self.LogWarning) 173 return master_params.uuid

174

175 176 -class LUClusterPostInit(LogicalUnit):

177 """Logical unit for running hooks after cluster initialization. 178 179 """ 180 HPATH = "cluster-init" 181 HTYPE = constants.HTYPE_CLUSTER 182

183 - def BuildHooksEnv(self):

184 """Build hooks env. 185 186 """ 187 return { 188 "OP_TARGET": self.cfg.GetClusterName(), 189 }

190

191 - def BuildHooksNodes(self):

192 """Build hooks nodes. 193 194 """ 195 return ([], [self.cfg.GetMasterNode()])

196

197 - def Exec(self, feedback_fn):

198 """Nothing to do. 199 200 """ 201 return True

202

203 204 -class ClusterQuery(QueryBase):

205 FIELDS = query.CLUSTER_FIELDS 206 207 #: Do not sort (there is only one item) 208 SORT_FIELD = None 209

210 - def ExpandNames(self, lu):

211 lu.needed_locks = {} 212 213 # The following variables interact with _QueryBase._GetNames 214 self.wanted = locking.ALL_SET 215 self.do_locking = self.use_locking 216 217 if self.do_locking: 218 raise errors.OpPrereqError("Can not use locking for cluster queries", 219 errors.ECODE_INVAL)

220

221 - def DeclareLocks(self, lu, level):

222 pass

223

224 - def _GetQueryData(self, lu):

225 """Computes the list of nodes and their attributes. 226 227 """ 228 # Locking is not used 229 assert not (compat.any(lu.glm.is_owned(level) 230 for level in locking.LEVELS 231 if level != locking.LEVEL_CLUSTER) or 232 self.do_locking or self.use_locking) 233 234 if query.CQ_CONFIG in self.requested_data: 235 cluster = lu.cfg.GetClusterInfo() 236 nodes = lu.cfg.GetAllNodesInfo() 237 else: 238 cluster = NotImplemented 239 nodes = NotImplemented 240 241 if query.CQ_QUEUE_DRAINED in self.requested_data: 242 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE) 243 else: 244 drain_flag = NotImplemented 245 246 if query.CQ_WATCHER_PAUSE in self.requested_data: 247 master_node_uuid = lu.cfg.GetMasterNode() 248 249 result = lu.rpc.call_get_watcher_pause(master_node_uuid) 250 result.Raise("Can't retrieve watcher pause from master node '%s'" % 251 lu.cfg.GetMasterNodeName()) 252 253 watcher_pause = result.payload 254 else: 255 watcher_pause = NotImplemented 256 257 return query.ClusterQueryData(cluster, nodes, drain_flag, watcher_pause)

258

259 260 -class LUClusterQuery(NoHooksLU):

261 """Query cluster configuration. 262 263 """ 264 REQ_BGL = False 265

266 - def ExpandNames(self):

267 self.needed_locks = {}

268

269 - def Exec(self, feedback_fn):

270 """Return cluster config. 271 272 """ 273 cluster = self.cfg.GetClusterInfo() 274 os_hvp = {} 275 276 # Filter just for enabled hypervisors 277 for os_name, hv_dict in cluster.os_hvp.items(): 278 os_hvp[os_name] = {} 279 for hv_name, hv_params in hv_dict.items(): 280 if hv_name in cluster.enabled_hypervisors: 281 os_hvp[os_name][hv_name] = hv_params 282 283 # Convert ip_family to ip_version 284 primary_ip_version = constants.IP4_VERSION 285 if cluster.primary_ip_family == netutils.IP6Address.family: 286 primary_ip_version = constants.IP6_VERSION 287 288 result = { 289 "software_version": constants.RELEASE_VERSION, 290 "protocol_version": constants.PROTOCOL_VERSION, 291 "config_version": constants.CONFIG_VERSION, 292 "os_api_version": max(constants.OS_API_VERSIONS), 293 "export_version": constants.EXPORT_VERSION, 294 "vcs_version": constants.VCS_VERSION, 295 "architecture": runtime.GetArchInfo(), 296 "name": cluster.cluster_name, 297 "master": self.cfg.GetMasterNodeName(), 298 "default_hypervisor": cluster.primary_hypervisor, 299 "enabled_hypervisors": cluster.enabled_hypervisors, 300 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name]) 301 for hypervisor_name in cluster.enabled_hypervisors]), 302 "os_hvp": os_hvp, 303 "beparams": cluster.beparams, 304 "osparams": cluster.osparams, 305 "ipolicy": cluster.ipolicy, 306 "nicparams": cluster.nicparams, 307 "ndparams": cluster.ndparams, 308 "diskparams": cluster.diskparams, 309 "candidate_pool_size": cluster.candidate_pool_size, 310 "master_netdev": cluster.master_netdev, 311 "master_netmask": cluster.master_netmask, 312 "use_external_mip_script": cluster.use_external_mip_script, 313 "volume_group_name": cluster.volume_group_name, 314 "drbd_usermode_helper": cluster.drbd_usermode_helper, 315 "file_storage_dir": cluster.file_storage_dir, 316 "shared_file_storage_dir": cluster.shared_file_storage_dir, 317 "maintain_node_health": cluster.maintain_node_health, 318 "ctime": cluster.ctime, 319 "mtime": cluster.mtime, 320 "uuid": cluster.uuid, 321 "tags": list(cluster.GetTags()), 322 "uid_pool": cluster.uid_pool, 323 "default_iallocator": cluster.default_iallocator, 324 "reserved_lvs": cluster.reserved_lvs, 325 "primary_ip_version": primary_ip_version, 326 "prealloc_wipe_disks": cluster.prealloc_wipe_disks, 327 "hidden_os": cluster.hidden_os, 328 "blacklisted_os": cluster.blacklisted_os, 329 "enabled_disk_templates": cluster.enabled_disk_templates, 330 } 331 332 return result

333

334 335 -class LUClusterRedistConf(NoHooksLU):

336 """Force the redistribution of cluster configuration. 337 338 This is a very simple LU. 339 340 """ 341 REQ_BGL = False 342

343 - def ExpandNames(self):

344 self.needed_locks = { 345 locking.LEVEL_NODE: locking.ALL_SET, 346 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 347 } 348 self.share_locks = ShareAll()

349

350 - def Exec(self, feedback_fn):

351 """Redistribute the configuration. 352 353 """ 354 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn) 355 RedistributeAncillaryFiles(self)

356

357 358 -class LUClusterRename(LogicalUnit):

359 """Rename the cluster. 360 361 """ 362 HPATH = "cluster-rename" 363 HTYPE = constants.HTYPE_CLUSTER 364

365 - def BuildHooksEnv(self):

366 """Build hooks env. 367 368 """ 369 return { 370 "OP_TARGET": self.cfg.GetClusterName(), 371 "NEW_NAME": self.op.name, 372 }

373

374 - def BuildHooksNodes(self):

375 """Build hooks nodes. 376 377 """ 378 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())

379

380 - def CheckPrereq(self):

381 """Verify that the passed name is a valid one. 382 383 """ 384 hostname = netutils.GetHostname(name=self.op.name, 385 family=self.cfg.GetPrimaryIPFamily()) 386 387 new_name = hostname.name 388 self.ip = new_ip = hostname.ip 389 old_name = self.cfg.GetClusterName() 390 old_ip = self.cfg.GetMasterIP() 391 if new_name == old_name and new_ip == old_ip: 392 raise errors.OpPrereqError("Neither the name nor the IP address of the" 393 " cluster has changed", 394 errors.ECODE_INVAL) 395 if new_ip != old_ip: 396 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT): 397 raise errors.OpPrereqError("The given cluster IP address (%s) is" 398 " reachable on the network" % 399 new_ip, errors.ECODE_NOTUNIQUE) 400 401 self.op.name = new_name

402

403 - def Exec(self, feedback_fn):

404 """Rename the cluster. 405 406 """ 407 clustername = self.op.name 408 new_ip = self.ip 409 410 # shutdown the master IP 411 master_params = self.cfg.GetMasterNetworkParameters() 412 ems = self.cfg.GetUseExternalMipScript() 413 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, 414 master_params, ems) 415 result.Raise("Could not disable the master role") 416 417 try: 418 cluster = self.cfg.GetClusterInfo() 419 cluster.cluster_name = clustername 420 cluster.master_ip = new_ip 421 self.cfg.Update(cluster, feedback_fn) 422 423 # update the known hosts file 424 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE) 425 node_list = self.cfg.GetOnlineNodeList() 426 try: 427 node_list.remove(master_params.uuid) 428 except ValueError: 429 pass 430 UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE) 431 finally: 432 master_params.ip = new_ip 433 result = self.rpc.call_node_activate_master_ip(master_params.uuid, 434 master_params, ems) 435 result.Warn("Could not re-enable the master role on the master," 436 " please restart manually", self.LogWarning) 437 438 return clustername

439

440 441 -class LUClusterRepairDiskSizes(NoHooksLU):

442 """Verifies the cluster disks sizes. 443 444 """ 445 REQ_BGL = False 446

447 - def ExpandNames(self):

448 if self.op.instances: 449 (_, self.wanted_names) = GetWantedInstances(self, self.op.instances) 450 # Not getting the node allocation lock as only a specific set of 451 # instances (and their nodes) is going to be acquired 452 self.needed_locks = { 453 locking.LEVEL_NODE_RES: [], 454 locking.LEVEL_INSTANCE: self.wanted_names, 455 } 456 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE 457 else: 458 self.wanted_names = None 459 self.needed_locks = { 460 locking.LEVEL_NODE_RES: locking.ALL_SET, 461 locking.LEVEL_INSTANCE: locking.ALL_SET, 462 463 # This opcode is acquires the node locks for all instances 464 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 465 } 466 467 self.share_locks = { 468 locking.LEVEL_NODE_RES: 1, 469 locking.LEVEL_INSTANCE: 0, 470 locking.LEVEL_NODE_ALLOC: 1, 471 }

472

473 - def DeclareLocks(self, level):

474 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None: 475 self._LockInstancesNodes(primary_only=True, level=level)

476

477 - def CheckPrereq(self):

478 """Check prerequisites. 479 480 This only checks the optional instance list against the existing names. 481 482 """ 483 if self.wanted_names is None: 484 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE) 485 486 self.wanted_instances = \ 487 map(compat.snd, self.cfg.GetMultiInstanceInfoByName(self.wanted_names))

488

489 - def _EnsureChildSizes(self, disk):

490 """Ensure children of the disk have the needed disk size. 491 492 This is valid mainly for DRBD8 and fixes an issue where the 493 children have smaller disk size. 494 495 @param disk: an L{ganeti.objects.Disk} object 496 497 """ 498 if disk.dev_type == constants.DT_DRBD8: 499 assert disk.children, "Empty children for DRBD8?" 500 fchild = disk.children[0] 501 mismatch = fchild.size < disk.size 502 if mismatch: 503 self.LogInfo("Child disk has size %d, parent %d, fixing", 504 fchild.size, disk.size) 505 fchild.size = disk.size 506 507 # and we recurse on this child only, not on the metadev 508 return self._EnsureChildSizes(fchild) or mismatch 509 else: 510 return False

511

512 - def Exec(self, feedback_fn):

513 """Verify the size of cluster disks. 514 515 """ 516 # TODO: check child disks too 517 # TODO: check differences in size between primary/secondary nodes 518 per_node_disks = {} 519 for instance in self.wanted_instances: 520 pnode = instance.primary_node 521 if pnode not in per_node_disks: 522 per_node_disks[pnode] = [] 523 for idx, disk in enumerate(instance.disks): 524 per_node_disks[pnode].append((instance, idx, disk)) 525 526 assert not (frozenset(per_node_disks.keys()) - 527 self.owned_locks(locking.LEVEL_NODE_RES)), \ 528 "Not owning correct locks" 529 assert not self.owned_locks(locking.LEVEL_NODE) 530 531 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, 532 per_node_disks.keys()) 533 534 changed = [] 535 for node_uuid, dskl in per_node_disks.items(): 536 newl = [v[2].Copy() for v in dskl] 537 for dsk in newl: 538 self.cfg.SetDiskID(dsk, node_uuid) 539 node_name = self.cfg.GetNodeName(node_uuid) 540 result = self.rpc.call_blockdev_getdimensions(node_uuid, newl) 541 if result.fail_msg: 542 self.LogWarning("Failure in blockdev_getdimensions call to node" 543 " %s, ignoring", node_name) 544 continue 545 if len(result.payload) != len(dskl): 546 logging.warning("Invalid result from node %s: len(dksl)=%d," 547 " result.payload=%s", node_name, len(dskl), 548 result.payload) 549 self.LogWarning("Invalid result from node %s, ignoring node results", 550 node_name) 551 continue 552 for ((instance, idx, disk), dimensions) in zip(dskl, result.payload): 553 if dimensions is None: 554 self.LogWarning("Disk %d of instance %s did not return size" 555 " information, ignoring", idx, instance.name) 556 continue 557 if not isinstance(dimensions, (tuple, list)): 558 self.LogWarning("Disk %d of instance %s did not return valid" 559 " dimension information, ignoring", idx, 560 instance.name) 561 continue 562 (size, spindles) = dimensions 563 if not isinstance(size, (int, long)): 564 self.LogWarning("Disk %d of instance %s did not return valid" 565 " size information, ignoring", idx, instance.name) 566 continue 567 size = size >> 20 568 if size != disk.size: 569 self.LogInfo("Disk %d of instance %s has mismatched size," 570 " correcting: recorded %d, actual %d", idx, 571 instance.name, disk.size, size) 572 disk.size = size 573 self.cfg.Update(instance, feedback_fn) 574 changed.append((instance.name, idx, "size", size)) 575 if es_flags[node_uuid]: 576 if spindles is None: 577 self.LogWarning("Disk %d of instance %s did not return valid" 578 " spindles information, ignoring", idx, 579 instance.name) 580 elif disk.spindles is None or disk.spindles != spindles: 581 self.LogInfo("Disk %d of instance %s has mismatched spindles," 582 " correcting: recorded %s, actual %s", 583 idx, instance.name, disk.spindles, spindles) 584 disk.spindles = spindles 585 self.cfg.Update(instance, feedback_fn) 586 changed.append((instance.name, idx, "spindles", disk.spindles)) 587 if self._EnsureChildSizes(disk): 588 self.cfg.Update(instance, feedback_fn) 589 changed.append((instance.name, idx, "size", disk.size)) 590 return changed

591

592 593 -def _ValidateNetmask(cfg, netmask):

594 """Checks if a netmask is valid. 595 596 @type cfg: L{config.ConfigWriter} 597 @param cfg: The cluster configuration 598 @type netmask: int 599 @param netmask: the netmask to be verified 600 @raise errors.OpPrereqError: if the validation fails 601 602 """ 603 ip_family = cfg.GetPrimaryIPFamily() 604 try: 605 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family) 606 except errors.ProgrammerError: 607 raise errors.OpPrereqError("Invalid primary ip family: %s." % 608 ip_family, errors.ECODE_INVAL) 609 if not ipcls.ValidateNetmask(netmask): 610 raise errors.OpPrereqError("CIDR netmask (%s) not valid" % 611 (netmask), errors.ECODE_INVAL)

612

613 614 -def CheckFileBasedStoragePathVsEnabledDiskTemplates( 615 logging_warn_fn, file_storage_dir, enabled_disk_templates, 616 file_disk_template):

617 """Checks whether the given file-based storage directory is acceptable. 618 619 Note: This function is public, because it is also used in bootstrap.py. 620 621 @type logging_warn_fn: function 622 @param logging_warn_fn: function which accepts a string and logs it 623 @type file_storage_dir: string 624 @param file_storage_dir: the directory to be used for file-based instances 625 @type enabled_disk_templates: list of string 626 @param enabled_disk_templates: the list of enabled disk templates 627 @type file_disk_template: string 628 @param file_disk_template: the file-based disk template for which the 629 path should be checked 630 631 """ 632 assert (file_disk_template in 633 utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE)) 634 file_storage_enabled = file_disk_template in enabled_disk_templates 635 if file_storage_dir is not None: 636 if file_storage_dir == "": 637 if file_storage_enabled: 638 raise errors.OpPrereqError( 639 "Unsetting the '%s' storage directory while having '%s' storage" 640 " enabled is not permitted." % 641 (file_disk_template, file_disk_template)) 642 else: 643 if not file_storage_enabled: 644 logging_warn_fn( 645 "Specified a %s storage directory, although %s storage is not" 646 " enabled." % (file_disk_template, file_disk_template)) 647 else: 648 raise errors.ProgrammerError("Received %s storage dir with value" 649 " 'None'." % file_disk_template)

650

651 652 -def CheckFileStoragePathVsEnabledDiskTemplates( 653 logging_warn_fn, file_storage_dir, enabled_disk_templates):

654 """Checks whether the given file storage directory is acceptable. 655 656 @see: C{CheckFileBasedStoragePathVsEnabledDiskTemplates} 657 658 """ 659 CheckFileBasedStoragePathVsEnabledDiskTemplates( 660 logging_warn_fn, file_storage_dir, enabled_disk_templates, 661 constants.DT_FILE)

662

663 664 -def CheckSharedFileStoragePathVsEnabledDiskTemplates( 665 logging_warn_fn, file_storage_dir, enabled_disk_templates):

666 """Checks whether the given shared file storage directory is acceptable. 667 668 @see: C{CheckFileBasedStoragePathVsEnabledDiskTemplates} 669 670 """ 671 CheckFileBasedStoragePathVsEnabledDiskTemplates( 672 logging_warn_fn, file_storage_dir, enabled_disk_templates, 673 constants.DT_SHARED_FILE)

674

675 676 -class LUClusterSetParams(LogicalUnit):

677 """Change the parameters of the cluster. 678 679 """ 680 HPATH = "cluster-modify" 681 HTYPE = constants.HTYPE_CLUSTER 682 REQ_BGL = False 683

684 - def CheckArguments(self):

685 """Check parameters 686 687 """ 688 if self.op.uid_pool: 689 uidpool.CheckUidPool(self.op.uid_pool) 690 691 if self.op.add_uids: 692 uidpool.CheckUidPool(self.op.add_uids) 693 694 if self.op.remove_uids: 695 uidpool.CheckUidPool(self.op.remove_uids) 696 697 if self.op.master_netmask is not None: 698 _ValidateNetmask(self.cfg, self.op.master_netmask) 699 700 if self.op.diskparams: 701 for dt_params in self.op.diskparams.values(): 702 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES) 703 try: 704 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS) 705 except errors.OpPrereqError, err: 706 raise errors.OpPrereqError("While verify diskparams options: %s" % err, 707 errors.ECODE_INVAL)

708

709 - def ExpandNames(self):

710 # FIXME: in the future maybe other cluster params won't require checking on 711 # all nodes to be modified. 712 # FIXME: This opcode changes cluster-wide settings. Is acquiring all 713 # resource locks the right thing, shouldn't it be the BGL instead? 714 self.needed_locks = { 715 locking.LEVEL_NODE: locking.ALL_SET, 716 locking.LEVEL_INSTANCE: locking.ALL_SET, 717 locking.LEVEL_NODEGROUP: locking.ALL_SET, 718 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 719 } 720 self.share_locks = ShareAll()

721

722 - def BuildHooksEnv(self):

723 """Build hooks env. 724 725 """ 726 return { 727 "OP_TARGET": self.cfg.GetClusterName(), 728 "NEW_VG_NAME": self.op.vg_name, 729 }

730

731 - def BuildHooksNodes(self):

732 """Build hooks nodes. 733 734 """ 735 mn = self.cfg.GetMasterNode() 736 return ([mn], [mn])

737

738 - def _CheckVgName(self, node_uuids, enabled_disk_templates, 739 new_enabled_disk_templates):

740 """Check the consistency of the vg name on all nodes and in case it gets 741 unset whether there are instances still using it. 742 743 """ 744 lvm_is_enabled = utils.IsLvmEnabled(enabled_disk_templates) 745 lvm_gets_enabled = utils.LvmGetsEnabled(enabled_disk_templates, 746 new_enabled_disk_templates) 747 current_vg_name = self.cfg.GetVGName() 748 749 if self.op.vg_name == '': 750 if lvm_is_enabled: 751 raise errors.OpPrereqError("Cannot unset volume group if lvm-based" 752 " disk templates are or get enabled.") 753 754 if self.op.vg_name is None: 755 if current_vg_name is None and lvm_is_enabled: 756 raise errors.OpPrereqError("Please specify a volume group when" 757 " enabling lvm-based disk-templates.") 758 759 if self.op.vg_name is not None and not self.op.vg_name: 760 if self.cfg.HasAnyDiskOfType(constants.DT_PLAIN): 761 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based" 762 " instances exist", errors.ECODE_INVAL) 763 764 if (self.op.vg_name is not None and lvm_is_enabled) or \ 765 (self.cfg.GetVGName() is not None and lvm_gets_enabled): 766 self._CheckVgNameOnNodes(node_uuids)

767

768 - def _CheckVgNameOnNodes(self, node_uuids):

769 """Check the status of the volume group on each node. 770 771 """ 772 vglist = self.rpc.call_vg_list(node_uuids) 773 for node_uuid in node_uuids: 774 msg = vglist[node_uuid].fail_msg 775 if msg: 776 # ignoring down node 777 self.LogWarning("Error while gathering data on node %s" 778 " (ignoring node): %s", 779 self.cfg.GetNodeName(node_uuid), msg) 780 continue 781 vgstatus = utils.CheckVolumeGroupSize(vglist[node_uuid].payload, 782 self.op.vg_name, 783 constants.MIN_VG_SIZE) 784 if vgstatus: 785 raise errors.OpPrereqError("Error on node '%s': %s" % 786 (self.cfg.GetNodeName(node_uuid), vgstatus), 787 errors.ECODE_ENVIRON)

788 789 @staticmethod

790 - def _GetEnabledDiskTemplatesInner(op_enabled_disk_templates, 791 old_enabled_disk_templates):

792 """Determines the enabled disk templates and the subset of disk templates 793 that are newly enabled by this operation. 794 795 """ 796 enabled_disk_templates = None 797 new_enabled_disk_templates = [] 798 if op_enabled_disk_templates: 799 enabled_disk_templates = op_enabled_disk_templates 800 new_enabled_disk_templates = \ 801 list(set(enabled_disk_templates) 802 - set(old_enabled_disk_templates)) 803 else: 804 enabled_disk_templates = old_enabled_disk_templates 805 return (enabled_disk_templates, new_enabled_disk_templates)

806

807 - def _GetEnabledDiskTemplates(self, cluster):

808 """Determines the enabled disk templates and the subset of disk templates 809 that are newly enabled by this operation. 810 811 """ 812 return self._GetEnabledDiskTemplatesInner(self.op.enabled_disk_templates, 813 cluster.enabled_disk_templates)

814

815 - def _CheckIpolicy(self, cluster, enabled_disk_templates):

816 """Checks the ipolicy. 817 818 @type cluster: C{objects.Cluster} 819 @param cluster: the cluster's configuration 820 @type enabled_disk_templates: list of string 821 @param enabled_disk_templates: list of (possibly newly) enabled disk 822 templates 823 824 """ 825 # FIXME: write unit tests for this 826 if self.op.ipolicy: 827 self.new_ipolicy = GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy, 828 group_policy=False) 829 830 CheckIpolicyVsDiskTemplates(self.new_ipolicy, 831 enabled_disk_templates) 832 833 all_instances = self.cfg.GetAllInstancesInfo().values() 834 violations = set() 835 for group in self.cfg.GetAllNodeGroupsInfo().values(): 836 instances = frozenset([inst for inst in all_instances 837 if compat.any(nuuid in group.members 838 for nuuid in inst.all_nodes)]) 839 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy) 840 ipol = masterd.instance.CalculateGroupIPolicy(cluster, group) 841 new = ComputeNewInstanceViolations(ipol, new_ipolicy, instances, 842 self.cfg) 843 if new: 844 violations.update(new) 845 846 if violations: 847 self.LogWarning("After the ipolicy change the following instances" 848 " violate them: %s", 849 utils.CommaJoin(utils.NiceSort(violations))) 850 else: 851 CheckIpolicyVsDiskTemplates(cluster.ipolicy, 852 enabled_disk_templates)

853

854 - def CheckPrereq(self):

855 """Check prerequisites. 856 857 This checks whether the given params don't conflict and 858 if the given volume group is valid. 859 860 """ 861 if self.op.drbd_helper is not None and not self.op.drbd_helper: 862 if self.cfg.HasAnyDiskOfType(constants.DT_DRBD8): 863 raise errors.OpPrereqError("Cannot disable drbd helper while" 864 " drbd-based instances exist", 865 errors.ECODE_INVAL) 866 867 node_uuids = self.owned_locks(locking.LEVEL_NODE) 868 self.cluster = cluster = self.cfg.GetClusterInfo() 869 870 vm_capable_node_uuids = [node.uuid 871 for node in self.cfg.GetAllNodesInfo().values() 872 if node.uuid in node_uuids and node.vm_capable] 873 874 (enabled_disk_templates, new_enabled_disk_templates) = \ 875 self._GetEnabledDiskTemplates(cluster) 876 877 self._CheckVgName(vm_capable_node_uuids, enabled_disk_templates, 878 new_enabled_disk_templates) 879 880 if self.op.file_storage_dir is not None: 881 CheckFileStoragePathVsEnabledDiskTemplates( 882 self.LogWarning, self.op.file_storage_dir, enabled_disk_templates) 883 884 if self.op.shared_file_storage_dir is not None: 885 CheckSharedFileStoragePathVsEnabledDiskTemplates( 886 self.LogWarning, self.op.shared_file_storage_dir, 887 enabled_disk_templates) 888 889 if self.op.drbd_helper: 890 # checks given drbd helper on all nodes 891 helpers = self.rpc.call_drbd_helper(node_uuids) 892 for (_, ninfo) in self.cfg.GetMultiNodeInfo(node_uuids): 893 if ninfo.offline: 894 self.LogInfo("Not checking drbd helper on offline node %s", 895 ninfo.name) 896 continue 897 msg = helpers[ninfo.uuid].fail_msg 898 if msg: 899 raise errors.OpPrereqError("Error checking drbd helper on node" 900 " '%s': %s" % (ninfo.name, msg), 901 errors.ECODE_ENVIRON) 902 node_helper = helpers[ninfo.uuid].payload 903 if node_helper != self.op.drbd_helper: 904 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" % 905 (ninfo.name, node_helper), 906 errors.ECODE_ENVIRON) 907 908 # validate params changes 909 if self.op.beparams: 910 objects.UpgradeBeParams(self.op.beparams) 911 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 912 self.new_beparams = cluster.SimpleFillBE(self.op.beparams) 913 914 if self.op.ndparams: 915 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) 916 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams) 917 918 # TODO: we need a more general way to handle resetting 919 # cluster-level parameters to default values 920 if self.new_ndparams["oob_program"] == "": 921 self.new_ndparams["oob_program"] = \ 922 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM] 923 924 if self.op.hv_state: 925 new_hv_state = MergeAndVerifyHvState(self.op.hv_state, 926 self.cluster.hv_state_static) 927 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values)) 928 for hv, values in new_hv_state.items()) 929 930 if self.op.disk_state: 931 new_disk_state = MergeAndVerifyDiskState(self.op.disk_state, 932 self.cluster.disk_state_static) 933 self.new_disk_state = \ 934 dict((storage, dict((name, cluster.SimpleFillDiskState(values)) 935 for name, values in svalues.items())) 936 for storage, svalues in new_disk_state.items()) 937 938 self._CheckIpolicy(cluster, enabled_disk_templates) 939 940 if self.op.nicparams: 941 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES) 942 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams) 943 objects.NIC.CheckParameterSyntax(self.new_nicparams) 944 nic_errors = [] 945 946 # check all instances for consistency 947 for instance in self.cfg.GetAllInstancesInfo().values(): 948 for nic_idx, nic in enumerate(instance.nics): 949 params_copy = copy.deepcopy(nic.nicparams) 950 params_filled = objects.FillDict(self.new_nicparams, params_copy) 951 952 # check parameter syntax 953 try: 954 objects.NIC.CheckParameterSyntax(params_filled) 955 except errors.ConfigurationError, err: 956 nic_errors.append("Instance %s, nic/%d: %s" % 957 (instance.name, nic_idx, err)) 958 959 # if we're moving instances to routed, check that they have an ip 960 target_mode = params_filled[constants.NIC_MODE] 961 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip: 962 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip" 963 " address" % (instance.name, nic_idx)) 964 if nic_errors: 965 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" % 966 "\n".join(nic_errors), errors.ECODE_INVAL) 967 968 # hypervisor list/parameters 969 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {}) 970 if self.op.hvparams: 971 for hv_name, hv_dict in self.op.hvparams.items(): 972 if hv_name not in self.new_hvparams: 973 self.new_hvparams[hv_name] = hv_dict 974 else: 975 self.new_hvparams[hv_name].update(hv_dict) 976 977 # disk template parameters 978 self.new_diskparams = objects.FillDict(cluster.diskparams, {}) 979 if self.op.diskparams: 980 for dt_name, dt_params in self.op.diskparams.items(): 981 if dt_name not in self.new_diskparams: 982 self.new_diskparams[dt_name] = dt_params 983 else: 984 self.new_diskparams[dt_name].update(dt_params) 985 986 # os hypervisor parameters 987 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {}) 988 if self.op.os_hvp: 989 for os_name, hvs in self.op.os_hvp.items(): 990 if os_name not in self.new_os_hvp: 991 self.new_os_hvp[os_name] = hvs 992 else: 993 for hv_name, hv_dict in hvs.items(): 994 if hv_dict is None: 995 # Delete if it exists 996 self.new_os_hvp[os_name].pop(hv_name, None) 997 elif hv_name not in self.new_os_hvp[os_name]: 998 self.new_os_hvp[os_name][hv_name] = hv_dict 999 else: 1000 self.new_os_hvp[os_name][hv_name].update(hv_dict) 1001 1002 # os parameters 1003 self.new_osp = objects.FillDict(cluster.osparams, {}) 1004 if self.op.osparams: 1005 for os_name, osp in self.op.osparams.items(): 1006 if os_name not in self.new_osp: 1007 self.new_osp[os_name] = {} 1008 1009 self.new_osp[os_name] = GetUpdatedParams(self.new_osp[os_name], osp, 1010 use_none=True) 1011 1012 if not self.new_osp[os_name]: 1013 # we removed all parameters 1014 del self.new_osp[os_name] 1015 else: 1016 # check the parameter validity (remote check) 1017 CheckOSParams(self, False, [self.cfg.GetMasterNode()], 1018 os_name, self.new_osp[os_name]) 1019 1020 # changes to the hypervisor list 1021 if self.op.enabled_hypervisors is not None: 1022 self.hv_list = self.op.enabled_hypervisors 1023 for hv in self.hv_list: 1024 # if the hypervisor doesn't already exist in the cluster 1025 # hvparams, we initialize it to empty, and then (in both 1026 # cases) we make sure to fill the defaults, as we might not 1027 # have a complete defaults list if the hypervisor wasn't 1028 # enabled before 1029 if hv not in new_hvp: 1030 new_hvp[hv] = {} 1031 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv]) 1032 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES) 1033 else: 1034 self.hv_list = cluster.enabled_hypervisors 1035 1036 if self.op.hvparams or self.op.enabled_hypervisors is not None: 1037 # either the enabled list has changed, or the parameters have, validate 1038 for hv_name, hv_params in self.new_hvparams.items(): 1039 if ((self.op.hvparams and hv_name in self.op.hvparams) or 1040 (self.op.enabled_hypervisors and 1041 hv_name in self.op.enabled_hypervisors)): 1042 # either this is a new hypervisor, or its parameters have changed 1043 hv_class = hypervisor.GetHypervisorClass(hv_name) 1044 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1045 hv_class.CheckParameterSyntax(hv_params) 1046 CheckHVParams(self, node_uuids, hv_name, hv_params) 1047 1048 self._CheckDiskTemplateConsistency() 1049 1050 if self.op.os_hvp: 1051 # no need to check any newly-enabled hypervisors, since the 1052 # defaults have already been checked in the above code-block 1053 for os_name, os_hvp in self.new_os_hvp.items(): 1054 for hv_name, hv_params in os_hvp.items(): 1055 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1056 # we need to fill in the new os_hvp on top of the actual hv_p 1057 cluster_defaults = self.new_hvparams.get(hv_name, {}) 1058 new_osp = objects.FillDict(cluster_defaults, hv_params) 1059 hv_class = hypervisor.GetHypervisorClass(hv_name) 1060 hv_class.CheckParameterSyntax(new_osp) 1061 CheckHVParams(self, node_uuids, hv_name, new_osp) 1062 1063 if self.op.default_iallocator: 1064 alloc_script = utils.FindFile(self.op.default_iallocator, 1065 constants.IALLOCATOR_SEARCH_PATH, 1066 os.path.isfile) 1067 if alloc_script is None: 1068 raise errors.OpPrereqError("Invalid default iallocator script '%s'" 1069 " specified" % self.op.default_iallocator, 1070 errors.ECODE_INVAL)

1071

1072 - def _CheckDiskTemplateConsistency(self):

1073 """Check whether the disk templates that are going to be disabled 1074 are still in use by some instances. 1075 1076 """ 1077 if self.op.enabled_disk_templates: 1078 cluster = self.cfg.GetClusterInfo() 1079 instances = self.cfg.GetAllInstancesInfo() 1080 1081 disk_templates_to_remove = set(cluster.enabled_disk_templates) \ 1082 - set(self.op.enabled_disk_templates) 1083 for instance in instances.itervalues(): 1084 if instance.disk_template in disk_templates_to_remove: 1085 raise errors.OpPrereqError("Cannot disable disk template '%s'," 1086 " because instance '%s' is using it." % 1087 (instance.disk_template, instance.name))

1088

1089 - def _SetVgName(self, feedback_fn):

1090 """Determines and sets the new volume group name. 1091 1092 """ 1093 if self.op.vg_name is not None: 1094 new_volume = self.op.vg_name 1095 if not new_volume: 1096 new_volume = None 1097 if new_volume != self.cfg.GetVGName(): 1098 self.cfg.SetVGName(new_volume) 1099 else: 1100 feedback_fn("Cluster LVM configuration already in desired" 1101 " state, not changing")

1102

1103 - def _SetFileStorageDir(self, feedback_fn):

1104 """Set the file storage directory. 1105 1106 """ 1107 if self.op.file_storage_dir is not None: 1108 if self.cluster.file_storage_dir == self.op.file_storage_dir: 1109 feedback_fn("Global file storage dir already set to value '%s'" 1110 % self.cluster.file_storage_dir) 1111 else: 1112 self.cluster.file_storage_dir = self.op.file_storage_dir

1113

1114 - def Exec(self, feedback_fn):

1115 """Change the parameters of the cluster. 1116 1117 """ 1118 if self.op.enabled_disk_templates: 1119 self.cluster.enabled_disk_templates = \ 1120 list(set(self.op.enabled_disk_templates)) 1121 1122 self._SetVgName(feedback_fn) 1123 self._SetFileStorageDir(feedback_fn) 1124 1125 if self.op.drbd_helper is not None: 1126 if not constants.DT_DRBD8 in self.cluster.enabled_disk_templates: 1127 feedback_fn("Note that you specified a drbd user helper, but did" 1128 " enabled the drbd disk template.") 1129 new_helper = self.op.drbd_helper 1130 if not new_helper: 1131 new_helper = None 1132 if new_helper != self.cfg.GetDRBDHelper(): 1133 self.cfg.SetDRBDHelper(new_helper) 1134 else: 1135 feedback_fn("Cluster DRBD helper already in desired state," 1136 " not changing") 1137 if self.op.hvparams: 1138 self.cluster.hvparams = self.new_hvparams 1139 if self.op.os_hvp: 1140 self.cluster.os_hvp = self.new_os_hvp 1141 if self.op.enabled_hypervisors is not None: 1142 self.cluster.hvparams = self.new_hvparams 1143 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors 1144 if self.op.beparams: 1145 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams 1146 if self.op.nicparams: 1147 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams 1148 if self.op.ipolicy: 1149 self.cluster.ipolicy = self.new_ipolicy 1150 if self.op.osparams: 1151 self.cluster.osparams = self.new_osp 1152 if self.op.ndparams: 1153 self.cluster.ndparams = self.new_ndparams 1154 if self.op.diskparams: 1155 self.cluster.diskparams = self.new_diskparams 1156 if self.op.hv_state: 1157 self.cluster.hv_state_static = self.new_hv_state 1158 if self.op.disk_state: 1159 self.cluster.disk_state_static = self.new_disk_state 1160 1161 if self.op.candidate_pool_size is not None: 1162 self.cluster.candidate_pool_size = self.op.candidate_pool_size 1163 # we need to update the pool size here, otherwise the save will fail 1164 AdjustCandidatePool(self, []) 1165 1166 if self.op.maintain_node_health is not None: 1167 if self.op.maintain_node_health and not constants.ENABLE_CONFD: 1168 feedback_fn("Note: CONFD was disabled at build time, node health" 1169 " maintenance is not useful (still enabling it)") 1170 self.cluster.maintain_node_health = self.op.maintain_node_health 1171 1172 if self.op.modify_etc_hosts is not None: 1173 self.cluster.modify_etc_hosts = self.op.modify_etc_hosts 1174 1175 if self.op.prealloc_wipe_disks is not None: 1176 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks 1177 1178 if self.op.add_uids is not None: 1179 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids) 1180 1181 if self.op.remove_uids is not None: 1182 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids) 1183 1184 if self.op.uid_pool is not None: 1185 self.cluster.uid_pool = self.op.uid_pool 1186 1187 if self.op.default_iallocator is not None: 1188 self.cluster.default_iallocator = self.op.default_iallocator 1189 1190 if self.op.reserved_lvs is not None: 1191 self.cluster.reserved_lvs = self.op.reserved_lvs 1192 1193 if self.op.use_external_mip_script is not None: 1194 self.cluster.use_external_mip_script = self.op.use_external_mip_script 1195 1196 def helper_os(aname, mods, desc): 1197 desc += " OS list" 1198 lst = getattr(self.cluster, aname) 1199 for key, val in mods: 1200 if key == constants.DDM_ADD: 1201 if val in lst: 1202 feedback_fn("OS %s already in %s, ignoring" % (val, desc)) 1203 else: 1204 lst.append(val) 1205 elif key == constants.DDM_REMOVE: 1206 if val in lst: 1207 lst.remove(val) 1208 else: 1209 feedback_fn("OS %s not found in %s, ignoring" % (val, desc)) 1210 else: 1211 raise errors.ProgrammerError("Invalid modification '%s'" % key)

1212 1213 if self.op.hidden_os: 1214 helper_os("hidden_os", self.op.hidden_os, "hidden") 1215 1216 if self.op.blacklisted_os: 1217 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted") 1218 1219 if self.op.master_netdev: 1220 master_params = self.cfg.GetMasterNetworkParameters() 1221 ems = self.cfg.GetUseExternalMipScript() 1222 feedback_fn("Shutting down master ip on the current netdev (%s)" % 1223 self.cluster.master_netdev) 1224 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, 1225 master_params, ems) 1226 if not self.op.force: 1227 result.Raise("Could not disable the master ip") 1228 else: 1229 if result.fail_msg: 1230 msg = ("Could not disable the master ip (continuing anyway): %s" % 1231 result.fail_msg) 1232 feedback_fn(msg) 1233 feedback_fn("Changing master_netdev from %s to %s" % 1234 (master_params.netdev, self.op.master_netdev)) 1235 self.cluster.master_netdev = self.op.master_netdev 1236 1237 if self.op.master_netmask: 1238 master_params = self.cfg.GetMasterNetworkParameters() 1239 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask) 1240 result = self.rpc.call_node_change_master_netmask( 1241 master_params.uuid, master_params.netmask, 1242 self.op.master_netmask, master_params.ip, 1243 master_params.netdev) 1244 result.Warn("Could not change the master IP netmask", feedback_fn) 1245 self.cluster.master_netmask = self.op.master_netmask 1246 1247 self.cfg.Update(self.cluster, feedback_fn) 1248 1249 if self.op.master_netdev: 1250 master_params = self.cfg.GetMasterNetworkParameters() 1251 feedback_fn("Starting the master ip on the new master netdev (%s)" % 1252 self.op.master_netdev) 1253 ems = self.cfg.GetUseExternalMipScript() 1254 result = self.rpc.call_node_activate_master_ip(master_params.uuid, 1255 master_params, ems) 1256 result.Warn("Could not re-enable the master ip on the master," 1257 " please restart manually", self.LogWarning)

1258

1259 1260 -class LUClusterVerify(NoHooksLU):

1261 """Submits all jobs necessary to verify the cluster. 1262 1263 """ 1264 REQ_BGL = False 1265

1266 - def ExpandNames(self):

1267 self.needed_locks = {}

1268

1269 - def Exec(self, feedback_fn):

1270 jobs = [] 1271 1272 if self.op.group_name: 1273 groups = [self.op.group_name] 1274 depends_fn = lambda: None 1275 else: 1276 groups = self.cfg.GetNodeGroupList() 1277 1278 # Verify global configuration 1279 jobs.append([ 1280 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors), 1281 ]) 1282 1283 # Always depend on global verification 1284 depends_fn = lambda: [(-len(jobs), [])] 1285 1286 jobs.extend( 1287 [opcodes.OpClusterVerifyGroup(group_name=group, 1288 ignore_errors=self.op.ignore_errors, 1289 depends=depends_fn())] 1290 for group in groups) 1291 1292 # Fix up all parameters 1293 for op in itertools.chain(*jobs): # pylint: disable=W0142 1294 op.debug_simulate_errors = self.op.debug_simulate_errors 1295 op.verbose = self.op.verbose 1296 op.error_codes = self.op.error_codes 1297 try: 1298 op.skip_checks = self.op.skip_checks 1299 except AttributeError: 1300 assert not isinstance(op, opcodes.OpClusterVerifyGroup) 1301 1302 return ResultWithJobs(jobs)

1303

1304 1305 -class _VerifyErrors(object):

1306 """Mix-in for cluster/group verify LUs. 1307 1308 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects 1309 self.op and self._feedback_fn to be available.) 1310 1311 """ 1312 1313 ETYPE_FIELD = "code" 1314 ETYPE_ERROR = "ERROR" 1315 ETYPE_WARNING = "WARNING" 1316

1317 - def _Error(self, ecode, item, msg, *args, **kwargs):

1318 """Format an error message. 1319 1320 Based on the opcode's error_codes parameter, either format a 1321 parseable error code, or a simpler error string. 1322 1323 This must be called only from Exec and functions called from Exec. 1324 1325 """ 1326 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) 1327 itype, etxt, _ = ecode 1328 # If the error code is in the list of ignored errors, demote the error to a 1329 # warning 1330 if etxt in self.op.ignore_errors: # pylint: disable=E1101 1331 ltype = self.ETYPE_WARNING 1332 # first complete the msg 1333 if args: 1334 msg = msg % args 1335 # then format the whole message 1336 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101 1337 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) 1338 else: 1339 if item: 1340 item = " " + item 1341 else: 1342 item = "" 1343 msg = "%s: %s%s: %s" % (ltype, itype, item, msg) 1344 # and finally report it via the feedback_fn 1345 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101 1346 # do not mark the operation as failed for WARN cases only 1347 if ltype == self.ETYPE_ERROR: 1348 self.bad = True

1349

1350 - def _ErrorIf(self, cond, *args, **kwargs):

1351 """Log an error message if the passed condition is True. 1352 1353 """ 1354 if (bool(cond) 1355 or self.op.debug_simulate_errors): # pylint: disable=E1101 1356 self._Error(*args, **kwargs)

1357

1358 1359 -def _VerifyCertificate(filename):

1360 """Verifies a certificate for L{LUClusterVerifyConfig}. 1361 1362 @type filename: string 1363 @param filename: Path to PEM file 1364 1365 """ 1366 try: 1367 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 1368 utils.ReadFile(filename)) 1369 except Exception, err: # pylint: disable=W0703 1370 return (LUClusterVerifyConfig.ETYPE_ERROR, 1371 "Failed to load X509 certificate %s: %s" % (filename, err)) 1372 1373 (errcode, msg) = \ 1374 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN, 1375 constants.SSL_CERT_EXPIRATION_ERROR) 1376 1377 if msg: 1378 fnamemsg = "While verifying %s: %s" % (filename, msg) 1379 else: 1380 fnamemsg = None 1381 1382 if errcode is None: 1383 return (None, fnamemsg) 1384 elif errcode == utils.CERT_WARNING: 1385 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg) 1386 elif errcode == utils.CERT_ERROR: 1387 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg) 1388 1389 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)

1390

1391 1392 -def _GetAllHypervisorParameters(cluster, instances):

1393 """Compute the set of all hypervisor parameters. 1394 1395 @type cluster: L{objects.Cluster} 1396 @param cluster: the cluster object 1397 @param instances: list of L{objects.Instance} 1398 @param instances: additional instances from which to obtain parameters 1399 @rtype: list of (origin, hypervisor, parameters) 1400 @return: a list with all parameters found, indicating the hypervisor they 1401 apply to, and the origin (can be "cluster", "os X", or "instance Y") 1402 1403 """ 1404 hvp_data = [] 1405 1406 for hv_name in cluster.enabled_hypervisors: 1407 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name))) 1408 1409 for os_name, os_hvp in cluster.os_hvp.items(): 1410 for hv_name, hv_params in os_hvp.items(): 1411 if hv_params: 1412 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name) 1413 hvp_data.append(("os %s" % os_name, hv_name, full_params)) 1414 1415 # TODO: collapse identical parameter values in a single one 1416 for instance in instances: 1417 if instance.hvparams: 1418 hvp_data.append(("instance %s" % instance.name, instance.hypervisor, 1419 cluster.FillHV(instance))) 1420 1421 return hvp_data

1422

1423 1424 -class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):

1425 """Verifies the cluster config. 1426 1427 """ 1428 REQ_BGL = False 1429

1430 - def _VerifyHVP(self, hvp_data):

1431 """Verifies locally the syntax of the hypervisor parameters. 1432 1433 """ 1434 for item, hv_name, hv_params in hvp_data: 1435 msg = ("hypervisor %s parameters syntax check (source %s): %%s" % 1436 (item, hv_name)) 1437 try: 1438 hv_class = hypervisor.GetHypervisorClass(hv_name) 1439 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1440 hv_class.CheckParameterSyntax(hv_params) 1441 except errors.GenericError, err: 1442 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))

1443

1444 - def ExpandNames(self):

1445 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET) 1446 self.share_locks = ShareAll()

1447

1448 - def CheckPrereq(self):

1449 """Check prerequisites. 1450 1451 """ 1452 # Retrieve all information 1453 self.all_group_info = self.cfg.GetAllNodeGroupsInfo() 1454 self.all_node_info = self.cfg.GetAllNodesInfo() 1455 self.all_inst_info = self.cfg.GetAllInstancesInfo()

1456

1457 - def Exec(self, feedback_fn):

1458 """Verify integrity of cluster, performing various test on nodes. 1459 1460 """ 1461 self.bad = False 1462 self._feedback_fn = feedback_fn 1463 1464 feedback_fn("* Verifying cluster config") 1465 1466 for msg in self.cfg.VerifyConfig(): 1467 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg) 1468 1469 feedback_fn("* Verifying cluster certificate files") 1470 1471 for cert_filename in pathutils.ALL_CERT_FILES: 1472 (errcode, msg) = _VerifyCertificate(cert_filename) 1473 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode) 1474 1475 self._ErrorIf(not utils.CanRead(constants.LUXID_USER, 1476 pathutils.NODED_CERT_FILE), 1477 constants.CV_ECLUSTERCERT, 1478 None, 1479 pathutils.NODED_CERT_FILE + " must be accessible by the " + 1480 constants.LUXID_USER + " user") 1481 1482 feedback_fn("* Verifying hypervisor parameters") 1483 1484 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(), 1485 self.all_inst_info.values())) 1486 1487 feedback_fn("* Verifying all nodes belong to an existing group") 1488 1489 # We do this verification here because, should this bogus circumstance 1490 # occur, it would never be caught by VerifyGroup, which only acts on 1491 # nodes/instances reachable from existing node groups. 1492 1493 dangling_nodes = set(node for node in self.all_node_info.values() 1494 if node.group not in self.all_group_info) 1495 1496 dangling_instances = {} 1497 no_node_instances = [] 1498 1499 for inst in self.all_inst_info.values(): 1500 if inst.primary_node in [node.uuid for node in dangling_nodes]: 1501 dangling_instances.setdefault(inst.primary_node, []).append(inst) 1502 elif inst.primary_node not in self.all_node_info: 1503 no_node_instances.append(inst) 1504 1505 pretty_dangling = [ 1506 "%s (%s)" % 1507 (node.name, 1508 utils.CommaJoin(inst.name for 1509 inst in dangling_instances.get(node.uuid, []))) 1510 for node in dangling_nodes] 1511 1512 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES, 1513 None, 1514 "the following nodes (and their instances) belong to a non" 1515 " existing group: %s", utils.CommaJoin(pretty_dangling)) 1516 1517 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST, 1518 None, 1519 "the following instances have a non-existing primary-node:" 1520 " %s", utils.CommaJoin(inst.name for 1521 inst in no_node_instances)) 1522 1523 return not self.bad

1524

1525 1526 -class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):

1527 """Verifies the status of a node group. 1528 1529 """ 1530 HPATH = "cluster-verify" 1531 HTYPE = constants.HTYPE_CLUSTER 1532 REQ_BGL = False 1533 1534 _HOOKS_INDENT_RE = re.compile("^", re.M) 1535

1536 - class NodeImage(object):

1537 """A class representing the logical and physical status of a node. 1538 1539 @type uuid: string 1540 @ivar uuid: the node UUID to which this object refers 1541 @ivar volumes: a structure as returned from 1542 L{ganeti.backend.GetVolumeList} (runtime) 1543 @ivar instances: a list of running instances (runtime) 1544 @ivar pinst: list of configured primary instances (config) 1545 @ivar sinst: list of configured secondary instances (config) 1546 @ivar sbp: dictionary of {primary-node: list of instances} for all 1547 instances for which this node is secondary (config) 1548 @ivar mfree: free memory, as reported by hypervisor (runtime) 1549 @ivar dfree: free disk, as reported by the node (runtime) 1550 @ivar offline: the offline status (config) 1551 @type rpc_fail: boolean 1552 @ivar rpc_fail: whether the RPC verify call was successfull (overall, 1553 not whether the individual keys were correct) (runtime) 1554 @type lvm_fail: boolean 1555 @ivar lvm_fail: whether the RPC call didn't return valid LVM data 1556 @type hyp_fail: boolean 1557 @ivar hyp_fail: whether the RPC call didn't return the instance list 1558 @type ghost: boolean 1559 @ivar ghost: whether this is a known node or not (config) 1560 @type os_fail: boolean 1561 @ivar os_fail: whether the RPC call didn't return valid OS data 1562 @type oslist: list 1563 @ivar oslist: list of OSes as diagnosed by DiagnoseOS 1564 @type vm_capable: boolean 1565 @ivar vm_capable: whether the node can host instances 1566 @type pv_min: float 1567 @ivar pv_min: size in MiB of the smallest PVs 1568 @type pv_max: float 1569 @ivar pv_max: size in MiB of the biggest PVs 1570 1571 """

1572 - def __init__(self, offline=False, uuid=None, vm_capable=True):

1573 self.uuid = uuid 1574 self.volumes = {} 1575 self.instances = [] 1576 self.pinst = [] 1577 self.sinst = [] 1578 self.sbp = {} 1579 self.mfree = 0 1580 self.dfree = 0 1581 self.offline = offline 1582 self.vm_capable = vm_capable 1583 self.rpc_fail = False 1584 self.lvm_fail = False 1585 self.hyp_fail = False 1586 self.ghost = False 1587 self.os_fail = False 1588 self.oslist = {} 1589 self.pv_min = None 1590 self.pv_max = None

1591

1592 - def ExpandNames(self):

1593 # This raises errors.OpPrereqError on its own: 1594 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 1595 1596 # Get instances in node group; this is unsafe and needs verification later 1597 inst_uuids = \ 1598 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 1599 1600 self.needed_locks = { 1601 locking.LEVEL_INSTANCE: self.cfg.GetInstanceNames(inst_uuids), 1602 locking.LEVEL_NODEGROUP: [self.group_uuid], 1603 locking.LEVEL_NODE: [], 1604 1605 # This opcode is run by watcher every five minutes and acquires all nodes 1606 # for a group. It doesn't run for a long time, so it's better to acquire 1607 # the node allocation lock as well. 1608 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 1609 } 1610 1611 self.share_locks = ShareAll()

1612

1613 - def DeclareLocks(self, level):

1614 if level == locking.LEVEL_NODE: 1615 # Get members of node group; this is unsafe and needs verification later 1616 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members) 1617 1618 # In Exec(), we warn about mirrored instances that have primary and 1619 # secondary living in separate node groups. To fully verify that 1620 # volumes for these instances are healthy, we will need to do an 1621 # extra call to their secondaries. We ensure here those nodes will 1622 # be locked. 1623 for inst_name in self.owned_locks(locking.LEVEL_INSTANCE): 1624 # Important: access only the instances whose lock is owned 1625 instance = self.cfg.GetInstanceInfoByName(inst_name) 1626 if instance.disk_template in constants.DTS_INT_MIRROR: 1627 nodes.update(instance.secondary_nodes) 1628 1629 self.needed_locks[locking.LEVEL_NODE] = nodes

1630

1631 - def CheckPrereq(self):

1632 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 1633 self.group_info = self.cfg.GetNodeGroup(self.group_uuid) 1634 1635 group_node_uuids = set(self.group_info.members) 1636 group_inst_uuids = \ 1637 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 1638 1639 unlocked_node_uuids = \ 1640 group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE)) 1641 1642 unlocked_inst_uuids = \ 1643 group_inst_uuids.difference( 1644 [self.cfg.GetInstanceInfoByName(name).uuid 1645 for name in self.owned_locks(locking.LEVEL_INSTANCE)]) 1646 1647 if unlocked_node_uuids: 1648 raise errors.OpPrereqError( 1649 "Missing lock for nodes: %s" % 1650 utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)), 1651 errors.ECODE_STATE) 1652 1653 if unlocked_inst_uuids: 1654 raise errors.OpPrereqError( 1655 "Missing lock for instances: %s" % 1656 utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)), 1657 errors.ECODE_STATE) 1658 1659 self.all_node_info = self.cfg.GetAllNodesInfo() 1660 self.all_inst_info = self.cfg.GetAllInstancesInfo() 1661 1662 self.my_node_uuids = group_node_uuids 1663 self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid]) 1664 for node_uuid in group_node_uuids) 1665 1666 self.my_inst_uuids = group_inst_uuids 1667 self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid]) 1668 for inst_uuid in group_inst_uuids) 1669 1670 # We detect here the nodes that will need the extra RPC calls for verifying 1671 # split LV volumes; they should be locked. 1672 extra_lv_nodes = set() 1673 1674 for inst in self.my_inst_info.values(): 1675 if inst.disk_template in constants.DTS_INT_MIRROR: 1676 for nuuid in inst.all_nodes: 1677 if self.all_node_info[nuuid].group != self.group_uuid: 1678 extra_lv_nodes.add(nuuid) 1679 1680 unlocked_lv_nodes = \ 1681 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) 1682 1683 if unlocked_lv_nodes: 1684 raise errors.OpPrereqError("Missing node locks for LV check: %s" % 1685 utils.CommaJoin(unlocked_lv_nodes), 1686 errors.ECODE_STATE) 1687 self.extra_lv_nodes = list(extra_lv_nodes)

1688

1689 - def _VerifyNode(self, ninfo, nresult):

1690 """Perform some basic validation on data returned from a node. 1691 1692 - check the result data structure is well formed and has all the 1693 mandatory fields 1694 - check ganeti version 1695 1696 @type ninfo: L{objects.Node} 1697 @param ninfo: the node to check 1698 @param nresult: the results from the node 1699 @rtype: boolean 1700 @return: whether overall this call was successful (and we can expect 1701 reasonable values in the respose) 1702 1703 """ 1704 # main result, nresult should be a non-empty dict 1705 test = not nresult or not isinstance(nresult, dict) 1706 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name, 1707 "unable to verify node: no data returned") 1708 if test: 1709 return False 1710 1711 # compares ganeti version 1712 local_version = constants.PROTOCOL_VERSION 1713 remote_version = nresult.get("version", None) 1714 test = not (remote_version and 1715 isinstance(remote_version, (list, tuple)) and 1716 len(remote_version) == 2) 1717 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name, 1718 "connection to node returned invalid data") 1719 if test: 1720 return False 1721 1722 test = local_version != remote_version[0] 1723 self._ErrorIf(test, constants.CV_ENODEVERSION, ninfo.name, 1724 "incompatible protocol versions: master %s," 1725 " node %s", local_version, remote_version[0]) 1726 if test: 1727 return False 1728 1729 # node seems compatible, we can actually try to look into its results 1730 1731 # full package version 1732 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], 1733 constants.CV_ENODEVERSION, ninfo.name, 1734 "software version mismatch: master %s, node %s", 1735 constants.RELEASE_VERSION, remote_version[1], 1736 code=self.ETYPE_WARNING) 1737 1738 hyp_result = nresult.get(constants.NV_HYPERVISOR, None) 1739 if ninfo.vm_capable and isinstance(hyp_result, dict): 1740 for hv_name, hv_result in hyp_result.iteritems(): 1741 test = hv_result is not None 1742 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, 1743 "hypervisor %s verify failure: '%s'", hv_name, hv_result) 1744 1745 hvp_result = nresult.get(constants.NV_HVPARAMS, None) 1746 if ninfo.vm_capable and isinstance(hvp_result, list): 1747 for item, hv_name, hv_result in hvp_result: 1748 self._ErrorIf(True, constants.CV_ENODEHV, ninfo.name, 1749 "hypervisor %s parameter verify failure (source %s): %s", 1750 hv_name, item, hv_result) 1751 1752 test = nresult.get(constants.NV_NODESETUP, 1753 ["Missing NODESETUP results"]) 1754 self._ErrorIf(test, constants.CV_ENODESETUP, ninfo.name, 1755 "node setup error: %s", "; ".join(test)) 1756 1757 return True

1758

1759 - def _VerifyNodeTime(self, ninfo, nresult, 1760 nvinfo_starttime, nvinfo_endtime):

1761 """Check the node time. 1762 1763 @type ninfo: L{objects.Node} 1764 @param ninfo: the node to check 1765 @param nresult: the remote results for the node 1766 @param nvinfo_starttime: the start time of the RPC call 1767 @param nvinfo_endtime: the end time of the RPC call 1768 1769 """ 1770 ntime = nresult.get(constants.NV_TIME, None) 1771 try: 1772 ntime_merged = utils.MergeTime(ntime) 1773 except (ValueError, TypeError): 1774 self._ErrorIf(True, constants.CV_ENODETIME, ninfo.name, 1775 "Node returned invalid time") 1776 return 1777 1778 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): 1779 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) 1780 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): 1781 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) 1782 else: 1783 ntime_diff = None 1784 1785 self._ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, ninfo.name, 1786 "Node time diverges by at least %s from master node time", 1787 ntime_diff)

1788

1789 - def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):

1790 """Check the node LVM results and update info for cross-node checks. 1791 1792 @type ninfo: L{objects.Node} 1793 @param ninfo: the node to check 1794 @param nresult: the remote results for the node 1795 @param vg_name: the configured VG name 1796 @type nimg: L{NodeImage} 1797 @param nimg: node image 1798 1799 """ 1800 if vg_name is None: 1801 return 1802 1803 # checks vg existence and size > 20G 1804 vglist = nresult.get(constants.NV_VGLIST, None) 1805 test = not vglist 1806 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name, 1807 "unable to check volume groups") 1808 if not test: 1809 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, 1810 constants.MIN_VG_SIZE) 1811 self._ErrorIf(vgstatus, constants.CV_ENODELVM, ninfo.name, vgstatus) 1812 1813 # Check PVs 1814 (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage) 1815 for em in errmsgs: 1816 self._Error(constants.CV_ENODELVM, ninfo.name, em) 1817 if pvminmax is not None: 1818 (nimg.pv_min, nimg.pv_max) = pvminmax

1819

1820 - def _VerifyGroupDRBDVersion(self, node_verify_infos):

1821 """Check cross-node DRBD version consistency. 1822 1823 @type node_verify_infos: dict 1824 @param node_verify_infos: infos about nodes as returned from the 1825 node_verify call. 1826 1827 """ 1828 node_versions = {} 1829 for node_uuid, ndata in node_verify_infos.items(): 1830 nresult = ndata.payload 1831 if nresult: 1832 version = nresult.get(constants.NV_DRBDVERSION, "Missing DRBD version") 1833 node_versions[node_uuid] = version 1834 1835 if len(set(node_versions.values())) > 1: 1836 for node_uuid, version in sorted(node_versions.items()): 1837 msg = "DRBD version mismatch: %s" % version 1838 self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg, 1839 code=self.ETYPE_WARNING)

1840

1841 - def _VerifyGroupLVM(self, node_image, vg_name):

1842 """Check cross-node consistency in LVM. 1843 1844 @type node_image: dict 1845 @param node_image: info about nodes, mapping from node to names to 1846 L{NodeImage} objects 1847 @param vg_name: the configured VG name 1848 1849 """ 1850 if vg_name is None: 1851 return 1852 1853 # Only exclusive storage needs this kind of checks 1854 if not self._exclusive_storage: 1855 return 1856 1857 # exclusive_storage wants all PVs to have the same size (approximately), 1858 # if the smallest and the biggest ones are okay, everything is fine. 1859 # pv_min is None iff pv_max is None 1860 vals = filter((lambda ni: ni.pv_min is not None), node_image.values()) 1861 if not vals: 1862 return 1863 (pvmin, minnode_uuid) = min((ni.pv_min, ni.uuid) for ni in vals) 1864 (pvmax, maxnode_uuid) = max((ni.pv_max, ni.uuid) for ni in vals) 1865 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax) 1866 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name, 1867 "PV sizes differ too much in the group; smallest (%s MB) is" 1868 " on %s, biggest (%s MB) is on %s", 1869 pvmin, self.cfg.GetNodeName(minnode_uuid), 1870 pvmax, self.cfg.GetNodeName(maxnode_uuid))

1871

1872 - def _VerifyNodeBridges(self, ninfo, nresult, bridges):

1873 """Check the node bridges. 1874 1875 @type ninfo: L{objects.Node} 1876 @param ninfo: the node to check 1877 @param nresult: the remote results for the node 1878 @param bridges: the expected list of bridges 1879 1880 """ 1881 if not bridges: 1882 return 1883 1884 missing = nresult.get(constants.NV_BRIDGES, None) 1885 test = not isinstance(missing, list) 1886 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name, 1887 "did not return valid bridge information") 1888 if not test: 1889 self._ErrorIf(bool(missing), constants.CV_ENODENET, ninfo.name, 1890 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))

1891

1892 - def _VerifyNodeUserScripts(self, ninfo, nresult):

1893 """Check the results of user scripts presence and executability on the node 1894 1895 @type ninfo: L{objects.Node} 1896 @param ninfo: the node to check 1897 @param nresult: the remote results for the node 1898 1899 """ 1900 test = not constants.NV_USERSCRIPTS in nresult 1901 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, ninfo.name, 1902 "did not return user scripts information") 1903 1904 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None) 1905 if not test: 1906 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, ninfo.name, 1907 "user scripts not present or not executable: %s" % 1908 utils.CommaJoin(sorted(broken_scripts)))

1909

1910 - def _VerifyNodeNetwork(self, ninfo, nresult):

1911 """Check the node network connectivity results. 1912 1913 @type ninfo: L{objects.Node} 1914 @param ninfo: the node to check 1915 @param nresult: the remote results for the node 1916 1917 """ 1918 test = constants.NV_NODELIST not in nresult 1919 self._ErrorIf(test, constants.CV_ENODESSH, ninfo.name, 1920 "node hasn't returned node ssh connectivity data") 1921 if not test: 1922 if nresult[constants.NV_NODELIST]: 1923 for a_node, a_msg in nresult[constants.NV_NODELIST].items(): 1924 self._ErrorIf(True, constants.CV_ENODESSH, ninfo.name, 1925 "ssh communication with node '%s': %s", a_node, a_msg) 1926 1927 test = constants.NV_NODENETTEST not in nresult 1928 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name, 1929 "node hasn't returned node tcp connectivity data") 1930 if not test: 1931 if nresult[constants.NV_NODENETTEST]: 1932 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) 1933 for anode in nlist: 1934 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, 1935 "tcp communication with node '%s': %s", 1936 anode, nresult[constants.NV_NODENETTEST][anode]) 1937 1938 test = constants.NV_MASTERIP not in nresult 1939 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name, 1940 "node hasn't returned node master IP reachability data") 1941 if not test: 1942 if not nresult[constants.NV_MASTERIP]: 1943 if ninfo.uuid == self.master_node: 1944 msg = "the master node cannot reach the master IP (not configured?)" 1945 else: 1946 msg = "cannot reach the master IP" 1947 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg)

1948

1949 - def _VerifyInstance(self, instance, node_image, diskstatus):

1950 """Verify an instance. 1951 1952 This function checks to see if the required block devices are 1953 available on the instance's node, and that the nodes are in the correct 1954 state. 1955 1956 """ 1957 pnode_uuid = instance.primary_node 1958 pnode_img = node_image[pnode_uuid] 1959 groupinfo = self.cfg.GetAllNodeGroupsInfo() 1960 1961 node_vol_should = {} 1962 instance.MapLVsByNode(node_vol_should) 1963 1964 cluster = self.cfg.GetClusterInfo() 1965 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, 1966 self.group_info) 1967 err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg) 1968 self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name, 1969 utils.CommaJoin(err), code=self.ETYPE_WARNING) 1970 1971 for node_uuid in node_vol_should: 1972 n_img = node_image[node_uuid] 1973 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 1974 # ignore missing volumes on offline or broken nodes 1975 continue 1976 for volume in node_vol_should[node_uuid]: 1977 test = volume not in n_img.volumes 1978 self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name, 1979 "volume %s missing on node %s", volume, 1980 self.cfg.GetNodeName(node_uuid)) 1981 1982 if instance.admin_state == constants.ADMINST_UP: 1983 test = instance.uuid not in pnode_img.instances and not pnode_img.offline 1984 self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name, 1985 "instance not running on its primary node %s", 1986 self.cfg.GetNodeName(pnode_uuid)) 1987 self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, 1988 instance.name, "instance is marked as running and lives on" 1989 " offline node %s", self.cfg.GetNodeName(pnode_uuid)) 1990 1991 diskdata = [(nname, success, status, idx) 1992 for (nname, disks) in diskstatus.items() 1993 for idx, (success, status) in enumerate(disks)] 1994 1995 for nname, success, bdev_status, idx in diskdata: 1996 # the 'ghost node' construction in Exec() ensures that we have a 1997 # node here 1998 snode = node_image[nname] 1999 bad_snode = snode.ghost or snode.offline 2000 self._ErrorIf(instance.disks_active and 2001 not success and not bad_snode, 2002 constants.CV_EINSTANCEFAULTYDISK, instance.name, 2003 "couldn't retrieve status for disk/%s on %s: %s", 2004 idx, self.cfg.GetNodeName(nname), bdev_status) 2005 2006 if instance.disks_active and success and \ 2007 (bdev_status.is_degraded or 2008 bdev_status.ldisk_status != constants.LDS_OKAY): 2009 msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname)) 2010 if bdev_status.is_degraded: 2011 msg += " is degraded" 2012 if bdev_status.ldisk_status != constants.LDS_OKAY: 2013 msg += "; state is '%s'" % \ 2014 constants.LDS_NAMES[bdev_status.ldisk_status] 2015 2016 self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg) 2017 2018 self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, 2019 constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid), 2020 "instance %s, connection to primary node failed", 2021 instance.name) 2022 2023 self._ErrorIf(len(instance.secondary_nodes) > 1, 2024 constants.CV_EINSTANCELAYOUT, instance.name, 2025 "instance has multiple secondary nodes: %s", 2026 utils.CommaJoin(instance.secondary_nodes), 2027 code=self.ETYPE_WARNING) 2028 2029 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, instance.all_nodes) 2030 if any(es_flags.values()): 2031 if instance.disk_template not in constants.DTS_EXCL_STORAGE: 2032 # Disk template not compatible with exclusive_storage: no instance 2033 # node should have the flag set 2034 es_nodes = [n 2035 for (n, es) in es_flags.items() 2036 if es] 2037 self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name, 2038 "instance has template %s, which is not supported on nodes" 2039 " that have exclusive storage set: %s", 2040 instance.disk_template, 2041 utils.CommaJoin(self.cfg.GetNodeNames(es_nodes))) 2042 for (idx, disk) in enumerate(instance.disks): 2043 self._ErrorIf(disk.spindles is None, 2044 constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name, 2045 "number of spindles not configured for disk %s while" 2046 " exclusive storage is enabled, try running" 2047 " gnt-cluster repair-disk-sizes", idx) 2048 2049 if instance.disk_template in constants.DTS_INT_MIRROR: 2050 instance_nodes = utils.NiceSort(instance.all_nodes) 2051 instance_groups = {} 2052 2053 for node_uuid in instance_nodes: 2054 instance_groups.setdefault(self.all_node_info[node_uuid].group, 2055 []).append(node_uuid) 2056 2057 pretty_list = [ 2058 "%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)), 2059 groupinfo[group].name) 2060 # Sort so that we always list the primary node first. 2061 for group, nodes in sorted(instance_groups.items(), 2062 key=lambda (_, nodes): pnode_uuid in nodes, 2063 reverse=True)] 2064 2065 self._ErrorIf(len(instance_groups) > 1, 2066 constants.CV_EINSTANCESPLITGROUPS, 2067 instance.name, "instance has primary and secondary nodes in" 2068 " different groups: %s", utils.CommaJoin(pretty_list), 2069 code=self.ETYPE_WARNING) 2070 2071 inst_nodes_offline = [] 2072 for snode in instance.secondary_nodes: 2073 s_img = node_image[snode] 2074 self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC, 2075 self.cfg.GetNodeName(snode), 2076 "instance %s, connection to secondary node failed", 2077 instance.name) 2078 2079 if s_img.offline: 2080 inst_nodes_offline.append(snode) 2081 2082 # warn that the instance lives on offline nodes 2083 self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, 2084 instance.name, "instance has offline secondary node(s) %s", 2085 utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline))) 2086 # ... or ghost/non-vm_capable nodes 2087 for node_uuid in instance.all_nodes: 2088 self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE, 2089 instance.name, "instance lives on ghost node %s", 2090 self.cfg.GetNodeName(node_uuid)) 2091 self._ErrorIf(not node_image[node_uuid].vm_capable, 2092 constants.CV_EINSTANCEBADNODE, instance.name, 2093 "instance lives on non-vm_capable node %s", 2094 self.cfg.GetNodeName(node_uuid))

2095

2096 - def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):

2097 """Verify if there are any unknown volumes in the cluster. 2098 2099 The .os, .swap and backup volumes are ignored. All other volumes are 2100 reported as unknown. 2101 2102 @type reserved: L{ganeti.utils.FieldSet} 2103 @param reserved: a FieldSet of reserved volume names 2104 2105 """ 2106 for node_uuid, n_img in node_image.items(): 2107 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or 2108 self.all_node_info[node_uuid].group != self.group_uuid): 2109 # skip non-healthy nodes 2110 continue 2111 for volume in n_img.volumes: 2112 test = ((node_uuid not in node_vol_should or 2113 volume not in node_vol_should[node_uuid]) and 2114 not reserved.Matches(volume)) 2115 self._ErrorIf(test, constants.CV_ENODEORPHANLV, 2116 self.cfg.GetNodeName(node_uuid), 2117 "volume %s is unknown", volume)

2118

2119 - def _VerifyNPlusOneMemory(self, node_image, all_insts):

2120 """Verify N+1 Memory Resilience. 2121 2122 Check that if one single node dies we can still start all the 2123 instances it was primary for. 2124 2125 """ 2126 cluster_info = self.cfg.GetClusterInfo() 2127 for node_uuid, n_img in node_image.items(): 2128 # This code checks that every node which is now listed as 2129 # secondary has enough memory to host all instances it is 2130 # supposed to should a single other node in the cluster fail. 2131 # FIXME: not ready for failover to an arbitrary node 2132 # FIXME: does not support file-backed instances 2133 # WARNING: we currently take into account down instances as well 2134 # as up ones, considering that even if they're down someone 2135 # might want to start them even in the event of a node failure. 2136 if n_img.offline or \ 2137 self.all_node_info[node_uuid].group != self.group_uuid: 2138 # we're skipping nodes marked offline and nodes in other groups from 2139 # the N+1 warning, since most likely we don't have good memory 2140 # infromation from them; we already list instances living on such 2141 # nodes, and that's enough warning 2142 continue 2143 #TODO(dynmem): also consider ballooning out other instances 2144 for prinode, inst_uuids in n_img.sbp.items(): 2145 needed_mem = 0 2146 for inst_uuid in inst_uuids: 2147 bep = cluster_info.FillBE(all_insts[inst_uuid]) 2148 if bep[constants.BE_AUTO_BALANCE]: 2149 needed_mem += bep[constants.BE_MINMEM] 2150 test = n_img.mfree < needed_mem 2151 self._ErrorIf(test, constants.CV_ENODEN1, 2152 self.cfg.GetNodeName(node_uuid), 2153 "not enough memory to accomodate instance failovers" 2154 " should node %s fail (%dMiB needed, %dMiB available)", 2155 self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)

2156

2157 - def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo, 2158 (files_all, files_opt, files_mc, files_vm)):

2159 """Verifies file checksums collected from all nodes. 2160 2161 @param nodes: List of L{objects.Node} objects 2162 @param master_node_uuid: UUID of master node 2163 @param all_nvinfo: RPC results 2164 2165 """ 2166 # Define functions determining which nodes to consider for a file 2167 files2nodefn = [ 2168 (files_all, None), 2169 (files_mc, lambda node: (node.master_candidate or 2170 node.uuid == master_node_uuid)), 2171 (files_vm, lambda node: node.vm_capable), 2172 ] 2173 2174 # Build mapping from filename to list of nodes which should have the file 2175 nodefiles = {} 2176 for (files, fn) in files2nodefn: 2177 if fn is None: 2178 filenodes = nodes 2179 else: 2180 filenodes = filter(fn, nodes) 2181 nodefiles.update((filename, 2182 frozenset(map(operator.attrgetter("uuid"), filenodes))) 2183 for filename in files) 2184 2185 assert set(nodefiles) == (files_all | files_mc | files_vm) 2186 2187 fileinfo = dict((filename, {}) for filename in nodefiles) 2188 ignore_nodes = set() 2189 2190 for node in nodes: 2191 if node.offline: 2192 ignore_nodes.add(node.uuid) 2193 continue 2194 2195 nresult = all_nvinfo[node.uuid] 2196 2197 if nresult.fail_msg or not nresult.payload: 2198 node_files = None 2199 else: 2200 fingerprints = nresult.payload.get(constants.NV_FILELIST, None) 2201 node_files = dict((vcluster.LocalizeVirtualPath(key), value) 2202 for (key, value) in fingerprints.items()) 2203 del fingerprints 2204 2205 test = not (node_files and isinstance(node_files, dict)) 2206 self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name, 2207 "Node did not return file checksum data") 2208 if test: 2209 ignore_nodes.add(node.uuid) 2210 continue 2211 2212 # Build per-checksum mapping from filename to nodes having it 2213 for (filename, checksum) in node_files.items(): 2214 assert filename in nodefiles 2215 fileinfo[filename].setdefault(checksum, set()).add(node.uuid) 2216 2217 for (filename, checksums) in fileinfo.items(): 2218 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum" 2219 2220 # Nodes having the file 2221 with_file = frozenset(node_uuid 2222 for node_uuids in fileinfo[filename].values() 2223 for node_uuid in node_uuids) - ignore_nodes 2224 2225 expected_nodes = nodefiles[filename] - ignore_nodes 2226 2227 # Nodes missing file 2228 missing_file = expected_nodes - with_file 2229 2230 if filename in files_opt: 2231 # All or no nodes 2232 self._ErrorIf(missing_file and missing_file != expected_nodes, 2233 constants.CV_ECLUSTERFILECHECK, None, 2234 "File %s is optional, but it must exist on all or no" 2235 " nodes (not found on %s)", 2236 filename, 2237 utils.CommaJoin( 2238 utils.NiceSort( 2239 map(self.cfg.GetNodeName, missing_file)))) 2240 else: 2241 self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None, 2242 "File %s is missing from node(s) %s", filename, 2243 utils.CommaJoin( 2244 utils.NiceSort( 2245 map(self.cfg.GetNodeName, missing_file)))) 2246 2247 # Warn if a node has a file it shouldn't 2248 unexpected = with_file - expected_nodes 2249 self._ErrorIf(unexpected, 2250 constants.CV_ECLUSTERFILECHECK, None, 2251 "File %s should not exist on node(s) %s", 2252 filename, utils.CommaJoin( 2253 utils.NiceSort(map(self.cfg.GetNodeName, unexpected)))) 2254 2255 # See if there are multiple versions of the file 2256 test = len(checksums) > 1 2257 if test: 2258 variants = ["variant %s on %s" % 2259 (idx + 1, 2260 utils.CommaJoin(utils.NiceSort( 2261 map(self.cfg.GetNodeName, node_uuids)))) 2262 for (idx, (checksum, node_uuids)) in 2263 enumerate(sorted(checksums.items()))] 2264 else: 2265 variants = [] 2266 2267 self._ErrorIf(test, constants.CV_ECLUSTERFILECHECK, None, 2268 "File %s found with %s different checksums (%s)", 2269 filename, len(checksums), "; ".join(variants))

2270

2271 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper, 2272 drbd_map):

2273 """Verifies and the node DRBD status. 2274 2275 @type ninfo: L{objects.Node} 2276 @param ninfo: the node to check 2277 @param nresult: the remote results for the node 2278 @param instanceinfo: the dict of instances 2279 @param drbd_helper: the configured DRBD usermode helper 2280 @param drbd_map: the DRBD map as returned by 2281 L{ganeti.config.ConfigWriter.ComputeDRBDMap} 2282 2283 """ 2284 if drbd_helper: 2285 helper_result = nresult.get(constants.NV_DRBDHELPER, None) 2286 test = (helper_result is None) 2287 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name, 2288 "no drbd usermode helper returned") 2289 if helper_result: 2290 status, payload = helper_result 2291 test = not status 2292 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name, 2293 "drbd usermode helper check unsuccessful: %s", payload) 2294 test = status and (payload != drbd_helper) 2295 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name, 2296 "wrong drbd usermode helper: %s", payload) 2297 2298 # compute the DRBD minors 2299 node_drbd = {} 2300 for minor, inst_uuid in drbd_map[ninfo.uuid].items(): 2301 test = inst_uuid not in instanceinfo 2302 self._ErrorIf(test, constants.CV_ECLUSTERCFG, None, 2303 "ghost instance '%s' in temporary DRBD map", inst_uuid) 2304 # ghost instance should not be running, but otherwise we 2305 # don't give double warnings (both ghost instance and 2306 # unallocated minor in use) 2307 if test: 2308 node_drbd[minor] = (inst_uuid, False) 2309 else: 2310 instance = instanceinfo[inst_uuid] 2311 node_drbd[minor] = (inst_uuid, instance.disks_active) 2312 2313 # and now check them 2314 used_minors = nresult.get(constants.NV_DRBDLIST, []) 2315 test = not isinstance(used_minors, (tuple, list)) 2316 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name, 2317 "cannot parse drbd status file: %s", str(used_minors)) 2318 if test: 2319 # we cannot check drbd status 2320 return 2321 2322 for minor, (inst_uuid, must_exist) in node_drbd.items(): 2323 test = minor not in used_minors and must_exist 2324 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name, 2325 "drbd minor %d of instance %s is not active", minor, 2326 self.cfg.GetInstanceName(inst_uuid)) 2327 for minor in used_minors: 2328 test = minor not in node_drbd 2329 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name, 2330 "unallocated drbd minor %d is in use", minor)

2331

2332 - def _UpdateNodeOS(self, ninfo, nresult, nimg):

2333 """Builds the node OS structures. 2334 2335 @type ninfo: L{objects.Node} 2336 @param ninfo: the node to check 2337 @param nresult: the remote results for the node 2338 @param nimg: the node image object 2339 2340 """ 2341 remote_os = nresult.get(constants.NV_OSLIST, None) 2342 test = (not isinstance(remote_os, list) or 2343 not compat.all(isinstance(v, list) and len(v) == 7 2344 for v in remote_os)) 2345 2346 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name, 2347 "node hasn't returned valid OS data") 2348 2349 nimg.os_fail = test 2350 2351 if test: 2352 return 2353 2354 os_dict = {} 2355 2356 for (name, os_path, status, diagnose, 2357 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]: 2358 2359 if name not in os_dict: 2360 os_dict[name] = [] 2361 2362 # parameters is a list of lists instead of list of tuples due to 2363 # JSON lacking a real tuple type, fix it: 2364 parameters = [tuple(v) for v in parameters] 2365 os_dict[name].append((os_path, status, diagnose, 2366 set(variants), set(parameters), set(api_ver))) 2367 2368 nimg.oslist = os_dict

2369

2370 - def _VerifyNodeOS(self, ninfo, nimg, base):

2371 """Verifies the node OS list. 2372 2373 @type ninfo: L{objects.Node} 2374 @param ninfo: the node to check 2375 @param nimg: the node image object 2376 @param base: the 'template' node we match against (e.g. from the master) 2377 2378 """ 2379 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" 2380 2381 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l] 2382 for os_name, os_data in nimg.oslist.items(): 2383 assert os_data, "Empty OS status for OS %s?!" % os_name 2384 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] 2385 self._ErrorIf(not f_status, constants.CV_ENODEOS, ninfo.name, 2386 "Invalid OS %s (located at %s): %s", 2387 os_name, f_path, f_diag) 2388 self._ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, ninfo.name, 2389 "OS '%s' has multiple entries" 2390 " (first one shadows the rest): %s", 2391 os_name, utils.CommaJoin([v[0] for v in os_data])) 2392 # comparisons with the 'base' image 2393 test = os_name not in base.oslist 2394 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name, 2395 "Extra OS %s not present on reference node (%s)", 2396 os_name, self.cfg.GetNodeName(base.uuid)) 2397 if test: 2398 continue 2399 assert base.oslist[os_name], "Base node has empty OS status?" 2400 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0] 2401 if not b_status: 2402 # base OS is invalid, skipping 2403 continue 2404 for kind, a, b in [("API version", f_api, b_api), 2405 ("variants list", f_var, b_var), 2406 ("parameters", beautify_params(f_param), 2407 beautify_params(b_param))]: 2408 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name, 2409 "OS %s for %s differs from reference node %s:" 2410 " [%s] vs. [%s]", kind, os_name, 2411 self.cfg.GetNodeName(base.uuid), 2412 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b))) 2413 2414 # check any missing OSes 2415 missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) 2416 self._ErrorIf(missing, constants.CV_ENODEOS, ninfo.name, 2417 "OSes present on reference node %s" 2418 " but missing on this node: %s", 2419 self.cfg.GetNodeName(base.uuid), utils.CommaJoin(missing))

2420

2421 - def _VerifyAcceptedFileStoragePaths(self, ninfo, nresult, is_master):

2422 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}. 2423 2424 @type ninfo: L{objects.Node} 2425 @param ninfo: the node to check 2426 @param nresult: the remote results for the node 2427 @type is_master: bool 2428 @param is_master: Whether node is the master node 2429 2430 """ 2431 cluster = self.cfg.GetClusterInfo() 2432 if (is_master and 2433 (cluster.IsFileStorageEnabled() or 2434 cluster.IsSharedFileStorageEnabled())): 2435 try: 2436 fspaths = nresult[constants.NV_ACCEPTED_STORAGE_PATHS] 2437 except KeyError: 2438 # This should never happen 2439 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name, 2440 "Node did not return forbidden file storage paths") 2441 else: 2442 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name, 2443 "Found forbidden file storage paths: %s", 2444 utils.CommaJoin(fspaths)) 2445 else: 2446 self._ErrorIf(constants.NV_ACCEPTED_STORAGE_PATHS in nresult, 2447 constants.CV_ENODEFILESTORAGEPATHS, ninfo.name, 2448 "Node should not have returned forbidden file storage" 2449 " paths")

2450

2451 - def _VerifyStoragePaths(self, ninfo, nresult, file_disk_template, 2452 verify_key, error_key):

2453 """Verifies (file) storage paths. 2454 2455 @type ninfo: L{objects.Node} 2456 @param ninfo: the node to check 2457 @param nresult: the remote results for the node 2458 @type file_disk_template: string 2459 @param file_disk_template: file-based disk template, whose directory 2460 is supposed to be verified 2461 @type verify_key: string 2462 @param verify_key: key for the verification map of this file 2463 verification step 2464 @param error_key: error key to be added to the verification results 2465 in case something goes wrong in this verification step 2466 2467 """ 2468 assert (file_disk_template in 2469 utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE)) 2470 cluster = self.cfg.GetClusterInfo() 2471 if cluster.IsDiskTemplateEnabled(file_disk_template): 2472 self._ErrorIf( 2473 verify_key in nresult, 2474 error_key, ninfo.name, 2475 "The configured %s storage path is unusable: %s" % 2476 (file_disk_template, nresult.get(verify_key)))

2477

2478 - def _VerifyFileStoragePaths(self, ninfo, nresult):

2479 """Verifies (file) storage paths. 2480 2481 @see: C{_VerifyStoragePaths} 2482 2483 """ 2484 self._VerifyStoragePaths( 2485 ninfo, nresult, constants.DT_FILE, 2486 constants.NV_FILE_STORAGE_PATH, 2487 constants.CV_ENODEFILESTORAGEPATHUNUSABLE)

2488

2489 - def _VerifySharedFileStoragePaths(self, ninfo, nresult):

2490 """Verifies (file) storage paths. 2491 2492 @see: C{_VerifyStoragePaths} 2493 2494 """ 2495 self._VerifyStoragePaths( 2496 ninfo, nresult, constants.DT_SHARED_FILE, 2497 constants.NV_SHARED_FILE_STORAGE_PATH, 2498 constants.CV_ENODESHAREDFILESTORAGEPATHUNUSABLE)

2499

2500 - def _VerifyOob(self, ninfo, nresult):

2501 """Verifies out of band functionality of a node. 2502 2503 @type ninfo: L{objects.Node} 2504 @param ninfo: the node to check 2505 @param nresult: the remote results for the node 2506 2507 """ 2508 # We just have to verify the paths on master and/or master candidates 2509 # as the oob helper is invoked on the master 2510 if ((ninfo.master_candidate or ninfo.master_capable) and 2511 constants.NV_OOB_PATHS in nresult): 2512 for path_result in nresult[constants.NV_OOB_PATHS]: 2513 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, 2514 ninfo.name, path_result)

2515

2516 - def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):

2517 """Verifies and updates the node volume data. 2518 2519 This function will update a L{NodeImage}'s internal structures 2520 with data from the remote call. 2521 2522 @type ninfo: L{objects.Node} 2523 @param ninfo: the node to check 2524 @param nresult: the remote results for the node 2525 @param nimg: the node image object 2526 @param vg_name: the configured VG name 2527 2528 """ 2529 nimg.lvm_fail = True 2530 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") 2531 if vg_name is None: 2532 pass 2533 elif isinstance(lvdata, basestring): 2534 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name, 2535 "LVM problem on node: %s", utils.SafeEncode(lvdata)) 2536 elif not isinstance(lvdata, dict): 2537 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name, 2538 "rpc call to node failed (lvlist)") 2539 else: 2540 nimg.volumes = lvdata 2541 nimg.lvm_fail = False

2542

2543 - def _UpdateNodeInstances(self, ninfo, nresult, nimg):

2544 """Verifies and updates the node instance list. 2545 2546 If the listing was successful, then updates this node's instance 2547 list. Otherwise, it marks the RPC call as failed for the instance 2548 list key. 2549 2550 @type ninfo: L{objects.Node} 2551 @param ninfo: the node to check 2552 @param nresult: the remote results for the node 2553 @param nimg: the node image object 2554 2555 """ 2556 idata = nresult.get(constants.NV_INSTANCELIST, None) 2557 test = not isinstance(idata, list) 2558 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, 2559 "rpc call to node failed (instancelist): %s", 2560 utils.SafeEncode(str(idata))) 2561 if test: 2562 nimg.hyp_fail = True 2563 else: 2564 nimg.instances = [inst.uuid for (_, inst) in 2565 self.cfg.GetMultiInstanceInfoByName(idata)]

2566

2567 - def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):

2568 """Verifies and computes a node information map 2569 2570 @type ninfo: L{objects.Node} 2571 @param ninfo: the node to check 2572 @param nresult: the remote results for the node 2573 @param nimg: the node image object 2574 @param vg_name: the configured VG name 2575 2576 """ 2577 # try to read free memory (from the hypervisor) 2578 hv_info = nresult.get(constants.NV_HVINFO, None) 2579 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info 2580 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, 2581 "rpc call to node failed (hvinfo)") 2582 if not test: 2583 try: 2584 nimg.mfree = int(hv_info["memory_free"]) 2585 except (ValueError, TypeError): 2586 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name, 2587 "node returned invalid nodeinfo, check hypervisor") 2588 2589 # FIXME: devise a free space model for file based instances as well 2590 if vg_name is not None: 2591 test = (constants.NV_VGLIST not in nresult or 2592 vg_name not in nresult[constants.NV_VGLIST]) 2593 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name, 2594 "node didn't return data for the volume group '%s'" 2595 " - it is either missing or broken", vg_name) 2596 if not test: 2597 try: 2598 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) 2599 except (ValueError, TypeError): 2600 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name, 2601 "node returned invalid LVM info, check LVM status")

2602

2603 - def _CollectDiskInfo(self, node_uuids, node_image, instanceinfo):

2604 """Gets per-disk status information for all instances. 2605 2606 @type node_uuids: list of strings 2607 @param node_uuids: Node UUIDs 2608 @type node_image: dict of (UUID, L{objects.Node}) 2609 @param node_image: Node objects 2610 @type instanceinfo: dict of (UUID, L{objects.Instance}) 2611 @param instanceinfo: Instance objects 2612 @rtype: {instance: {node: [(succes, payload)]}} 2613 @return: a dictionary of per-instance dictionaries with nodes as 2614 keys and disk information as values; the disk information is a 2615 list of tuples (success, payload) 2616 2617 """ 2618 node_disks = {} 2619 node_disks_devonly = {} 2620 diskless_instances = set() 2621 nodisk_instances = set() 2622 diskless = constants.DT_DISKLESS 2623 2624 for nuuid in node_uuids: 2625 node_inst_uuids = list(itertools.chain(node_image[nuuid].pinst, 2626 node_image[nuuid].sinst)) 2627 diskless_instances.update(uuid for uuid in node_inst_uuids 2628 if instanceinfo[uuid].disk_template == diskless) 2629 disks = [(inst_uuid, disk) 2630 for inst_uuid in node_inst_uuids 2631 for disk in instanceinfo[inst_uuid].disks] 2632 2633 if not disks: 2634 nodisk_instances.update(uuid for uuid in node_inst_uuids 2635 if instanceinfo[uuid].disk_template != diskless) 2636 # No need to collect data 2637 continue 2638 2639 node_disks[nuuid] = disks 2640 2641 # _AnnotateDiskParams makes already copies of the disks 2642 devonly = [] 2643 for (inst_uuid, dev) in disks: 2644 (anno_disk,) = AnnotateDiskParams(instanceinfo[inst_uuid], [dev], 2645 self.cfg) 2646 self.cfg.SetDiskID(anno_disk, nuuid) 2647 devonly.append(anno_disk) 2648 2649 node_disks_devonly[nuuid] = devonly 2650 2651 assert len(node_disks) == len(node_disks_devonly) 2652 2653 # Collect data from all nodes with disks 2654 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(), 2655 node_disks_devonly) 2656 2657 assert len(result) == len(node_disks) 2658 2659 instdisk = {} 2660 2661 for (nuuid, nres) in result.items(): 2662 node = self.cfg.GetNodeInfo(nuuid) 2663 disks = node_disks[node.uuid] 2664 2665 if nres.offline: 2666 # No data from this node 2667 data = len(disks) * [(False, "node offline")] 2668 else: 2669 msg = nres.fail_msg 2670 self._ErrorIf(msg, constants.CV_ENODERPC, node.name, 2671 "while getting disk information: %s", msg) 2672 if msg: 2673 # No data from this node 2674 data = len(disks) * [(False, msg)] 2675 else: 2676 data = [] 2677 for idx, i in enumerate(nres.payload): 2678 if isinstance(i, (tuple, list)) and len(i) == 2: 2679 data.append(i) 2680 else: 2681 logging.warning("Invalid result from node %s, entry %d: %s", 2682 node.name, idx, i) 2683 data.append((False, "Invalid result from the remote node")) 2684 2685 for ((inst_uuid, _), status) in zip(disks, data): 2686 instdisk.setdefault(inst_uuid, {}).setdefault(node.uuid, []) \ 2687 .append(status) 2688 2689 # Add empty entries for diskless instances. 2690 for inst_uuid in diskless_instances: 2691 assert inst_uuid not in instdisk 2692 instdisk[inst_uuid] = {} 2693 # ...and disk-full instances that happen to have no disks 2694 for inst_uuid in nodisk_instances: 2695 assert inst_uuid not in instdisk 2696 instdisk[inst_uuid] = {} 2697 2698 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and 2699 len(nuuids) <= len(instanceinfo[inst].all_nodes) and 2700 compat.all(isinstance(s, (tuple, list)) and 2701 len(s) == 2 for s in statuses) 2702 for inst, nuuids in instdisk.items() 2703 for nuuid, statuses in nuuids.items()) 2704 if __debug__: 2705 instdisk_keys = set(instdisk) 2706 instanceinfo_keys = set(instanceinfo) 2707 assert instdisk_keys == instanceinfo_keys, \ 2708 ("instdisk keys (%s) do not match instanceinfo keys (%s)" % 2709 (instdisk_keys, instanceinfo_keys)) 2710 2711 return instdisk

2712 2713 @staticmethod

2714 - def _SshNodeSelector(group_uuid, all_nodes):

2715 """Create endless iterators for all potential SSH check hosts. 2716 2717 """ 2718 nodes = [node for node in all_nodes 2719 if (node.group != group_uuid and 2720 not node.offline)] 2721 keyfunc = operator.attrgetter("group") 2722 2723 return map(itertools.cycle, 2724 [sorted(map(operator.attrgetter("name"), names)) 2725 for _, names in itertools.groupby(sorted(nodes, key=keyfunc), 2726 keyfunc)])

2727 2728 @classmethod

2729 - def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):

2730 """Choose which nodes should talk to which other nodes. 2731 2732 We will make nodes contact all nodes in their group, and one node from 2733 every other group. 2734 2735 @warning: This algorithm has a known issue if one node group is much 2736 smaller than others (e.g. just one node). In such a case all other 2737 nodes will talk to the single node. 2738 2739 """ 2740 online_nodes = sorted(node.name for node in group_nodes if not node.offline) 2741 sel = cls._SshNodeSelector(group_uuid, all_nodes) 2742 2743 return (online_nodes, 2744 dict((name, sorted([i.next() for i in sel])) 2745 for name in online_nodes))

2746

2747 - def BuildHooksEnv(self):

2748 """Build hooks env. 2749 2750 Cluster-Verify hooks just ran in the post phase and their failure makes 2751 the output be logged in the verify output and the verification to fail. 2752 2753 """ 2754 env = { 2755 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()), 2756 } 2757 2758 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags())) 2759 for node in self.my_node_info.values()) 2760 2761 return env

2762

2763 - def BuildHooksNodes(self):

2764 """Build hooks nodes. 2765 2766 """ 2767 return ([], list(self.my_node_info.keys()))

2768

2769 - def Exec(self, feedback_fn):

2770 """Verify integrity of the node group, performing various test on nodes. 2771 2772 """ 2773 # This method has too many local variables. pylint: disable=R0914 2774 feedback_fn("* Verifying group '%s'" % self.group_info.name) 2775 2776 if not self.my_node_uuids: 2777 # empty node group 2778 feedback_fn("* Empty node group, skipping verification") 2779 return True 2780 2781 self.bad = False 2782 verbose = self.op.verbose 2783 self._feedback_fn = feedback_fn 2784 2785 vg_name = self.cfg.GetVGName() 2786 drbd_helper = self.cfg.GetDRBDHelper() 2787 cluster = self.cfg.GetClusterInfo() 2788 hypervisors = cluster.enabled_hypervisors 2789 node_data_list = self.my_node_info.values() 2790 2791 i_non_redundant = [] # Non redundant instances 2792 i_non_a_balanced = [] # Non auto-balanced instances 2793 i_offline = 0 # Count of offline instances 2794 n_offline = 0 # Count of offline nodes 2795 n_drained = 0 # Count of nodes being drained 2796 node_vol_should = {} 2797 2798 # FIXME: verify OS list 2799 2800 # File verification 2801 filemap = ComputeAncillaryFiles(cluster, False) 2802 2803 # do local checksums 2804 master_node_uuid = self.master_node = self.cfg.GetMasterNode() 2805 master_ip = self.cfg.GetMasterIP() 2806 2807 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_uuids)) 2808 2809 user_scripts = [] 2810 if self.cfg.GetUseExternalMipScript(): 2811 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT) 2812 2813 node_verify_param = { 2814 constants.NV_FILELIST: 2815 map(vcluster.MakeVirtualPath, 2816 utils.UniqueSequence(filename 2817 for files in filemap 2818 for filename in files)), 2819 constants.NV_NODELIST: 2820 self._SelectSshCheckNodes(node_data_list, self.group_uuid, 2821 self.all_node_info.values()), 2822 constants.NV_HYPERVISOR: hypervisors, 2823 constants.NV_HVPARAMS: 2824 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()), 2825 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip) 2826 for node in node_data_list 2827 if not node.offline], 2828 constants.NV_INSTANCELIST: hypervisors, 2829 constants.NV_VERSION: None, 2830 constants.NV_HVINFO: self.cfg.GetHypervisorType(), 2831 constants.NV_NODESETUP: None, 2832 constants.NV_TIME: None, 2833 constants.NV_MASTERIP: (self.cfg.GetMasterNodeName(), master_ip), 2834 constants.NV_OSLIST: None, 2835 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(), 2836 constants.NV_USERSCRIPTS: user_scripts, 2837 } 2838 2839 if vg_name is not None: 2840 node_verify_param[constants.NV_VGLIST] = None 2841 node_verify_param[constants.NV_LVLIST] = vg_name 2842 node_verify_param[constants.NV_PVLIST] = [vg_name] 2843 2844 if drbd_helper: 2845 node_verify_param[constants.NV_DRBDVERSION] = None 2846 node_verify_param[constants.NV_DRBDLIST] = None 2847 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper 2848 2849 if cluster.IsFileStorageEnabled() or \ 2850 cluster.IsSharedFileStorageEnabled(): 2851 # Load file storage paths only from master node 2852 node_verify_param[constants.NV_ACCEPTED_STORAGE_PATHS] = \ 2853 self.cfg.GetMasterNodeName() 2854 if cluster.IsFileStorageEnabled(): 2855 node_verify_param[constants.NV_FILE_STORAGE_PATH] = \ 2856 cluster.file_storage_dir 2857 2858 # bridge checks 2859 # FIXME: this needs to be changed per node-group, not cluster-wide 2860 bridges = set() 2861 default_nicpp = cluster.nicparams[constants.PP_DEFAULT] 2862 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 2863 bridges.add(default_nicpp[constants.NIC_LINK]) 2864 for inst_uuid in self.my_inst_info.values(): 2865 for nic in inst_uuid.nics: 2866 full_nic = cluster.SimpleFillNIC(nic.nicparams) 2867 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 2868 bridges.add(full_nic[constants.NIC_LINK]) 2869 2870 if bridges: 2871 node_verify_param[constants.NV_BRIDGES] = list(bridges) 2872 2873 # Build our expected cluster state 2874 node_image = dict((node.uuid, self.NodeImage(offline=node.offline, 2875 uuid=node.uuid, 2876 vm_capable=node.vm_capable)) 2877 for node in node_data_list) 2878 2879 # Gather OOB paths 2880 oob_paths = [] 2881 for node in self.all_node_info.values(): 2882 path = SupportsOob(self.cfg, node) 2883 if path and path not in oob_paths: 2884 oob_paths.append(path) 2885 2886 if oob_paths: 2887 node_verify_param[constants.NV_OOB_PATHS] = oob_paths 2888 2889 for inst_uuid in self.my_inst_uuids: 2890 instance = self.my_inst_info[inst_uuid] 2891 if instance.admin_state == constants.ADMINST_OFFLINE: 2892 i_offline += 1 2893 2894 for nuuid in instance.all_nodes: 2895 if nuuid not in node_image: 2896 gnode = self.NodeImage(uuid=nuuid) 2897 gnode.ghost = (nuuid not in self.all_node_info) 2898 node_image[nuuid] = gnode 2899 2900 instance.MapLVsByNode(node_vol_should) 2901 2902 pnode = instance.primary_node 2903 node_image[pnode].pinst.append(instance.uuid) 2904 2905 for snode in instance.secondary_nodes: 2906 nimg = node_image[snode] 2907 nimg.sinst.append(instance.uuid) 2908 if pnode not in nimg.sbp: 2909 nimg.sbp[pnode] = [] 2910 nimg.sbp[pnode].append(instance.uuid) 2911 2912 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, 2913 self.my_node_info.keys()) 2914 # The value of exclusive_storage should be the same across the group, so if 2915 # it's True for at least a node, we act as if it were set for all the nodes 2916 self._exclusive_storage = compat.any(es_flags.values()) 2917 if self._exclusive_storage: 2918 node_verify_param[constants.NV_EXCLUSIVEPVS] = True 2919 2920 # At this point, we have the in-memory data structures complete, 2921 # except for the runtime information, which we'll gather next 2922 2923 # Due to the way our RPC system works, exact response times cannot be 2924 # guaranteed (e.g. a broken node could run into a timeout). By keeping the 2925 # time before and after executing the request, we can at least have a time 2926 # window. 2927 nvinfo_starttime = time.time() 2928 all_nvinfo = self.rpc.call_node_verify(self.my_node_uuids, 2929 node_verify_param, 2930 self.cfg.GetClusterName(), 2931 self.cfg.GetClusterInfo().hvparams) 2932 nvinfo_endtime = time.time() 2933 2934 if self.extra_lv_nodes and vg_name is not None: 2935 extra_lv_nvinfo = \ 2936 self.rpc.call_node_verify(self.extra_lv_nodes, 2937 {constants.NV_LVLIST: vg_name}, 2938 self.cfg.GetClusterName(), 2939 self.cfg.GetClusterInfo().hvparams) 2940 else: 2941 extra_lv_nvinfo = {} 2942 2943 all_drbd_map = self.cfg.ComputeDRBDMap() 2944 2945 feedback_fn("* Gathering disk information (%s nodes)" % 2946 len(self.my_node_uuids)) 2947 instdisk = self._CollectDiskInfo(self.my_node_info.keys(), node_image, 2948 self.my_inst_info) 2949 2950 feedback_fn("* Verifying configuration file consistency") 2951 2952 # If not all nodes are being checked, we need to make sure the master node 2953 # and a non-checked vm_capable node are in the list. 2954 absent_node_uuids = set(self.all_node_info).difference(self.my_node_info) 2955 if absent_node_uuids: 2956 vf_nvinfo = all_nvinfo.copy() 2957 vf_node_info = list(self.my_node_info.values()) 2958 additional_node_uuids = [] 2959 if master_node_uuid not in self.my_node_info: 2960 additional_node_uuids.append(master_node_uuid) 2961 vf_node_info.append(self.all_node_info[master_node_uuid]) 2962 # Add the first vm_capable node we find which is not included, 2963 # excluding the master node (which we already have) 2964 for node_uuid in absent_node_uuids: 2965 nodeinfo = self.all_node_info[node_uuid] 2966 if (nodeinfo.vm_capable and not nodeinfo.offline and 2967 node_uuid != master_node_uuid): 2968 additional_node_uuids.append(node_uuid) 2969 vf_node_info.append(self.all_node_info[node_uuid]) 2970 break 2971 key = constants.NV_FILELIST 2972 vf_nvinfo.update(self.rpc.call_node_verify( 2973 additional_node_uuids, {key: node_verify_param[key]}, 2974 self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams)) 2975 else: 2976 vf_nvinfo = all_nvinfo 2977 vf_node_info = self.my_node_info.values() 2978 2979 self._VerifyFiles(vf_node_info, master_node_uuid, vf_nvinfo, filemap) 2980 2981 feedback_fn("* Verifying node status") 2982 2983 refos_img = None 2984 2985 for node_i in node_data_list: 2986 nimg = node_image[node_i.uuid] 2987 2988 if node_i.offline: 2989 if verbose: 2990 feedback_fn("* Skipping offline node %s" % (node_i.name,)) 2991 n_offline += 1 2992 continue 2993 2994 if node_i.uuid == master_node_uuid: 2995 ntype = "master" 2996 elif node_i.master_candidate: 2997 ntype = "master candidate" 2998 elif node_i.drained: 2999 ntype = "drained" 3000 n_drained += 1 3001 else: 3002 ntype = "regular" 3003 if verbose: 3004 feedback_fn("* Verifying node %s (%s)" % (node_i.name, ntype)) 3005 3006 msg = all_nvinfo[node_i.uuid].fail_msg 3007 self._ErrorIf(msg, constants.CV_ENODERPC, node_i.name, 3008 "while contacting node: %s", msg) 3009 if msg: 3010 nimg.rpc_fail = True 3011 continue 3012 3013 nresult = all_nvinfo[node_i.uuid].payload 3014 3015 nimg.call_ok = self._VerifyNode(node_i, nresult) 3016 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime) 3017 self._VerifyNodeNetwork(node_i, nresult) 3018 self._VerifyNodeUserScripts(node_i, nresult) 3019 self._VerifyOob(node_i, nresult) 3020 self._VerifyAcceptedFileStoragePaths(node_i, nresult, 3021 node_i.uuid == master_node_uuid) 3022 self._VerifyFileStoragePaths(node_i, nresult) 3023 self._VerifySharedFileStoragePaths(node_i, nresult) 3024 3025 if nimg.vm_capable: 3026 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg) 3027 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper, 3028 all_drbd_map) 3029 3030 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name) 3031 self._UpdateNodeInstances(node_i, nresult, nimg) 3032 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name) 3033 self._UpdateNodeOS(node_i, nresult, nimg) 3034 3035 if not nimg.os_fail: 3036 if refos_img is None: 3037 refos_img = nimg 3038 self._VerifyNodeOS(node_i, nimg, refos_img) 3039 self._VerifyNodeBridges(node_i, nresult, bridges) 3040 3041 # Check whether all running instances are primary for the node. (This 3042 # can no longer be done from _VerifyInstance below, since some of the 3043 # wrong instances could be from other node groups.) 3044 non_primary_inst_uuids = set(nimg.instances).difference(nimg.pinst) 3045 3046 for inst_uuid in non_primary_inst_uuids: 3047 test = inst_uuid in self.all_inst_info 3048 self._ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, 3049 self.cfg.GetInstanceName(inst_uuid), 3050 "instance should not run on node %s", node_i.name) 3051 self._ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name, 3052 "node is running unknown instance %s", inst_uuid) 3053 3054 self._VerifyGroupDRBDVersion(all_nvinfo) 3055 self._VerifyGroupLVM(node_image, vg_name) 3056 3057 for node_uuid, result in extra_lv_nvinfo.items(): 3058 self._UpdateNodeVolumes(self.all_node_info[node_uuid], result.payload, 3059 node_image[node_uuid], vg_name) 3060 3061 feedback_fn("* Verifying instance status") 3062 for inst_uuid in self.my_inst_uuids: 3063 instance = self.my_inst_info[inst_uuid] 3064 if verbose: 3065 feedback_fn("* Verifying instance %s" % instance.name) 3066 self._VerifyInstance(instance, node_image, instdisk[inst_uuid]) 3067 3068 # If the instance is non-redundant we cannot survive losing its primary 3069 # node, so we are not N+1 compliant. 3070 if instance.disk_template not in constants.DTS_MIRRORED: 3071 i_non_redundant.append(instance) 3072 3073 if not cluster.FillBE(instance)[constants.BE_AUTO_BALANCE]: 3074 i_non_a_balanced.append(instance) 3075 3076 feedback_fn("* Verifying orphan volumes") 3077 reserved = utils.FieldSet(*cluster.reserved_lvs) 3078 3079 # We will get spurious "unknown volume" warnings if any node of this group 3080 # is secondary for an instance whose primary is in another group. To avoid 3081 # them, we find these instances and add their volumes to node_vol_should. 3082 for instance in self.all_inst_info.values(): 3083 for secondary in instance.secondary_nodes: 3084 if (secondary in self.my_node_info 3085 and instance.name not in self.my_inst_info): 3086 instance.MapLVsByNode(node_vol_should) 3087 break 3088 3089 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved) 3090 3091 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks: 3092 feedback_fn("* Verifying N+1 Memory redundancy") 3093 self._VerifyNPlusOneMemory(node_image, self.my_inst_info) 3094 3095 feedback_fn("* Other Notes") 3096 if i_non_redundant: 3097 feedback_fn(" - NOTICE: %d non-redundant instance(s) found." 3098 % len(i_non_redundant)) 3099 3100 if i_non_a_balanced: 3101 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found." 3102 % len(i_non_a_balanced)) 3103 3104 if i_offline: 3105 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline) 3106 3107 if n_offline: 3108 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline) 3109 3110 if n_drained: 3111 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained) 3112 3113 return not self.bad

3114

3115 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):

3116 """Analyze the post-hooks' result 3117 3118 This method analyses the hook result, handles it, and sends some 3119 nicely-formatted feedback back to the user. 3120 3121 @param phase: one of L{constants.HOOKS_PHASE_POST} or 3122 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 3123 @param hooks_results: the results of the multi-node hooks rpc call 3124 @param feedback_fn: function used send feedback back to the caller 3125 @param lu_result: previous Exec result 3126 @return: the new Exec result, based on the previous result 3127 and hook results 3128 3129 """ 3130 # We only really run POST phase hooks, only for non-empty groups, 3131 # and are only interested in their results 3132 if not self.my_node_uuids: 3133 # empty node group 3134 pass 3135 elif phase == constants.HOOKS_PHASE_POST: 3136 # Used to change hooks' output to proper indentation 3137 feedback_fn("* Hooks Results") 3138 assert hooks_results, "invalid result from hooks" 3139 3140 for node_name in hooks_results: 3141 res = hooks_results[node_name] 3142 msg = res.fail_msg 3143 test = msg and not res.offline 3144 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name, 3145 "Communication failure in hooks execution: %s", msg) 3146 if res.offline or msg: 3147 # No need to investigate payload if node is offline or gave 3148 # an error. 3149 continue 3150 for script, hkr, output in res.payload: 3151 test = hkr == constants.HKR_FAIL 3152 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name, 3153 "Script %s failed, output:", script) 3154 if test: 3155 output = self._HOOKS_INDENT_RE.sub(" ", output) 3156 feedback_fn("%s" % output) 3157 lu_result = False 3158 3159 return lu_result

3160

3161 3162 -class LUClusterVerifyDisks(NoHooksLU):

3163 """Verifies the cluster disks status. 3164 3165 """ 3166 REQ_BGL = False 3167

3168 - def ExpandNames(self):

3169 self.share_locks = ShareAll() 3170 self.needed_locks = { 3171 locking.LEVEL_NODEGROUP: locking.ALL_SET, 3172 }

3173

3174 - def Exec(self, feedback_fn):

3175 group_names = self.owned_locks(locking.LEVEL_NODEGROUP) 3176 3177 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group 3178 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)] 3179 for group in group_names])

3180

Source Code for Module ganeti.cmdlib.cluster