Package ganeti :: Package cmdlib :: Module cluster
[hide private]
[frames] | no frames]

Source Code for Module ganeti.cmdlib.cluster

   1  # 
   2  # 
   3   
   4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc. 
   5  # All rights reserved. 
   6  # 
   7  # Redistribution and use in source and binary forms, with or without 
   8  # modification, are permitted provided that the following conditions are 
   9  # met: 
  10  # 
  11  # 1. Redistributions of source code must retain the above copyright notice, 
  12  # this list of conditions and the following disclaimer. 
  13  # 
  14  # 2. Redistributions in binary form must reproduce the above copyright 
  15  # notice, this list of conditions and the following disclaimer in the 
  16  # documentation and/or other materials provided with the distribution. 
  17  # 
  18  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
  19  # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
  20  # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
  21  # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
  22  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  23  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  24  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
  25  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
  26  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
  27  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
  28  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  29   
  30   
  31  """Logical units dealing with the cluster.""" 
  32   
  33  import copy 
  34  import itertools 
  35  import logging 
  36  import operator 
  37  import os 
  38  import re 
  39  import time 
  40   
  41  from ganeti import compat 
  42  from ganeti import constants 
  43  from ganeti import errors 
  44  from ganeti import hypervisor 
  45  from ganeti import locking 
  46  from ganeti import masterd 
  47  from ganeti import netutils 
  48  from ganeti import objects 
  49  from ganeti import opcodes 
  50  from ganeti import pathutils 
  51  from ganeti import query 
  52  import ganeti.rpc.node as rpc 
  53  from ganeti import runtime 
  54  from ganeti import ssh 
  55  from ganeti import uidpool 
  56  from ganeti import utils 
  57  from ganeti import vcluster 
  58   
  59  from ganeti.cmdlib.base import NoHooksLU, QueryBase, LogicalUnit, \ 
  60    ResultWithJobs 
  61  from ganeti.cmdlib.common import ShareAll, RunPostHook, \ 
  62    ComputeAncillaryFiles, RedistributeAncillaryFiles, UploadHelper, \ 
  63    GetWantedInstances, MergeAndVerifyHvState, MergeAndVerifyDiskState, \ 
  64    GetUpdatedIPolicy, ComputeNewInstanceViolations, GetUpdatedParams, \ 
  65    CheckOSParams, CheckHVParams, AdjustCandidatePool, CheckNodePVs, \ 
  66    ComputeIPolicyInstanceViolation, AnnotateDiskParams, SupportsOob, \ 
  67    CheckIpolicyVsDiskTemplates, CheckDiskAccessModeValidity, \ 
  68    CheckDiskAccessModeConsistency, GetClientCertDigest, \ 
  69    AddInstanceCommunicationNetworkOp, ConnectInstanceCommunicationNetworkOp, \ 
  70    CheckImageValidity, CheckDiskAccessModeConsistency, EnsureKvmdOnNodes 
  71   
  72  import ganeti.masterd.instance 
73 74 75 -class LUClusterRenewCrypto(NoHooksLU):
76 """Renew the cluster's crypto tokens. 77 78 Note that most of this operation is done in gnt_cluster.py, this LU only 79 takes care of the renewal of the client SSL certificates. 80 81 """ 82 _MAX_NUM_RETRIES = 3 83
84 - def Exec(self, feedback_fn):
85 master_uuid = self.cfg.GetMasterNode() 86 cluster = self.cfg.GetClusterInfo() 87 88 logging.debug("Renewing the master's SSL node certificate." 89 " Master's UUID: %s.", master_uuid) 90 91 # mapping node UUIDs to client certificate digests 92 digest_map = {} 93 master_digest = utils.GetCertificateDigest( 94 cert_filename=pathutils.NODED_CLIENT_CERT_FILE) 95 digest_map[master_uuid] = master_digest 96 logging.debug("Adding the master's SSL node certificate digest to the" 97 " configuration. Master's UUID: %s, Digest: %s", 98 master_uuid, master_digest) 99 100 node_errors = {} 101 nodes = self.cfg.GetAllNodesInfo() 102 logging.debug("Renewing non-master nodes' node certificates.") 103 for (node_uuid, node_info) in nodes.items(): 104 if node_info.offline: 105 feedback_fn("* Skipping offline node %s" % node_info.name) 106 logging.debug("Skipping offline node %s (UUID: %s).", 107 node_info.name, node_uuid) 108 continue 109 if node_uuid != master_uuid: 110 logging.debug("Adding certificate digest of node '%s'.", node_uuid) 111 last_exception = None 112 for i in range(self._MAX_NUM_RETRIES): 113 try: 114 if node_info.master_candidate: 115 node_digest = GetClientCertDigest(self, node_uuid) 116 digest_map[node_uuid] = node_digest 117 logging.debug("Added the node's certificate to candidate" 118 " certificate list. Current list: %s.", 119 str(cluster.candidate_certs)) 120 break 121 except errors.OpExecError as e: 122 last_exception = e 123 logging.error("Could not fetch a non-master node's SSL node" 124 " certificate at attempt no. %s. The node's UUID" 125 " is %s, and the error was: %s.", 126 str(i), node_uuid, e) 127 else: 128 if last_exception: 129 node_errors[node_uuid] = last_exception 130 131 if node_errors: 132 msg = ("Some nodes' SSL client certificates could not be fetched." 133 " Please make sure those nodes are reachable and rerun" 134 " the operation. The affected nodes and their errors are:\n") 135 for uuid, e in node_errors.items(): 136 msg += "Node %s: %s\n" % (uuid, e) 137 feedback_fn(msg) 138 139 self.cfg.SetCandidateCerts(digest_map)
140
141 142 -class LUClusterActivateMasterIp(NoHooksLU):
143 """Activate the master IP on the master node. 144 145 """
146 - def Exec(self, feedback_fn):
147 """Activate the master IP. 148 149 """ 150 master_params = self.cfg.GetMasterNetworkParameters() 151 ems = self.cfg.GetUseExternalMipScript() 152 result = self.rpc.call_node_activate_master_ip(master_params.uuid, 153 master_params, ems) 154 result.Raise("Could not activate the master IP")
155
156 157 -class LUClusterDeactivateMasterIp(NoHooksLU):
158 """Deactivate the master IP on the master node. 159 160 """
161 - def Exec(self, feedback_fn):
162 """Deactivate the master IP. 163 164 """ 165 master_params = self.cfg.GetMasterNetworkParameters() 166 ems = self.cfg.GetUseExternalMipScript() 167 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, 168 master_params, ems) 169 result.Raise("Could not deactivate the master IP")
170
171 172 -class LUClusterConfigQuery(NoHooksLU):
173 """Return configuration values. 174 175 """ 176 REQ_BGL = False 177
178 - def CheckArguments(self):
179 self.cq = ClusterQuery(None, self.op.output_fields, False)
180
181 - def ExpandNames(self):
182 self.cq.ExpandNames(self)
183
184 - def DeclareLocks(self, level):
185 self.cq.DeclareLocks(self, level)
186
187 - def Exec(self, feedback_fn):
188 result = self.cq.OldStyleQuery(self) 189 190 assert len(result) == 1 191 192 return result[0]
193
194 195 -class LUClusterDestroy(LogicalUnit):
196 """Logical unit for destroying the cluster. 197 198 """ 199 HPATH = "cluster-destroy" 200 HTYPE = constants.HTYPE_CLUSTER 201 202 # Read by the job queue to detect when the cluster is gone and job files will 203 # never be available. 204 # FIXME: This variable should be removed together with the Python job queue. 205 clusterHasBeenDestroyed = False 206
207 - def BuildHooksEnv(self):
208 """Build hooks env. 209 210 """ 211 return { 212 "OP_TARGET": self.cfg.GetClusterName(), 213 }
214
215 - def BuildHooksNodes(self):
216 """Build hooks nodes. 217 218 """ 219 return ([], [])
220
221 - def CheckPrereq(self):
222 """Check prerequisites. 223 224 This checks whether the cluster is empty. 225 226 Any errors are signaled by raising errors.OpPrereqError. 227 228 """ 229 master = self.cfg.GetMasterNode() 230 231 nodelist = self.cfg.GetNodeList() 232 if len(nodelist) != 1 or nodelist[0] != master: 233 raise errors.OpPrereqError("There are still %d node(s) in" 234 " this cluster." % (len(nodelist) - 1), 235 errors.ECODE_INVAL) 236 instancelist = self.cfg.GetInstanceList() 237 if instancelist: 238 raise errors.OpPrereqError("There are still %d instance(s) in" 239 " this cluster." % len(instancelist), 240 errors.ECODE_INVAL)
241
242 - def Exec(self, feedback_fn):
243 """Destroys the cluster. 244 245 """ 246 master_params = self.cfg.GetMasterNetworkParameters() 247 248 # Run post hooks on master node before it's removed 249 RunPostHook(self, self.cfg.GetNodeName(master_params.uuid)) 250 251 ems = self.cfg.GetUseExternalMipScript() 252 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, 253 master_params, ems) 254 result.Warn("Error disabling the master IP address", self.LogWarning) 255 256 self.wconfd.Client().PrepareClusterDestruction(self.wconfdcontext) 257 258 # signal to the job queue that the cluster is gone 259 LUClusterDestroy.clusterHasBeenDestroyed = True 260 261 return master_params.uuid
262
263 264 -class LUClusterPostInit(LogicalUnit):
265 """Logical unit for running hooks after cluster initialization. 266 267 """ 268 HPATH = "cluster-init" 269 HTYPE = constants.HTYPE_CLUSTER 270
271 - def CheckArguments(self):
272 self.master_uuid = self.cfg.GetMasterNode() 273 self.master_ndparams = self.cfg.GetNdParams(self.cfg.GetMasterNodeInfo()) 274 275 # TODO: When Issue 584 is solved, and None is properly parsed when used 276 # as a default value, ndparams.get(.., None) can be changed to 277 # ndparams[..] to access the values directly 278 279 # OpenvSwitch: Warn user if link is missing 280 if (self.master_ndparams[constants.ND_OVS] and not 281 self.master_ndparams.get(constants.ND_OVS_LINK, None)): 282 self.LogInfo("No physical interface for OpenvSwitch was given." 283 " OpenvSwitch will not have an outside connection. This" 284 " might not be what you want.")
285
286 - def BuildHooksEnv(self):
287 """Build hooks env. 288 289 """ 290 return { 291 "OP_TARGET": self.cfg.GetClusterName(), 292 }
293
294 - def BuildHooksNodes(self):
295 """Build hooks nodes. 296 297 """ 298 return ([], [self.cfg.GetMasterNode()])
299
300 - def Exec(self, feedback_fn):
301 """Create and configure Open vSwitch 302 303 """ 304 if self.master_ndparams[constants.ND_OVS]: 305 result = self.rpc.call_node_configure_ovs( 306 self.master_uuid, 307 self.master_ndparams[constants.ND_OVS_NAME], 308 self.master_ndparams.get(constants.ND_OVS_LINK, None)) 309 result.Raise("Could not successully configure Open vSwitch") 310 311 return True
312
313 314 -class ClusterQuery(QueryBase):
315 FIELDS = query.CLUSTER_FIELDS 316 317 #: Do not sort (there is only one item) 318 SORT_FIELD = None 319
320 - def ExpandNames(self, lu):
321 lu.needed_locks = {} 322 323 # The following variables interact with _QueryBase._GetNames 324 self.wanted = locking.ALL_SET 325 self.do_locking = self.use_locking 326 327 if self.do_locking: 328 raise errors.OpPrereqError("Can not use locking for cluster queries", 329 errors.ECODE_INVAL)
330
331 - def DeclareLocks(self, lu, level):
332 pass
333
334 - def _GetQueryData(self, lu):
335 """Computes the list of nodes and their attributes. 336 337 """ 338 if query.CQ_CONFIG in self.requested_data: 339 cluster = lu.cfg.GetClusterInfo() 340 nodes = lu.cfg.GetAllNodesInfo() 341 else: 342 cluster = NotImplemented 343 nodes = NotImplemented 344 345 if query.CQ_QUEUE_DRAINED in self.requested_data: 346 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE) 347 else: 348 drain_flag = NotImplemented 349 350 if query.CQ_WATCHER_PAUSE in self.requested_data: 351 master_node_uuid = lu.cfg.GetMasterNode() 352 353 result = lu.rpc.call_get_watcher_pause(master_node_uuid) 354 result.Raise("Can't retrieve watcher pause from master node '%s'" % 355 lu.cfg.GetMasterNodeName()) 356 357 watcher_pause = result.payload 358 else: 359 watcher_pause = NotImplemented 360 361 return query.ClusterQueryData(cluster, nodes, drain_flag, watcher_pause)
362
363 364 -class LUClusterQuery(NoHooksLU):
365 """Query cluster configuration. 366 367 """ 368 REQ_BGL = False 369
370 - def ExpandNames(self):
371 self.needed_locks = {}
372
373 - def Exec(self, feedback_fn):
374 """Return cluster config. 375 376 """ 377 cluster = self.cfg.GetClusterInfo() 378 os_hvp = {} 379 380 # Filter just for enabled hypervisors 381 for os_name, hv_dict in cluster.os_hvp.items(): 382 os_hvp[os_name] = {} 383 for hv_name, hv_params in hv_dict.items(): 384 if hv_name in cluster.enabled_hypervisors: 385 os_hvp[os_name][hv_name] = hv_params 386 387 # Convert ip_family to ip_version 388 primary_ip_version = constants.IP4_VERSION 389 if cluster.primary_ip_family == netutils.IP6Address.family: 390 primary_ip_version = constants.IP6_VERSION 391 392 result = { 393 "software_version": constants.RELEASE_VERSION, 394 "protocol_version": constants.PROTOCOL_VERSION, 395 "config_version": constants.CONFIG_VERSION, 396 "os_api_version": max(constants.OS_API_VERSIONS), 397 "export_version": constants.EXPORT_VERSION, 398 "vcs_version": constants.VCS_VERSION, 399 "architecture": runtime.GetArchInfo(), 400 "name": cluster.cluster_name, 401 "master": self.cfg.GetMasterNodeName(), 402 "default_hypervisor": cluster.primary_hypervisor, 403 "enabled_hypervisors": cluster.enabled_hypervisors, 404 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name]) 405 for hypervisor_name in cluster.enabled_hypervisors]), 406 "os_hvp": os_hvp, 407 "beparams": cluster.beparams, 408 "osparams": cluster.osparams, 409 "ipolicy": cluster.ipolicy, 410 "nicparams": cluster.nicparams, 411 "ndparams": cluster.ndparams, 412 "diskparams": cluster.diskparams, 413 "candidate_pool_size": cluster.candidate_pool_size, 414 "max_running_jobs": cluster.max_running_jobs, 415 "max_tracked_jobs": cluster.max_tracked_jobs, 416 "mac_prefix": cluster.mac_prefix, 417 "master_netdev": cluster.master_netdev, 418 "master_netmask": cluster.master_netmask, 419 "use_external_mip_script": cluster.use_external_mip_script, 420 "volume_group_name": cluster.volume_group_name, 421 "drbd_usermode_helper": cluster.drbd_usermode_helper, 422 "file_storage_dir": cluster.file_storage_dir, 423 "shared_file_storage_dir": cluster.shared_file_storage_dir, 424 "maintain_node_health": cluster.maintain_node_health, 425 "ctime": cluster.ctime, 426 "mtime": cluster.mtime, 427 "uuid": cluster.uuid, 428 "tags": list(cluster.GetTags()), 429 "uid_pool": cluster.uid_pool, 430 "default_iallocator": cluster.default_iallocator, 431 "default_iallocator_params": cluster.default_iallocator_params, 432 "reserved_lvs": cluster.reserved_lvs, 433 "primary_ip_version": primary_ip_version, 434 "prealloc_wipe_disks": cluster.prealloc_wipe_disks, 435 "hidden_os": cluster.hidden_os, 436 "blacklisted_os": cluster.blacklisted_os, 437 "enabled_disk_templates": cluster.enabled_disk_templates, 438 "install_image": cluster.install_image, 439 "instance_communication_network": cluster.instance_communication_network, 440 "compression_tools": cluster.compression_tools, 441 "enabled_user_shutdown": cluster.enabled_user_shutdown, 442 } 443 444 return result
445
446 447 -class LUClusterRedistConf(NoHooksLU):
448 """Force the redistribution of cluster configuration. 449 450 This is a very simple LU. 451 452 """ 453 REQ_BGL = False 454
455 - def ExpandNames(self):
456 self.needed_locks = { 457 locking.LEVEL_NODE: locking.ALL_SET, 458 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 459 } 460 self.share_locks = ShareAll()
461
462 - def Exec(self, feedback_fn):
463 """Redistribute the configuration. 464 465 """ 466 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn) 467 RedistributeAncillaryFiles(self)
468
469 470 -class LUClusterRename(LogicalUnit):
471 """Rename the cluster. 472 473 """ 474 HPATH = "cluster-rename" 475 HTYPE = constants.HTYPE_CLUSTER 476
477 - def BuildHooksEnv(self):
478 """Build hooks env. 479 480 """ 481 return { 482 "OP_TARGET": self.cfg.GetClusterName(), 483 "NEW_NAME": self.op.name, 484 }
485
486 - def BuildHooksNodes(self):
487 """Build hooks nodes. 488 489 """ 490 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
491
492 - def CheckPrereq(self):
493 """Verify that the passed name is a valid one. 494 495 """ 496 hostname = netutils.GetHostname(name=self.op.name, 497 family=self.cfg.GetPrimaryIPFamily()) 498 499 new_name = hostname.name 500 self.ip = new_ip = hostname.ip 501 old_name = self.cfg.GetClusterName() 502 old_ip = self.cfg.GetMasterIP() 503 if new_name == old_name and new_ip == old_ip: 504 raise errors.OpPrereqError("Neither the name nor the IP address of the" 505 " cluster has changed", 506 errors.ECODE_INVAL) 507 if new_ip != old_ip: 508 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT): 509 raise errors.OpPrereqError("The given cluster IP address (%s) is" 510 " reachable on the network" % 511 new_ip, errors.ECODE_NOTUNIQUE) 512 513 self.op.name = new_name
514
515 - def Exec(self, feedback_fn):
516 """Rename the cluster. 517 518 """ 519 clustername = self.op.name 520 new_ip = self.ip 521 522 # shutdown the master IP 523 master_params = self.cfg.GetMasterNetworkParameters() 524 ems = self.cfg.GetUseExternalMipScript() 525 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, 526 master_params, ems) 527 result.Raise("Could not disable the master role") 528 529 try: 530 cluster = self.cfg.GetClusterInfo() 531 cluster.cluster_name = clustername 532 cluster.master_ip = new_ip 533 self.cfg.Update(cluster, feedback_fn) 534 535 # update the known hosts file 536 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE) 537 node_list = self.cfg.GetOnlineNodeList() 538 try: 539 node_list.remove(master_params.uuid) 540 except ValueError: 541 pass 542 UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE) 543 finally: 544 master_params.ip = new_ip 545 result = self.rpc.call_node_activate_master_ip(master_params.uuid, 546 master_params, ems) 547 result.Warn("Could not re-enable the master role on the master," 548 " please restart manually", self.LogWarning) 549 550 return clustername
551
552 553 -class LUClusterRepairDiskSizes(NoHooksLU):
554 """Verifies the cluster disks sizes. 555 556 """ 557 REQ_BGL = False 558
559 - def ExpandNames(self):
560 if self.op.instances: 561 (_, self.wanted_names) = GetWantedInstances(self, self.op.instances) 562 # Not getting the node allocation lock as only a specific set of 563 # instances (and their nodes) is going to be acquired 564 self.needed_locks = { 565 locking.LEVEL_NODE_RES: [], 566 locking.LEVEL_INSTANCE: self.wanted_names, 567 } 568 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE 569 else: 570 self.wanted_names = None 571 self.needed_locks = { 572 locking.LEVEL_NODE_RES: locking.ALL_SET, 573 locking.LEVEL_INSTANCE: locking.ALL_SET, 574 575 # This opcode is acquires the node locks for all instances 576 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 577 } 578 579 self.share_locks = { 580 locking.LEVEL_NODE_RES: 1, 581 locking.LEVEL_INSTANCE: 0, 582 locking.LEVEL_NODE_ALLOC: 1, 583 }
584
585 - def DeclareLocks(self, level):
586 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None: 587 self._LockInstancesNodes(primary_only=True, level=level)
588
589 - def CheckPrereq(self):
590 """Check prerequisites. 591 592 This only checks the optional instance list against the existing names. 593 594 """ 595 if self.wanted_names is None: 596 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE) 597 598 self.wanted_instances = \ 599 map(compat.snd, self.cfg.GetMultiInstanceInfoByName(self.wanted_names))
600
601 - def _EnsureChildSizes(self, disk):
602 """Ensure children of the disk have the needed disk size. 603 604 This is valid mainly for DRBD8 and fixes an issue where the 605 children have smaller disk size. 606 607 @param disk: an L{ganeti.objects.Disk} object 608 609 """ 610 if disk.dev_type == constants.DT_DRBD8: 611 assert disk.children, "Empty children for DRBD8?" 612 fchild = disk.children[0] 613 mismatch = fchild.size < disk.size 614 if mismatch: 615 self.LogInfo("Child disk has size %d, parent %d, fixing", 616 fchild.size, disk.size) 617 fchild.size = disk.size 618 619 # and we recurse on this child only, not on the metadev 620 return self._EnsureChildSizes(fchild) or mismatch 621 else: 622 return False
623
624 - def Exec(self, feedback_fn):
625 """Verify the size of cluster disks. 626 627 """ 628 # TODO: check child disks too 629 # TODO: check differences in size between primary/secondary nodes 630 per_node_disks = {} 631 for instance in self.wanted_instances: 632 pnode = instance.primary_node 633 if pnode not in per_node_disks: 634 per_node_disks[pnode] = [] 635 for idx, disk in enumerate(self.cfg.GetInstanceDisks(instance.uuid)): 636 per_node_disks[pnode].append((instance, idx, disk)) 637 638 assert not (frozenset(per_node_disks.keys()) - 639 frozenset(self.owned_locks(locking.LEVEL_NODE_RES))), \ 640 "Not owning correct locks" 641 assert not self.owned_locks(locking.LEVEL_NODE) 642 643 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, 644 per_node_disks.keys()) 645 646 changed = [] 647 for node_uuid, dskl in per_node_disks.items(): 648 if not dskl: 649 # no disks on the node 650 continue 651 652 newl = [([v[2].Copy()], v[0]) for v in dskl] 653 node_name = self.cfg.GetNodeName(node_uuid) 654 result = self.rpc.call_blockdev_getdimensions(node_uuid, newl) 655 if result.fail_msg: 656 self.LogWarning("Failure in blockdev_getdimensions call to node" 657 " %s, ignoring", node_name) 658 continue 659 if len(result.payload) != len(dskl): 660 logging.warning("Invalid result from node %s: len(dksl)=%d," 661 " result.payload=%s", node_name, len(dskl), 662 result.payload) 663 self.LogWarning("Invalid result from node %s, ignoring node results", 664 node_name) 665 continue 666 for ((instance, idx, disk), dimensions) in zip(dskl, result.payload): 667 if dimensions is None: 668 self.LogWarning("Disk %d of instance %s did not return size" 669 " information, ignoring", idx, instance.name) 670 continue 671 if not isinstance(dimensions, (tuple, list)): 672 self.LogWarning("Disk %d of instance %s did not return valid" 673 " dimension information, ignoring", idx, 674 instance.name) 675 continue 676 (size, spindles) = dimensions 677 if not isinstance(size, (int, long)): 678 self.LogWarning("Disk %d of instance %s did not return valid" 679 " size information, ignoring", idx, instance.name) 680 continue 681 size = size >> 20 682 if size != disk.size: 683 self.LogInfo("Disk %d of instance %s has mismatched size," 684 " correcting: recorded %d, actual %d", idx, 685 instance.name, disk.size, size) 686 disk.size = size 687 self.cfg.Update(disk, feedback_fn) 688 changed.append((instance.name, idx, "size", size)) 689 if es_flags[node_uuid]: 690 if spindles is None: 691 self.LogWarning("Disk %d of instance %s did not return valid" 692 " spindles information, ignoring", idx, 693 instance.name) 694 elif disk.spindles is None or disk.spindles != spindles: 695 self.LogInfo("Disk %d of instance %s has mismatched spindles," 696 " correcting: recorded %s, actual %s", 697 idx, instance.name, disk.spindles, spindles) 698 disk.spindles = spindles 699 self.cfg.Update(disk, feedback_fn) 700 changed.append((instance.name, idx, "spindles", disk.spindles)) 701 if self._EnsureChildSizes(disk): 702 self.cfg.Update(disk, feedback_fn) 703 changed.append((instance.name, idx, "size", disk.size)) 704 return changed
705
706 707 -def _ValidateNetmask(cfg, netmask):
708 """Checks if a netmask is valid. 709 710 @type cfg: L{config.ConfigWriter} 711 @param cfg: cluster configuration 712 @type netmask: int 713 @param netmask: netmask to be verified 714 @raise errors.OpPrereqError: if the validation fails 715 716 """ 717 ip_family = cfg.GetPrimaryIPFamily() 718 try: 719 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family) 720 except errors.ProgrammerError: 721 raise errors.OpPrereqError("Invalid primary ip family: %s." % 722 ip_family, errors.ECODE_INVAL) 723 if not ipcls.ValidateNetmask(netmask): 724 raise errors.OpPrereqError("CIDR netmask (%s) not valid" % 725 (netmask), errors.ECODE_INVAL)
726
727 728 -def CheckFileBasedStoragePathVsEnabledDiskTemplates( 729 logging_warn_fn, file_storage_dir, enabled_disk_templates, 730 file_disk_template):
731 """Checks whether the given file-based storage directory is acceptable. 732 733 Note: This function is public, because it is also used in bootstrap.py. 734 735 @type logging_warn_fn: function 736 @param logging_warn_fn: function which accepts a string and logs it 737 @type file_storage_dir: string 738 @param file_storage_dir: the directory to be used for file-based instances 739 @type enabled_disk_templates: list of string 740 @param enabled_disk_templates: the list of enabled disk templates 741 @type file_disk_template: string 742 @param file_disk_template: the file-based disk template for which the 743 path should be checked 744 745 """ 746 assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageTypes( 747 constants.ST_FILE, constants.ST_SHARED_FILE, constants.ST_GLUSTER 748 )) 749 750 file_storage_enabled = file_disk_template in enabled_disk_templates 751 if file_storage_dir is not None: 752 if file_storage_dir == "": 753 if file_storage_enabled: 754 raise errors.OpPrereqError( 755 "Unsetting the '%s' storage directory while having '%s' storage" 756 " enabled is not permitted." % 757 (file_disk_template, file_disk_template)) 758 else: 759 if not file_storage_enabled: 760 logging_warn_fn( 761 "Specified a %s storage directory, although %s storage is not" 762 " enabled." % (file_disk_template, file_disk_template)) 763 else: 764 raise errors.ProgrammerError("Received %s storage dir with value" 765 " 'None'." % file_disk_template)
766
767 768 -def CheckFileStoragePathVsEnabledDiskTemplates( 769 logging_warn_fn, file_storage_dir, enabled_disk_templates):
770 """Checks whether the given file storage directory is acceptable. 771 772 @see: C{CheckFileBasedStoragePathVsEnabledDiskTemplates} 773 774 """ 775 CheckFileBasedStoragePathVsEnabledDiskTemplates( 776 logging_warn_fn, file_storage_dir, enabled_disk_templates, 777 constants.DT_FILE)
778
779 780 -def CheckSharedFileStoragePathVsEnabledDiskTemplates( 781 logging_warn_fn, file_storage_dir, enabled_disk_templates):
782 """Checks whether the given shared file storage directory is acceptable. 783 784 @see: C{CheckFileBasedStoragePathVsEnabledDiskTemplates} 785 786 """ 787 CheckFileBasedStoragePathVsEnabledDiskTemplates( 788 logging_warn_fn, file_storage_dir, enabled_disk_templates, 789 constants.DT_SHARED_FILE)
790
791 792 -def CheckGlusterStoragePathVsEnabledDiskTemplates( 793 logging_warn_fn, file_storage_dir, enabled_disk_templates):
794 """Checks whether the given gluster storage directory is acceptable. 795 796 @see: C{CheckFileBasedStoragePathVsEnabledDiskTemplates} 797 798 """ 799 CheckFileBasedStoragePathVsEnabledDiskTemplates( 800 logging_warn_fn, file_storage_dir, enabled_disk_templates, 801 constants.DT_GLUSTER)
802
803 804 -def CheckCompressionTools(tools):
805 """Check whether the provided compression tools look like executables. 806 807 @type tools: list of string 808 @param tools: The tools provided as opcode input 809 810 """ 811 regex = re.compile('^[-_a-zA-Z0-9]+$') 812 illegal_tools = [t for t in tools if not regex.match(t)] 813 814 if illegal_tools: 815 raise errors.OpPrereqError( 816 "The tools '%s' contain illegal characters: only alphanumeric values," 817 " dashes, and underscores are allowed" % ", ".join(illegal_tools) 818 ) 819 820 if constants.IEC_GZIP not in tools: 821 raise errors.OpPrereqError("For compatibility reasons, the %s utility must" 822 " be present among the compression tools" % 823 constants.IEC_GZIP) 824 825 if constants.IEC_NONE in tools: 826 raise errors.OpPrereqError("%s is a reserved value used for no compression," 827 " and cannot be used as the name of a tool" % 828 constants.IEC_NONE)
829
830 831 -class LUClusterSetParams(LogicalUnit):
832 """Change the parameters of the cluster. 833 834 """ 835 HPATH = "cluster-modify" 836 HTYPE = constants.HTYPE_CLUSTER 837 REQ_BGL = False 838
839 - def CheckArguments(self):
840 """Check parameters 841 842 """ 843 if self.op.uid_pool: 844 uidpool.CheckUidPool(self.op.uid_pool) 845 846 if self.op.add_uids: 847 uidpool.CheckUidPool(self.op.add_uids) 848 849 if self.op.remove_uids: 850 uidpool.CheckUidPool(self.op.remove_uids) 851 852 if self.op.mac_prefix: 853 self.op.mac_prefix = \ 854 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix) 855 856 if self.op.master_netmask is not None: 857 _ValidateNetmask(self.cfg, self.op.master_netmask) 858 859 if self.op.diskparams: 860 for dt_params in self.op.diskparams.values(): 861 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES) 862 try: 863 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS) 864 CheckDiskAccessModeValidity(self.op.diskparams) 865 except errors.OpPrereqError, err: 866 raise errors.OpPrereqError("While verify diskparams options: %s" % err, 867 errors.ECODE_INVAL) 868 869 if self.op.install_image is not None: 870 CheckImageValidity(self.op.install_image, 871 "Install image must be an absolute path or a URL")
872
873 - def ExpandNames(self):
874 # FIXME: in the future maybe other cluster params won't require checking on 875 # all nodes to be modified. 876 # FIXME: This opcode changes cluster-wide settings. Is acquiring all 877 # resource locks the right thing, shouldn't it be the BGL instead? 878 self.needed_locks = { 879 locking.LEVEL_NODE: locking.ALL_SET, 880 locking.LEVEL_INSTANCE: locking.ALL_SET, 881 locking.LEVEL_NODEGROUP: locking.ALL_SET, 882 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 883 } 884 self.share_locks = ShareAll()
885
886 - def BuildHooksEnv(self):
887 """Build hooks env. 888 889 """ 890 return { 891 "OP_TARGET": self.cfg.GetClusterName(), 892 "NEW_VG_NAME": self.op.vg_name, 893 }
894
895 - def BuildHooksNodes(self):
896 """Build hooks nodes. 897 898 """ 899 mn = self.cfg.GetMasterNode() 900 return ([mn], [mn])
901
902 - def _CheckVgName(self, node_uuids, enabled_disk_templates, 903 new_enabled_disk_templates):
904 """Check the consistency of the vg name on all nodes and in case it gets 905 unset whether there are instances still using it. 906 907 """ 908 lvm_is_enabled = utils.IsLvmEnabled(enabled_disk_templates) 909 lvm_gets_enabled = utils.LvmGetsEnabled(enabled_disk_templates, 910 new_enabled_disk_templates) 911 current_vg_name = self.cfg.GetVGName() 912 913 if self.op.vg_name == '': 914 if lvm_is_enabled: 915 raise errors.OpPrereqError("Cannot unset volume group if lvm-based" 916 " disk templates are or get enabled.") 917 918 if self.op.vg_name is None: 919 if current_vg_name is None and lvm_is_enabled: 920 raise errors.OpPrereqError("Please specify a volume group when" 921 " enabling lvm-based disk-templates.") 922 923 if self.op.vg_name is not None and not self.op.vg_name: 924 if self.cfg.HasAnyDiskOfType(constants.DT_PLAIN): 925 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based" 926 " instances exist", errors.ECODE_INVAL) 927 928 if (self.op.vg_name is not None and lvm_is_enabled) or \ 929 (self.cfg.GetVGName() is not None and lvm_gets_enabled): 930 self._CheckVgNameOnNodes(node_uuids)
931
932 - def _CheckVgNameOnNodes(self, node_uuids):
933 """Check the status of the volume group on each node. 934 935 """ 936 vglist = self.rpc.call_vg_list(node_uuids) 937 for node_uuid in node_uuids: 938 msg = vglist[node_uuid].fail_msg 939 if msg: 940 # ignoring down node 941 self.LogWarning("Error while gathering data on node %s" 942 " (ignoring node): %s", 943 self.cfg.GetNodeName(node_uuid), msg) 944 continue 945 vgstatus = utils.CheckVolumeGroupSize(vglist[node_uuid].payload, 946 self.op.vg_name, 947 constants.MIN_VG_SIZE) 948 if vgstatus: 949 raise errors.OpPrereqError("Error on node '%s': %s" % 950 (self.cfg.GetNodeName(node_uuid), vgstatus), 951 errors.ECODE_ENVIRON)
952 953 @staticmethod
954 - def _GetDiskTemplateSetsInner(op_enabled_disk_templates, 955 old_enabled_disk_templates):
956 """Computes three sets of disk templates. 957 958 @see: C{_GetDiskTemplateSets} for more details. 959 960 """ 961 enabled_disk_templates = None 962 new_enabled_disk_templates = [] 963 disabled_disk_templates = [] 964 if op_enabled_disk_templates: 965 enabled_disk_templates = op_enabled_disk_templates 966 new_enabled_disk_templates = \ 967 list(set(enabled_disk_templates) 968 - set(old_enabled_disk_templates)) 969 disabled_disk_templates = \ 970 list(set(old_enabled_disk_templates) 971 - set(enabled_disk_templates)) 972 else: 973 enabled_disk_templates = old_enabled_disk_templates 974 return (enabled_disk_templates, new_enabled_disk_templates, 975 disabled_disk_templates)
976
977 - def _GetDiskTemplateSets(self, cluster):
978 """Computes three sets of disk templates. 979 980 The three sets are: 981 - disk templates that will be enabled after this operation (no matter if 982 they were enabled before or not) 983 - disk templates that get enabled by this operation (thus haven't been 984 enabled before.) 985 - disk templates that get disabled by this operation 986 987 """ 988 return self._GetDiskTemplateSetsInner(self.op.enabled_disk_templates, 989 cluster.enabled_disk_templates)
990
991 - def _CheckIpolicy(self, cluster, enabled_disk_templates):
992 """Checks the ipolicy. 993 994 @type cluster: C{objects.Cluster} 995 @param cluster: the cluster's configuration 996 @type enabled_disk_templates: list of string 997 @param enabled_disk_templates: list of (possibly newly) enabled disk 998 templates 999 1000 """ 1001 # FIXME: write unit tests for this 1002 if self.op.ipolicy: 1003 self.new_ipolicy = GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy, 1004 group_policy=False) 1005 1006 CheckIpolicyVsDiskTemplates(self.new_ipolicy, 1007 enabled_disk_templates) 1008 1009 all_instances = self.cfg.GetAllInstancesInfo().values() 1010 violations = set() 1011 for group in self.cfg.GetAllNodeGroupsInfo().values(): 1012 instances = frozenset( 1013 [inst for inst in all_instances 1014 if compat.any(nuuid in group.members 1015 for nuuid in self.cfg.GetInstanceNodes(inst.uuid))]) 1016 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy) 1017 ipol = masterd.instance.CalculateGroupIPolicy(cluster, group) 1018 new = ComputeNewInstanceViolations(ipol, new_ipolicy, instances, 1019 self.cfg) 1020 if new: 1021 violations.update(new) 1022 1023 if violations: 1024 self.LogWarning("After the ipolicy change the following instances" 1025 " violate them: %s", 1026 utils.CommaJoin(utils.NiceSort(violations))) 1027 else: 1028 CheckIpolicyVsDiskTemplates(cluster.ipolicy, 1029 enabled_disk_templates)
1030
1031 - def _CheckDrbdHelperOnNodes(self, drbd_helper, node_uuids):
1032 """Checks whether the set DRBD helper actually exists on the nodes. 1033 1034 @type drbd_helper: string 1035 @param drbd_helper: path of the drbd usermode helper binary 1036 @type node_uuids: list of strings 1037 @param node_uuids: list of node UUIDs to check for the helper 1038 1039 """ 1040 # checks given drbd helper on all nodes 1041 helpers = self.rpc.call_drbd_helper(node_uuids) 1042 for (_, ninfo) in self.cfg.GetMultiNodeInfo(node_uuids): 1043 if ninfo.offline: 1044 self.LogInfo("Not checking drbd helper on offline node %s", 1045 ninfo.name) 1046 continue 1047 msg = helpers[ninfo.uuid].fail_msg 1048 if msg: 1049 raise errors.OpPrereqError("Error checking drbd helper on node" 1050 " '%s': %s" % (ninfo.name, msg), 1051 errors.ECODE_ENVIRON) 1052 node_helper = helpers[ninfo.uuid].payload 1053 if node_helper != drbd_helper: 1054 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" % 1055 (ninfo.name, node_helper), 1056 errors.ECODE_ENVIRON)
1057
1058 - def _CheckDrbdHelper(self, node_uuids, drbd_enabled, drbd_gets_enabled):
1059 """Check the DRBD usermode helper. 1060 1061 @type node_uuids: list of strings 1062 @param node_uuids: a list of nodes' UUIDs 1063 @type drbd_enabled: boolean 1064 @param drbd_enabled: whether DRBD will be enabled after this operation 1065 (no matter if it was disabled before or not) 1066 @type drbd_gets_enabled: boolen 1067 @param drbd_gets_enabled: true if DRBD was disabled before this 1068 operation, but will be enabled afterwards 1069 1070 """ 1071 if self.op.drbd_helper == '': 1072 if drbd_enabled: 1073 raise errors.OpPrereqError("Cannot disable drbd helper while" 1074 " DRBD is enabled.") 1075 if self.cfg.HasAnyDiskOfType(constants.DT_DRBD8): 1076 raise errors.OpPrereqError("Cannot disable drbd helper while" 1077 " drbd-based instances exist", 1078 errors.ECODE_INVAL) 1079 1080 else: 1081 if self.op.drbd_helper is not None and drbd_enabled: 1082 self._CheckDrbdHelperOnNodes(self.op.drbd_helper, node_uuids) 1083 else: 1084 if drbd_gets_enabled: 1085 current_drbd_helper = self.cfg.GetClusterInfo().drbd_usermode_helper 1086 if current_drbd_helper is not None: 1087 self._CheckDrbdHelperOnNodes(current_drbd_helper, node_uuids) 1088 else: 1089 raise errors.OpPrereqError("Cannot enable DRBD without a" 1090 " DRBD usermode helper set.")
1091
1092 - def _CheckInstancesOfDisabledDiskTemplates( 1093 self, disabled_disk_templates):
1094 """Check whether we try to disable a disk template that is in use. 1095 1096 @type disabled_disk_templates: list of string 1097 @param disabled_disk_templates: list of disk templates that are going to 1098 be disabled by this operation 1099 1100 """ 1101 for disk_template in disabled_disk_templates: 1102 if self.cfg.HasAnyDiskOfType(disk_template): 1103 raise errors.OpPrereqError( 1104 "Cannot disable disk template '%s', because there is at least one" 1105 " instance using it." % disk_template)
1106 1107 @staticmethod
1108 - def _CheckInstanceCommunicationNetwork(network, warning_fn):
1109 """Check whether an existing network is configured for instance 1110 communication. 1111 1112 Checks whether an existing network is configured with the 1113 parameters that are advisable for instance communication, and 1114 otherwise issue security warnings. 1115 1116 @type network: L{ganeti.objects.Network} 1117 @param network: L{ganeti.objects.Network} object whose 1118 configuration is being checked 1119 @type warning_fn: function 1120 @param warning_fn: function used to print warnings 1121 @rtype: None 1122 @return: None 1123 1124 """ 1125 def _MaybeWarn(err, val, default): 1126 if val != default: 1127 warning_fn("Supplied instance communication network '%s' %s '%s'," 1128 " this might pose a security risk (default is '%s').", 1129 network.name, err, val, default)
1130 1131 if network.network is None: 1132 raise errors.OpPrereqError("Supplied instance communication network '%s'" 1133 " must have an IPv4 network address.", 1134 network.name) 1135 1136 _MaybeWarn("has an IPv4 gateway", network.gateway, None) 1137 _MaybeWarn("has a non-standard IPv4 network address", network.network, 1138 constants.INSTANCE_COMMUNICATION_NETWORK4) 1139 _MaybeWarn("has an IPv6 gateway", network.gateway6, None) 1140 _MaybeWarn("has a non-standard IPv6 network address", network.network6, 1141 constants.INSTANCE_COMMUNICATION_NETWORK6) 1142 _MaybeWarn("has a non-standard MAC prefix", network.mac_prefix, 1143 constants.INSTANCE_COMMUNICATION_MAC_PREFIX)
1144
1145 - def CheckPrereq(self):
1146 """Check prerequisites. 1147 1148 This checks whether the given params don't conflict and 1149 if the given volume group is valid. 1150 1151 """ 1152 node_uuids = self.owned_locks(locking.LEVEL_NODE) 1153 self.cluster = cluster = self.cfg.GetClusterInfo() 1154 1155 vm_capable_node_uuids = [node.uuid 1156 for node in self.cfg.GetAllNodesInfo().values() 1157 if node.uuid in node_uuids and node.vm_capable] 1158 1159 (enabled_disk_templates, new_enabled_disk_templates, 1160 disabled_disk_templates) = self._GetDiskTemplateSets(cluster) 1161 self._CheckInstancesOfDisabledDiskTemplates(disabled_disk_templates) 1162 1163 self._CheckVgName(vm_capable_node_uuids, enabled_disk_templates, 1164 new_enabled_disk_templates) 1165 1166 if self.op.file_storage_dir is not None: 1167 CheckFileStoragePathVsEnabledDiskTemplates( 1168 self.LogWarning, self.op.file_storage_dir, enabled_disk_templates) 1169 1170 if self.op.shared_file_storage_dir is not None: 1171 CheckSharedFileStoragePathVsEnabledDiskTemplates( 1172 self.LogWarning, self.op.shared_file_storage_dir, 1173 enabled_disk_templates) 1174 1175 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates 1176 drbd_gets_enabled = constants.DT_DRBD8 in new_enabled_disk_templates 1177 self._CheckDrbdHelper(vm_capable_node_uuids, 1178 drbd_enabled, drbd_gets_enabled) 1179 1180 # validate params changes 1181 if self.op.beparams: 1182 objects.UpgradeBeParams(self.op.beparams) 1183 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 1184 self.new_beparams = cluster.SimpleFillBE(self.op.beparams) 1185 1186 if self.op.ndparams: 1187 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) 1188 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams) 1189 1190 # TODO: we need a more general way to handle resetting 1191 # cluster-level parameters to default values 1192 if self.new_ndparams["oob_program"] == "": 1193 self.new_ndparams["oob_program"] = \ 1194 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM] 1195 1196 if self.op.hv_state: 1197 new_hv_state = MergeAndVerifyHvState(self.op.hv_state, 1198 self.cluster.hv_state_static) 1199 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values)) 1200 for hv, values in new_hv_state.items()) 1201 1202 if self.op.disk_state: 1203 new_disk_state = MergeAndVerifyDiskState(self.op.disk_state, 1204 self.cluster.disk_state_static) 1205 self.new_disk_state = \ 1206 dict((storage, dict((name, cluster.SimpleFillDiskState(values)) 1207 for name, values in svalues.items())) 1208 for storage, svalues in new_disk_state.items()) 1209 1210 self._CheckIpolicy(cluster, enabled_disk_templates) 1211 1212 if self.op.nicparams: 1213 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES) 1214 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams) 1215 objects.NIC.CheckParameterSyntax(self.new_nicparams) 1216 nic_errors = [] 1217 1218 # check all instances for consistency 1219 for instance in self.cfg.GetAllInstancesInfo().values(): 1220 for nic_idx, nic in enumerate(instance.nics): 1221 params_copy = copy.deepcopy(nic.nicparams) 1222 params_filled = objects.FillDict(self.new_nicparams, params_copy) 1223 1224 # check parameter syntax 1225 try: 1226 objects.NIC.CheckParameterSyntax(params_filled) 1227 except errors.ConfigurationError, err: 1228 nic_errors.append("Instance %s, nic/%d: %s" % 1229 (instance.name, nic_idx, err)) 1230 1231 # if we're moving instances to routed, check that they have an ip 1232 target_mode = params_filled[constants.NIC_MODE] 1233 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip: 1234 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip" 1235 " address" % (instance.name, nic_idx)) 1236 if nic_errors: 1237 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" % 1238 "\n".join(nic_errors), errors.ECODE_INVAL) 1239 1240 # hypervisor list/parameters 1241 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {}) 1242 if self.op.hvparams: 1243 for hv_name, hv_dict in self.op.hvparams.items(): 1244 if hv_name not in self.new_hvparams: 1245 self.new_hvparams[hv_name] = hv_dict 1246 else: 1247 self.new_hvparams[hv_name].update(hv_dict) 1248 1249 # disk template parameters 1250 self.new_diskparams = objects.FillDict(cluster.diskparams, {}) 1251 if self.op.diskparams: 1252 for dt_name, dt_params in self.op.diskparams.items(): 1253 if dt_name not in self.new_diskparams: 1254 self.new_diskparams[dt_name] = dt_params 1255 else: 1256 self.new_diskparams[dt_name].update(dt_params) 1257 CheckDiskAccessModeConsistency(self.op.diskparams, self.cfg) 1258 1259 # os hypervisor parameters 1260 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {}) 1261 if self.op.os_hvp: 1262 for os_name, hvs in self.op.os_hvp.items(): 1263 if os_name not in self.new_os_hvp: 1264 self.new_os_hvp[os_name] = hvs 1265 else: 1266 for hv_name, hv_dict in hvs.items(): 1267 if hv_dict is None: 1268 # Delete if it exists 1269 self.new_os_hvp[os_name].pop(hv_name, None) 1270 elif hv_name not in self.new_os_hvp[os_name]: 1271 self.new_os_hvp[os_name][hv_name] = hv_dict 1272 else: 1273 self.new_os_hvp[os_name][hv_name].update(hv_dict) 1274 1275 # os parameters 1276 self._BuildOSParams(cluster) 1277 1278 # changes to the hypervisor list 1279 if self.op.enabled_hypervisors is not None: 1280 for hv in self.op.enabled_hypervisors: 1281 # if the hypervisor doesn't already exist in the cluster 1282 # hvparams, we initialize it to empty, and then (in both 1283 # cases) we make sure to fill the defaults, as we might not 1284 # have a complete defaults list if the hypervisor wasn't 1285 # enabled before 1286 if hv not in new_hvp: 1287 new_hvp[hv] = {} 1288 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv]) 1289 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES) 1290 1291 if self.op.hvparams or self.op.enabled_hypervisors is not None: 1292 # either the enabled list has changed, or the parameters have, validate 1293 for hv_name, hv_params in self.new_hvparams.items(): 1294 if ((self.op.hvparams and hv_name in self.op.hvparams) or 1295 (self.op.enabled_hypervisors and 1296 hv_name in self.op.enabled_hypervisors)): 1297 # either this is a new hypervisor, or its parameters have changed 1298 hv_class = hypervisor.GetHypervisorClass(hv_name) 1299 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1300 hv_class.CheckParameterSyntax(hv_params) 1301 CheckHVParams(self, node_uuids, hv_name, hv_params) 1302 1303 self._CheckDiskTemplateConsistency() 1304 1305 if self.op.os_hvp: 1306 # no need to check any newly-enabled hypervisors, since the 1307 # defaults have already been checked in the above code-block 1308 for os_name, os_hvp in self.new_os_hvp.items(): 1309 for hv_name, hv_params in os_hvp.items(): 1310 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1311 # we need to fill in the new os_hvp on top of the actual hv_p 1312 cluster_defaults = self.new_hvparams.get(hv_name, {}) 1313 new_osp = objects.FillDict(cluster_defaults, hv_params) 1314 hv_class = hypervisor.GetHypervisorClass(hv_name) 1315 hv_class.CheckParameterSyntax(new_osp) 1316 CheckHVParams(self, node_uuids, hv_name, new_osp) 1317 1318 if self.op.default_iallocator: 1319 alloc_script = utils.FindFile(self.op.default_iallocator, 1320 constants.IALLOCATOR_SEARCH_PATH, 1321 os.path.isfile) 1322 if alloc_script is None: 1323 raise errors.OpPrereqError("Invalid default iallocator script '%s'" 1324 " specified" % self.op.default_iallocator, 1325 errors.ECODE_INVAL) 1326 1327 if self.op.instance_communication_network: 1328 network_name = self.op.instance_communication_network 1329 1330 try: 1331 network_uuid = self.cfg.LookupNetwork(network_name) 1332 except errors.OpPrereqError: 1333 network_uuid = None 1334 1335 if network_uuid is not None: 1336 network = self.cfg.GetNetwork(network_uuid) 1337 self._CheckInstanceCommunicationNetwork(network, self.LogWarning) 1338 1339 if self.op.compression_tools: 1340 CheckCompressionTools(self.op.compression_tools)
1341
1342 - def _BuildOSParams(self, cluster):
1343 "Calculate the new OS parameters for this operation." 1344 1345 def _GetNewParams(source, new_params): 1346 "Wrapper around GetUpdatedParams." 1347 if new_params is None: 1348 return source 1349 result = objects.FillDict(source, {}) # deep copy of source 1350 for os_name in new_params: 1351 result[os_name] = GetUpdatedParams(result.get(os_name, {}), 1352 new_params[os_name], 1353 use_none=True) 1354 if not result[os_name]: 1355 del result[os_name] # we removed all parameters 1356 return result
1357 1358 self.new_osp = _GetNewParams(cluster.osparams, 1359 self.op.osparams) 1360 self.new_osp_private = _GetNewParams(cluster.osparams_private_cluster, 1361 self.op.osparams_private_cluster) 1362 1363 # Remove os validity check 1364 changed_oses = (set(self.new_osp.keys()) | set(self.new_osp_private.keys())) 1365 for os_name in changed_oses: 1366 os_params = cluster.SimpleFillOS( 1367 os_name, 1368 self.new_osp.get(os_name, {}), 1369 os_params_private=self.new_osp_private.get(os_name, {}) 1370 ) 1371 # check the parameter validity (remote check) 1372 CheckOSParams(self, False, [self.cfg.GetMasterNode()], 1373 os_name, os_params, False) 1374
1375 - def _CheckDiskTemplateConsistency(self):
1376 """Check whether the disk templates that are going to be disabled 1377 are still in use by some instances. 1378 1379 """ 1380 if self.op.enabled_disk_templates: 1381 cluster = self.cfg.GetClusterInfo() 1382 instances = self.cfg.GetAllInstancesInfo() 1383 1384 disk_templates_to_remove = set(cluster.enabled_disk_templates) \ 1385 - set(self.op.enabled_disk_templates) 1386 for instance in instances.itervalues(): 1387 if instance.disk_template in disk_templates_to_remove: 1388 raise errors.OpPrereqError("Cannot disable disk template '%s'," 1389 " because instance '%s' is using it." % 1390 (instance.disk_template, instance.name))
1391
1392 - def _SetVgName(self, feedback_fn):
1393 """Determines and sets the new volume group name. 1394 1395 """ 1396 if self.op.vg_name is not None: 1397 new_volume = self.op.vg_name 1398 if not new_volume: 1399 new_volume = None 1400 if new_volume != self.cfg.GetVGName(): 1401 self.cfg.SetVGName(new_volume) 1402 else: 1403 feedback_fn("Cluster LVM configuration already in desired" 1404 " state, not changing")
1405
1406 - def _SetFileStorageDir(self, feedback_fn):
1407 """Set the file storage directory. 1408 1409 """ 1410 if self.op.file_storage_dir is not None: 1411 if self.cluster.file_storage_dir == self.op.file_storage_dir: 1412 feedback_fn("Global file storage dir already set to value '%s'" 1413 % self.cluster.file_storage_dir) 1414 else: 1415 self.cluster.file_storage_dir = self.op.file_storage_dir
1416
1417 - def _SetSharedFileStorageDir(self, feedback_fn):
1418 """Set the shared file storage directory. 1419 1420 """ 1421 if self.op.shared_file_storage_dir is not None: 1422 if self.cluster.shared_file_storage_dir == \ 1423 self.op.shared_file_storage_dir: 1424 feedback_fn("Global shared file storage dir already set to value '%s'" 1425 % self.cluster.shared_file_storage_dir) 1426 else: 1427 self.cluster.shared_file_storage_dir = self.op.shared_file_storage_dir
1428
1429 - def _SetDrbdHelper(self, feedback_fn):
1430 """Set the DRBD usermode helper. 1431 1432 """ 1433 if self.op.drbd_helper is not None: 1434 if not constants.DT_DRBD8 in self.cluster.enabled_disk_templates: 1435 feedback_fn("Note that you specified a drbd user helper, but did not" 1436 " enable the drbd disk template.") 1437 new_helper = self.op.drbd_helper 1438 if not new_helper: 1439 new_helper = None 1440 if new_helper != self.cfg.GetDRBDHelper(): 1441 self.cfg.SetDRBDHelper(new_helper) 1442 else: 1443 feedback_fn("Cluster DRBD helper already in desired state," 1444 " not changing")
1445 1446 @staticmethod
1447 - def _EnsureInstanceCommunicationNetwork(cfg, network_name):
1448 """Ensure that the instance communication network exists and is 1449 connected to all groups. 1450 1451 The instance communication network given by L{network_name} it is 1452 created, if necessary, via the opcode 'OpNetworkAdd'. Also, the 1453 instance communication network is connected to all existing node 1454 groups, if necessary, via the opcode 'OpNetworkConnect'. 1455 1456 @type cfg: L{config.ConfigWriter} 1457 @param cfg: cluster configuration 1458 1459 @type network_name: string 1460 @param network_name: instance communication network name 1461 1462 @rtype: L{ganeti.cmdlib.ResultWithJobs} or L{None} 1463 @return: L{ganeti.cmdlib.ResultWithJobs} if the instance 1464 communication needs to be created or it needs to be 1465 connected to a group, otherwise L{None} 1466 1467 """ 1468 jobs = [] 1469 1470 try: 1471 network_uuid = cfg.LookupNetwork(network_name) 1472 network_exists = True 1473 except errors.OpPrereqError: 1474 network_exists = False 1475 1476 if not network_exists: 1477 jobs.append(AddInstanceCommunicationNetworkOp(network_name)) 1478 1479 for group_uuid in cfg.GetNodeGroupList(): 1480 group = cfg.GetNodeGroup(group_uuid) 1481 1482 if network_exists: 1483 network_connected = network_uuid in group.networks 1484 else: 1485 # The network was created asynchronously by the previous 1486 # opcode and, therefore, we don't have access to its 1487 # network_uuid. As a result, we assume that the network is 1488 # not connected to any group yet. 1489 network_connected = False 1490 1491 if not network_connected: 1492 op = ConnectInstanceCommunicationNetworkOp(group_uuid, network_name) 1493 jobs.append(op) 1494 1495 if jobs: 1496 return ResultWithJobs([jobs]) 1497 else: 1498 return None
1499 1500 @staticmethod
1501 - def _ModifyInstanceCommunicationNetwork(cfg, network_name, feedback_fn):
1502 """Update the instance communication network stored in the cluster 1503 configuration. 1504 1505 Compares the user-supplied instance communication network against 1506 the one stored in the Ganeti cluster configuration. If there is a 1507 change, the instance communication network may be possibly created 1508 and connected to all groups (see 1509 L{LUClusterSetParams._EnsureInstanceCommunicationNetwork}). 1510 1511 @type cfg: L{config.ConfigWriter} 1512 @param cfg: cluster configuration 1513 1514 @type network_name: string 1515 @param network_name: instance communication network name 1516 1517 @type feedback_fn: function 1518 @param feedback_fn: see L{ganeti.cmdlist.base.LogicalUnit} 1519 1520 @rtype: L{LUClusterSetParams._EnsureInstanceCommunicationNetwork} or L{None} 1521 @return: see L{LUClusterSetParams._EnsureInstanceCommunicationNetwork} 1522 1523 """ 1524 config_network_name = cfg.GetInstanceCommunicationNetwork() 1525 1526 if network_name == config_network_name: 1527 feedback_fn("Instance communication network already is '%s', nothing to" 1528 " do." % network_name) 1529 else: 1530 try: 1531 cfg.LookupNetwork(config_network_name) 1532 feedback_fn("Previous instance communication network '%s'" 1533 " should be removed manually." % config_network_name) 1534 except errors.OpPrereqError: 1535 pass 1536 1537 if network_name: 1538 feedback_fn("Changing instance communication network to '%s', only new" 1539 " instances will be affected." 1540 % network_name) 1541 else: 1542 feedback_fn("Disabling instance communication network, only new" 1543 " instances will be affected.") 1544 1545 cfg.SetInstanceCommunicationNetwork(network_name) 1546 1547 if network_name: 1548 return LUClusterSetParams._EnsureInstanceCommunicationNetwork( 1549 cfg, 1550 network_name) 1551 else: 1552 return None
1553
1554 - def Exec(self, feedback_fn):
1555 """Change the parameters of the cluster. 1556 1557 """ 1558 # re-read the fresh configuration 1559 self.cluster = self.cfg.GetClusterInfo() 1560 if self.op.enabled_disk_templates: 1561 self.cluster.enabled_disk_templates = \ 1562 list(self.op.enabled_disk_templates) 1563 # save the changes 1564 self.cfg.Update(self.cluster, feedback_fn) 1565 1566 self._SetVgName(feedback_fn) 1567 1568 self.cluster = self.cfg.GetClusterInfo() 1569 self._SetFileStorageDir(feedback_fn) 1570 self._SetSharedFileStorageDir(feedback_fn) 1571 self.cfg.Update(self.cluster, feedback_fn) 1572 self._SetDrbdHelper(feedback_fn) 1573 1574 # re-read the fresh configuration again 1575 self.cluster = self.cfg.GetClusterInfo() 1576 1577 ensure_kvmd = False 1578 1579 if self.op.hvparams: 1580 self.cluster.hvparams = self.new_hvparams 1581 if self.op.os_hvp: 1582 self.cluster.os_hvp = self.new_os_hvp 1583 if self.op.enabled_hypervisors is not None: 1584 self.cluster.hvparams = self.new_hvparams 1585 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors 1586 ensure_kvmd = True 1587 if self.op.beparams: 1588 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams 1589 if self.op.nicparams: 1590 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams 1591 if self.op.ipolicy: 1592 self.cluster.ipolicy = self.new_ipolicy 1593 if self.op.osparams: 1594 self.cluster.osparams = self.new_osp 1595 if self.op.osparams_private_cluster: 1596 self.cluster.osparams_private_cluster = self.new_osp_private 1597 if self.op.ndparams: 1598 self.cluster.ndparams = self.new_ndparams 1599 if self.op.diskparams: 1600 self.cluster.diskparams = self.new_diskparams 1601 if self.op.hv_state: 1602 self.cluster.hv_state_static = self.new_hv_state 1603 if self.op.disk_state: 1604 self.cluster.disk_state_static = self.new_disk_state 1605 1606 if self.op.candidate_pool_size is not None: 1607 self.cluster.candidate_pool_size = self.op.candidate_pool_size 1608 # we need to update the pool size here, otherwise the save will fail 1609 AdjustCandidatePool(self, []) 1610 1611 if self.op.max_running_jobs is not None: 1612 self.cluster.max_running_jobs = self.op.max_running_jobs 1613 1614 if self.op.max_tracked_jobs is not None: 1615 self.cluster.max_tracked_jobs = self.op.max_tracked_jobs 1616 1617 if self.op.maintain_node_health is not None: 1618 if self.op.maintain_node_health and not constants.ENABLE_CONFD: 1619 feedback_fn("Note: CONFD was disabled at build time, node health" 1620 " maintenance is not useful (still enabling it)") 1621 self.cluster.maintain_node_health = self.op.maintain_node_health 1622 1623 if self.op.modify_etc_hosts is not None: 1624 self.cluster.modify_etc_hosts = self.op.modify_etc_hosts 1625 1626 if self.op.prealloc_wipe_disks is not None: 1627 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks 1628 1629 if self.op.add_uids is not None: 1630 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids) 1631 1632 if self.op.remove_uids is not None: 1633 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids) 1634 1635 if self.op.uid_pool is not None: 1636 self.cluster.uid_pool = self.op.uid_pool 1637 1638 if self.op.default_iallocator is not None: 1639 self.cluster.default_iallocator = self.op.default_iallocator 1640 1641 if self.op.default_iallocator_params is not None: 1642 self.cluster.default_iallocator_params = self.op.default_iallocator_params 1643 1644 if self.op.reserved_lvs is not None: 1645 self.cluster.reserved_lvs = self.op.reserved_lvs 1646 1647 if self.op.use_external_mip_script is not None: 1648 self.cluster.use_external_mip_script = self.op.use_external_mip_script 1649 1650 if self.op.enabled_user_shutdown is not None and \ 1651 self.cluster.enabled_user_shutdown != self.op.enabled_user_shutdown: 1652 self.cluster.enabled_user_shutdown = self.op.enabled_user_shutdown 1653 ensure_kvmd = True 1654 1655 def helper_os(aname, mods, desc): 1656 desc += " OS list" 1657 lst = getattr(self.cluster, aname) 1658 for key, val in mods: 1659 if key == constants.DDM_ADD: 1660 if val in lst: 1661 feedback_fn("OS %s already in %s, ignoring" % (val, desc)) 1662 else: 1663 lst.append(val) 1664 elif key == constants.DDM_REMOVE: 1665 if val in lst: 1666 lst.remove(val) 1667 else: 1668 feedback_fn("OS %s not found in %s, ignoring" % (val, desc)) 1669 else: 1670 raise errors.ProgrammerError("Invalid modification '%s'" % key)
1671 1672 if self.op.hidden_os: 1673 helper_os("hidden_os", self.op.hidden_os, "hidden") 1674 1675 if self.op.blacklisted_os: 1676 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted") 1677 1678 if self.op.mac_prefix: 1679 self.cluster.mac_prefix = self.op.mac_prefix 1680 1681 if self.op.master_netdev: 1682 master_params = self.cfg.GetMasterNetworkParameters() 1683 ems = self.cfg.GetUseExternalMipScript() 1684 feedback_fn("Shutting down master ip on the current netdev (%s)" % 1685 self.cluster.master_netdev) 1686 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid, 1687 master_params, ems) 1688 if not self.op.force: 1689 result.Raise("Could not disable the master ip") 1690 else: 1691 if result.fail_msg: 1692 msg = ("Could not disable the master ip (continuing anyway): %s" % 1693 result.fail_msg) 1694 feedback_fn(msg) 1695 feedback_fn("Changing master_netdev from %s to %s" % 1696 (master_params.netdev, self.op.master_netdev)) 1697 self.cluster.master_netdev = self.op.master_netdev 1698 1699 if self.op.master_netmask: 1700 master_params = self.cfg.GetMasterNetworkParameters() 1701 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask) 1702 result = self.rpc.call_node_change_master_netmask( 1703 master_params.uuid, master_params.netmask, 1704 self.op.master_netmask, master_params.ip, 1705 master_params.netdev) 1706 result.Warn("Could not change the master IP netmask", feedback_fn) 1707 self.cluster.master_netmask = self.op.master_netmask 1708 1709 if self.op.install_image: 1710 self.cluster.install_image = self.op.install_image 1711 1712 if self.op.zeroing_image is not None: 1713 CheckImageValidity(self.op.zeroing_image, 1714 "Zeroing image must be an absolute path or a URL") 1715 self.cluster.zeroing_image = self.op.zeroing_image 1716 1717 self.cfg.Update(self.cluster, feedback_fn) 1718 1719 if self.op.master_netdev: 1720 master_params = self.cfg.GetMasterNetworkParameters() 1721 feedback_fn("Starting the master ip on the new master netdev (%s)" % 1722 self.op.master_netdev) 1723 ems = self.cfg.GetUseExternalMipScript() 1724 result = self.rpc.call_node_activate_master_ip(master_params.uuid, 1725 master_params, ems) 1726 result.Warn("Could not re-enable the master ip on the master," 1727 " please restart manually", self.LogWarning) 1728 1729 # Even though 'self.op.enabled_user_shutdown' is being tested 1730 # above, the RPCs can only be done after 'self.cfg.Update' because 1731 # this will update the cluster object and sync 'Ssconf', and kvmd 1732 # uses 'Ssconf'. 1733 if ensure_kvmd: 1734 EnsureKvmdOnNodes(self, feedback_fn) 1735 1736 if self.op.compression_tools is not None: 1737 self.cfg.SetCompressionTools(self.op.compression_tools) 1738 1739 network_name = self.op.instance_communication_network 1740 if network_name is not None: 1741 return self._ModifyInstanceCommunicationNetwork(self.cfg, 1742 network_name, feedback_fn) 1743 else: 1744 return None 1745
1746 1747 -class LUClusterVerify(NoHooksLU):
1748 """Submits all jobs necessary to verify the cluster. 1749 1750 """ 1751 REQ_BGL = False 1752
1753 - def ExpandNames(self):
1754 self.needed_locks = {}
1755
1756 - def Exec(self, feedback_fn):
1757 jobs = [] 1758 1759 if self.op.group_name: 1760 groups = [self.op.group_name] 1761 depends_fn = lambda: None 1762 else: 1763 groups = self.cfg.GetNodeGroupList() 1764 1765 # Verify global configuration 1766 jobs.append([ 1767 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors), 1768 ]) 1769 1770 # Always depend on global verification 1771 depends_fn = lambda: [(-len(jobs), [])] 1772 1773 jobs.extend( 1774 [opcodes.OpClusterVerifyGroup(group_name=group, 1775 ignore_errors=self.op.ignore_errors, 1776 depends=depends_fn())] 1777 for group in groups) 1778 1779 # Fix up all parameters 1780 for op in itertools.chain(*jobs): # pylint: disable=W0142 1781 op.debug_simulate_errors = self.op.debug_simulate_errors 1782 op.verbose = self.op.verbose 1783 op.error_codes = self.op.error_codes 1784 try: 1785 op.skip_checks = self.op.skip_checks 1786 except AttributeError: 1787 assert not isinstance(op, opcodes.OpClusterVerifyGroup) 1788 1789 return ResultWithJobs(jobs)
1790
1791 1792 -class _VerifyErrors(object):
1793 """Mix-in for cluster/group verify LUs. 1794 1795 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects 1796 self.op and self._feedback_fn to be available.) 1797 1798 """ 1799 1800 ETYPE_FIELD = "code" 1801 ETYPE_ERROR = constants.CV_ERROR 1802 ETYPE_WARNING = constants.CV_WARNING 1803
1804 - def _Error(self, ecode, item, msg, *args, **kwargs):
1805 """Format an error message. 1806 1807 Based on the opcode's error_codes parameter, either format a 1808 parseable error code, or a simpler error string. 1809 1810 This must be called only from Exec and functions called from Exec. 1811 1812 """ 1813 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) 1814 itype, etxt, _ = ecode 1815 # If the error code is in the list of ignored errors, demote the error to a 1816 # warning 1817 if etxt in self.op.ignore_errors: # pylint: disable=E1101 1818 ltype = self.ETYPE_WARNING 1819 # first complete the msg 1820 if args: 1821 msg = msg % args 1822 # then format the whole message 1823 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101 1824 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) 1825 else: 1826 if item: 1827 item = " " + item 1828 else: 1829 item = "" 1830 msg = "%s: %s%s: %s" % (ltype, itype, item, msg) 1831 # and finally report it via the feedback_fn 1832 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101 1833 # do not mark the operation as failed for WARN cases only 1834 if ltype == self.ETYPE_ERROR: 1835 self.bad = True
1836
1837 - def _ErrorIf(self, cond, *args, **kwargs):
1838 """Log an error message if the passed condition is True. 1839 1840 """ 1841 if (bool(cond) 1842 or self.op.debug_simulate_errors): # pylint: disable=E1101 1843 self._Error(*args, **kwargs)
1844
1845 1846 -def _GetAllHypervisorParameters(cluster, instances):
1847 """Compute the set of all hypervisor parameters. 1848 1849 @type cluster: L{objects.Cluster} 1850 @param cluster: the cluster object 1851 @param instances: list of L{objects.Instance} 1852 @param instances: additional instances from which to obtain parameters 1853 @rtype: list of (origin, hypervisor, parameters) 1854 @return: a list with all parameters found, indicating the hypervisor they 1855 apply to, and the origin (can be "cluster", "os X", or "instance Y") 1856 1857 """ 1858 hvp_data = [] 1859 1860 for hv_name in cluster.enabled_hypervisors: 1861 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name))) 1862 1863 for os_name, os_hvp in cluster.os_hvp.items(): 1864 for hv_name, hv_params in os_hvp.items(): 1865 if hv_params: 1866 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name) 1867 hvp_data.append(("os %s" % os_name, hv_name, full_params)) 1868 1869 # TODO: collapse identical parameter values in a single one 1870 for instance in instances: 1871 if instance.hvparams: 1872 hvp_data.append(("instance %s" % instance.name, instance.hypervisor, 1873 cluster.FillHV(instance))) 1874 1875 return hvp_data
1876
1877 1878 -class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1879 """Verifies the cluster config. 1880 1881 """ 1882 REQ_BGL = False 1883
1884 - def _VerifyHVP(self, hvp_data):
1885 """Verifies locally the syntax of the hypervisor parameters. 1886 1887 """ 1888 for item, hv_name, hv_params in hvp_data: 1889 msg = ("hypervisor %s parameters syntax check (source %s): %%s" % 1890 (item, hv_name)) 1891 try: 1892 hv_class = hypervisor.GetHypervisorClass(hv_name) 1893 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1894 hv_class.CheckParameterSyntax(hv_params) 1895 except errors.GenericError, err: 1896 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1897
1898 - def ExpandNames(self):
1899 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET) 1900 self.share_locks = ShareAll()
1901
1902 - def CheckPrereq(self):
1903 """Check prerequisites. 1904 1905 """ 1906 # Retrieve all information 1907 self.all_group_info = self.cfg.GetAllNodeGroupsInfo() 1908 self.all_node_info = self.cfg.GetAllNodesInfo() 1909 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1910
1911 - def Exec(self, feedback_fn):
1912 """Verify integrity of cluster, performing various test on nodes. 1913 1914 """ 1915 self.bad = False 1916 self._feedback_fn = feedback_fn 1917 1918 feedback_fn("* Verifying cluster config") 1919 1920 for msg in self.cfg.VerifyConfig(): 1921 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg) 1922 1923 feedback_fn("* Verifying cluster certificate files") 1924 1925 for cert_filename in pathutils.ALL_CERT_FILES: 1926 (errcode, msg) = utils.VerifyCertificate(cert_filename) 1927 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode) 1928 1929 self._ErrorIf(not utils.CanRead(constants.LUXID_USER, 1930 pathutils.NODED_CERT_FILE), 1931 constants.CV_ECLUSTERCERT, 1932 None, 1933 pathutils.NODED_CERT_FILE + " must be accessible by the " + 1934 constants.LUXID_USER + " user") 1935 1936 feedback_fn("* Verifying hypervisor parameters") 1937 1938 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(), 1939 self.all_inst_info.values())) 1940 1941 feedback_fn("* Verifying all nodes belong to an existing group") 1942 1943 # We do this verification here because, should this bogus circumstance 1944 # occur, it would never be caught by VerifyGroup, which only acts on 1945 # nodes/instances reachable from existing node groups. 1946 1947 dangling_nodes = set(node for node in self.all_node_info.values() 1948 if node.group not in self.all_group_info) 1949 1950 dangling_instances = {} 1951 no_node_instances = [] 1952 1953 for inst in self.all_inst_info.values(): 1954 if inst.primary_node in [node.uuid for node in dangling_nodes]: 1955 dangling_instances.setdefault(inst.primary_node, []).append(inst) 1956 elif inst.primary_node not in self.all_node_info: 1957 no_node_instances.append(inst) 1958 1959 pretty_dangling = [ 1960 "%s (%s)" % 1961 (node.name, 1962 utils.CommaJoin(inst.name for 1963 inst in dangling_instances.get(node.uuid, []))) 1964 for node in dangling_nodes] 1965 1966 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES, 1967 None, 1968 "the following nodes (and their instances) belong to a non" 1969 " existing group: %s", utils.CommaJoin(pretty_dangling)) 1970 1971 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST, 1972 None, 1973 "the following instances have a non-existing primary-node:" 1974 " %s", utils.CommaJoin(inst.name for 1975 inst in no_node_instances)) 1976 1977 return not self.bad
1978
1979 1980 -class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1981 """Verifies the status of a node group. 1982 1983 """ 1984 HPATH = "cluster-verify" 1985 HTYPE = constants.HTYPE_CLUSTER 1986 REQ_BGL = False 1987 1988 _HOOKS_INDENT_RE = re.compile("^", re.M) 1989
1990 - class NodeImage(object):
1991 """A class representing the logical and physical status of a node. 1992 1993 @type uuid: string 1994 @ivar uuid: the node UUID to which this object refers 1995 @ivar volumes: a structure as returned from 1996 L{ganeti.backend.GetVolumeList} (runtime) 1997 @ivar instances: a list of running instances (runtime) 1998 @ivar pinst: list of configured primary instances (config) 1999 @ivar sinst: list of configured secondary instances (config) 2000 @ivar sbp: dictionary of {primary-node: list of instances} for all 2001 instances for which this node is secondary (config) 2002 @ivar mfree: free memory, as reported by hypervisor (runtime) 2003 @ivar dfree: free disk, as reported by the node (runtime) 2004 @ivar offline: the offline status (config) 2005 @type rpc_fail: boolean 2006 @ivar rpc_fail: whether the RPC verify call was successfull (overall, 2007 not whether the individual keys were correct) (runtime) 2008 @type lvm_fail: boolean 2009 @ivar lvm_fail: whether the RPC call didn't return valid LVM data 2010 @type hyp_fail: boolean 2011 @ivar hyp_fail: whether the RPC call didn't return the instance list 2012 @type ghost: boolean 2013 @ivar ghost: whether this is a known node or not (config) 2014 @type os_fail: boolean 2015 @ivar os_fail: whether the RPC call didn't return valid OS data 2016 @type oslist: list 2017 @ivar oslist: list of OSes as diagnosed by DiagnoseOS 2018 @type vm_capable: boolean 2019 @ivar vm_capable: whether the node can host instances 2020 @type pv_min: float 2021 @ivar pv_min: size in MiB of the smallest PVs 2022 @type pv_max: float 2023 @ivar pv_max: size in MiB of the biggest PVs 2024 2025 """
2026 - def __init__(self, offline=False, uuid=None, vm_capable=True):
2027 self.uuid = uuid 2028 self.volumes = {} 2029 self.instances = [] 2030 self.pinst = [] 2031 self.sinst = [] 2032 self.sbp = {} 2033 self.mfree = 0 2034 self.dfree = 0 2035 self.offline = offline 2036 self.vm_capable = vm_capable 2037 self.rpc_fail = False 2038 self.lvm_fail = False 2039 self.hyp_fail = False 2040 self.ghost = False 2041 self.os_fail = False 2042 self.oslist = {} 2043 self.pv_min = None 2044 self.pv_max = None
2045
2046 - def ExpandNames(self):
2047 # This raises errors.OpPrereqError on its own: 2048 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 2049 2050 # Get instances in node group; this is unsafe and needs verification later 2051 inst_uuids = \ 2052 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 2053 2054 self.needed_locks = { 2055 locking.LEVEL_INSTANCE: self.cfg.GetInstanceNames(inst_uuids), 2056 locking.LEVEL_NODEGROUP: [self.group_uuid], 2057 locking.LEVEL_NODE: [], 2058 2059 # This opcode is run by watcher every five minutes and acquires all nodes 2060 # for a group. It doesn't run for a long time, so it's better to acquire 2061 # the node allocation lock as well. 2062 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 2063 } 2064 2065 self.share_locks = ShareAll()
2066
2067 - def DeclareLocks(self, level):
2068 if level == locking.LEVEL_NODE: 2069 # Get members of node group; this is unsafe and needs verification later 2070 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members) 2071 2072 # In Exec(), we warn about mirrored instances that have primary and 2073 # secondary living in separate node groups. To fully verify that 2074 # volumes for these instances are healthy, we will need to do an 2075 # extra call to their secondaries. We ensure here those nodes will 2076 # be locked. 2077 for inst_name in self.owned_locks(locking.LEVEL_INSTANCE): 2078 # Important: access only the instances whose lock is owned 2079 instance = self.cfg.GetInstanceInfoByName(inst_name) 2080 if instance.disk_template in constants.DTS_INT_MIRROR: 2081 nodes.update(self.cfg.GetInstanceSecondaryNodes(instance.uuid)) 2082 2083 self.needed_locks[locking.LEVEL_NODE] = nodes
2084
2085 - def CheckPrereq(self):
2086 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 2087 self.group_info = self.cfg.GetNodeGroup(self.group_uuid) 2088 2089 group_node_uuids = set(self.group_info.members) 2090 group_inst_uuids = \ 2091 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 2092 2093 unlocked_node_uuids = \ 2094 group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE)) 2095 2096 unlocked_inst_uuids = \ 2097 group_inst_uuids.difference( 2098 [self.cfg.GetInstanceInfoByName(name).uuid 2099 for name in self.owned_locks(locking.LEVEL_INSTANCE)]) 2100 2101 if unlocked_node_uuids: 2102 raise errors.OpPrereqError( 2103 "Missing lock for nodes: %s" % 2104 utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)), 2105 errors.ECODE_STATE) 2106 2107 if unlocked_inst_uuids: 2108 raise errors.OpPrereqError( 2109 "Missing lock for instances: %s" % 2110 utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)), 2111 errors.ECODE_STATE) 2112 2113 self.all_node_info = self.cfg.GetAllNodesInfo() 2114 self.all_inst_info = self.cfg.GetAllInstancesInfo() 2115 2116 self.my_node_uuids = group_node_uuids 2117 self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid]) 2118 for node_uuid in group_node_uuids) 2119 2120 self.my_inst_uuids = group_inst_uuids 2121 self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid]) 2122 for inst_uuid in group_inst_uuids) 2123 2124 # We detect here the nodes that will need the extra RPC calls for verifying 2125 # split LV volumes; they should be locked. 2126 extra_lv_nodes = set() 2127 2128 for inst in self.my_inst_info.values(): 2129 if inst.disk_template in constants.DTS_INT_MIRROR: 2130 inst_nodes = self.cfg.GetInstanceNodes(inst.uuid) 2131 for nuuid in inst_nodes: 2132 if self.all_node_info[nuuid].group != self.group_uuid: 2133 extra_lv_nodes.add(nuuid) 2134 2135 unlocked_lv_nodes = \ 2136 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) 2137 2138 if unlocked_lv_nodes: 2139 raise errors.OpPrereqError("Missing node locks for LV check: %s" % 2140 utils.CommaJoin(unlocked_lv_nodes), 2141 errors.ECODE_STATE) 2142 self.extra_lv_nodes = list(extra_lv_nodes)
2143
2144 - def _VerifyNode(self, ninfo, nresult):
2145 """Perform some basic validation on data returned from a node. 2146 2147 - check the result data structure is well formed and has all the 2148 mandatory fields 2149 - check ganeti version 2150 2151 @type ninfo: L{objects.Node} 2152 @param ninfo: the node to check 2153 @param nresult: the results from the node 2154 @rtype: boolean 2155 @return: whether overall this call was successful (and we can expect 2156 reasonable values in the respose) 2157 2158 """ 2159 # main result, nresult should be a non-empty dict 2160 test = not nresult or not isinstance(nresult, dict) 2161 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name, 2162 "unable to verify node: no data returned") 2163 if test: 2164 return False 2165 2166 # compares ganeti version 2167 local_version = constants.PROTOCOL_VERSION 2168 remote_version = nresult.get("version", None) 2169 test = not (remote_version and 2170 isinstance(remote_version, (list, tuple)) and 2171 len(remote_version) == 2) 2172 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name, 2173 "connection to node returned invalid data") 2174 if test: 2175 return False 2176 2177 test = local_version != remote_version[0] 2178 self._ErrorIf(test, constants.CV_ENODEVERSION, ninfo.name, 2179 "incompatible protocol versions: master %s," 2180 " node %s", local_version, remote_version[0]) 2181 if test: 2182 return False 2183 2184 # node seems compatible, we can actually try to look into its results 2185 2186 # full package version 2187 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], 2188 constants.CV_ENODEVERSION, ninfo.name, 2189 "software version mismatch: master %s, node %s", 2190 constants.RELEASE_VERSION, remote_version[1], 2191 code=self.ETYPE_WARNING) 2192 2193 hyp_result = nresult.get(constants.NV_HYPERVISOR, None) 2194 if ninfo.vm_capable and isinstance(hyp_result, dict): 2195 for hv_name, hv_result in hyp_result.iteritems(): 2196 test = hv_result is not None 2197 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, 2198 "hypervisor %s verify failure: '%s'", hv_name, hv_result) 2199 2200 hvp_result = nresult.get(constants.NV_HVPARAMS, None) 2201 if ninfo.vm_capable and isinstance(hvp_result, list): 2202 for item, hv_name, hv_result in hvp_result: 2203 self._ErrorIf(True, constants.CV_ENODEHV, ninfo.name, 2204 "hypervisor %s parameter verify failure (source %s): %s", 2205 hv_name, item, hv_result) 2206 2207 test = nresult.get(constants.NV_NODESETUP, 2208 ["Missing NODESETUP results"]) 2209 self._ErrorIf(test, constants.CV_ENODESETUP, ninfo.name, 2210 "node setup error: %s", "; ".join(test)) 2211 2212 return True
2213
2214 - def _VerifyNodeTime(self, ninfo, nresult, 2215 nvinfo_starttime, nvinfo_endtime):
2216 """Check the node time. 2217 2218 @type ninfo: L{objects.Node} 2219 @param ninfo: the node to check 2220 @param nresult: the remote results for the node 2221 @param nvinfo_starttime: the start time of the RPC call 2222 @param nvinfo_endtime: the end time of the RPC call 2223 2224 """ 2225 ntime = nresult.get(constants.NV_TIME, None) 2226 try: 2227 ntime_merged = utils.MergeTime(ntime) 2228 except (ValueError, TypeError): 2229 self._ErrorIf(True, constants.CV_ENODETIME, ninfo.name, 2230 "Node returned invalid time") 2231 return 2232 2233 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): 2234 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) 2235 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): 2236 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) 2237 else: 2238 ntime_diff = None 2239 2240 self._ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, ninfo.name, 2241 "Node time diverges by at least %s from master node time", 2242 ntime_diff)
2243
2244 - def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2245 """Check the node LVM results and update info for cross-node checks. 2246 2247 @type ninfo: L{objects.Node} 2248 @param ninfo: the node to check 2249 @param nresult: the remote results for the node 2250 @param vg_name: the configured VG name 2251 @type nimg: L{NodeImage} 2252 @param nimg: node image 2253 2254 """ 2255 if vg_name is None: 2256 return 2257 2258 # checks vg existence and size > 20G 2259 vglist = nresult.get(constants.NV_VGLIST, None) 2260 test = not vglist 2261 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name, 2262 "unable to check volume groups") 2263 if not test: 2264 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, 2265 constants.MIN_VG_SIZE) 2266 self._ErrorIf(vgstatus, constants.CV_ENODELVM, ninfo.name, vgstatus) 2267 2268 # Check PVs 2269 (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage) 2270 for em in errmsgs: 2271 self._Error(constants.CV_ENODELVM, ninfo.name, em) 2272 if pvminmax is not None: 2273 (nimg.pv_min, nimg.pv_max) = pvminmax
2274
2275 - def _VerifyGroupDRBDVersion(self, node_verify_infos):
2276 """Check cross-node DRBD version consistency. 2277 2278 @type node_verify_infos: dict 2279 @param node_verify_infos: infos about nodes as returned from the 2280 node_verify call. 2281 2282 """ 2283 node_versions = {} 2284 for node_uuid, ndata in node_verify_infos.items(): 2285 nresult = ndata.payload 2286 if nresult: 2287 version = nresult.get(constants.NV_DRBDVERSION, None) 2288 if version: 2289 node_versions[node_uuid] = version 2290 2291 if len(set(node_versions.values())) > 1: 2292 for node_uuid, version in sorted(node_versions.items()): 2293 msg = "DRBD version mismatch: %s" % version 2294 self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg, 2295 code=self.ETYPE_WARNING)
2296
2297 - def _VerifyGroupLVM(self, node_image, vg_name):
2298 """Check cross-node consistency in LVM. 2299 2300 @type node_image: dict 2301 @param node_image: info about nodes, mapping from node to names to 2302 L{NodeImage} objects 2303 @param vg_name: the configured VG name 2304 2305 """ 2306 if vg_name is None: 2307 return 2308 2309 # Only exclusive storage needs this kind of checks 2310 if not self._exclusive_storage: 2311 return 2312 2313 # exclusive_storage wants all PVs to have the same size (approximately), 2314 # if the smallest and the biggest ones are okay, everything is fine. 2315 # pv_min is None iff pv_max is None 2316 vals = filter((lambda ni: ni.pv_min is not None), node_image.values()) 2317 if not vals: 2318 return 2319 (pvmin, minnode_uuid) = min((ni.pv_min, ni.uuid) for ni in vals) 2320 (pvmax, maxnode_uuid) = max((ni.pv_max, ni.uuid) for ni in vals) 2321 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax) 2322 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name, 2323 "PV sizes differ too much in the group; smallest (%s MB) is" 2324 " on %s, biggest (%s MB) is on %s", 2325 pvmin, self.cfg.GetNodeName(minnode_uuid), 2326 pvmax, self.cfg.GetNodeName(maxnode_uuid))
2327
2328 - def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2329 """Check the node bridges. 2330 2331 @type ninfo: L{objects.Node} 2332 @param ninfo: the node to check 2333 @param nresult: the remote results for the node 2334 @param bridges: the expected list of bridges 2335 2336 """ 2337 if not bridges: 2338 return 2339 2340 missing = nresult.get(constants.NV_BRIDGES, None) 2341 test = not isinstance(missing, list) 2342 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name, 2343 "did not return valid bridge information") 2344 if not test: 2345 self._ErrorIf(bool(missing), constants.CV_ENODENET, ninfo.name, 2346 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2347
2348 - def _VerifyNodeUserScripts(self, ninfo, nresult):
2349 """Check the results of user scripts presence and executability on the node 2350 2351 @type ninfo: L{objects.Node} 2352 @param ninfo: the node to check 2353 @param nresult: the remote results for the node 2354 2355 """ 2356 test = not constants.NV_USERSCRIPTS in nresult 2357 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, ninfo.name, 2358 "did not return user scripts information") 2359 2360 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None) 2361 if not test: 2362 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, ninfo.name, 2363 "user scripts not present or not executable: %s" % 2364 utils.CommaJoin(sorted(broken_scripts)))
2365
2366 - def _VerifyNodeNetwork(self, ninfo, nresult):
2367 """Check the node network connectivity results. 2368 2369 @type ninfo: L{objects.Node} 2370 @param ninfo: the node to check 2371 @param nresult: the remote results for the node 2372 2373 """ 2374 test = constants.NV_NODELIST not in nresult 2375 self._ErrorIf(test, constants.CV_ENODESSH, ninfo.name, 2376 "node hasn't returned node ssh connectivity data") 2377 if not test: 2378 if nresult[constants.NV_NODELIST]: 2379 for a_node, a_msg in nresult[constants.NV_NODELIST].items(): 2380 self._ErrorIf(True, constants.CV_ENODESSH, ninfo.name, 2381 "ssh communication with node '%s': %s", a_node, a_msg) 2382 2383 test = constants.NV_NODENETTEST not in nresult 2384 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name, 2385 "node hasn't returned node tcp connectivity data") 2386 if not test: 2387 if nresult[constants.NV_NODENETTEST]: 2388 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) 2389 for anode in nlist: 2390 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, 2391 "tcp communication with node '%s': %s", 2392 anode, nresult[constants.NV_NODENETTEST][anode]) 2393 2394 test = constants.NV_MASTERIP not in nresult 2395 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name, 2396 "node hasn't returned node master IP reachability data") 2397 if not test: 2398 if not nresult[constants.NV_MASTERIP]: 2399 if ninfo.uuid == self.master_node: 2400 msg = "the master node cannot reach the master IP (not configured?)" 2401 else: 2402 msg = "cannot reach the master IP" 2403 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg)
2404
2405 - def _VerifyInstance(self, instance, node_image, diskstatus):
2406 """Verify an instance. 2407 2408 This function checks to see if the required block devices are 2409 available on the instance's node, and that the nodes are in the correct 2410 state. 2411 2412 """ 2413 pnode_uuid = instance.primary_node 2414 pnode_img = node_image[pnode_uuid] 2415 groupinfo = self.cfg.GetAllNodeGroupsInfo() 2416 2417 node_vol_should = {} 2418 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should) 2419 2420 cluster = self.cfg.GetClusterInfo() 2421 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, 2422 self.group_info) 2423 err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg) 2424 self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name, 2425 utils.CommaJoin(err), code=self.ETYPE_WARNING) 2426 2427 for node_uuid in node_vol_should: 2428 n_img = node_image[node_uuid] 2429 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 2430 # ignore missing volumes on offline or broken nodes 2431 continue 2432 for volume in node_vol_should[node_uuid]: 2433 test = volume not in n_img.volumes 2434 self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name, 2435 "volume %s missing on node %s", volume, 2436 self.cfg.GetNodeName(node_uuid)) 2437 2438 if instance.admin_state == constants.ADMINST_UP: 2439 test = instance.uuid not in pnode_img.instances and not pnode_img.offline 2440 self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name, 2441 "instance not running on its primary node %s", 2442 self.cfg.GetNodeName(pnode_uuid)) 2443 self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, 2444 instance.name, "instance is marked as running and lives on" 2445 " offline node %s", self.cfg.GetNodeName(pnode_uuid)) 2446 2447 diskdata = [(nname, success, status, idx) 2448 for (nname, disks) in diskstatus.items() 2449 for idx, (success, status) in enumerate(disks)] 2450 2451 for nname, success, bdev_status, idx in diskdata: 2452 # the 'ghost node' construction in Exec() ensures that we have a 2453 # node here 2454 snode = node_image[nname] 2455 bad_snode = snode.ghost or snode.offline 2456 self._ErrorIf(instance.disks_active and 2457 not success and not bad_snode, 2458 constants.CV_EINSTANCEFAULTYDISK, instance.name, 2459 "couldn't retrieve status for disk/%s on %s: %s", 2460 idx, self.cfg.GetNodeName(nname), bdev_status) 2461 2462 if instance.disks_active and success and \ 2463 (bdev_status.is_degraded or 2464 bdev_status.ldisk_status != constants.LDS_OKAY): 2465 msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname)) 2466 if bdev_status.is_degraded: 2467 msg += " is degraded" 2468 if bdev_status.ldisk_status != constants.LDS_OKAY: 2469 msg += "; state is '%s'" % \ 2470 constants.LDS_NAMES[bdev_status.ldisk_status] 2471 2472 self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg) 2473 2474 self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, 2475 constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid), 2476 "instance %s, connection to primary node failed", 2477 instance.name) 2478 2479 secondary_nodes = self.cfg.GetInstanceSecondaryNodes(instance.uuid) 2480 self._ErrorIf(len(secondary_nodes) > 1, 2481 constants.CV_EINSTANCELAYOUT, instance.name, 2482 "instance has multiple secondary nodes: %s", 2483 utils.CommaJoin(secondary_nodes), 2484 code=self.ETYPE_WARNING) 2485 2486 inst_nodes = self.cfg.GetInstanceNodes(instance.uuid) 2487 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, inst_nodes) 2488 if any(es_flags.values()): 2489 if instance.disk_template not in constants.DTS_EXCL_STORAGE: 2490 # Disk template not compatible with exclusive_storage: no instance 2491 # node should have the flag set 2492 es_nodes = [n 2493 for (n, es) in es_flags.items() 2494 if es] 2495 self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name, 2496 "instance has template %s, which is not supported on nodes" 2497 " that have exclusive storage set: %s", 2498 instance.disk_template, 2499 utils.CommaJoin(self.cfg.GetNodeNames(es_nodes))) 2500 for (idx, disk) in enumerate(self.cfg.GetInstanceDisks(instance.uuid)): 2501 self._ErrorIf(disk.spindles is None, 2502 constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name, 2503 "number of spindles not configured for disk %s while" 2504 " exclusive storage is enabled, try running" 2505 " gnt-cluster repair-disk-sizes", idx) 2506 2507 if instance.disk_template in constants.DTS_INT_MIRROR: 2508 instance_nodes = utils.NiceSort(inst_nodes) 2509 instance_groups = {} 2510 2511 for node_uuid in instance_nodes: 2512 instance_groups.setdefault(self.all_node_info[node_uuid].group, 2513 []).append(node_uuid) 2514 2515 pretty_list = [ 2516 "%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)), 2517 groupinfo[group].name) 2518 # Sort so that we always list the primary node first. 2519 for group, nodes in sorted(instance_groups.items(), 2520 key=lambda (_, nodes): pnode_uuid in nodes, 2521 reverse=True)] 2522 2523 self._ErrorIf(len(instance_groups) > 1, 2524 constants.CV_EINSTANCESPLITGROUPS, 2525 instance.name, "instance has primary and secondary nodes in" 2526 " different groups: %s", utils.CommaJoin(pretty_list), 2527 code=self.ETYPE_WARNING) 2528 2529 inst_nodes_offline = [] 2530 for snode in secondary_nodes: 2531 s_img = node_image[snode] 2532 self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC, 2533 self.cfg.GetNodeName(snode), 2534 "instance %s, connection to secondary node failed", 2535 instance.name) 2536 2537 if s_img.offline: 2538 inst_nodes_offline.append(snode) 2539 2540 # warn that the instance lives on offline nodes 2541 self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, 2542 instance.name, "instance has offline secondary node(s) %s", 2543 utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline))) 2544 # ... or ghost/non-vm_capable nodes 2545 for node_uuid in inst_nodes: 2546 self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE, 2547 instance.name, "instance lives on ghost node %s", 2548 self.cfg.GetNodeName(node_uuid)) 2549 self._ErrorIf(not node_image[node_uuid].vm_capable, 2550 constants.CV_EINSTANCEBADNODE, instance.name, 2551 "instance lives on non-vm_capable node %s", 2552 self.cfg.GetNodeName(node_uuid))
2553
2554 - def _VerifyOrphanVolumes(self, vg_name, node_vol_should, node_image, 2555 reserved):
2556 """Verify if there are any unknown volumes in the cluster. 2557 2558 The .os, .swap and backup volumes are ignored. All other volumes are 2559 reported as unknown. 2560 2561 @type vg_name: string 2562 @param vg_name: the name of the Ganeti-administered volume group 2563 @type reserved: L{ganeti.utils.FieldSet} 2564 @param reserved: a FieldSet of reserved volume names 2565 2566 """ 2567 for node_uuid, n_img in node_image.items(): 2568 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or 2569 self.all_node_info[node_uuid].group != self.group_uuid): 2570 # skip non-healthy nodes 2571 continue 2572 for volume in n_img.volumes: 2573 # skip volumes not belonging to the ganeti-administered volume group 2574 if volume.split('/')[0] != vg_name: 2575 continue 2576 2577 test = ((node_uuid not in node_vol_should or 2578 volume not in node_vol_should[node_uuid]) and 2579 not reserved.Matches(volume)) 2580 self._ErrorIf(test, constants.CV_ENODEORPHANLV, 2581 self.cfg.GetNodeName(node_uuid), 2582 "volume %s is unknown", volume, 2583 code=_VerifyErrors.ETYPE_WARNING)
2584
2585 - def _VerifyNPlusOneMemory(self, node_image, all_insts):
2586 """Verify N+1 Memory Resilience. 2587 2588 Check that if one single node dies we can still start all the 2589 instances it was primary for. 2590 2591 """ 2592 cluster_info = self.cfg.GetClusterInfo() 2593 for node_uuid, n_img in node_image.items(): 2594 # This code checks that every node which is now listed as 2595 # secondary has enough memory to host all instances it is 2596 # supposed to should a single other node in the cluster fail. 2597 # FIXME: not ready for failover to an arbitrary node 2598 # FIXME: does not support file-backed instances 2599 # WARNING: we currently take into account down instances as well 2600 # as up ones, considering that even if they're down someone 2601 # might want to start them even in the event of a node failure. 2602 if n_img.offline or \ 2603 self.all_node_info[node_uuid].group != self.group_uuid: 2604 # we're skipping nodes marked offline and nodes in other groups from 2605 # the N+1 warning, since most likely we don't have good memory 2606 # information from them; we already list instances living on such 2607 # nodes, and that's enough warning 2608 continue 2609 #TODO(dynmem): also consider ballooning out other instances 2610 for prinode, inst_uuids in n_img.sbp.items(): 2611 needed_mem = 0 2612 for inst_uuid in inst_uuids: 2613 bep = cluster_info.FillBE(all_insts[inst_uuid]) 2614 if bep[constants.BE_AUTO_BALANCE]: 2615 needed_mem += bep[constants.BE_MINMEM] 2616 test = n_img.mfree < needed_mem 2617 self._ErrorIf(test, constants.CV_ENODEN1, 2618 self.cfg.GetNodeName(node_uuid), 2619 "not enough memory to accomodate instance failovers" 2620 " should node %s fail (%dMiB needed, %dMiB available)", 2621 self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)
2622
2623 - def _VerifyClientCertificates(self, nodes, all_nvinfo):
2624 """Verifies the consistency of the client certificates. 2625 2626 This includes several aspects: 2627 - the individual validation of all nodes' certificates 2628 - the consistency of the master candidate certificate map 2629 - the consistency of the master candidate certificate map with the 2630 certificates that the master candidates are actually using. 2631 2632 @param nodes: the list of nodes to consider in this verification 2633 @param all_nvinfo: the map of results of the verify_node call to 2634 all nodes 2635 2636 """ 2637 candidate_certs = self.cfg.GetClusterInfo().candidate_certs 2638 if candidate_certs is None or len(candidate_certs) == 0: 2639 self._ErrorIf( 2640 True, constants.CV_ECLUSTERCLIENTCERT, None, 2641 "The cluster's list of master candidate certificates is empty." 2642 " If you just updated the cluster, please run" 2643 " 'gnt-cluster renew-crypto --new-node-certificates'.") 2644 return 2645 2646 self._ErrorIf( 2647 len(candidate_certs) != len(set(candidate_certs.values())), 2648 constants.CV_ECLUSTERCLIENTCERT, None, 2649 "There are at least two master candidates configured to use the same" 2650 " certificate.") 2651 2652 # collect the client certificate 2653 for node in nodes: 2654 if node.offline: 2655 continue 2656 2657 nresult = all_nvinfo[node.uuid] 2658 if nresult.fail_msg or not nresult.payload: 2659 continue 2660 2661 (errcode, msg) = nresult.payload.get(constants.NV_CLIENT_CERT, None) 2662 2663 self._ErrorIf( 2664 errcode is not None, constants.CV_ECLUSTERCLIENTCERT, None, 2665 "Client certificate of node '%s' failed validation: %s (code '%s')", 2666 node.uuid, msg, errcode) 2667 2668 if not errcode: 2669 digest = msg 2670 if node.master_candidate: 2671 if node.uuid in candidate_certs: 2672 self._ErrorIf( 2673 digest != candidate_certs[node.uuid], 2674 constants.CV_ECLUSTERCLIENTCERT, None, 2675 "Client certificate digest of master candidate '%s' does not" 2676 " match its entry in the cluster's map of master candidate" 2677 " certificates. Expected: %s Got: %s", node.uuid, 2678 digest, candidate_certs[node.uuid]) 2679 else: 2680 self._ErrorIf( 2681 True, constants.CV_ECLUSTERCLIENTCERT, None, 2682 "The master candidate '%s' does not have an entry in the" 2683 " map of candidate certificates.", node.uuid) 2684 self._ErrorIf( 2685 digest in candidate_certs.values(), 2686 constants.CV_ECLUSTERCLIENTCERT, None, 2687 "Master candidate '%s' is using a certificate of another node.", 2688 node.uuid) 2689 else: 2690 self._ErrorIf( 2691 node.uuid in candidate_certs, 2692 constants.CV_ECLUSTERCLIENTCERT, None, 2693 "Node '%s' is not a master candidate, but still listed in the" 2694 " map of master candidate certificates.", node.uuid) 2695 self._ErrorIf( 2696 (node.uuid not in candidate_certs) and 2697 (digest in candidate_certs.values()), 2698 constants.CV_ECLUSTERCLIENTCERT, None, 2699 "Node '%s' is not a master candidate and is incorrectly using a" 2700 " certificate of another node which is master candidate.", 2701 node.uuid)
2702
2703 - def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo, 2704 (files_all, files_opt, files_mc, files_vm)):
2705 """Verifies file checksums collected from all nodes. 2706 2707 @param nodes: List of L{objects.Node} objects 2708 @param master_node_uuid: UUID of master node 2709 @param all_nvinfo: RPC results 2710 2711 """ 2712 # Define functions determining which nodes to consider for a file 2713 files2nodefn = [ 2714 (files_all, None), 2715 (files_mc, lambda node: (node.master_candidate or 2716 node.uuid == master_node_uuid)), 2717 (files_vm, lambda node: node.vm_capable), 2718 ] 2719 2720 # Build mapping from filename to list of nodes which should have the file 2721 nodefiles = {} 2722 for (files, fn) in files2nodefn: 2723 if fn is None: 2724 filenodes = nodes 2725 else: 2726 filenodes = filter(fn, nodes) 2727 nodefiles.update((filename, 2728 frozenset(map(operator.attrgetter("uuid"), filenodes))) 2729 for filename in files) 2730 2731 assert set(nodefiles) == (files_all | files_mc | files_vm) 2732 2733 fileinfo = dict((filename, {}) for filename in nodefiles) 2734 ignore_nodes = set() 2735 2736 for node in nodes: 2737 if node.offline: 2738 ignore_nodes.add(node.uuid) 2739 continue 2740 2741 nresult = all_nvinfo[node.uuid] 2742 2743 if nresult.fail_msg or not nresult.payload: 2744 node_files = None 2745 else: 2746 fingerprints = nresult.payload.get(constants.NV_FILELIST, {}) 2747 node_files