1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """Logical units dealing with the cluster."""
32
33 import copy
34 import itertools
35 import logging
36 import operator
37 import os
38 import re
39 import time
40
41 from ganeti import compat
42 from ganeti import constants
43 from ganeti import errors
44 from ganeti import hypervisor
45 from ganeti import locking
46 from ganeti import masterd
47 from ganeti import netutils
48 from ganeti import objects
49 from ganeti import opcodes
50 from ganeti import pathutils
51 from ganeti import query
52 import ganeti.rpc.node as rpc
53 from ganeti import runtime
54 from ganeti import ssh
55 from ganeti import uidpool
56 from ganeti import utils
57 from ganeti import vcluster
58
59 from ganeti.cmdlib.base import NoHooksLU, QueryBase, LogicalUnit, \
60 ResultWithJobs
61 from ganeti.cmdlib.common import ShareAll, RunPostHook, \
62 ComputeAncillaryFiles, RedistributeAncillaryFiles, UploadHelper, \
63 GetWantedInstances, MergeAndVerifyHvState, MergeAndVerifyDiskState, \
64 GetUpdatedIPolicy, ComputeNewInstanceViolations, GetUpdatedParams, \
65 CheckOSParams, CheckHVParams, AdjustCandidatePool, CheckNodePVs, \
66 ComputeIPolicyInstanceViolation, AnnotateDiskParams, SupportsOob, \
67 CheckIpolicyVsDiskTemplates, CheckDiskAccessModeValidity, \
68 CheckDiskAccessModeConsistency, GetClientCertDigest, \
69 AddInstanceCommunicationNetworkOp, ConnectInstanceCommunicationNetworkOp, \
70 CheckImageValidity, CheckDiskAccessModeConsistency, EnsureKvmdOnNodes
71
72 import ganeti.masterd.instance
76 """Renew the cluster's crypto tokens.
77
78 Note that most of this operation is done in gnt_cluster.py, this LU only
79 takes care of the renewal of the client SSL certificates.
80
81 """
82 _MAX_NUM_RETRIES = 3
83
84 - def Exec(self, feedback_fn):
85 master_uuid = self.cfg.GetMasterNode()
86 cluster = self.cfg.GetClusterInfo()
87
88 logging.debug("Renewing the master's SSL node certificate."
89 " Master's UUID: %s.", master_uuid)
90
91
92 digest_map = {}
93 master_digest = utils.GetCertificateDigest(
94 cert_filename=pathutils.NODED_CLIENT_CERT_FILE)
95 digest_map[master_uuid] = master_digest
96 logging.debug("Adding the master's SSL node certificate digest to the"
97 " configuration. Master's UUID: %s, Digest: %s",
98 master_uuid, master_digest)
99
100 node_errors = {}
101 nodes = self.cfg.GetAllNodesInfo()
102 logging.debug("Renewing non-master nodes' node certificates.")
103 for (node_uuid, node_info) in nodes.items():
104 if node_info.offline:
105 feedback_fn("* Skipping offline node %s" % node_info.name)
106 logging.debug("Skipping offline node %s (UUID: %s).",
107 node_info.name, node_uuid)
108 continue
109 if node_uuid != master_uuid:
110 logging.debug("Adding certificate digest of node '%s'.", node_uuid)
111 last_exception = None
112 for i in range(self._MAX_NUM_RETRIES):
113 try:
114 if node_info.master_candidate:
115 node_digest = GetClientCertDigest(self, node_uuid)
116 digest_map[node_uuid] = node_digest
117 logging.debug("Added the node's certificate to candidate"
118 " certificate list. Current list: %s.",
119 str(cluster.candidate_certs))
120 break
121 except errors.OpExecError as e:
122 last_exception = e
123 logging.error("Could not fetch a non-master node's SSL node"
124 " certificate at attempt no. %s. The node's UUID"
125 " is %s, and the error was: %s.",
126 str(i), node_uuid, e)
127 else:
128 if last_exception:
129 node_errors[node_uuid] = last_exception
130
131 if node_errors:
132 msg = ("Some nodes' SSL client certificates could not be fetched."
133 " Please make sure those nodes are reachable and rerun"
134 " the operation. The affected nodes and their errors are:\n")
135 for uuid, e in node_errors.items():
136 msg += "Node %s: %s\n" % (uuid, e)
137 feedback_fn(msg)
138
139 self.cfg.SetCandidateCerts(digest_map)
140
143 """Activate the master IP on the master node.
144
145 """
146 - def Exec(self, feedback_fn):
155
158 """Deactivate the master IP on the master node.
159
160 """
161 - def Exec(self, feedback_fn):
170
173 """Return configuration values.
174
175 """
176 REQ_BGL = False
177
179 self.cq = ClusterQuery(None, self.op.output_fields, False)
180
183
186
187 - def Exec(self, feedback_fn):
188 result = self.cq.OldStyleQuery(self)
189
190 assert len(result) == 1
191
192 return result[0]
193
196 """Logical unit for destroying the cluster.
197
198 """
199 HPATH = "cluster-destroy"
200 HTYPE = constants.HTYPE_CLUSTER
201
202
203
204
205 clusterHasBeenDestroyed = False
206
208 """Build hooks env.
209
210 """
211 return {
212 "OP_TARGET": self.cfg.GetClusterName(),
213 }
214
216 """Build hooks nodes.
217
218 """
219 return ([], [])
220
222 """Check prerequisites.
223
224 This checks whether the cluster is empty.
225
226 Any errors are signaled by raising errors.OpPrereqError.
227
228 """
229 master = self.cfg.GetMasterNode()
230
231 nodelist = self.cfg.GetNodeList()
232 if len(nodelist) != 1 or nodelist[0] != master:
233 raise errors.OpPrereqError("There are still %d node(s) in"
234 " this cluster." % (len(nodelist) - 1),
235 errors.ECODE_INVAL)
236 instancelist = self.cfg.GetInstanceList()
237 if instancelist:
238 raise errors.OpPrereqError("There are still %d instance(s) in"
239 " this cluster." % len(instancelist),
240 errors.ECODE_INVAL)
241
242 - def Exec(self, feedback_fn):
262
263
264 -class LUClusterPostInit(LogicalUnit):
265 """Logical unit for running hooks after cluster initialization.
266
267 """
268 HPATH = "cluster-init"
269 HTYPE = constants.HTYPE_CLUSTER
270
271 - def CheckArguments(self):
272 self.master_uuid = self.cfg.GetMasterNode()
273 self.master_ndparams = self.cfg.GetNdParams(self.cfg.GetMasterNodeInfo())
274
275
276
277
278
279
280 if (self.master_ndparams[constants.ND_OVS] and not
281 self.master_ndparams.get(constants.ND_OVS_LINK, None)):
282 self.LogInfo("No physical interface for OpenvSwitch was given."
283 " OpenvSwitch will not have an outside connection. This"
284 " might not be what you want.")
285
286 - def BuildHooksEnv(self):
287 """Build hooks env.
288
289 """
290 return {
291 "OP_TARGET": self.cfg.GetClusterName(),
292 }
293
294 - def BuildHooksNodes(self):
295 """Build hooks nodes.
296
297 """
298 return ([], [self.cfg.GetMasterNode()])
299
300 - def Exec(self, feedback_fn):
301 """Create and configure Open vSwitch
302
303 """
304 if self.master_ndparams[constants.ND_OVS]:
305 result = self.rpc.call_node_configure_ovs(
306 self.master_uuid,
307 self.master_ndparams[constants.ND_OVS_NAME],
308 self.master_ndparams.get(constants.ND_OVS_LINK, None))
309 result.Raise("Could not successully configure Open vSwitch")
310
311 return True
312
362
365 """Query cluster configuration.
366
367 """
368 REQ_BGL = False
369
371 self.needed_locks = {}
372
373 - def Exec(self, feedback_fn):
374 """Return cluster config.
375
376 """
377 cluster = self.cfg.GetClusterInfo()
378 os_hvp = {}
379
380
381 for os_name, hv_dict in cluster.os_hvp.items():
382 os_hvp[os_name] = {}
383 for hv_name, hv_params in hv_dict.items():
384 if hv_name in cluster.enabled_hypervisors:
385 os_hvp[os_name][hv_name] = hv_params
386
387
388 primary_ip_version = constants.IP4_VERSION
389 if cluster.primary_ip_family == netutils.IP6Address.family:
390 primary_ip_version = constants.IP6_VERSION
391
392 result = {
393 "software_version": constants.RELEASE_VERSION,
394 "protocol_version": constants.PROTOCOL_VERSION,
395 "config_version": constants.CONFIG_VERSION,
396 "os_api_version": max(constants.OS_API_VERSIONS),
397 "export_version": constants.EXPORT_VERSION,
398 "vcs_version": constants.VCS_VERSION,
399 "architecture": runtime.GetArchInfo(),
400 "name": cluster.cluster_name,
401 "master": self.cfg.GetMasterNodeName(),
402 "default_hypervisor": cluster.primary_hypervisor,
403 "enabled_hypervisors": cluster.enabled_hypervisors,
404 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
405 for hypervisor_name in cluster.enabled_hypervisors]),
406 "os_hvp": os_hvp,
407 "beparams": cluster.beparams,
408 "osparams": cluster.osparams,
409 "ipolicy": cluster.ipolicy,
410 "nicparams": cluster.nicparams,
411 "ndparams": cluster.ndparams,
412 "diskparams": cluster.diskparams,
413 "candidate_pool_size": cluster.candidate_pool_size,
414 "max_running_jobs": cluster.max_running_jobs,
415 "max_tracked_jobs": cluster.max_tracked_jobs,
416 "mac_prefix": cluster.mac_prefix,
417 "master_netdev": cluster.master_netdev,
418 "master_netmask": cluster.master_netmask,
419 "use_external_mip_script": cluster.use_external_mip_script,
420 "volume_group_name": cluster.volume_group_name,
421 "drbd_usermode_helper": cluster.drbd_usermode_helper,
422 "file_storage_dir": cluster.file_storage_dir,
423 "shared_file_storage_dir": cluster.shared_file_storage_dir,
424 "maintain_node_health": cluster.maintain_node_health,
425 "ctime": cluster.ctime,
426 "mtime": cluster.mtime,
427 "uuid": cluster.uuid,
428 "tags": list(cluster.GetTags()),
429 "uid_pool": cluster.uid_pool,
430 "default_iallocator": cluster.default_iallocator,
431 "default_iallocator_params": cluster.default_iallocator_params,
432 "reserved_lvs": cluster.reserved_lvs,
433 "primary_ip_version": primary_ip_version,
434 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
435 "hidden_os": cluster.hidden_os,
436 "blacklisted_os": cluster.blacklisted_os,
437 "enabled_disk_templates": cluster.enabled_disk_templates,
438 "install_image": cluster.install_image,
439 "instance_communication_network": cluster.instance_communication_network,
440 "compression_tools": cluster.compression_tools,
441 "enabled_user_shutdown": cluster.enabled_user_shutdown,
442 }
443
444 return result
445
448 """Force the redistribution of cluster configuration.
449
450 This is a very simple LU.
451
452 """
453 REQ_BGL = False
454
461
462 - def Exec(self, feedback_fn):
468
471 """Rename the cluster.
472
473 """
474 HPATH = "cluster-rename"
475 HTYPE = constants.HTYPE_CLUSTER
476
478 """Build hooks env.
479
480 """
481 return {
482 "OP_TARGET": self.cfg.GetClusterName(),
483 "NEW_NAME": self.op.name,
484 }
485
491
514
515 - def Exec(self, feedback_fn):
516 """Rename the cluster.
517
518 """
519 clustername = self.op.name
520 new_ip = self.ip
521
522
523 master_params = self.cfg.GetMasterNetworkParameters()
524 ems = self.cfg.GetUseExternalMipScript()
525 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
526 master_params, ems)
527 result.Raise("Could not disable the master role")
528
529 try:
530 cluster = self.cfg.GetClusterInfo()
531 cluster.cluster_name = clustername
532 cluster.master_ip = new_ip
533 self.cfg.Update(cluster, feedback_fn)
534
535
536 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
537 node_list = self.cfg.GetOnlineNodeList()
538 try:
539 node_list.remove(master_params.uuid)
540 except ValueError:
541 pass
542 UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
543 finally:
544 master_params.ip = new_ip
545 result = self.rpc.call_node_activate_master_ip(master_params.uuid,
546 master_params, ems)
547 result.Warn("Could not re-enable the master role on the master,"
548 " please restart manually", self.LogWarning)
549
550 return clustername
551
554 """Verifies the cluster disks sizes.
555
556 """
557 REQ_BGL = False
558
560 if self.op.instances:
561 (_, self.wanted_names) = GetWantedInstances(self, self.op.instances)
562
563
564 self.needed_locks = {
565 locking.LEVEL_NODE_RES: [],
566 locking.LEVEL_INSTANCE: self.wanted_names,
567 }
568 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
569 else:
570 self.wanted_names = None
571 self.needed_locks = {
572 locking.LEVEL_NODE_RES: locking.ALL_SET,
573 locking.LEVEL_INSTANCE: locking.ALL_SET,
574
575
576 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
577 }
578
579 self.share_locks = {
580 locking.LEVEL_NODE_RES: 1,
581 locking.LEVEL_INSTANCE: 0,
582 locking.LEVEL_NODE_ALLOC: 1,
583 }
584
586 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
587 self._LockInstancesNodes(primary_only=True, level=level)
588
590 """Check prerequisites.
591
592 This only checks the optional instance list against the existing names.
593
594 """
595 if self.wanted_names is None:
596 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
597
598 self.wanted_instances = \
599 map(compat.snd, self.cfg.GetMultiInstanceInfoByName(self.wanted_names))
600
602 """Ensure children of the disk have the needed disk size.
603
604 This is valid mainly for DRBD8 and fixes an issue where the
605 children have smaller disk size.
606
607 @param disk: an L{ganeti.objects.Disk} object
608
609 """
610 if disk.dev_type == constants.DT_DRBD8:
611 assert disk.children, "Empty children for DRBD8?"
612 fchild = disk.children[0]
613 mismatch = fchild.size < disk.size
614 if mismatch:
615 self.LogInfo("Child disk has size %d, parent %d, fixing",
616 fchild.size, disk.size)
617 fchild.size = disk.size
618
619
620 return self._EnsureChildSizes(fchild) or mismatch
621 else:
622 return False
623
624 - def Exec(self, feedback_fn):
625 """Verify the size of cluster disks.
626
627 """
628
629
630 per_node_disks = {}
631 for instance in self.wanted_instances:
632 pnode = instance.primary_node
633 if pnode not in per_node_disks:
634 per_node_disks[pnode] = []
635 for idx, disk in enumerate(self.cfg.GetInstanceDisks(instance.uuid)):
636 per_node_disks[pnode].append((instance, idx, disk))
637
638 assert not (frozenset(per_node_disks.keys()) -
639 frozenset(self.owned_locks(locking.LEVEL_NODE_RES))), \
640 "Not owning correct locks"
641 assert not self.owned_locks(locking.LEVEL_NODE)
642
643 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg,
644 per_node_disks.keys())
645
646 changed = []
647 for node_uuid, dskl in per_node_disks.items():
648 if not dskl:
649
650 continue
651
652 newl = [([v[2].Copy()], v[0]) for v in dskl]
653 node_name = self.cfg.GetNodeName(node_uuid)
654 result = self.rpc.call_blockdev_getdimensions(node_uuid, newl)
655 if result.fail_msg:
656 self.LogWarning("Failure in blockdev_getdimensions call to node"
657 " %s, ignoring", node_name)
658 continue
659 if len(result.payload) != len(dskl):
660 logging.warning("Invalid result from node %s: len(dksl)=%d,"
661 " result.payload=%s", node_name, len(dskl),
662 result.payload)
663 self.LogWarning("Invalid result from node %s, ignoring node results",
664 node_name)
665 continue
666 for ((instance, idx, disk), dimensions) in zip(dskl, result.payload):
667 if dimensions is None:
668 self.LogWarning("Disk %d of instance %s did not return size"
669 " information, ignoring", idx, instance.name)
670 continue
671 if not isinstance(dimensions, (tuple, list)):
672 self.LogWarning("Disk %d of instance %s did not return valid"
673 " dimension information, ignoring", idx,
674 instance.name)
675 continue
676 (size, spindles) = dimensions
677 if not isinstance(size, (int, long)):
678 self.LogWarning("Disk %d of instance %s did not return valid"
679 " size information, ignoring", idx, instance.name)
680 continue
681 size = size >> 20
682 if size != disk.size:
683 self.LogInfo("Disk %d of instance %s has mismatched size,"
684 " correcting: recorded %d, actual %d", idx,
685 instance.name, disk.size, size)
686 disk.size = size
687 self.cfg.Update(disk, feedback_fn)
688 changed.append((instance.name, idx, "size", size))
689 if es_flags[node_uuid]:
690 if spindles is None:
691 self.LogWarning("Disk %d of instance %s did not return valid"
692 " spindles information, ignoring", idx,
693 instance.name)
694 elif disk.spindles is None or disk.spindles != spindles:
695 self.LogInfo("Disk %d of instance %s has mismatched spindles,"
696 " correcting: recorded %s, actual %s",
697 idx, instance.name, disk.spindles, spindles)
698 disk.spindles = spindles
699 self.cfg.Update(disk, feedback_fn)
700 changed.append((instance.name, idx, "spindles", disk.spindles))
701 if self._EnsureChildSizes(disk):
702 self.cfg.Update(disk, feedback_fn)
703 changed.append((instance.name, idx, "size", disk.size))
704 return changed
705
726
731 """Checks whether the given file-based storage directory is acceptable.
732
733 Note: This function is public, because it is also used in bootstrap.py.
734
735 @type logging_warn_fn: function
736 @param logging_warn_fn: function which accepts a string and logs it
737 @type file_storage_dir: string
738 @param file_storage_dir: the directory to be used for file-based instances
739 @type enabled_disk_templates: list of string
740 @param enabled_disk_templates: the list of enabled disk templates
741 @type file_disk_template: string
742 @param file_disk_template: the file-based disk template for which the
743 path should be checked
744
745 """
746 assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageTypes(
747 constants.ST_FILE, constants.ST_SHARED_FILE, constants.ST_GLUSTER
748 ))
749
750 file_storage_enabled = file_disk_template in enabled_disk_templates
751 if file_storage_dir is not None:
752 if file_storage_dir == "":
753 if file_storage_enabled:
754 raise errors.OpPrereqError(
755 "Unsetting the '%s' storage directory while having '%s' storage"
756 " enabled is not permitted." %
757 (file_disk_template, file_disk_template))
758 else:
759 if not file_storage_enabled:
760 logging_warn_fn(
761 "Specified a %s storage directory, although %s storage is not"
762 " enabled." % (file_disk_template, file_disk_template))
763 else:
764 raise errors.ProgrammerError("Received %s storage dir with value"
765 " 'None'." % file_disk_template)
766
778
790
802
829
832 """Change the parameters of the cluster.
833
834 """
835 HPATH = "cluster-modify"
836 HTYPE = constants.HTYPE_CLUSTER
837 REQ_BGL = False
838
872
885
887 """Build hooks env.
888
889 """
890 return {
891 "OP_TARGET": self.cfg.GetClusterName(),
892 "NEW_VG_NAME": self.op.vg_name,
893 }
894
896 """Build hooks nodes.
897
898 """
899 mn = self.cfg.GetMasterNode()
900 return ([mn], [mn])
901
902 - def _CheckVgName(self, node_uuids, enabled_disk_templates,
903 new_enabled_disk_templates):
904 """Check the consistency of the vg name on all nodes and in case it gets
905 unset whether there are instances still using it.
906
907 """
908 lvm_is_enabled = utils.IsLvmEnabled(enabled_disk_templates)
909 lvm_gets_enabled = utils.LvmGetsEnabled(enabled_disk_templates,
910 new_enabled_disk_templates)
911 current_vg_name = self.cfg.GetVGName()
912
913 if self.op.vg_name == '':
914 if lvm_is_enabled:
915 raise errors.OpPrereqError("Cannot unset volume group if lvm-based"
916 " disk templates are or get enabled.")
917
918 if self.op.vg_name is None:
919 if current_vg_name is None and lvm_is_enabled:
920 raise errors.OpPrereqError("Please specify a volume group when"
921 " enabling lvm-based disk-templates.")
922
923 if self.op.vg_name is not None and not self.op.vg_name:
924 if self.cfg.HasAnyDiskOfType(constants.DT_PLAIN):
925 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
926 " instances exist", errors.ECODE_INVAL)
927
928 if (self.op.vg_name is not None and lvm_is_enabled) or \
929 (self.cfg.GetVGName() is not None and lvm_gets_enabled):
930 self._CheckVgNameOnNodes(node_uuids)
931
952
953 @staticmethod
956 """Computes three sets of disk templates.
957
958 @see: C{_GetDiskTemplateSets} for more details.
959
960 """
961 enabled_disk_templates = None
962 new_enabled_disk_templates = []
963 disabled_disk_templates = []
964 if op_enabled_disk_templates:
965 enabled_disk_templates = op_enabled_disk_templates
966 new_enabled_disk_templates = \
967 list(set(enabled_disk_templates)
968 - set(old_enabled_disk_templates))
969 disabled_disk_templates = \
970 list(set(old_enabled_disk_templates)
971 - set(enabled_disk_templates))
972 else:
973 enabled_disk_templates = old_enabled_disk_templates
974 return (enabled_disk_templates, new_enabled_disk_templates,
975 disabled_disk_templates)
976
978 """Computes three sets of disk templates.
979
980 The three sets are:
981 - disk templates that will be enabled after this operation (no matter if
982 they were enabled before or not)
983 - disk templates that get enabled by this operation (thus haven't been
984 enabled before.)
985 - disk templates that get disabled by this operation
986
987 """
988 return self._GetDiskTemplateSetsInner(self.op.enabled_disk_templates,
989 cluster.enabled_disk_templates)
990
992 """Checks the ipolicy.
993
994 @type cluster: C{objects.Cluster}
995 @param cluster: the cluster's configuration
996 @type enabled_disk_templates: list of string
997 @param enabled_disk_templates: list of (possibly newly) enabled disk
998 templates
999
1000 """
1001
1002 if self.op.ipolicy:
1003 self.new_ipolicy = GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
1004 group_policy=False)
1005
1006 CheckIpolicyVsDiskTemplates(self.new_ipolicy,
1007 enabled_disk_templates)
1008
1009 all_instances = self.cfg.GetAllInstancesInfo().values()
1010 violations = set()
1011 for group in self.cfg.GetAllNodeGroupsInfo().values():
1012 instances = frozenset(
1013 [inst for inst in all_instances
1014 if compat.any(nuuid in group.members
1015 for nuuid in self.cfg.GetInstanceNodes(inst.uuid))])
1016 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
1017 ipol = masterd.instance.CalculateGroupIPolicy(cluster, group)
1018 new = ComputeNewInstanceViolations(ipol, new_ipolicy, instances,
1019 self.cfg)
1020 if new:
1021 violations.update(new)
1022
1023 if violations:
1024 self.LogWarning("After the ipolicy change the following instances"
1025 " violate them: %s",
1026 utils.CommaJoin(utils.NiceSort(violations)))
1027 else:
1028 CheckIpolicyVsDiskTemplates(cluster.ipolicy,
1029 enabled_disk_templates)
1030
1032 """Checks whether the set DRBD helper actually exists on the nodes.
1033
1034 @type drbd_helper: string
1035 @param drbd_helper: path of the drbd usermode helper binary
1036 @type node_uuids: list of strings
1037 @param node_uuids: list of node UUIDs to check for the helper
1038
1039 """
1040
1041 helpers = self.rpc.call_drbd_helper(node_uuids)
1042 for (_, ninfo) in self.cfg.GetMultiNodeInfo(node_uuids):
1043 if ninfo.offline:
1044 self.LogInfo("Not checking drbd helper on offline node %s",
1045 ninfo.name)
1046 continue
1047 msg = helpers[ninfo.uuid].fail_msg
1048 if msg:
1049 raise errors.OpPrereqError("Error checking drbd helper on node"
1050 " '%s': %s" % (ninfo.name, msg),
1051 errors.ECODE_ENVIRON)
1052 node_helper = helpers[ninfo.uuid].payload
1053 if node_helper != drbd_helper:
1054 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
1055 (ninfo.name, node_helper),
1056 errors.ECODE_ENVIRON)
1057
1059 """Check the DRBD usermode helper.
1060
1061 @type node_uuids: list of strings
1062 @param node_uuids: a list of nodes' UUIDs
1063 @type drbd_enabled: boolean
1064 @param drbd_enabled: whether DRBD will be enabled after this operation
1065 (no matter if it was disabled before or not)
1066 @type drbd_gets_enabled: boolen
1067 @param drbd_gets_enabled: true if DRBD was disabled before this
1068 operation, but will be enabled afterwards
1069
1070 """
1071 if self.op.drbd_helper == '':
1072 if drbd_enabled:
1073 raise errors.OpPrereqError("Cannot disable drbd helper while"
1074 " DRBD is enabled.")
1075 if self.cfg.HasAnyDiskOfType(constants.DT_DRBD8):
1076 raise errors.OpPrereqError("Cannot disable drbd helper while"
1077 " drbd-based instances exist",
1078 errors.ECODE_INVAL)
1079
1080 else:
1081 if self.op.drbd_helper is not None and drbd_enabled:
1082 self._CheckDrbdHelperOnNodes(self.op.drbd_helper, node_uuids)
1083 else:
1084 if drbd_gets_enabled:
1085 current_drbd_helper = self.cfg.GetClusterInfo().drbd_usermode_helper
1086 if current_drbd_helper is not None:
1087 self._CheckDrbdHelperOnNodes(current_drbd_helper, node_uuids)
1088 else:
1089 raise errors.OpPrereqError("Cannot enable DRBD without a"
1090 " DRBD usermode helper set.")
1091
1094 """Check whether we try to disable a disk template that is in use.
1095
1096 @type disabled_disk_templates: list of string
1097 @param disabled_disk_templates: list of disk templates that are going to
1098 be disabled by this operation
1099
1100 """
1101 for disk_template in disabled_disk_templates:
1102 if self.cfg.HasAnyDiskOfType(disk_template):
1103 raise errors.OpPrereqError(
1104 "Cannot disable disk template '%s', because there is at least one"
1105 " instance using it." % disk_template)
1106
1107 @staticmethod
1109 """Check whether an existing network is configured for instance
1110 communication.
1111
1112 Checks whether an existing network is configured with the
1113 parameters that are advisable for instance communication, and
1114 otherwise issue security warnings.
1115
1116 @type network: L{ganeti.objects.Network}
1117 @param network: L{ganeti.objects.Network} object whose
1118 configuration is being checked
1119 @type warning_fn: function
1120 @param warning_fn: function used to print warnings
1121 @rtype: None
1122 @return: None
1123
1124 """
1125 def _MaybeWarn(err, val, default):
1126 if val != default:
1127 warning_fn("Supplied instance communication network '%s' %s '%s',"
1128 " this might pose a security risk (default is '%s').",
1129 network.name, err, val, default)
1130
1131 if network.network is None:
1132 raise errors.OpPrereqError("Supplied instance communication network '%s'"
1133 " must have an IPv4 network address.",
1134 network.name)
1135
1136 _MaybeWarn("has an IPv4 gateway", network.gateway, None)
1137 _MaybeWarn("has a non-standard IPv4 network address", network.network,
1138 constants.INSTANCE_COMMUNICATION_NETWORK4)
1139 _MaybeWarn("has an IPv6 gateway", network.gateway6, None)
1140 _MaybeWarn("has a non-standard IPv6 network address", network.network6,
1141 constants.INSTANCE_COMMUNICATION_NETWORK6)
1142 _MaybeWarn("has a non-standard MAC prefix", network.mac_prefix,
1143 constants.INSTANCE_COMMUNICATION_MAC_PREFIX)
1144
1146 """Check prerequisites.
1147
1148 This checks whether the given params don't conflict and
1149 if the given volume group is valid.
1150
1151 """
1152 node_uuids = self.owned_locks(locking.LEVEL_NODE)
1153 self.cluster = cluster = self.cfg.GetClusterInfo()
1154
1155 vm_capable_node_uuids = [node.uuid
1156 for node in self.cfg.GetAllNodesInfo().values()
1157 if node.uuid in node_uuids and node.vm_capable]
1158
1159 (enabled_disk_templates, new_enabled_disk_templates,
1160 disabled_disk_templates) = self._GetDiskTemplateSets(cluster)
1161 self._CheckInstancesOfDisabledDiskTemplates(disabled_disk_templates)
1162
1163 self._CheckVgName(vm_capable_node_uuids, enabled_disk_templates,
1164 new_enabled_disk_templates)
1165
1166 if self.op.file_storage_dir is not None:
1167 CheckFileStoragePathVsEnabledDiskTemplates(
1168 self.LogWarning, self.op.file_storage_dir, enabled_disk_templates)
1169
1170 if self.op.shared_file_storage_dir is not None:
1171 CheckSharedFileStoragePathVsEnabledDiskTemplates(
1172 self.LogWarning, self.op.shared_file_storage_dir,
1173 enabled_disk_templates)
1174
1175 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
1176 drbd_gets_enabled = constants.DT_DRBD8 in new_enabled_disk_templates
1177 self._CheckDrbdHelper(vm_capable_node_uuids,
1178 drbd_enabled, drbd_gets_enabled)
1179
1180
1181 if self.op.beparams:
1182 objects.UpgradeBeParams(self.op.beparams)
1183 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1184 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
1185
1186 if self.op.ndparams:
1187 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
1188 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
1189
1190
1191
1192 if self.new_ndparams["oob_program"] == "":
1193 self.new_ndparams["oob_program"] = \
1194 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
1195
1196 if self.op.hv_state:
1197 new_hv_state = MergeAndVerifyHvState(self.op.hv_state,
1198 self.cluster.hv_state_static)
1199 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
1200 for hv, values in new_hv_state.items())
1201
1202 if self.op.disk_state:
1203 new_disk_state = MergeAndVerifyDiskState(self.op.disk_state,
1204 self.cluster.disk_state_static)
1205 self.new_disk_state = \
1206 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
1207 for name, values in svalues.items()))
1208 for storage, svalues in new_disk_state.items())
1209
1210 self._CheckIpolicy(cluster, enabled_disk_templates)
1211
1212 if self.op.nicparams:
1213 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1214 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
1215 objects.NIC.CheckParameterSyntax(self.new_nicparams)
1216 nic_errors = []
1217
1218
1219 for instance in self.cfg.GetAllInstancesInfo().values():
1220 for nic_idx, nic in enumerate(instance.nics):
1221 params_copy = copy.deepcopy(nic.nicparams)
1222 params_filled = objects.FillDict(self.new_nicparams, params_copy)
1223
1224
1225 try:
1226 objects.NIC.CheckParameterSyntax(params_filled)
1227 except errors.ConfigurationError, err:
1228 nic_errors.append("Instance %s, nic/%d: %s" %
1229 (instance.name, nic_idx, err))
1230
1231
1232 target_mode = params_filled[constants.NIC_MODE]
1233 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
1234 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
1235 " address" % (instance.name, nic_idx))
1236 if nic_errors:
1237 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
1238 "\n".join(nic_errors), errors.ECODE_INVAL)
1239
1240
1241 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
1242 if self.op.hvparams:
1243 for hv_name, hv_dict in self.op.hvparams.items():
1244 if hv_name not in self.new_hvparams:
1245 self.new_hvparams[hv_name] = hv_dict
1246 else:
1247 self.new_hvparams[hv_name].update(hv_dict)
1248
1249
1250 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
1251 if self.op.diskparams:
1252 for dt_name, dt_params in self.op.diskparams.items():
1253 if dt_name not in self.new_diskparams:
1254 self.new_diskparams[dt_name] = dt_params
1255 else:
1256 self.new_diskparams[dt_name].update(dt_params)
1257 CheckDiskAccessModeConsistency(self.op.diskparams, self.cfg)
1258
1259
1260 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
1261 if self.op.os_hvp:
1262 for os_name, hvs in self.op.os_hvp.items():
1263 if os_name not in self.new_os_hvp:
1264 self.new_os_hvp[os_name] = hvs
1265 else:
1266 for hv_name, hv_dict in hvs.items():
1267 if hv_dict is None:
1268
1269 self.new_os_hvp[os_name].pop(hv_name, None)
1270 elif hv_name not in self.new_os_hvp[os_name]:
1271 self.new_os_hvp[os_name][hv_name] = hv_dict
1272 else:
1273 self.new_os_hvp[os_name][hv_name].update(hv_dict)
1274
1275
1276 self._BuildOSParams(cluster)
1277
1278
1279 if self.op.enabled_hypervisors is not None:
1280 for hv in self.op.enabled_hypervisors:
1281
1282
1283
1284
1285
1286 if hv not in new_hvp:
1287 new_hvp[hv] = {}
1288 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
1289 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
1290
1291 if self.op.hvparams or self.op.enabled_hypervisors is not None:
1292
1293 for hv_name, hv_params in self.new_hvparams.items():
1294 if ((self.op.hvparams and hv_name in self.op.hvparams) or
1295 (self.op.enabled_hypervisors and
1296 hv_name in self.op.enabled_hypervisors)):
1297
1298 hv_class = hypervisor.GetHypervisorClass(hv_name)
1299 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1300 hv_class.CheckParameterSyntax(hv_params)
1301 CheckHVParams(self, node_uuids, hv_name, hv_params)
1302
1303 self._CheckDiskTemplateConsistency()
1304
1305 if self.op.os_hvp:
1306
1307
1308 for os_name, os_hvp in self.new_os_hvp.items():
1309 for hv_name, hv_params in os_hvp.items():
1310 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1311
1312 cluster_defaults = self.new_hvparams.get(hv_name, {})
1313 new_osp = objects.FillDict(cluster_defaults, hv_params)
1314 hv_class = hypervisor.GetHypervisorClass(hv_name)
1315 hv_class.CheckParameterSyntax(new_osp)
1316 CheckHVParams(self, node_uuids, hv_name, new_osp)
1317
1318 if self.op.default_iallocator:
1319 alloc_script = utils.FindFile(self.op.default_iallocator,
1320 constants.IALLOCATOR_SEARCH_PATH,
1321 os.path.isfile)
1322 if alloc_script is None:
1323 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
1324 " specified" % self.op.default_iallocator,
1325 errors.ECODE_INVAL)
1326
1327 if self.op.instance_communication_network:
1328 network_name = self.op.instance_communication_network
1329
1330 try:
1331 network_uuid = self.cfg.LookupNetwork(network_name)
1332 except errors.OpPrereqError:
1333 network_uuid = None
1334
1335 if network_uuid is not None:
1336 network = self.cfg.GetNetwork(network_uuid)
1337 self._CheckInstanceCommunicationNetwork(network, self.LogWarning)
1338
1339 if self.op.compression_tools:
1340 CheckCompressionTools(self.op.compression_tools)
1341
1343 "Calculate the new OS parameters for this operation."
1344
1345 def _GetNewParams(source, new_params):
1346 "Wrapper around GetUpdatedParams."
1347 if new_params is None:
1348 return source
1349 result = objects.FillDict(source, {})
1350 for os_name in new_params:
1351 result[os_name] = GetUpdatedParams(result.get(os_name, {}),
1352 new_params[os_name],
1353 use_none=True)
1354 if not result[os_name]:
1355 del result[os_name]
1356 return result
1357
1358 self.new_osp = _GetNewParams(cluster.osparams,
1359 self.op.osparams)
1360 self.new_osp_private = _GetNewParams(cluster.osparams_private_cluster,
1361 self.op.osparams_private_cluster)
1362
1363
1364 changed_oses = (set(self.new_osp.keys()) | set(self.new_osp_private.keys()))
1365 for os_name in changed_oses:
1366 os_params = cluster.SimpleFillOS(
1367 os_name,
1368 self.new_osp.get(os_name, {}),
1369 os_params_private=self.new_osp_private.get(os_name, {})
1370 )
1371
1372 CheckOSParams(self, False, [self.cfg.GetMasterNode()],
1373 os_name, os_params, False)
1374
1376 """Check whether the disk templates that are going to be disabled
1377 are still in use by some instances.
1378
1379 """
1380 if self.op.enabled_disk_templates:
1381 cluster = self.cfg.GetClusterInfo()
1382 instances = self.cfg.GetAllInstancesInfo()
1383
1384 disk_templates_to_remove = set(cluster.enabled_disk_templates) \
1385 - set(self.op.enabled_disk_templates)
1386 for instance in instances.itervalues():
1387 if instance.disk_template in disk_templates_to_remove:
1388 raise errors.OpPrereqError("Cannot disable disk template '%s',"
1389 " because instance '%s' is using it." %
1390 (instance.disk_template, instance.name))
1391
1393 """Determines and sets the new volume group name.
1394
1395 """
1396 if self.op.vg_name is not None:
1397 new_volume = self.op.vg_name
1398 if not new_volume:
1399 new_volume = None
1400 if new_volume != self.cfg.GetVGName():
1401 self.cfg.SetVGName(new_volume)
1402 else:
1403 feedback_fn("Cluster LVM configuration already in desired"
1404 " state, not changing")
1405
1407 """Set the file storage directory.
1408
1409 """
1410 if self.op.file_storage_dir is not None:
1411 if self.cluster.file_storage_dir == self.op.file_storage_dir:
1412 feedback_fn("Global file storage dir already set to value '%s'"
1413 % self.cluster.file_storage_dir)
1414 else:
1415 self.cluster.file_storage_dir = self.op.file_storage_dir
1416
1418 """Set the shared file storage directory.
1419
1420 """
1421 if self.op.shared_file_storage_dir is not None:
1422 if self.cluster.shared_file_storage_dir == \
1423 self.op.shared_file_storage_dir:
1424 feedback_fn("Global shared file storage dir already set to value '%s'"
1425 % self.cluster.shared_file_storage_dir)
1426 else:
1427 self.cluster.shared_file_storage_dir = self.op.shared_file_storage_dir
1428
1430 """Set the DRBD usermode helper.
1431
1432 """
1433 if self.op.drbd_helper is not None:
1434 if not constants.DT_DRBD8 in self.cluster.enabled_disk_templates:
1435 feedback_fn("Note that you specified a drbd user helper, but did not"
1436 " enable the drbd disk template.")
1437 new_helper = self.op.drbd_helper
1438 if not new_helper:
1439 new_helper = None
1440 if new_helper != self.cfg.GetDRBDHelper():
1441 self.cfg.SetDRBDHelper(new_helper)
1442 else:
1443 feedback_fn("Cluster DRBD helper already in desired state,"
1444 " not changing")
1445
1446 @staticmethod
1448 """Ensure that the instance communication network exists and is
1449 connected to all groups.
1450
1451 The instance communication network given by L{network_name} it is
1452 created, if necessary, via the opcode 'OpNetworkAdd'. Also, the
1453 instance communication network is connected to all existing node
1454 groups, if necessary, via the opcode 'OpNetworkConnect'.
1455
1456 @type cfg: L{config.ConfigWriter}
1457 @param cfg: cluster configuration
1458
1459 @type network_name: string
1460 @param network_name: instance communication network name
1461
1462 @rtype: L{ganeti.cmdlib.ResultWithJobs} or L{None}
1463 @return: L{ganeti.cmdlib.ResultWithJobs} if the instance
1464 communication needs to be created or it needs to be
1465 connected to a group, otherwise L{None}
1466
1467 """
1468 jobs = []
1469
1470 try:
1471 network_uuid = cfg.LookupNetwork(network_name)
1472 network_exists = True
1473 except errors.OpPrereqError:
1474 network_exists = False
1475
1476 if not network_exists:
1477 jobs.append(AddInstanceCommunicationNetworkOp(network_name))
1478
1479 for group_uuid in cfg.GetNodeGroupList():
1480 group = cfg.GetNodeGroup(group_uuid)
1481
1482 if network_exists:
1483 network_connected = network_uuid in group.networks
1484 else:
1485
1486
1487
1488
1489 network_connected = False
1490
1491 if not network_connected:
1492 op = ConnectInstanceCommunicationNetworkOp(group_uuid, network_name)
1493 jobs.append(op)
1494
1495 if jobs:
1496 return ResultWithJobs([jobs])
1497 else:
1498 return None
1499
1500 @staticmethod
1502 """Update the instance communication network stored in the cluster
1503 configuration.
1504
1505 Compares the user-supplied instance communication network against
1506 the one stored in the Ganeti cluster configuration. If there is a
1507 change, the instance communication network may be possibly created
1508 and connected to all groups (see
1509 L{LUClusterSetParams._EnsureInstanceCommunicationNetwork}).
1510
1511 @type cfg: L{config.ConfigWriter}
1512 @param cfg: cluster configuration
1513
1514 @type network_name: string
1515 @param network_name: instance communication network name
1516
1517 @type feedback_fn: function
1518 @param feedback_fn: see L{ganeti.cmdlist.base.LogicalUnit}
1519
1520 @rtype: L{LUClusterSetParams._EnsureInstanceCommunicationNetwork} or L{None}
1521 @return: see L{LUClusterSetParams._EnsureInstanceCommunicationNetwork}
1522
1523 """
1524 config_network_name = cfg.GetInstanceCommunicationNetwork()
1525
1526 if network_name == config_network_name:
1527 feedback_fn("Instance communication network already is '%s', nothing to"
1528 " do." % network_name)
1529 else:
1530 try:
1531 cfg.LookupNetwork(config_network_name)
1532 feedback_fn("Previous instance communication network '%s'"
1533 " should be removed manually." % config_network_name)
1534 except errors.OpPrereqError:
1535 pass
1536
1537 if network_name:
1538 feedback_fn("Changing instance communication network to '%s', only new"
1539 " instances will be affected."
1540 % network_name)
1541 else:
1542 feedback_fn("Disabling instance communication network, only new"
1543 " instances will be affected.")
1544
1545 cfg.SetInstanceCommunicationNetwork(network_name)
1546
1547 if network_name:
1548 return LUClusterSetParams._EnsureInstanceCommunicationNetwork(
1549 cfg,
1550 network_name)
1551 else:
1552 return None
1553
1554 - def Exec(self, feedback_fn):
1555 """Change the parameters of the cluster.
1556
1557 """
1558
1559 self.cluster = self.cfg.GetClusterInfo()
1560 if self.op.enabled_disk_templates:
1561 self.cluster.enabled_disk_templates = \
1562 list(self.op.enabled_disk_templates)
1563
1564 self.cfg.Update(self.cluster, feedback_fn)
1565
1566 self._SetVgName(feedback_fn)
1567
1568 self.cluster = self.cfg.GetClusterInfo()
1569 self._SetFileStorageDir(feedback_fn)
1570 self._SetSharedFileStorageDir(feedback_fn)
1571 self.cfg.Update(self.cluster, feedback_fn)
1572 self._SetDrbdHelper(feedback_fn)
1573
1574
1575 self.cluster = self.cfg.GetClusterInfo()
1576
1577 ensure_kvmd = False
1578
1579 if self.op.hvparams:
1580 self.cluster.hvparams = self.new_hvparams
1581 if self.op.os_hvp:
1582 self.cluster.os_hvp = self.new_os_hvp
1583 if self.op.enabled_hypervisors is not None:
1584 self.cluster.hvparams = self.new_hvparams
1585 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1586 ensure_kvmd = True
1587 if self.op.beparams:
1588 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1589 if self.op.nicparams:
1590 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1591 if self.op.ipolicy:
1592 self.cluster.ipolicy = self.new_ipolicy
1593 if self.op.osparams:
1594 self.cluster.osparams = self.new_osp
1595 if self.op.osparams_private_cluster:
1596 self.cluster.osparams_private_cluster = self.new_osp_private
1597 if self.op.ndparams:
1598 self.cluster.ndparams = self.new_ndparams
1599 if self.op.diskparams:
1600 self.cluster.diskparams = self.new_diskparams
1601 if self.op.hv_state:
1602 self.cluster.hv_state_static = self.new_hv_state
1603 if self.op.disk_state:
1604 self.cluster.disk_state_static = self.new_disk_state
1605
1606 if self.op.candidate_pool_size is not None:
1607 self.cluster.candidate_pool_size = self.op.candidate_pool_size
1608
1609 AdjustCandidatePool(self, [])
1610
1611 if self.op.max_running_jobs is not None:
1612 self.cluster.max_running_jobs = self.op.max_running_jobs
1613
1614 if self.op.max_tracked_jobs is not None:
1615 self.cluster.max_tracked_jobs = self.op.max_tracked_jobs
1616
1617 if self.op.maintain_node_health is not None:
1618 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
1619 feedback_fn("Note: CONFD was disabled at build time, node health"
1620 " maintenance is not useful (still enabling it)")
1621 self.cluster.maintain_node_health = self.op.maintain_node_health
1622
1623 if self.op.modify_etc_hosts is not None:
1624 self.cluster.modify_etc_hosts = self.op.modify_etc_hosts
1625
1626 if self.op.prealloc_wipe_disks is not None:
1627 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
1628
1629 if self.op.add_uids is not None:
1630 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
1631
1632 if self.op.remove_uids is not None:
1633 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
1634
1635 if self.op.uid_pool is not None:
1636 self.cluster.uid_pool = self.op.uid_pool
1637
1638 if self.op.default_iallocator is not None:
1639 self.cluster.default_iallocator = self.op.default_iallocator
1640
1641 if self.op.default_iallocator_params is not None:
1642 self.cluster.default_iallocator_params = self.op.default_iallocator_params
1643
1644 if self.op.reserved_lvs is not None:
1645 self.cluster.reserved_lvs = self.op.reserved_lvs
1646
1647 if self.op.use_external_mip_script is not None:
1648 self.cluster.use_external_mip_script = self.op.use_external_mip_script
1649
1650 if self.op.enabled_user_shutdown is not None and \
1651 self.cluster.enabled_user_shutdown != self.op.enabled_user_shutdown:
1652 self.cluster.enabled_user_shutdown = self.op.enabled_user_shutdown
1653 ensure_kvmd = True
1654
1655 def helper_os(aname, mods, desc):
1656 desc += " OS list"
1657 lst = getattr(self.cluster, aname)
1658 for key, val in mods:
1659 if key == constants.DDM_ADD:
1660 if val in lst:
1661 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
1662 else:
1663 lst.append(val)
1664 elif key == constants.DDM_REMOVE:
1665 if val in lst:
1666 lst.remove(val)
1667 else:
1668 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
1669 else:
1670 raise errors.ProgrammerError("Invalid modification '%s'" % key)
1671
1672 if self.op.hidden_os:
1673 helper_os("hidden_os", self.op.hidden_os, "hidden")
1674
1675 if self.op.blacklisted_os:
1676 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
1677
1678 if self.op.mac_prefix:
1679 self.cluster.mac_prefix = self.op.mac_prefix
1680
1681 if self.op.master_netdev:
1682 master_params = self.cfg.GetMasterNetworkParameters()
1683 ems = self.cfg.GetUseExternalMipScript()
1684 feedback_fn("Shutting down master ip on the current netdev (%s)" %
1685 self.cluster.master_netdev)
1686 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
1687 master_params, ems)
1688 if not self.op.force:
1689 result.Raise("Could not disable the master ip")
1690 else:
1691 if result.fail_msg:
1692 msg = ("Could not disable the master ip (continuing anyway): %s" %
1693 result.fail_msg)
1694 feedback_fn(msg)
1695 feedback_fn("Changing master_netdev from %s to %s" %
1696 (master_params.netdev, self.op.master_netdev))
1697 self.cluster.master_netdev = self.op.master_netdev
1698
1699 if self.op.master_netmask:
1700 master_params = self.cfg.GetMasterNetworkParameters()
1701 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
1702 result = self.rpc.call_node_change_master_netmask(
1703 master_params.uuid, master_params.netmask,
1704 self.op.master_netmask, master_params.ip,
1705 master_params.netdev)
1706 result.Warn("Could not change the master IP netmask", feedback_fn)
1707 self.cluster.master_netmask = self.op.master_netmask
1708
1709 if self.op.install_image:
1710 self.cluster.install_image = self.op.install_image
1711
1712 if self.op.zeroing_image is not None:
1713 CheckImageValidity(self.op.zeroing_image,
1714 "Zeroing image must be an absolute path or a URL")
1715 self.cluster.zeroing_image = self.op.zeroing_image
1716
1717 self.cfg.Update(self.cluster, feedback_fn)
1718
1719 if self.op.master_netdev:
1720 master_params = self.cfg.GetMasterNetworkParameters()
1721 feedback_fn("Starting the master ip on the new master netdev (%s)" %
1722 self.op.master_netdev)
1723 ems = self.cfg.GetUseExternalMipScript()
1724 result = self.rpc.call_node_activate_master_ip(master_params.uuid,
1725 master_params, ems)
1726 result.Warn("Could not re-enable the master ip on the master,"
1727 " please restart manually", self.LogWarning)
1728
1729
1730
1731
1732
1733 if ensure_kvmd:
1734 EnsureKvmdOnNodes(self, feedback_fn)
1735
1736 if self.op.compression_tools is not None:
1737 self.cfg.SetCompressionTools(self.op.compression_tools)
1738
1739 network_name = self.op.instance_communication_network
1740 if network_name is not None:
1741 return self._ModifyInstanceCommunicationNetwork(self.cfg,
1742 network_name, feedback_fn)
1743 else:
1744 return None
1745
1748 """Submits all jobs necessary to verify the cluster.
1749
1750 """
1751 REQ_BGL = False
1752
1754 self.needed_locks = {}
1755
1756 - def Exec(self, feedback_fn):
1757 jobs = []
1758
1759 if self.op.group_name:
1760 groups = [self.op.group_name]
1761 depends_fn = lambda: None
1762 else:
1763 groups = self.cfg.GetNodeGroupList()
1764
1765
1766 jobs.append([
1767 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
1768 ])
1769
1770
1771 depends_fn = lambda: [(-len(jobs), [])]
1772
1773 jobs.extend(
1774 [opcodes.OpClusterVerifyGroup(group_name=group,
1775 ignore_errors=self.op.ignore_errors,
1776 depends=depends_fn())]
1777 for group in groups)
1778
1779
1780 for op in itertools.chain(*jobs):
1781 op.debug_simulate_errors = self.op.debug_simulate_errors
1782 op.verbose = self.op.verbose
1783 op.error_codes = self.op.error_codes
1784 try:
1785 op.skip_checks = self.op.skip_checks
1786 except AttributeError:
1787 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1788
1789 return ResultWithJobs(jobs)
1790
1793 """Mix-in for cluster/group verify LUs.
1794
1795 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1796 self.op and self._feedback_fn to be available.)
1797
1798 """
1799
1800 ETYPE_FIELD = "code"
1801 ETYPE_ERROR = constants.CV_ERROR
1802 ETYPE_WARNING = constants.CV_WARNING
1803
1804 - def _Error(self, ecode, item, msg, *args, **kwargs):
1805 """Format an error message.
1806
1807 Based on the opcode's error_codes parameter, either format a
1808 parseable error code, or a simpler error string.
1809
1810 This must be called only from Exec and functions called from Exec.
1811
1812 """
1813 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1814 itype, etxt, _ = ecode
1815
1816
1817 if etxt in self.op.ignore_errors:
1818 ltype = self.ETYPE_WARNING
1819
1820 if args:
1821 msg = msg % args
1822
1823 if self.op.error_codes:
1824 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1825 else:
1826 if item:
1827 item = " " + item
1828 else:
1829 item = ""
1830 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1831
1832 self._feedback_fn(" - %s" % msg)
1833
1834 if ltype == self.ETYPE_ERROR:
1835 self.bad = True
1836
1837 - def _ErrorIf(self, cond, *args, **kwargs):
1838 """Log an error message if the passed condition is True.
1839
1840 """
1841 if (bool(cond)
1842 or self.op.debug_simulate_errors):
1843 self._Error(*args, **kwargs)
1844
1847 """Compute the set of all hypervisor parameters.
1848
1849 @type cluster: L{objects.Cluster}
1850 @param cluster: the cluster object
1851 @param instances: list of L{objects.Instance}
1852 @param instances: additional instances from which to obtain parameters
1853 @rtype: list of (origin, hypervisor, parameters)
1854 @return: a list with all parameters found, indicating the hypervisor they
1855 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1856
1857 """
1858 hvp_data = []
1859
1860 for hv_name in cluster.enabled_hypervisors:
1861 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1862
1863 for os_name, os_hvp in cluster.os_hvp.items():
1864 for hv_name, hv_params in os_hvp.items():
1865 if hv_params:
1866 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1867 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1868
1869
1870 for instance in instances:
1871 if instance.hvparams:
1872 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1873 cluster.FillHV(instance)))
1874
1875 return hvp_data
1876
1879 """Verifies the cluster config.
1880
1881 """
1882 REQ_BGL = False
1883
1897
1901
1910
1911 - def Exec(self, feedback_fn):
1912 """Verify integrity of cluster, performing various test on nodes.
1913
1914 """
1915 self.bad = False
1916 self._feedback_fn = feedback_fn
1917
1918 feedback_fn("* Verifying cluster config")
1919
1920 for msg in self.cfg.VerifyConfig():
1921 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1922
1923 feedback_fn("* Verifying cluster certificate files")
1924
1925 for cert_filename in pathutils.ALL_CERT_FILES:
1926 (errcode, msg) = utils.VerifyCertificate(cert_filename)
1927 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1928
1929 self._ErrorIf(not utils.CanRead(constants.LUXID_USER,
1930 pathutils.NODED_CERT_FILE),
1931 constants.CV_ECLUSTERCERT,
1932 None,
1933 pathutils.NODED_CERT_FILE + " must be accessible by the " +
1934 constants.LUXID_USER + " user")
1935
1936 feedback_fn("* Verifying hypervisor parameters")
1937
1938 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1939 self.all_inst_info.values()))
1940
1941 feedback_fn("* Verifying all nodes belong to an existing group")
1942
1943
1944
1945
1946
1947 dangling_nodes = set(node for node in self.all_node_info.values()
1948 if node.group not in self.all_group_info)
1949
1950 dangling_instances = {}
1951 no_node_instances = []
1952
1953 for inst in self.all_inst_info.values():
1954 if inst.primary_node in [node.uuid for node in dangling_nodes]:
1955 dangling_instances.setdefault(inst.primary_node, []).append(inst)
1956 elif inst.primary_node not in self.all_node_info:
1957 no_node_instances.append(inst)
1958
1959 pretty_dangling = [
1960 "%s (%s)" %
1961 (node.name,
1962 utils.CommaJoin(inst.name for
1963 inst in dangling_instances.get(node.uuid, [])))
1964 for node in dangling_nodes]
1965
1966 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1967 None,
1968 "the following nodes (and their instances) belong to a non"
1969 " existing group: %s", utils.CommaJoin(pretty_dangling))
1970
1971 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1972 None,
1973 "the following instances have a non-existing primary-node:"
1974 " %s", utils.CommaJoin(inst.name for
1975 inst in no_node_instances))
1976
1977 return not self.bad
1978
1981 """Verifies the status of a node group.
1982
1983 """
1984 HPATH = "cluster-verify"
1985 HTYPE = constants.HTYPE_CLUSTER
1986 REQ_BGL = False
1987
1988 _HOOKS_INDENT_RE = re.compile("^", re.M)
1989
1991 """A class representing the logical and physical status of a node.
1992
1993 @type uuid: string
1994 @ivar uuid: the node UUID to which this object refers
1995 @ivar volumes: a structure as returned from
1996 L{ganeti.backend.GetVolumeList} (runtime)
1997 @ivar instances: a list of running instances (runtime)
1998 @ivar pinst: list of configured primary instances (config)
1999 @ivar sinst: list of configured secondary instances (config)
2000 @ivar sbp: dictionary of {primary-node: list of instances} for all
2001 instances for which this node is secondary (config)
2002 @ivar mfree: free memory, as reported by hypervisor (runtime)
2003 @ivar dfree: free disk, as reported by the node (runtime)
2004 @ivar offline: the offline status (config)
2005 @type rpc_fail: boolean
2006 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2007 not whether the individual keys were correct) (runtime)
2008 @type lvm_fail: boolean
2009 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2010 @type hyp_fail: boolean
2011 @ivar hyp_fail: whether the RPC call didn't return the instance list
2012 @type ghost: boolean
2013 @ivar ghost: whether this is a known node or not (config)
2014 @type os_fail: boolean
2015 @ivar os_fail: whether the RPC call didn't return valid OS data
2016 @type oslist: list
2017 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2018 @type vm_capable: boolean
2019 @ivar vm_capable: whether the node can host instances
2020 @type pv_min: float
2021 @ivar pv_min: size in MiB of the smallest PVs
2022 @type pv_max: float
2023 @ivar pv_max: size in MiB of the biggest PVs
2024
2025 """
2026 - def __init__(self, offline=False, uuid=None, vm_capable=True):
2027 self.uuid = uuid
2028 self.volumes = {}
2029 self.instances = []
2030 self.pinst = []
2031 self.sinst = []
2032 self.sbp = {}
2033 self.mfree = 0
2034 self.dfree = 0
2035 self.offline = offline
2036 self.vm_capable = vm_capable
2037 self.rpc_fail = False
2038 self.lvm_fail = False
2039 self.hyp_fail = False
2040 self.ghost = False
2041 self.os_fail = False
2042 self.oslist = {}
2043 self.pv_min = None
2044 self.pv_max = None
2045
2066
2084
2086 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2087 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2088
2089 group_node_uuids = set(self.group_info.members)
2090 group_inst_uuids = \
2091 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2092
2093 unlocked_node_uuids = \
2094 group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE))
2095
2096 unlocked_inst_uuids = \
2097 group_inst_uuids.difference(
2098 [self.cfg.GetInstanceInfoByName(name).uuid
2099 for name in self.owned_locks(locking.LEVEL_INSTANCE)])
2100
2101 if unlocked_node_uuids:
2102 raise errors.OpPrereqError(
2103 "Missing lock for nodes: %s" %
2104 utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)),
2105 errors.ECODE_STATE)
2106
2107 if unlocked_inst_uuids:
2108 raise errors.OpPrereqError(
2109 "Missing lock for instances: %s" %
2110 utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)),
2111 errors.ECODE_STATE)
2112
2113 self.all_node_info = self.cfg.GetAllNodesInfo()
2114 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2115
2116 self.my_node_uuids = group_node_uuids
2117 self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid])
2118 for node_uuid in group_node_uuids)
2119
2120 self.my_inst_uuids = group_inst_uuids
2121 self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid])
2122 for inst_uuid in group_inst_uuids)
2123
2124
2125
2126 extra_lv_nodes = set()
2127
2128 for inst in self.my_inst_info.values():
2129 if inst.disk_template in constants.DTS_INT_MIRROR:
2130 inst_nodes = self.cfg.GetInstanceNodes(inst.uuid)
2131 for nuuid in inst_nodes:
2132 if self.all_node_info[nuuid].group != self.group_uuid:
2133 extra_lv_nodes.add(nuuid)
2134
2135 unlocked_lv_nodes = \
2136 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2137
2138 if unlocked_lv_nodes:
2139 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2140 utils.CommaJoin(unlocked_lv_nodes),
2141 errors.ECODE_STATE)
2142 self.extra_lv_nodes = list(extra_lv_nodes)
2143
2145 """Perform some basic validation on data returned from a node.
2146
2147 - check the result data structure is well formed and has all the
2148 mandatory fields
2149 - check ganeti version
2150
2151 @type ninfo: L{objects.Node}
2152 @param ninfo: the node to check
2153 @param nresult: the results from the node
2154 @rtype: boolean
2155 @return: whether overall this call was successful (and we can expect
2156 reasonable values in the respose)
2157
2158 """
2159
2160 test = not nresult or not isinstance(nresult, dict)
2161 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
2162 "unable to verify node: no data returned")
2163 if test:
2164 return False
2165
2166
2167 local_version = constants.PROTOCOL_VERSION
2168 remote_version = nresult.get("version", None)
2169 test = not (remote_version and
2170 isinstance(remote_version, (list, tuple)) and
2171 len(remote_version) == 2)
2172 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
2173 "connection to node returned invalid data")
2174 if test:
2175 return False
2176
2177 test = local_version != remote_version[0]
2178 self._ErrorIf(test, constants.CV_ENODEVERSION, ninfo.name,
2179 "incompatible protocol versions: master %s,"
2180 " node %s", local_version, remote_version[0])
2181 if test:
2182 return False
2183
2184
2185
2186
2187 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2188 constants.CV_ENODEVERSION, ninfo.name,
2189 "software version mismatch: master %s, node %s",
2190 constants.RELEASE_VERSION, remote_version[1],
2191 code=self.ETYPE_WARNING)
2192
2193 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2194 if ninfo.vm_capable and isinstance(hyp_result, dict):
2195 for hv_name, hv_result in hyp_result.iteritems():
2196 test = hv_result is not None
2197 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2198 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2199
2200 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2201 if ninfo.vm_capable and isinstance(hvp_result, list):
2202 for item, hv_name, hv_result in hvp_result:
2203 self._ErrorIf(True, constants.CV_ENODEHV, ninfo.name,
2204 "hypervisor %s parameter verify failure (source %s): %s",
2205 hv_name, item, hv_result)
2206
2207 test = nresult.get(constants.NV_NODESETUP,
2208 ["Missing NODESETUP results"])
2209 self._ErrorIf(test, constants.CV_ENODESETUP, ninfo.name,
2210 "node setup error: %s", "; ".join(test))
2211
2212 return True
2213
2214 - def _VerifyNodeTime(self, ninfo, nresult,
2215 nvinfo_starttime, nvinfo_endtime):
2216 """Check the node time.
2217
2218 @type ninfo: L{objects.Node}
2219 @param ninfo: the node to check
2220 @param nresult: the remote results for the node
2221 @param nvinfo_starttime: the start time of the RPC call
2222 @param nvinfo_endtime: the end time of the RPC call
2223
2224 """
2225 ntime = nresult.get(constants.NV_TIME, None)
2226 try:
2227 ntime_merged = utils.MergeTime(ntime)
2228 except (ValueError, TypeError):
2229 self._ErrorIf(True, constants.CV_ENODETIME, ninfo.name,
2230 "Node returned invalid time")
2231 return
2232
2233 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2234 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2235 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2236 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2237 else:
2238 ntime_diff = None
2239
2240 self._ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, ninfo.name,
2241 "Node time diverges by at least %s from master node time",
2242 ntime_diff)
2243
2245 """Check the node LVM results and update info for cross-node checks.
2246
2247 @type ninfo: L{objects.Node}
2248 @param ninfo: the node to check
2249 @param nresult: the remote results for the node
2250 @param vg_name: the configured VG name
2251 @type nimg: L{NodeImage}
2252 @param nimg: node image
2253
2254 """
2255 if vg_name is None:
2256 return
2257
2258
2259 vglist = nresult.get(constants.NV_VGLIST, None)
2260 test = not vglist
2261 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
2262 "unable to check volume groups")
2263 if not test:
2264 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2265 constants.MIN_VG_SIZE)
2266 self._ErrorIf(vgstatus, constants.CV_ENODELVM, ninfo.name, vgstatus)
2267
2268
2269 (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage)
2270 for em in errmsgs:
2271 self._Error(constants.CV_ENODELVM, ninfo.name, em)
2272 if pvminmax is not None:
2273 (nimg.pv_min, nimg.pv_max) = pvminmax
2274
2276 """Check cross-node DRBD version consistency.
2277
2278 @type node_verify_infos: dict
2279 @param node_verify_infos: infos about nodes as returned from the
2280 node_verify call.
2281
2282 """
2283 node_versions = {}
2284 for node_uuid, ndata in node_verify_infos.items():
2285 nresult = ndata.payload
2286 if nresult:
2287 version = nresult.get(constants.NV_DRBDVERSION, None)
2288 if version:
2289 node_versions[node_uuid] = version
2290
2291 if len(set(node_versions.values())) > 1:
2292 for node_uuid, version in sorted(node_versions.items()):
2293 msg = "DRBD version mismatch: %s" % version
2294 self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg,
2295 code=self.ETYPE_WARNING)
2296
2298 """Check cross-node consistency in LVM.
2299
2300 @type node_image: dict
2301 @param node_image: info about nodes, mapping from node to names to
2302 L{NodeImage} objects
2303 @param vg_name: the configured VG name
2304
2305 """
2306 if vg_name is None:
2307 return
2308
2309
2310 if not self._exclusive_storage:
2311 return
2312
2313
2314
2315
2316 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2317 if not vals:
2318 return
2319 (pvmin, minnode_uuid) = min((ni.pv_min, ni.uuid) for ni in vals)
2320 (pvmax, maxnode_uuid) = max((ni.pv_max, ni.uuid) for ni in vals)
2321 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2322 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2323 "PV sizes differ too much in the group; smallest (%s MB) is"
2324 " on %s, biggest (%s MB) is on %s",
2325 pvmin, self.cfg.GetNodeName(minnode_uuid),
2326 pvmax, self.cfg.GetNodeName(maxnode_uuid))
2327
2329 """Check the node bridges.
2330
2331 @type ninfo: L{objects.Node}
2332 @param ninfo: the node to check
2333 @param nresult: the remote results for the node
2334 @param bridges: the expected list of bridges
2335
2336 """
2337 if not bridges:
2338 return
2339
2340 missing = nresult.get(constants.NV_BRIDGES, None)
2341 test = not isinstance(missing, list)
2342 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
2343 "did not return valid bridge information")
2344 if not test:
2345 self._ErrorIf(bool(missing), constants.CV_ENODENET, ninfo.name,
2346 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2347
2365
2367 """Check the node network connectivity results.
2368
2369 @type ninfo: L{objects.Node}
2370 @param ninfo: the node to check
2371 @param nresult: the remote results for the node
2372
2373 """
2374 test = constants.NV_NODELIST not in nresult
2375 self._ErrorIf(test, constants.CV_ENODESSH, ninfo.name,
2376 "node hasn't returned node ssh connectivity data")
2377 if not test:
2378 if nresult[constants.NV_NODELIST]:
2379 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2380 self._ErrorIf(True, constants.CV_ENODESSH, ninfo.name,
2381 "ssh communication with node '%s': %s", a_node, a_msg)
2382
2383 test = constants.NV_NODENETTEST not in nresult
2384 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
2385 "node hasn't returned node tcp connectivity data")
2386 if not test:
2387 if nresult[constants.NV_NODENETTEST]:
2388 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2389 for anode in nlist:
2390 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name,
2391 "tcp communication with node '%s': %s",
2392 anode, nresult[constants.NV_NODENETTEST][anode])
2393
2394 test = constants.NV_MASTERIP not in nresult
2395 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
2396 "node hasn't returned node master IP reachability data")
2397 if not test:
2398 if not nresult[constants.NV_MASTERIP]:
2399 if ninfo.uuid == self.master_node:
2400 msg = "the master node cannot reach the master IP (not configured?)"
2401 else:
2402 msg = "cannot reach the master IP"
2403 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg)
2404
2406 """Verify an instance.
2407
2408 This function checks to see if the required block devices are
2409 available on the instance's node, and that the nodes are in the correct
2410 state.
2411
2412 """
2413 pnode_uuid = instance.primary_node
2414 pnode_img = node_image[pnode_uuid]
2415 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2416
2417 node_vol_should = {}
2418 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
2419
2420 cluster = self.cfg.GetClusterInfo()
2421 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2422 self.group_info)
2423 err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg)
2424 self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name,
2425 utils.CommaJoin(err), code=self.ETYPE_WARNING)
2426
2427 for node_uuid in node_vol_should:
2428 n_img = node_image[node_uuid]
2429 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2430
2431 continue
2432 for volume in node_vol_should[node_uuid]:
2433 test = volume not in n_img.volumes
2434 self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name,
2435 "volume %s missing on node %s", volume,
2436 self.cfg.GetNodeName(node_uuid))
2437
2438 if instance.admin_state == constants.ADMINST_UP:
2439 test = instance.uuid not in pnode_img.instances and not pnode_img.offline
2440 self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name,
2441 "instance not running on its primary node %s",
2442 self.cfg.GetNodeName(pnode_uuid))
2443 self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE,
2444 instance.name, "instance is marked as running and lives on"
2445 " offline node %s", self.cfg.GetNodeName(pnode_uuid))
2446
2447 diskdata = [(nname, success, status, idx)
2448 for (nname, disks) in diskstatus.items()
2449 for idx, (success, status) in enumerate(disks)]
2450
2451 for nname, success, bdev_status, idx in diskdata:
2452
2453
2454 snode = node_image[nname]
2455 bad_snode = snode.ghost or snode.offline
2456 self._ErrorIf(instance.disks_active and
2457 not success and not bad_snode,
2458 constants.CV_EINSTANCEFAULTYDISK, instance.name,
2459 "couldn't retrieve status for disk/%s on %s: %s",
2460 idx, self.cfg.GetNodeName(nname), bdev_status)
2461
2462 if instance.disks_active and success and \
2463 (bdev_status.is_degraded or
2464 bdev_status.ldisk_status != constants.LDS_OKAY):
2465 msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname))
2466 if bdev_status.is_degraded:
2467 msg += " is degraded"
2468 if bdev_status.ldisk_status != constants.LDS_OKAY:
2469 msg += "; state is '%s'" % \
2470 constants.LDS_NAMES[bdev_status.ldisk_status]
2471
2472 self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg)
2473
2474 self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2475 constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid),
2476 "instance %s, connection to primary node failed",
2477 instance.name)
2478
2479 secondary_nodes = self.cfg.GetInstanceSecondaryNodes(instance.uuid)
2480 self._ErrorIf(len(secondary_nodes) > 1,
2481 constants.CV_EINSTANCELAYOUT, instance.name,
2482 "instance has multiple secondary nodes: %s",
2483 utils.CommaJoin(secondary_nodes),
2484 code=self.ETYPE_WARNING)
2485
2486 inst_nodes = self.cfg.GetInstanceNodes(instance.uuid)
2487 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, inst_nodes)
2488 if any(es_flags.values()):
2489 if instance.disk_template not in constants.DTS_EXCL_STORAGE:
2490
2491
2492 es_nodes = [n
2493 for (n, es) in es_flags.items()
2494 if es]
2495 self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name,
2496 "instance has template %s, which is not supported on nodes"
2497 " that have exclusive storage set: %s",
2498 instance.disk_template,
2499 utils.CommaJoin(self.cfg.GetNodeNames(es_nodes)))
2500 for (idx, disk) in enumerate(self.cfg.GetInstanceDisks(instance.uuid)):
2501 self._ErrorIf(disk.spindles is None,
2502 constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name,
2503 "number of spindles not configured for disk %s while"
2504 " exclusive storage is enabled, try running"
2505 " gnt-cluster repair-disk-sizes", idx)
2506
2507 if instance.disk_template in constants.DTS_INT_MIRROR:
2508 instance_nodes = utils.NiceSort(inst_nodes)
2509 instance_groups = {}
2510
2511 for node_uuid in instance_nodes:
2512 instance_groups.setdefault(self.all_node_info[node_uuid].group,
2513 []).append(node_uuid)
2514
2515 pretty_list = [
2516 "%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)),
2517 groupinfo[group].name)
2518
2519 for group, nodes in sorted(instance_groups.items(),
2520 key=lambda (_, nodes): pnode_uuid in nodes,
2521 reverse=True)]
2522
2523 self._ErrorIf(len(instance_groups) > 1,
2524 constants.CV_EINSTANCESPLITGROUPS,
2525 instance.name, "instance has primary and secondary nodes in"
2526 " different groups: %s", utils.CommaJoin(pretty_list),
2527 code=self.ETYPE_WARNING)
2528
2529 inst_nodes_offline = []
2530 for snode in secondary_nodes:
2531 s_img = node_image[snode]
2532 self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2533 self.cfg.GetNodeName(snode),
2534 "instance %s, connection to secondary node failed",
2535 instance.name)
2536
2537 if s_img.offline:
2538 inst_nodes_offline.append(snode)
2539
2540
2541 self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE,
2542 instance.name, "instance has offline secondary node(s) %s",
2543 utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline)))
2544
2545 for node_uuid in inst_nodes:
2546 self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE,
2547 instance.name, "instance lives on ghost node %s",
2548 self.cfg.GetNodeName(node_uuid))
2549 self._ErrorIf(not node_image[node_uuid].vm_capable,
2550 constants.CV_EINSTANCEBADNODE, instance.name,
2551 "instance lives on non-vm_capable node %s",
2552 self.cfg.GetNodeName(node_uuid))
2553
2556 """Verify if there are any unknown volumes in the cluster.
2557
2558 The .os, .swap and backup volumes are ignored. All other volumes are
2559 reported as unknown.
2560
2561 @type vg_name: string
2562 @param vg_name: the name of the Ganeti-administered volume group
2563 @type reserved: L{ganeti.utils.FieldSet}
2564 @param reserved: a FieldSet of reserved volume names
2565
2566 """
2567 for node_uuid, n_img in node_image.items():
2568 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2569 self.all_node_info[node_uuid].group != self.group_uuid):
2570
2571 continue
2572 for volume in n_img.volumes:
2573
2574 if volume.split('/')[0] != vg_name:
2575 continue
2576
2577 test = ((node_uuid not in node_vol_should or
2578 volume not in node_vol_should[node_uuid]) and
2579 not reserved.Matches(volume))
2580 self._ErrorIf(test, constants.CV_ENODEORPHANLV,
2581 self.cfg.GetNodeName(node_uuid),
2582 "volume %s is unknown", volume,
2583 code=_VerifyErrors.ETYPE_WARNING)
2584
2586 """Verify N+1 Memory Resilience.
2587
2588 Check that if one single node dies we can still start all the
2589 instances it was primary for.
2590
2591 """
2592 cluster_info = self.cfg.GetClusterInfo()
2593 for node_uuid, n_img in node_image.items():
2594
2595
2596
2597
2598
2599
2600
2601
2602 if n_img.offline or \
2603 self.all_node_info[node_uuid].group != self.group_uuid:
2604
2605
2606
2607
2608 continue
2609
2610 for prinode, inst_uuids in n_img.sbp.items():
2611 needed_mem = 0
2612 for inst_uuid in inst_uuids:
2613 bep = cluster_info.FillBE(all_insts[inst_uuid])
2614 if bep[constants.BE_AUTO_BALANCE]:
2615 needed_mem += bep[constants.BE_MINMEM]
2616 test = n_img.mfree < needed_mem
2617 self._ErrorIf(test, constants.CV_ENODEN1,
2618 self.cfg.GetNodeName(node_uuid),
2619 "not enough memory to accomodate instance failovers"
2620 " should node %s fail (%dMiB needed, %dMiB available)",
2621 self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)
2622
2624 """Verifies the consistency of the client certificates.
2625
2626 This includes several aspects:
2627 - the individual validation of all nodes' certificates
2628 - the consistency of the master candidate certificate map
2629 - the consistency of the master candidate certificate map with the
2630 certificates that the master candidates are actually using.
2631
2632 @param nodes: the list of nodes to consider in this verification
2633 @param all_nvinfo: the map of results of the verify_node call to
2634 all nodes
2635
2636 """
2637 candidate_certs = self.cfg.GetClusterInfo().candidate_certs
2638 if candidate_certs is None or len(candidate_certs) == 0:
2639 self._ErrorIf(
2640 True, constants.CV_ECLUSTERCLIENTCERT, None,
2641 "The cluster's list of master candidate certificates is empty."
2642 " If you just updated the cluster, please run"
2643 " 'gnt-cluster renew-crypto --new-node-certificates'.")
2644 return
2645
2646 self._ErrorIf(
2647 len(candidate_certs) != len(set(candidate_certs.values())),
2648 constants.CV_ECLUSTERCLIENTCERT, None,
2649 "There are at least two master candidates configured to use the same"
2650 " certificate.")
2651
2652
2653 for node in nodes:
2654 if node.offline:
2655 continue
2656
2657 nresult = all_nvinfo[node.uuid]
2658 if nresult.fail_msg or not nresult.payload:
2659 continue
2660
2661 (errcode, msg) = nresult.payload.get(constants.NV_CLIENT_CERT, None)
2662
2663 self._ErrorIf(
2664 errcode is not None, constants.CV_ECLUSTERCLIENTCERT, None,
2665 "Client certificate of node '%s' failed validation: %s (code '%s')",
2666 node.uuid, msg, errcode)
2667
2668 if not errcode:
2669 digest = msg
2670 if node.master_candidate:
2671 if node.uuid in candidate_certs:
2672 self._ErrorIf(
2673 digest != candidate_certs[node.uuid],
2674 constants.CV_ECLUSTERCLIENTCERT, None,
2675 "Client certificate digest of master candidate '%s' does not"
2676 " match its entry in the cluster's map of master candidate"
2677 " certificates. Expected: %s Got: %s", node.uuid,
2678 digest, candidate_certs[node.uuid])
2679 else:
2680 self._ErrorIf(
2681 True, constants.CV_ECLUSTERCLIENTCERT, None,
2682 "The master candidate '%s' does not have an entry in the"
2683 " map of candidate certificates.", node.uuid)
2684 self._ErrorIf(
2685 digest in candidate_certs.values(),
2686 constants.CV_ECLUSTERCLIENTCERT, None,
2687 "Master candidate '%s' is using a certificate of another node.",
2688 node.uuid)
2689 else:
2690 self._ErrorIf(
2691 node.uuid in candidate_certs,
2692 constants.CV_ECLUSTERCLIENTCERT, None,
2693 "Node '%s' is not a master candidate, but still listed in the"
2694 " map of master candidate certificates.", node.uuid)
2695 self._ErrorIf(
2696 (node.uuid not in candidate_certs) and
2697 (digest in candidate_certs.values()),
2698 constants.CV_ECLUSTERCLIENTCERT, None,
2699 "Node '%s' is not a master candidate and is incorrectly using a"
2700 " certificate of another node which is master candidate.",
2701 node.uuid)
2702
2703 - def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo,
2704 (files_all, files_opt, files_mc, files_vm)):
2705 """Verifies file checksums collected from all nodes.
2706
2707 @param nodes: List of L{objects.Node} objects
2708 @param master_node_uuid: UUID of master node
2709 @param all_nvinfo: RPC results
2710
2711 """
2712
2713 files2nodefn = [
2714 (files_all, None),
2715 (files_mc, lambda node: (node.master_candidate or
2716 node.uuid == master_node_uuid)),
2717 (files_vm, lambda node: node.vm_capable),
2718 ]
2719
2720
2721 nodefiles = {}
2722 for (files, fn) in files2nodefn:
2723 if fn is None:
2724 filenodes = nodes
2725 else:
2726 filenodes = filter(fn, nodes)
2727 nodefiles.update((filename,
2728 frozenset(map(operator.attrgetter("uuid"), filenodes)))
2729 for filename in files)
2730
2731 assert set(nodefiles) == (files_all | files_mc | files_vm)
2732
2733 fileinfo = dict((filename, {}) for filename in nodefiles)
2734 ignore_nodes = set()
2735
2736 for node in nodes:
2737 if node.offline:
2738 ignore_nodes.add(node.uuid)
2739 continue
2740
2741 nresult = all_nvinfo[node.uuid]
2742
2743 if nresult.fail_msg or not nresult.payload:
2744 node_files = None
2745 else:
2746 fingerprints = nresult.payload.get(constants.NV_FILELIST, {})
2747 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2748 for (key, value) in fingerprints.items())
2749 del fingerprints
2750
2751 test = not (node_files and isinstance(node_files, dict))
2752 self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name,
2753 "Node did not return file checksum data")
2754 if test:
2755 ignore_nodes.add(node.uuid)
2756 continue
2757
2758
2759 for (filename, checksum) in node_files.items():
2760 assert filename in nodefiles
2761 fileinfo[filename].setdefault(checksum, set()).add(node.uuid)
2762
2763 for (filename, checksums) in fileinfo.items():
2764 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2765
2766
2767 with_file = frozenset(node_uuid
2768 for node_uuids in fileinfo[filename].values()
2769 for node_uuid in node_uuids) - ignore_nodes
2770
2771 expected_nodes = nodefiles[filename] - ignore_nodes
2772
2773
2774 missing_file = expected_nodes - with_file
2775
2776 if filename in files_opt:
2777
2778 self._ErrorIf(missing_file and missing_file != expected_nodes,
2779 constants.CV_ECLUSTERFILECHECK, None,
2780 "File %s is optional, but it must exist on all or no"
2781 " nodes (not found on %s)",
2782 filename,
2783 utils.CommaJoin(
2784 utils.NiceSort(
2785 map(self.cfg.GetNodeName, missing_file))))
2786 else:
2787 self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2788 "File %s is missing from node(s) %s", filename,
2789 utils.CommaJoin(
2790 utils.NiceSort(
2791 map(self.cfg.GetNodeName, missing_file))))
2792
2793
2794 unexpected = with_file - expected_nodes
2795 self._ErrorIf(unexpected,
2796 constants.CV_ECLUSTERFILECHECK, None,
2797 "File %s should not exist on node(s) %s",
2798 filename, utils.CommaJoin(
2799 utils.NiceSort(map(self.cfg.GetNodeName, unexpected))))
2800
2801
2802 test = len(checksums) > 1
2803 if test:
2804 variants = ["variant %s on %s" %
2805 (idx + 1,
2806 utils.CommaJoin(utils.NiceSort(
2807 map(self.cfg.GetNodeName, node_uuids))))
2808 for (idx, (checksum, node_uuids)) in
2809 enumerate(sorted(checksums.items()))]
2810 else:
2811 variants = []
2812
2813 self._ErrorIf(test, constants.CV_ECLUSTERFILECHECK, None,
2814 "File %s found with %s different checksums (%s)",
2815 filename, len(checksums), "; ".join(variants))
2816
2818 """Verify the drbd helper.
2819
2820 """
2821 if drbd_helper:
2822 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2823 test = (helper_result is None)
2824 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2825 "no drbd usermode helper returned")
2826 if helper_result:
2827 status, payload = helper_result
2828 test = not status
2829 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2830 "drbd usermode helper check unsuccessful: %s", payload)
2831 test = status and (payload != drbd_helper)
2832 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2833 "wrong drbd usermode helper: %s", payload)
2834
2835 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2836 drbd_map):
2837 """Verifies and the node DRBD status.
2838
2839 @type ninfo: L{objects.Node}
2840 @param ninfo: the node to check
2841 @param nresult: the remote results for the node
2842 @param instanceinfo: the dict of instances
2843 @param drbd_helper: the configured DRBD usermode helper
2844 @param drbd_map: the DRBD map as returned by
2845 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2846
2847 """
2848 self._VerifyNodeDrbdHelper(ninfo, nresult, drbd_helper)
2849
2850
2851 node_drbd = {}
2852 for minor, inst_uuid in drbd_map[ninfo.uuid].items():
2853 test = inst_uuid not in instanceinfo
2854 self._ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2855 "ghost instance '%s' in temporary DRBD map", inst_uuid)
2856
2857
2858
2859 if test:
2860 node_drbd[minor] = (inst_uuid, False)
2861 else:
2862 instance = instanceinfo[inst_uuid]
2863 node_drbd[minor] = (inst_uuid, instance.disks_active)
2864
2865
2866 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2867 test = not isinstance(used_minors, (tuple, list))
2868 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2869 "cannot parse drbd status file: %s", str(used_minors))
2870 if test:
2871
2872 return
2873
2874 for minor, (inst_uuid, must_exist) in node_drbd.items():
2875 test = minor not in used_minors and must_exist
2876 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2877 "drbd minor %d of instance %s is not active", minor,
2878 self.cfg.GetInstanceName(inst_uuid))
2879 for minor in used_minors:
2880 test = minor not in node_drbd
2881 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2882 "unallocated drbd minor %d is in use", minor)
2883
2885 """Builds the node OS structures.
2886
2887 @type ninfo: L{objects.Node}
2888 @param ninfo: the node to check
2889 @param nresult: the remote results for the node
2890 @param nimg: the node image object
2891
2892 """
2893 remote_os = nresult.get(constants.NV_OSLIST, None)
2894 test = (not isinstance(remote_os, list) or
2895 not compat.all(isinstance(v, list) and len(v) == 8
2896 for v in remote_os))
2897
2898 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
2899 "node hasn't returned valid OS data")
2900
2901 nimg.os_fail = test
2902
2903 if test:
2904 return
2905
2906 os_dict = {}
2907
2908 for (name, os_path, status, diagnose,
2909 variants, parameters, api_ver,
2910 trusted) in nresult[constants.NV_OSLIST]:
2911
2912 if name not in os_dict:
2913 os_dict[name] = []
2914
2915
2916
2917 parameters = [tuple(v) for v in parameters]
2918 os_dict[name].append((os_path, status, diagnose,
2919 set(variants), set(parameters), set(api_ver),
2920 trusted))
2921
2922 nimg.oslist = os_dict
2923
2925 """Verifies the node OS list.
2926
2927 @type ninfo: L{objects.Node}
2928 @param ninfo: the node to check
2929 @param nimg: the node image object
2930 @param base: the 'template' node we match against (e.g. from the master)
2931
2932 """
2933 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2934
2935 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2936 for os_name, os_data in nimg.oslist.items():
2937 assert os_data, "Empty OS status for OS %s?!" % os_name
2938 f_path, f_status, f_diag, f_var, f_param, f_api, f_trusted = os_data[0]
2939 self._ErrorIf(not f_status, constants.CV_ENODEOS, ninfo.name,
2940 "Invalid OS %s (located at %s): %s",
2941 os_name, f_path, f_diag)
2942 self._ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, ninfo.name,
2943 "OS '%s' has multiple entries"
2944 " (first one shadows the rest): %s",
2945 os_name, utils.CommaJoin([v[0] for v in os_data]))
2946
2947 test = os_name not in base.oslist
2948 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
2949 "Extra OS %s not present on reference node (%s)",
2950 os_name, self.cfg.GetNodeName(base.uuid))
2951 if test:
2952 continue
2953 assert base.oslist[os_name], "Base node has empty OS status?"
2954 _, b_status, _, b_var, b_param, b_api, b_trusted = base.oslist[os_name][0]
2955 if not b_status:
2956
2957 continue
2958 for kind, a, b in [("API version", f_api, b_api),
2959 ("variants list", f_var, b_var),
2960 ("parameters", beautify_params(f_param),
2961 beautify_params(b_param))]:
2962 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
2963 "OS %s for %s differs from reference node %s:"
2964 " [%s] vs. [%s]", kind, os_name,
2965 self.cfg.GetNodeName(base.uuid),
2966 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2967 for kind, a, b in [("trusted", f_trusted, b_trusted)]:
2968 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
2969 "OS %s for %s differs from reference node %s:"
2970 " %s vs. %s", kind, os_name,
2971 self.cfg.GetNodeName(base.uuid), a, b)
2972
2973
2974 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2975 self._ErrorIf(missing, constants.CV_ENODEOS, ninfo.name,
2976 "OSes present on reference node %s"
2977 " but missing on this node: %s",
2978 self.cfg.GetNodeName(base.uuid), utils.CommaJoin(missing))
2979
2981 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2982
2983 @type ninfo: L{objects.Node}
2984 @param ninfo: the node to check
2985 @param nresult: the remote results for the node
2986 @type is_master: bool
2987 @param is_master: Whether node is the master node
2988
2989 """
2990 cluster = self.cfg.GetClusterInfo()
2991 if (is_master and
2992 (cluster.IsFileStorageEnabled() or
2993 cluster.IsSharedFileStorageEnabled())):
2994 try:
2995 fspaths = nresult[constants.NV_ACCEPTED_STORAGE_PATHS]
2996 except KeyError:
2997
2998 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
2999 "Node did not return forbidden file storage paths")
3000 else:
3001 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
3002 "Found forbidden file storage paths: %s",
3003 utils.CommaJoin(fspaths))
3004 else:
3005 self._ErrorIf(constants.NV_ACCEPTED_STORAGE_PATHS in nresult,
3006 constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
3007 "Node should not have returned forbidden file storage"
3008 " paths")
3009
3010 - def _VerifyStoragePaths(self, ninfo, nresult, file_disk_template,
3011 verify_key, error_key):
3012 """Verifies (file) storage paths.
3013
3014 @type ninfo: L{objects.Node}
3015 @param ninfo: the node to check
3016 @param nresult: the remote results for the node
3017 @type file_disk_template: string
3018 @param file_disk_template: file-based disk template, whose directory
3019 is supposed to be verified
3020 @type verify_key: string
3021 @param verify_key: key for the verification map of this file
3022 verification step
3023 @param error_key: error key to be added to the verification results
3024 in case something goes wrong in this verification step
3025
3026 """
3027 assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageTypes(
3028 constants.ST_FILE, constants.ST_SHARED_FILE, constants.ST_GLUSTER
3029 ))
3030
3031 cluster = self.cfg.GetClusterInfo()
3032 if cluster.IsDiskTemplateEnabled(file_disk_template):
3033 self._ErrorIf(
3034 verify_key in nresult,
3035 error_key, ninfo.name,
3036 "The configured %s storage path is unusable: %s" %
3037 (file_disk_template, nresult.get(verify_key)))
3038
3049
3060
3071
3073 """Verifies out of band functionality of a node.
3074
3075 @type ninfo: L{objects.Node}
3076 @param ninfo: the node to check
3077 @param nresult: the remote results for the node
3078
3079 """
3080
3081
3082 if ((ninfo.master_candidate or ninfo.master_capable) and
3083 constants.NV_OOB_PATHS in nresult):
3084 for path_result in nresult[constants.NV_OOB_PATHS]:
3085 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH,
3086 ninfo.name, path_result)
3087
3089 """Verifies and updates the node volume data.
3090
3091 This function will update a L{NodeImage}'s internal structures
3092 with data from the remote call.
3093
3094 @type ninfo: L{objects.Node}
3095 @param ninfo: the node to check
3096 @param nresult: the remote results for the node
3097 @param nimg: the node image object
3098 @param vg_name: the configured VG name
3099
3100 """
3101 nimg.lvm_fail = True
3102 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3103 if vg_name is None:
3104 pass
3105 elif isinstance(lvdata, basestring):
3106 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
3107 "LVM problem on node: %s", utils.SafeEncode(lvdata))
3108 elif not isinstance(lvdata, dict):
3109 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
3110 "rpc call to node failed (lvlist)")
3111 else:
3112 nimg.volumes = lvdata
3113 nimg.lvm_fail = False
3114
3116 """Verifies and updates the node instance list.
3117
3118 If the listing was successful, then updates this node's instance
3119 list. Otherwise, it marks the RPC call as failed for the instance
3120 list key.
3121
3122 @type ninfo: L{objects.Node}
3123 @param ninfo: the node to check
3124 @param nresult: the remote results for the node
3125 @param nimg: the node image object
3126
3127 """
3128 idata = nresult.get(constants.NV_INSTANCELIST, None)
3129 test = not isinstance(idata, list)
3130 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3131 "rpc call to node failed (instancelist): %s",
3132 utils.SafeEncode(str(idata)))
3133 if test:
3134 nimg.hyp_fail = True
3135 else:
3136 nimg.instances = [uuid for (uuid, _) in
3137 self.cfg.GetMultiInstanceInfoByName(idata)]
3138
3140 """Verifies and computes a node information map
3141
3142 @type ninfo: L{objects.Node}
3143 @param ninfo: the node to check
3144 @param nresult: the remote results for the node
3145 @param nimg: the node image object
3146 @param vg_name: the configured VG name
3147
3148 """
3149
3150 hv_info = nresult.get(constants.NV_HVINFO, None)
3151 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3152 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3153 "rpc call to node failed (hvinfo)")
3154 if not test:
3155 try:
3156 nimg.mfree = int(hv_info["memory_free"])
3157 except (ValueError, TypeError):
3158 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
3159 "node returned invalid nodeinfo, check hypervisor")
3160
3161
3162 if vg_name is not None:
3163 test = (constants.NV_VGLIST not in nresult or
3164 vg_name not in nresult[constants.NV_VGLIST])
3165 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
3166 "node didn't return data for the volume group '%s'"
3167 " - it is either missing or broken", vg_name)
3168 if not test:
3169 try:
3170 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3171 except (ValueError, TypeError):
3172 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
3173 "node returned invalid LVM info, check LVM status")
3174
3176 """Gets per-disk status information for all instances.
3177
3178 @type node_uuids: list of strings
3179 @param node_uuids: Node UUIDs
3180 @type node_image: dict of (UUID, L{objects.Node})
3181 @param node_image: Node objects
3182 @type instanceinfo: dict of (UUID, L{objects.Instance})
3183 @param instanceinfo: Instance objects
3184 @rtype: {instance: {node: [(succes, payload)]}}
3185 @return: a dictionary of per-instance dictionaries with nodes as
3186 keys and disk information as values; the disk information is a
3187 list of tuples (success, payload)
3188
3189 """
3190 node_disks = {}
3191 node_disks_dev_inst_only = {}
3192 diskless_instances = set()
3193 nodisk_instances = set()
3194 diskless = constants.DT_DISKLESS
3195
3196 for nuuid in node_uuids:
3197 node_inst_uuids = list(itertools.chain(node_image[nuuid].pinst,
3198 node_image[nuuid].sinst))
3199 diskless_instances.update(uuid for uuid in node_inst_uuids
3200 if instanceinfo[uuid].disk_template == diskless)
3201 disks = [(inst_uuid, disk)
3202 for inst_uuid in node_inst_uuids
3203 for disk in self.cfg.GetInstanceDisks(inst_uuid)]
3204
3205 if not disks:
3206 nodisk_instances.update(uuid for uuid in node_inst_uuids
3207 if instanceinfo[uuid].disk_template != diskless)
3208
3209 continue
3210
3211 node_disks[nuuid] = disks
3212
3213
3214 dev_inst_only = []
3215 for (inst_uuid, dev) in disks:
3216 (anno_disk,) = AnnotateDiskParams(instanceinfo[inst_uuid], [dev],
3217 self.cfg)
3218 dev_inst_only.append((anno_disk, instanceinfo[inst_uuid]))
3219
3220 node_disks_dev_inst_only[nuuid] = dev_inst_only
3221
3222 assert len(node_disks) == len(node_disks_dev_inst_only)
3223
3224
3225 result = self.rpc.call_blockdev_getmirrorstatus_multi(
3226 node_disks.keys(), node_disks_dev_inst_only)
3227
3228 assert len(result) == len(node_disks)
3229
3230 instdisk = {}
3231
3232 for (nuuid, nres) in result.items():
3233 node = self.cfg.GetNodeInfo(nuuid)
3234 disks = node_disks[node.uuid]
3235
3236 if nres.offline:
3237
3238 data = len(disks) * [(False, "node offline")]
3239 else:
3240 msg = nres.fail_msg
3241 self._ErrorIf(msg, constants.CV_ENODERPC, node.name,
3242 "while getting disk information: %s", msg)
3243 if msg:
3244
3245 data = len(disks) * [(False, msg)]
3246 else:
3247 data = []
3248 for idx, i in enumerate(nres.payload):
3249 if isinstance(i, (tuple, list)) and len(i) == 2:
3250 data.append(i)
3251 else:
3252 logging.warning("Invalid result from node %s, entry %d: %s",
3253 node.name, idx, i)
3254 data.append((False, "Invalid result from the remote node"))
3255
3256 for ((inst_uuid, _), status) in zip(disks, data):
3257 instdisk.setdefault(inst_uuid, {}).setdefault(node.uuid, []) \
3258 .append(status)
3259
3260
3261 for inst_uuid in diskless_instances:
3262 assert inst_uuid not in instdisk
3263 instdisk[inst_uuid] = {}
3264
3265 for inst_uuid in nodisk_instances:
3266 assert inst_uuid not in instdisk
3267 instdisk[inst_uuid] = {}
3268
3269 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3270 len(nuuids) <= len(
3271 self.cfg.GetInstanceNodes(instanceinfo[inst].uuid)) and
3272 compat.all(isinstance(s, (tuple, list)) and
3273 len(s) == 2 for s in statuses)
3274 for inst, nuuids in instdisk.items()
3275 for nuuid, statuses in nuuids.items())
3276 if __debug__:
3277 instdisk_keys = set(instdisk)
3278 instanceinfo_keys = set(instanceinfo)
3279 assert instdisk_keys == instanceinfo_keys, \
3280 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3281 (instdisk_keys, instanceinfo_keys))
3282
3283 return instdisk
3284
3285 @staticmethod
3287 """Create endless iterators for all potential SSH check hosts.
3288
3289 """
3290 nodes = [node for node in all_nodes
3291 if (node.group != group_uuid and
3292 not node.offline)]
3293 keyfunc = operator.attrgetter("group")
3294
3295 return map(itertools.cycle,
3296 [sorted(map(operator.attrgetter("name"), names))
3297 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3298 keyfunc)])
3299
3300 @classmethod
3302 """Choose which nodes should talk to which other nodes.
3303
3304 We will make nodes contact all nodes in their group, and one node from
3305 every other group.
3306
3307 @warning: This algorithm has a known issue if one node group is much
3308 smaller than others (e.g. just one node). In such a case all other
3309 nodes will talk to the single node.
3310
3311 """
3312 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3313 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3314
3315 return (online_nodes,
3316 dict((name, sorted([i.next() for i in sel]))
3317 for name in online_nodes))
3318
3320 """Build hooks env.
3321
3322 Cluster-Verify hooks just ran in the post phase and their failure makes
3323 the output be logged in the verify output and the verification to fail.
3324
3325 """
3326 env = {
3327 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3328 }
3329
3330 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3331 for node in self.my_node_info.values())
3332
3333 return env
3334
3336 """Build hooks nodes.
3337
3338 """
3339 return ([], list(self.my_node_info.keys()))
3340
3341 - def Exec(self, feedback_fn):
3342 """Verify integrity of the node group, performing various test on nodes.
3343
3344 """
3345
3346 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3347
3348 if not self.my_node_uuids:
3349
3350 feedback_fn("* Empty node group, skipping verification")
3351 return True
3352
3353 self.bad = False
3354 verbose = self.op.verbose
3355 self._feedback_fn = feedback_fn
3356
3357 vg_name = self.cfg.GetVGName()
3358 drbd_helper = self.cfg.GetDRBDHelper()
3359 cluster = self.cfg.GetClusterInfo()
3360 hypervisors = cluster.enabled_hypervisors
3361 node_data_list = self.my_node_info.values()
3362
3363 i_non_redundant = []
3364 i_non_a_balanced = []
3365 i_offline = 0
3366 n_offline = 0
3367 n_drained = 0
3368 node_vol_should = {}
3369
3370
3371
3372
3373 filemap = ComputeAncillaryFiles(cluster, False)
3374
3375
3376 master_node_uuid = self.master_node = self.cfg.GetMasterNode()
3377 master_ip = self.cfg.GetMasterIP()
3378
3379 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_uuids))
3380
3381 user_scripts = []
3382 if self.cfg.GetUseExternalMipScript():
3383 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3384
3385 node_verify_param = {
3386 constants.NV_FILELIST:
3387 map(vcluster.MakeVirtualPath,
3388 utils.UniqueSequence(filename
3389 for files in filemap
3390 for filename in files)),
3391 constants.NV_NODELIST:
3392 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3393 self.all_node_info.values()),
3394 constants.NV_HYPERVISOR: hypervisors,
3395 constants.NV_HVPARAMS:
3396 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3397 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3398 for node in node_data_list
3399 if not node.offline],
3400 constants.NV_INSTANCELIST: hypervisors,
3401 constants.NV_VERSION: None,
3402 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3403 constants.NV_NODESETUP: None,
3404 constants.NV_TIME: None,
3405 constants.NV_MASTERIP: (self.cfg.GetMasterNodeName(), master_ip),
3406 constants.NV_OSLIST: None,
3407 constants.NV_NONVMNODES: self.cfg.GetNonVmCapableNodeNameList(),
3408 constants.NV_USERSCRIPTS: user_scripts,
3409 constants.NV_CLIENT_CERT: None,
3410 }
3411
3412 if vg_name is not None:
3413 node_verify_param[constants.NV_VGLIST] = None
3414 node_verify_param[constants.NV_LVLIST] = vg_name
3415 node_verify_param[constants.NV_PVLIST] = [vg_name]
3416
3417 if cluster.IsDiskTemplateEnabled(constants.DT_DRBD8):
3418 if drbd_helper:
3419 node_verify_param[constants.NV_DRBDVERSION] = None
3420 node_verify_param[constants.NV_DRBDLIST] = None
3421 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3422
3423 if cluster.IsFileStorageEnabled() or \
3424 cluster.IsSharedFileStorageEnabled():
3425
3426 node_verify_param[constants.NV_ACCEPTED_STORAGE_PATHS] = \
3427 self.cfg.GetMasterNodeName()
3428 if cluster.IsFileStorageEnabled():
3429 node_verify_param[constants.NV_FILE_STORAGE_PATH] = \
3430 cluster.file_storage_dir
3431 if cluster.IsSharedFileStorageEnabled():
3432 node_verify_param[constants.NV_SHARED_FILE_STORAGE_PATH] = \
3433 cluster.shared_file_storage_dir
3434
3435
3436
3437 bridges = set()
3438 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3439 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3440 bridges.add(default_nicpp[constants.NIC_LINK])
3441 for inst_uuid in self.my_inst_info.values():
3442 for nic in inst_uuid.nics:
3443 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3444 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3445 bridges.add(full_nic[constants.NIC_LINK])
3446
3447 if bridges:
3448 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3449
3450
3451 node_image = dict((node.uuid, self.NodeImage(offline=node.offline,
3452 uuid=node.uuid,
3453 vm_capable=node.vm_capable))
3454 for node in node_data_list)
3455
3456
3457 oob_paths = []
3458 for node in self.all_node_info.values():
3459 path = SupportsOob(self.cfg, node)
3460 if path and path not in oob_paths:
3461 oob_paths.append(path)
3462
3463 if oob_paths:
3464 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3465
3466 for inst_uuid in self.my_inst_uuids:
3467 instance = self.my_inst_info[inst_uuid]
3468 if instance.admin_state == constants.ADMINST_OFFLINE:
3469 i_offline += 1
3470
3471 inst_nodes = self.cfg.GetInstanceNodes(instance.uuid)
3472 for nuuid in inst_nodes:
3473 if nuuid not in node_image:
3474 gnode = self.NodeImage(uuid=nuuid)
3475 gnode.ghost = (nuuid not in self.all_node_info)
3476 node_image[nuuid] = gnode
3477
3478 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
3479
3480 pnode = instance.primary_node
3481 node_image[pnode].pinst.append(instance.uuid)
3482
3483 for snode in self.cfg.GetInstanceSecondaryNodes(instance.uuid):
3484 nimg = node_image[snode]
3485 nimg.sinst.append(instance.uuid)
3486 if pnode not in nimg.sbp:
3487 nimg.sbp[pnode] = []
3488 nimg.sbp[pnode].append(instance.uuid)
3489
3490 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg,
3491 self.my_node_info.keys())
3492
3493
3494 self._exclusive_storage = compat.any(es_flags.values())
3495 if self._exclusive_storage:
3496 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3497
3498 node_group_uuids = dict(map(lambda n: (n.name, n.group),
3499 self.cfg.GetAllNodesInfo().values()))
3500 groups_config = self.cfg.GetAllNodeGroupsInfoDict()
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515 with self.cfg.GetConfigManager(shared=True):
3516 feedback_fn("* Gathering information about nodes (%s nodes)" %
3517 len(self.my_node_uuids))
3518
3519 self.cfg.FlushConfig()
3520
3521
3522
3523
3524 nvinfo_starttime = time.time()
3525
3526
3527
3528
3529
3530 cluster_name = self.cfg.GetClusterName()
3531 hvparams = self.cfg.GetClusterInfo().hvparams
3532 all_nvinfo = self.rpc.call_node_verify(self.my_node_uuids,
3533 node_verify_param,
3534 cluster_name,
3535 hvparams,
3536 node_group_uuids,
3537 groups_config)
3538 nvinfo_endtime = time.time()
3539
3540 if self.extra_lv_nodes and vg_name is not None:
3541 feedback_fn("* Gathering information about extra nodes (%s nodes)" %
3542 len(self.extra_lv_nodes))
3543 extra_lv_nvinfo = \
3544 self.rpc.call_node_verify(self.extra_lv_nodes,
3545 {constants.NV_LVLIST: vg_name},
3546 self.cfg.GetClusterName(),
3547 self.cfg.GetClusterInfo().hvparams,
3548 node_group_uuids,
3549 groups_config)
3550 else:
3551 extra_lv_nvinfo = {}
3552
3553
3554
3555 absent_node_uuids = set(self.all_node_info).difference(self.my_node_info)
3556 if absent_node_uuids:
3557 vf_nvinfo = all_nvinfo.copy()
3558 vf_node_info = list(self.my_node_info.values())
3559 additional_node_uuids = []
3560 if master_node_uuid not in self.my_node_info:
3561 additional_node_uuids.append(master_node_uuid)
3562 vf_node_info.append(self.all_node_info[master_node_uuid])
3563
3564
3565 for node_uuid in absent_node_uuids:
3566 nodeinfo = self.all_node_info[node_uuid]
3567 if (nodeinfo.vm_capable and not nodeinfo.offline and
3568 node_uuid != master_node_uuid):
3569 additional_node_uuids.append(node_uuid)
3570 vf_node_info.append(self.all_node_info[node_uuid])
3571 break
3572 key = constants.NV_FILELIST
3573
3574 feedback_fn("* Gathering information about the master node")
3575 vf_nvinfo.update(self.rpc.call_node_verify(
3576 additional_node_uuids, {key: node_verify_param[key]},
3577 self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams,
3578 node_group_uuids,
3579 groups_config))
3580 else:
3581 vf_nvinfo = all_nvinfo
3582 vf_node_info = self.my_node_info.values()
3583
3584 all_drbd_map = self.cfg.ComputeDRBDMap()
3585
3586 feedback_fn("* Gathering disk information (%s nodes)" %
3587 len(self.my_node_uuids))
3588 instdisk = self._CollectDiskInfo(self.my_node_info.keys(), node_image,
3589 self.my_inst_info)
3590
3591 feedback_fn("* Verifying configuration file consistency")
3592
3593 self._VerifyClientCertificates(self.my_node_info.values(), all_nvinfo)
3594
3595 self._VerifyFiles(vf_node_info, master_node_uuid, vf_nvinfo, filemap)
3596
3597 feedback_fn("* Verifying node status")
3598
3599 refos_img = None
3600
3601 for node_i in node_data_list:
3602 nimg = node_image[node_i.uuid]
3603
3604 if node_i.offline:
3605 if verbose:
3606 feedback_fn("* Skipping offline node %s" % (node_i.name,))
3607 n_offline += 1
3608 continue
3609
3610 if node_i.uuid == master_node_uuid:
3611 ntype = "master"
3612 elif node_i.master_candidate:
3613 ntype = "master candidate"
3614 elif node_i.drained:
3615 ntype = "drained"
3616 n_drained += 1
3617 else:
3618 ntype = "regular"
3619 if verbose:
3620 feedback_fn("* Verifying node %s (%s)" % (node_i.name, ntype))
3621
3622 msg = all_nvinfo[node_i.uuid].fail_msg
3623 self._ErrorIf(msg, constants.CV_ENODERPC, node_i.name,
3624 "while contacting node: %s", msg)
3625 if msg:
3626 nimg.rpc_fail = True
3627 continue
3628
3629 nresult = all_nvinfo[node_i.uuid].payload
3630
3631 nimg.call_ok = self._VerifyNode(node_i, nresult)
3632 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3633 self._VerifyNodeNetwork(node_i, nresult)
3634 self._VerifyNodeUserScripts(node_i, nresult)
3635 self._VerifyOob(node_i, nresult)
3636 self._VerifyAcceptedFileStoragePaths(node_i, nresult,
3637 node_i.uuid == master_node_uuid)
3638 self._VerifyFileStoragePaths(node_i, nresult)
3639 self._VerifySharedFileStoragePaths(node_i, nresult)
3640 self._VerifyGlusterStoragePaths(node_i, nresult)
3641
3642 if nimg.vm_capable:
3643 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3644 if constants.DT_DRBD8 in cluster.enabled_disk_templates:
3645 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3646 all_drbd_map)
3647
3648 if (constants.DT_PLAIN in cluster.enabled_disk_templates) or \
3649 (constants.DT_DRBD8 in cluster.enabled_disk_templates):
3650 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3651 self._UpdateNodeInstances(node_i, nresult, nimg)
3652 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3653 self._UpdateNodeOS(node_i, nresult, nimg)
3654
3655 if not nimg.os_fail:
3656 if refos_img is None:
3657 refos_img = nimg
3658 self._VerifyNodeOS(node_i, nimg, refos_img)
3659 self._VerifyNodeBridges(node_i, nresult, bridges)
3660
3661
3662
3663
3664 non_primary_inst_uuids = set(nimg.instances).difference(nimg.pinst)
3665
3666 for inst_uuid in non_primary_inst_uuids:
3667 test = inst_uuid in self.all_inst_info
3668 self._ErrorIf(test, constants.CV_EINSTANCEWRONGNODE,
3669 self.cfg.GetInstanceName(inst_uuid),
3670 "instance should not run on node %s", node_i.name)
3671 self._ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3672 "node is running unknown instance %s", inst_uuid)
3673
3674 self._VerifyGroupDRBDVersion(all_nvinfo)
3675 self._VerifyGroupLVM(node_image, vg_name)
3676
3677 for node_uuid, result in extra_lv_nvinfo.items():
3678 self._UpdateNodeVolumes(self.all_node_info[node_uuid], result.payload,
3679 node_image[node_uuid], vg_name)
3680
3681 feedback_fn("* Verifying instance status")
3682 for inst_uuid in self.my_inst_uuids:
3683 instance = self.my_inst_info[inst_uuid]
3684 if verbose:
3685 feedback_fn("* Verifying instance %s" % instance.name)
3686 self._VerifyInstance(instance, node_image, instdisk[inst_uuid])
3687
3688
3689
3690 if instance.disk_template not in constants.DTS_MIRRORED:
3691 i_non_redundant.append(instance)
3692
3693 if not cluster.FillBE(instance)[constants.BE_AUTO_BALANCE]:
3694 i_non_a_balanced.append(instance)
3695
3696 feedback_fn("* Verifying orphan volumes")
3697 reserved = utils.FieldSet(*cluster.reserved_lvs)
3698
3699
3700
3701
3702 for instance in self.all_inst_info.values():
3703 for secondary in self.cfg.GetInstanceSecondaryNodes(instance.uuid):
3704 if (secondary in self.my_node_info
3705 and instance.name not in self.my_inst_info):
3706 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
3707 break
3708
3709 self._VerifyOrphanVolumes(vg_name, node_vol_should, node_image, reserved)
3710
3711 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3712 feedback_fn("* Verifying N+1 Memory redundancy")
3713 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3714
3715 feedback_fn("* Other Notes")
3716 if i_non_redundant:
3717 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3718 % len(i_non_redundant))
3719
3720 if i_non_a_balanced:
3721 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3722 % len(i_non_a_balanced))
3723
3724 if i_offline:
3725 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3726
3727 if n_offline:
3728 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3729
3730 if n_drained:
3731 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3732
3733 return not self.bad
3734
3735 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3736 """Analyze the post-hooks' result
3737
3738 This method analyses the hook result, handles it, and sends some
3739 nicely-formatted feedback back to the user.
3740
3741 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3742 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3743 @param hooks_results: the results of the multi-node hooks rpc call
3744 @param feedback_fn: function used send feedback back to the caller
3745 @param lu_result: previous Exec result
3746 @return: the new Exec result, based on the previous result
3747 and hook results
3748
3749 """
3750
3751
3752 if not self.my_node_uuids:
3753
3754 pass
3755 elif phase == constants.HOOKS_PHASE_POST:
3756
3757 feedback_fn("* Hooks Results")
3758 assert hooks_results, "invalid result from hooks"
3759
3760 for node_name in hooks_results:
3761 res = hooks_results[node_name]
3762 msg = res.fail_msg
3763 test = msg and not res.offline
3764 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3765 "Communication failure in hooks execution: %s", msg)
3766 if test:
3767 lu_result = False
3768 continue
3769 if res.offline:
3770
3771 continue
3772 for script, hkr, output in res.payload:
3773 test = hkr == constants.HKR_FAIL
3774 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3775 "Script %s failed, output:", script)
3776 if test:
3777 output = self._HOOKS_INDENT_RE.sub(" ", output)
3778 feedback_fn("%s" % output)
3779 lu_result = False
3780
3781 return lu_result
3782
3785 """Verifies the cluster disks status.
3786
3787 """
3788 REQ_BGL = False
3789
3795
3796 - def Exec(self, feedback_fn):
3802