1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """Logical units dealing with the cluster."""
32
33 import OpenSSL
34
35 import copy
36 import itertools
37 import logging
38 import operator
39 import os
40 import re
41 import time
42
43 from ganeti import compat
44 from ganeti import constants
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import masterd
49 from ganeti import netutils
50 from ganeti import objects
51 from ganeti import opcodes
52 from ganeti import pathutils
53 from ganeti import query
54 from ganeti import rpc
55 from ganeti import runtime
56 from ganeti import ssh
57 from ganeti import uidpool
58 from ganeti import utils
59 from ganeti import vcluster
60
61 from ganeti.cmdlib.base import NoHooksLU, QueryBase, LogicalUnit, \
62 ResultWithJobs
63 from ganeti.cmdlib.common import ShareAll, RunPostHook, \
64 ComputeAncillaryFiles, RedistributeAncillaryFiles, UploadHelper, \
65 GetWantedInstances, MergeAndVerifyHvState, MergeAndVerifyDiskState, \
66 GetUpdatedIPolicy, ComputeNewInstanceViolations, GetUpdatedParams, \
67 CheckOSParams, CheckHVParams, AdjustCandidatePool, CheckNodePVs, \
68 ComputeIPolicyInstanceViolation, AnnotateDiskParams, SupportsOob, \
69 CheckIpolicyVsDiskTemplates, CheckDiskAccessModeValidity, \
70 CheckDiskAccessModeConsistency
71
72 import ganeti.masterd.instance
76 """Activate the master IP on the master node.
77
78 """
79 - def Exec(self, feedback_fn):
88
91 """Deactivate the master IP on the master node.
92
93 """
94 - def Exec(self, feedback_fn):
103
106 """Return configuration values.
107
108 """
109 REQ_BGL = False
110
112 self.cq = ClusterQuery(None, self.op.output_fields, False)
113
116
119
120 - def Exec(self, feedback_fn):
121 result = self.cq.OldStyleQuery(self)
122
123 assert len(result) == 1
124
125 return result[0]
126
129 """Logical unit for destroying the cluster.
130
131 """
132 HPATH = "cluster-destroy"
133 HTYPE = constants.HTYPE_CLUSTER
134
136 """Build hooks env.
137
138 """
139 return {
140 "OP_TARGET": self.cfg.GetClusterName(),
141 }
142
144 """Build hooks nodes.
145
146 """
147 return ([], [])
148
150 """Check prerequisites.
151
152 This checks whether the cluster is empty.
153
154 Any errors are signaled by raising errors.OpPrereqError.
155
156 """
157 master = self.cfg.GetMasterNode()
158
159 nodelist = self.cfg.GetNodeList()
160 if len(nodelist) != 1 or nodelist[0] != master:
161 raise errors.OpPrereqError("There are still %d node(s) in"
162 " this cluster." % (len(nodelist) - 1),
163 errors.ECODE_INVAL)
164 instancelist = self.cfg.GetInstanceList()
165 if instancelist:
166 raise errors.OpPrereqError("There are still %d instance(s) in"
167 " this cluster." % len(instancelist),
168 errors.ECODE_INVAL)
169
170 - def Exec(self, feedback_fn):
184
185
186 -class LUClusterPostInit(LogicalUnit):
187 """Logical unit for running hooks after cluster initialization.
188
189 """
190 HPATH = "cluster-init"
191 HTYPE = constants.HTYPE_CLUSTER
192
193 - def CheckArguments(self):
194 self.master_uuid = self.cfg.GetMasterNode()
195 self.master_ndparams = self.cfg.GetNdParams(self.cfg.GetMasterNodeInfo())
196
197
198
199
200
201
202 if (self.master_ndparams[constants.ND_OVS] and not
203 self.master_ndparams.get(constants.ND_OVS_LINK, None)):
204 self.LogInfo("No physical interface for OpenvSwitch was given."
205 " OpenvSwitch will not have an outside connection. This"
206 " might not be what you want.")
207
208 - def BuildHooksEnv(self):
209 """Build hooks env.
210
211 """
212 return {
213 "OP_TARGET": self.cfg.GetClusterName(),
214 }
215
216 - def BuildHooksNodes(self):
217 """Build hooks nodes.
218
219 """
220 return ([], [self.cfg.GetMasterNode()])
221
222 - def Exec(self, feedback_fn):
223 """Create and configure Open vSwitch
224
225 """
226 if self.master_ndparams[constants.ND_OVS]:
227 result = self.rpc.call_node_configure_ovs(
228 self.master_uuid,
229 self.master_ndparams[constants.ND_OVS_NAME],
230 self.master_ndparams.get(constants.ND_OVS_LINK, None))
231 result.Raise("Could not successully configure Open vSwitch")
232 return True
233
289
292 """Query cluster configuration.
293
294 """
295 REQ_BGL = False
296
298 self.needed_locks = {}
299
300 - def Exec(self, feedback_fn):
301 """Return cluster config.
302
303 """
304 cluster = self.cfg.GetClusterInfo()
305 os_hvp = {}
306
307
308 for os_name, hv_dict in cluster.os_hvp.items():
309 os_hvp[os_name] = {}
310 for hv_name, hv_params in hv_dict.items():
311 if hv_name in cluster.enabled_hypervisors:
312 os_hvp[os_name][hv_name] = hv_params
313
314
315 primary_ip_version = constants.IP4_VERSION
316 if cluster.primary_ip_family == netutils.IP6Address.family:
317 primary_ip_version = constants.IP6_VERSION
318
319 result = {
320 "software_version": constants.RELEASE_VERSION,
321 "protocol_version": constants.PROTOCOL_VERSION,
322 "config_version": constants.CONFIG_VERSION,
323 "os_api_version": max(constants.OS_API_VERSIONS),
324 "export_version": constants.EXPORT_VERSION,
325 "vcs_version": constants.VCS_VERSION,
326 "architecture": runtime.GetArchInfo(),
327 "name": cluster.cluster_name,
328 "master": self.cfg.GetMasterNodeName(),
329 "default_hypervisor": cluster.primary_hypervisor,
330 "enabled_hypervisors": cluster.enabled_hypervisors,
331 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
332 for hypervisor_name in cluster.enabled_hypervisors]),
333 "os_hvp": os_hvp,
334 "beparams": cluster.beparams,
335 "osparams": cluster.osparams,
336 "ipolicy": cluster.ipolicy,
337 "nicparams": cluster.nicparams,
338 "ndparams": cluster.ndparams,
339 "diskparams": cluster.diskparams,
340 "candidate_pool_size": cluster.candidate_pool_size,
341 "master_netdev": cluster.master_netdev,
342 "master_netmask": cluster.master_netmask,
343 "use_external_mip_script": cluster.use_external_mip_script,
344 "volume_group_name": cluster.volume_group_name,
345 "drbd_usermode_helper": cluster.drbd_usermode_helper,
346 "file_storage_dir": cluster.file_storage_dir,
347 "shared_file_storage_dir": cluster.shared_file_storage_dir,
348 "maintain_node_health": cluster.maintain_node_health,
349 "ctime": cluster.ctime,
350 "mtime": cluster.mtime,
351 "uuid": cluster.uuid,
352 "tags": list(cluster.GetTags()),
353 "uid_pool": cluster.uid_pool,
354 "default_iallocator": cluster.default_iallocator,
355 "reserved_lvs": cluster.reserved_lvs,
356 "primary_ip_version": primary_ip_version,
357 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
358 "hidden_os": cluster.hidden_os,
359 "blacklisted_os": cluster.blacklisted_os,
360 "enabled_disk_templates": cluster.enabled_disk_templates,
361 }
362
363 return result
364
367 """Force the redistribution of cluster configuration.
368
369 This is a very simple LU.
370
371 """
372 REQ_BGL = False
373
380
381 - def Exec(self, feedback_fn):
387
390 """Rename the cluster.
391
392 """
393 HPATH = "cluster-rename"
394 HTYPE = constants.HTYPE_CLUSTER
395
397 """Build hooks env.
398
399 """
400 return {
401 "OP_TARGET": self.cfg.GetClusterName(),
402 "NEW_NAME": self.op.name,
403 }
404
410
433
434 - def Exec(self, feedback_fn):
435 """Rename the cluster.
436
437 """
438 clustername = self.op.name
439 new_ip = self.ip
440
441
442 master_params = self.cfg.GetMasterNetworkParameters()
443 ems = self.cfg.GetUseExternalMipScript()
444 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
445 master_params, ems)
446 result.Raise("Could not disable the master role")
447
448 try:
449 cluster = self.cfg.GetClusterInfo()
450 cluster.cluster_name = clustername
451 cluster.master_ip = new_ip
452 self.cfg.Update(cluster, feedback_fn)
453
454
455 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
456 node_list = self.cfg.GetOnlineNodeList()
457 try:
458 node_list.remove(master_params.uuid)
459 except ValueError:
460 pass
461 UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
462 finally:
463 master_params.ip = new_ip
464 result = self.rpc.call_node_activate_master_ip(master_params.uuid,
465 master_params, ems)
466 result.Warn("Could not re-enable the master role on the master,"
467 " please restart manually", self.LogWarning)
468
469 return clustername
470
473 """Verifies the cluster disks sizes.
474
475 """
476 REQ_BGL = False
477
479 if self.op.instances:
480 (_, self.wanted_names) = GetWantedInstances(self, self.op.instances)
481
482
483 self.needed_locks = {
484 locking.LEVEL_NODE_RES: [],
485 locking.LEVEL_INSTANCE: self.wanted_names,
486 }
487 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
488 else:
489 self.wanted_names = None
490 self.needed_locks = {
491 locking.LEVEL_NODE_RES: locking.ALL_SET,
492 locking.LEVEL_INSTANCE: locking.ALL_SET,
493
494
495 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
496 }
497
498 self.share_locks = {
499 locking.LEVEL_NODE_RES: 1,
500 locking.LEVEL_INSTANCE: 0,
501 locking.LEVEL_NODE_ALLOC: 1,
502 }
503
505 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
506 self._LockInstancesNodes(primary_only=True, level=level)
507
509 """Check prerequisites.
510
511 This only checks the optional instance list against the existing names.
512
513 """
514 if self.wanted_names is None:
515 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
516
517 self.wanted_instances = \
518 map(compat.snd, self.cfg.GetMultiInstanceInfoByName(self.wanted_names))
519
521 """Ensure children of the disk have the needed disk size.
522
523 This is valid mainly for DRBD8 and fixes an issue where the
524 children have smaller disk size.
525
526 @param disk: an L{ganeti.objects.Disk} object
527
528 """
529 if disk.dev_type == constants.DT_DRBD8:
530 assert disk.children, "Empty children for DRBD8?"
531 fchild = disk.children[0]
532 mismatch = fchild.size < disk.size
533 if mismatch:
534 self.LogInfo("Child disk has size %d, parent %d, fixing",
535 fchild.size, disk.size)
536 fchild.size = disk.size
537
538
539 return self._EnsureChildSizes(fchild) or mismatch
540 else:
541 return False
542
543 - def Exec(self, feedback_fn):
544 """Verify the size of cluster disks.
545
546 """
547
548
549 per_node_disks = {}
550 for instance in self.wanted_instances:
551 pnode = instance.primary_node
552 if pnode not in per_node_disks:
553 per_node_disks[pnode] = []
554 for idx, disk in enumerate(instance.disks):
555 per_node_disks[pnode].append((instance, idx, disk))
556
557 assert not (frozenset(per_node_disks.keys()) -
558 self.owned_locks(locking.LEVEL_NODE_RES)), \
559 "Not owning correct locks"
560 assert not self.owned_locks(locking.LEVEL_NODE)
561
562 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg,
563 per_node_disks.keys())
564
565 changed = []
566 for node_uuid, dskl in per_node_disks.items():
567 if not dskl:
568
569 continue
570
571 newl = [([v[2].Copy()], v[0]) for v in dskl]
572 node_name = self.cfg.GetNodeName(node_uuid)
573 result = self.rpc.call_blockdev_getdimensions(node_uuid, newl)
574 if result.fail_msg:
575 self.LogWarning("Failure in blockdev_getdimensions call to node"
576 " %s, ignoring", node_name)
577 continue
578 if len(result.payload) != len(dskl):
579 logging.warning("Invalid result from node %s: len(dksl)=%d,"
580 " result.payload=%s", node_name, len(dskl),
581 result.payload)
582 self.LogWarning("Invalid result from node %s, ignoring node results",
583 node_name)
584 continue
585 for ((instance, idx, disk), dimensions) in zip(dskl, result.payload):
586 if dimensions is None:
587 self.LogWarning("Disk %d of instance %s did not return size"
588 " information, ignoring", idx, instance.name)
589 continue
590 if not isinstance(dimensions, (tuple, list)):
591 self.LogWarning("Disk %d of instance %s did not return valid"
592 " dimension information, ignoring", idx,
593 instance.name)
594 continue
595 (size, spindles) = dimensions
596 if not isinstance(size, (int, long)):
597 self.LogWarning("Disk %d of instance %s did not return valid"
598 " size information, ignoring", idx, instance.name)
599 continue
600 size = size >> 20
601 if size != disk.size:
602 self.LogInfo("Disk %d of instance %s has mismatched size,"
603 " correcting: recorded %d, actual %d", idx,
604 instance.name, disk.size, size)
605 disk.size = size
606 self.cfg.Update(instance, feedback_fn)
607 changed.append((instance.name, idx, "size", size))
608 if es_flags[node_uuid]:
609 if spindles is None:
610 self.LogWarning("Disk %d of instance %s did not return valid"
611 " spindles information, ignoring", idx,
612 instance.name)
613 elif disk.spindles is None or disk.spindles != spindles:
614 self.LogInfo("Disk %d of instance %s has mismatched spindles,"
615 " correcting: recorded %s, actual %s",
616 idx, instance.name, disk.spindles, spindles)
617 disk.spindles = spindles
618 self.cfg.Update(instance, feedback_fn)
619 changed.append((instance.name, idx, "spindles", disk.spindles))
620 if self._EnsureChildSizes(disk):
621 self.cfg.Update(instance, feedback_fn)
622 changed.append((instance.name, idx, "size", disk.size))
623 return changed
624
645
650 """Checks whether the given file-based storage directory is acceptable.
651
652 Note: This function is public, because it is also used in bootstrap.py.
653
654 @type logging_warn_fn: function
655 @param logging_warn_fn: function which accepts a string and logs it
656 @type file_storage_dir: string
657 @param file_storage_dir: the directory to be used for file-based instances
658 @type enabled_disk_templates: list of string
659 @param enabled_disk_templates: the list of enabled disk templates
660 @type file_disk_template: string
661 @param file_disk_template: the file-based disk template for which the
662 path should be checked
663
664 """
665 assert (file_disk_template in
666 utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE))
667 file_storage_enabled = file_disk_template in enabled_disk_templates
668 if file_storage_dir is not None:
669 if file_storage_dir == "":
670 if file_storage_enabled:
671 raise errors.OpPrereqError(
672 "Unsetting the '%s' storage directory while having '%s' storage"
673 " enabled is not permitted." %
674 (file_disk_template, file_disk_template))
675 else:
676 if not file_storage_enabled:
677 logging_warn_fn(
678 "Specified a %s storage directory, although %s storage is not"
679 " enabled." % (file_disk_template, file_disk_template))
680 else:
681 raise errors.ProgrammerError("Received %s storage dir with value"
682 " 'None'." % file_disk_template)
683
695
707
710 """Change the parameters of the cluster.
711
712 """
713 HPATH = "cluster-modify"
714 HTYPE = constants.HTYPE_CLUSTER
715 REQ_BGL = False
716
742
755
757 """Build hooks env.
758
759 """
760 return {
761 "OP_TARGET": self.cfg.GetClusterName(),
762 "NEW_VG_NAME": self.op.vg_name,
763 }
764
766 """Build hooks nodes.
767
768 """
769 mn = self.cfg.GetMasterNode()
770 return ([mn], [mn])
771
772 - def _CheckVgName(self, node_uuids, enabled_disk_templates,
773 new_enabled_disk_templates):
774 """Check the consistency of the vg name on all nodes and in case it gets
775 unset whether there are instances still using it.
776
777 """
778 lvm_is_enabled = utils.IsLvmEnabled(enabled_disk_templates)
779 lvm_gets_enabled = utils.LvmGetsEnabled(enabled_disk_templates,
780 new_enabled_disk_templates)
781 current_vg_name = self.cfg.GetVGName()
782
783 if self.op.vg_name == '':
784 if lvm_is_enabled:
785 raise errors.OpPrereqError("Cannot unset volume group if lvm-based"
786 " disk templates are or get enabled.")
787
788 if self.op.vg_name is None:
789 if current_vg_name is None and lvm_is_enabled:
790 raise errors.OpPrereqError("Please specify a volume group when"
791 " enabling lvm-based disk-templates.")
792
793 if self.op.vg_name is not None and not self.op.vg_name:
794 if self.cfg.HasAnyDiskOfType(constants.DT_PLAIN):
795 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
796 " instances exist", errors.ECODE_INVAL)
797
798 if (self.op.vg_name is not None and lvm_is_enabled) or \
799 (self.cfg.GetVGName() is not None and lvm_gets_enabled):
800 self._CheckVgNameOnNodes(node_uuids)
801
822
823 @staticmethod
826 """Computes three sets of disk templates.
827
828 @see: C{_GetDiskTemplateSets} for more details.
829
830 """
831 enabled_disk_templates = None
832 new_enabled_disk_templates = []
833 disabled_disk_templates = []
834 if op_enabled_disk_templates:
835 enabled_disk_templates = op_enabled_disk_templates
836 new_enabled_disk_templates = \
837 list(set(enabled_disk_templates)
838 - set(old_enabled_disk_templates))
839 disabled_disk_templates = \
840 list(set(old_enabled_disk_templates)
841 - set(enabled_disk_templates))
842 else:
843 enabled_disk_templates = old_enabled_disk_templates
844 return (enabled_disk_templates, new_enabled_disk_templates,
845 disabled_disk_templates)
846
848 """Computes three sets of disk templates.
849
850 The three sets are:
851 - disk templates that will be enabled after this operation (no matter if
852 they were enabled before or not)
853 - disk templates that get enabled by this operation (thus haven't been
854 enabled before.)
855 - disk templates that get disabled by this operation
856
857 """
858 return self._GetDiskTemplateSetsInner(self.op.enabled_disk_templates,
859 cluster.enabled_disk_templates)
860
862 """Checks the ipolicy.
863
864 @type cluster: C{objects.Cluster}
865 @param cluster: the cluster's configuration
866 @type enabled_disk_templates: list of string
867 @param enabled_disk_templates: list of (possibly newly) enabled disk
868 templates
869
870 """
871
872 if self.op.ipolicy:
873 self.new_ipolicy = GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
874 group_policy=False)
875
876 CheckIpolicyVsDiskTemplates(self.new_ipolicy,
877 enabled_disk_templates)
878
879 all_instances = self.cfg.GetAllInstancesInfo().values()
880 violations = set()
881 for group in self.cfg.GetAllNodeGroupsInfo().values():
882 instances = frozenset([inst for inst in all_instances
883 if compat.any(nuuid in group.members
884 for nuuid in inst.all_nodes)])
885 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
886 ipol = masterd.instance.CalculateGroupIPolicy(cluster, group)
887 new = ComputeNewInstanceViolations(ipol, new_ipolicy, instances,
888 self.cfg)
889 if new:
890 violations.update(new)
891
892 if violations:
893 self.LogWarning("After the ipolicy change the following instances"
894 " violate them: %s",
895 utils.CommaJoin(utils.NiceSort(violations)))
896 else:
897 CheckIpolicyVsDiskTemplates(cluster.ipolicy,
898 enabled_disk_templates)
899
901 """Checks whether the set DRBD helper actually exists on the nodes.
902
903 @type drbd_helper: string
904 @param drbd_helper: path of the drbd usermode helper binary
905 @type node_uuids: list of strings
906 @param node_uuids: list of node UUIDs to check for the helper
907
908 """
909
910 helpers = self.rpc.call_drbd_helper(node_uuids)
911 for (_, ninfo) in self.cfg.GetMultiNodeInfo(node_uuids):
912 if ninfo.offline:
913 self.LogInfo("Not checking drbd helper on offline node %s",
914 ninfo.name)
915 continue
916 msg = helpers[ninfo.uuid].fail_msg
917 if msg:
918 raise errors.OpPrereqError("Error checking drbd helper on node"
919 " '%s': %s" % (ninfo.name, msg),
920 errors.ECODE_ENVIRON)
921 node_helper = helpers[ninfo.uuid].payload
922 if node_helper != drbd_helper:
923 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
924 (ninfo.name, node_helper),
925 errors.ECODE_ENVIRON)
926
928 """Check the DRBD usermode helper.
929
930 @type node_uuids: list of strings
931 @param node_uuids: a list of nodes' UUIDs
932 @type drbd_enabled: boolean
933 @param drbd_enabled: whether DRBD will be enabled after this operation
934 (no matter if it was disabled before or not)
935 @type drbd_gets_enabled: boolen
936 @param drbd_gets_enabled: true if DRBD was disabled before this
937 operation, but will be enabled afterwards
938
939 """
940 if self.op.drbd_helper == '':
941 if drbd_enabled:
942 raise errors.OpPrereqError("Cannot disable drbd helper while"
943 " DRBD is enabled.")
944 if self.cfg.HasAnyDiskOfType(constants.DT_DRBD8):
945 raise errors.OpPrereqError("Cannot disable drbd helper while"
946 " drbd-based instances exist",
947 errors.ECODE_INVAL)
948
949 else:
950 if self.op.drbd_helper is not None and drbd_enabled:
951 self._CheckDrbdHelperOnNodes(self.op.drbd_helper, node_uuids)
952 else:
953 if drbd_gets_enabled:
954 current_drbd_helper = self.cfg.GetClusterInfo().drbd_usermode_helper
955 if current_drbd_helper is not None:
956 self._CheckDrbdHelperOnNodes(current_drbd_helper, node_uuids)
957 else:
958 raise errors.OpPrereqError("Cannot enable DRBD without a"
959 " DRBD usermode helper set.")
960
963 """Check whether we try to disable a disk template that is in use.
964
965 @type disabled_disk_templates: list of string
966 @param disabled_disk_templates: list of disk templates that are going to
967 be disabled by this operation
968
969 """
970 for disk_template in disabled_disk_templates:
971 if self.cfg.HasAnyDiskOfType(disk_template):
972 raise errors.OpPrereqError(
973 "Cannot disable disk template '%s', because there is at least one"
974 " instance using it." % disk_template)
975
977 """Check prerequisites.
978
979 This checks whether the given params don't conflict and
980 if the given volume group is valid.
981
982 """
983 node_uuids = self.owned_locks(locking.LEVEL_NODE)
984 self.cluster = cluster = self.cfg.GetClusterInfo()
985
986 vm_capable_node_uuids = [node.uuid
987 for node in self.cfg.GetAllNodesInfo().values()
988 if node.uuid in node_uuids and node.vm_capable]
989
990 (enabled_disk_templates, new_enabled_disk_templates,
991 disabled_disk_templates) = self._GetDiskTemplateSets(cluster)
992 self._CheckInstancesOfDisabledDiskTemplates(disabled_disk_templates)
993
994 self._CheckVgName(vm_capable_node_uuids, enabled_disk_templates,
995 new_enabled_disk_templates)
996
997 if self.op.file_storage_dir is not None:
998 CheckFileStoragePathVsEnabledDiskTemplates(
999 self.LogWarning, self.op.file_storage_dir, enabled_disk_templates)
1000
1001 if self.op.shared_file_storage_dir is not None:
1002 CheckSharedFileStoragePathVsEnabledDiskTemplates(
1003 self.LogWarning, self.op.shared_file_storage_dir,
1004 enabled_disk_templates)
1005
1006 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
1007 drbd_gets_enabled = constants.DT_DRBD8 in new_enabled_disk_templates
1008 self._CheckDrbdHelper(vm_capable_node_uuids,
1009 drbd_enabled, drbd_gets_enabled)
1010
1011
1012 if self.op.beparams:
1013 objects.UpgradeBeParams(self.op.beparams)
1014 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1015 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
1016
1017 if self.op.ndparams:
1018 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
1019 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
1020
1021
1022
1023 if self.new_ndparams["oob_program"] == "":
1024 self.new_ndparams["oob_program"] = \
1025 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
1026
1027 if self.op.hv_state:
1028 new_hv_state = MergeAndVerifyHvState(self.op.hv_state,
1029 self.cluster.hv_state_static)
1030 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
1031 for hv, values in new_hv_state.items())
1032
1033 if self.op.disk_state:
1034 new_disk_state = MergeAndVerifyDiskState(self.op.disk_state,
1035 self.cluster.disk_state_static)
1036 self.new_disk_state = \
1037 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
1038 for name, values in svalues.items()))
1039 for storage, svalues in new_disk_state.items())
1040
1041 self._CheckIpolicy(cluster, enabled_disk_templates)
1042
1043 if self.op.nicparams:
1044 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1045 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
1046 objects.NIC.CheckParameterSyntax(self.new_nicparams)
1047 nic_errors = []
1048
1049
1050 for instance in self.cfg.GetAllInstancesInfo().values():
1051 for nic_idx, nic in enumerate(instance.nics):
1052 params_copy = copy.deepcopy(nic.nicparams)
1053 params_filled = objects.FillDict(self.new_nicparams, params_copy)
1054
1055
1056 try:
1057 objects.NIC.CheckParameterSyntax(params_filled)
1058 except errors.ConfigurationError, err:
1059 nic_errors.append("Instance %s, nic/%d: %s" %
1060 (instance.name, nic_idx, err))
1061
1062
1063 target_mode = params_filled[constants.NIC_MODE]
1064 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
1065 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
1066 " address" % (instance.name, nic_idx))
1067 if nic_errors:
1068 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
1069 "\n".join(nic_errors), errors.ECODE_INVAL)
1070
1071
1072 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
1073 if self.op.hvparams:
1074 for hv_name, hv_dict in self.op.hvparams.items():
1075 if hv_name not in self.new_hvparams:
1076 self.new_hvparams[hv_name] = hv_dict
1077 else:
1078 self.new_hvparams[hv_name].update(hv_dict)
1079
1080
1081 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
1082 if self.op.diskparams:
1083 for dt_name, dt_params in self.op.diskparams.items():
1084 if dt_name not in self.new_diskparams:
1085 self.new_diskparams[dt_name] = dt_params
1086 else:
1087 self.new_diskparams[dt_name].update(dt_params)
1088 CheckDiskAccessModeConsistency(self.op.diskparams, self.cfg)
1089
1090
1091 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
1092 if self.op.os_hvp:
1093 for os_name, hvs in self.op.os_hvp.items():
1094 if os_name not in self.new_os_hvp:
1095 self.new_os_hvp[os_name] = hvs
1096 else:
1097 for hv_name, hv_dict in hvs.items():
1098 if hv_dict is None:
1099
1100 self.new_os_hvp[os_name].pop(hv_name, None)
1101 elif hv_name not in self.new_os_hvp[os_name]:
1102 self.new_os_hvp[os_name][hv_name] = hv_dict
1103 else:
1104 self.new_os_hvp[os_name][hv_name].update(hv_dict)
1105
1106
1107 self.new_osp = objects.FillDict(cluster.osparams, {})
1108 if self.op.osparams:
1109 for os_name, osp in self.op.osparams.items():
1110 if os_name not in self.new_osp:
1111 self.new_osp[os_name] = {}
1112
1113 self.new_osp[os_name] = GetUpdatedParams(self.new_osp[os_name], osp,
1114 use_none=True)
1115
1116 if not self.new_osp[os_name]:
1117
1118 del self.new_osp[os_name]
1119 else:
1120
1121 CheckOSParams(self, False, [self.cfg.GetMasterNode()],
1122 os_name, self.new_osp[os_name])
1123
1124
1125 if self.op.enabled_hypervisors is not None:
1126 self.hv_list = self.op.enabled_hypervisors
1127 for hv in self.hv_list:
1128
1129
1130
1131
1132
1133 if hv not in new_hvp:
1134 new_hvp[hv] = {}
1135 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
1136 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
1137 else:
1138 self.hv_list = cluster.enabled_hypervisors
1139
1140 if self.op.hvparams or self.op.enabled_hypervisors is not None:
1141
1142 for hv_name, hv_params in self.new_hvparams.items():
1143 if ((self.op.hvparams and hv_name in self.op.hvparams) or
1144 (self.op.enabled_hypervisors and
1145 hv_name in self.op.enabled_hypervisors)):
1146
1147 hv_class = hypervisor.GetHypervisorClass(hv_name)
1148 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1149 hv_class.CheckParameterSyntax(hv_params)
1150 CheckHVParams(self, node_uuids, hv_name, hv_params)
1151
1152 self._CheckDiskTemplateConsistency()
1153
1154 if self.op.os_hvp:
1155
1156
1157 for os_name, os_hvp in self.new_os_hvp.items():
1158 for hv_name, hv_params in os_hvp.items():
1159 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1160
1161 cluster_defaults = self.new_hvparams.get(hv_name, {})
1162 new_osp = objects.FillDict(cluster_defaults, hv_params)
1163 hv_class = hypervisor.GetHypervisorClass(hv_name)
1164 hv_class.CheckParameterSyntax(new_osp)
1165 CheckHVParams(self, node_uuids, hv_name, new_osp)
1166
1167 if self.op.default_iallocator:
1168 alloc_script = utils.FindFile(self.op.default_iallocator,
1169 constants.IALLOCATOR_SEARCH_PATH,
1170 os.path.isfile)
1171 if alloc_script is None:
1172 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
1173 " specified" % self.op.default_iallocator,
1174 errors.ECODE_INVAL)
1175
1177 """Check whether the disk templates that are going to be disabled
1178 are still in use by some instances.
1179
1180 """
1181 if self.op.enabled_disk_templates:
1182 cluster = self.cfg.GetClusterInfo()
1183 instances = self.cfg.GetAllInstancesInfo()
1184
1185 disk_templates_to_remove = set(cluster.enabled_disk_templates) \
1186 - set(self.op.enabled_disk_templates)
1187 for instance in instances.itervalues():
1188 if instance.disk_template in disk_templates_to_remove:
1189 raise errors.OpPrereqError("Cannot disable disk template '%s',"
1190 " because instance '%s' is using it." %
1191 (instance.disk_template, instance.name))
1192
1194 """Determines and sets the new volume group name.
1195
1196 """
1197 if self.op.vg_name is not None:
1198 new_volume = self.op.vg_name
1199 if not new_volume:
1200 new_volume = None
1201 if new_volume != self.cfg.GetVGName():
1202 self.cfg.SetVGName(new_volume)
1203 else:
1204 feedback_fn("Cluster LVM configuration already in desired"
1205 " state, not changing")
1206
1208 """Set the file storage directory.
1209
1210 """
1211 if self.op.file_storage_dir is not None:
1212 if self.cluster.file_storage_dir == self.op.file_storage_dir:
1213 feedback_fn("Global file storage dir already set to value '%s'"
1214 % self.cluster.file_storage_dir)
1215 else:
1216 self.cluster.file_storage_dir = self.op.file_storage_dir
1217
1219 """Set the shared file storage directory.
1220
1221 """
1222 if self.op.shared_file_storage_dir is not None:
1223 if self.cluster.shared_file_storage_dir == \
1224 self.op.shared_file_storage_dir:
1225 feedback_fn("Global shared file storage dir already set to value '%s'"
1226 % self.cluster.shared_file_storage_dir)
1227 else:
1228 self.cluster.shared_file_storage_dir = self.op.shared_file_storage_dir
1229
1231 """Set the DRBD usermode helper.
1232
1233 """
1234 if self.op.drbd_helper is not None:
1235 if not constants.DT_DRBD8 in self.cluster.enabled_disk_templates:
1236 feedback_fn("Note that you specified a drbd user helper, but did not"
1237 " enable the drbd disk template.")
1238 new_helper = self.op.drbd_helper
1239 if not new_helper:
1240 new_helper = None
1241 if new_helper != self.cfg.GetDRBDHelper():
1242 self.cfg.SetDRBDHelper(new_helper)
1243 else:
1244 feedback_fn("Cluster DRBD helper already in desired state,"
1245 " not changing")
1246
1247 - def Exec(self, feedback_fn):
1248 """Change the parameters of the cluster.
1249
1250 """
1251 if self.op.enabled_disk_templates:
1252 self.cluster.enabled_disk_templates = \
1253 list(self.op.enabled_disk_templates)
1254
1255 self._SetVgName(feedback_fn)
1256 self._SetFileStorageDir(feedback_fn)
1257 self._SetSharedFileStorageDir(feedback_fn)
1258 self._SetDrbdHelper(feedback_fn)
1259
1260 if self.op.hvparams:
1261 self.cluster.hvparams = self.new_hvparams
1262 if self.op.os_hvp:
1263 self.cluster.os_hvp = self.new_os_hvp
1264 if self.op.enabled_hypervisors is not None:
1265 self.cluster.hvparams = self.new_hvparams
1266 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1267 if self.op.beparams:
1268 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1269 if self.op.nicparams:
1270 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1271 if self.op.ipolicy:
1272 self.cluster.ipolicy = self.new_ipolicy
1273 if self.op.osparams:
1274 self.cluster.osparams = self.new_osp
1275 if self.op.ndparams:
1276 self.cluster.ndparams = self.new_ndparams
1277 if self.op.diskparams:
1278 self.cluster.diskparams = self.new_diskparams
1279 if self.op.hv_state:
1280 self.cluster.hv_state_static = self.new_hv_state
1281 if self.op.disk_state:
1282 self.cluster.disk_state_static = self.new_disk_state
1283
1284 if self.op.candidate_pool_size is not None:
1285 self.cluster.candidate_pool_size = self.op.candidate_pool_size
1286
1287 AdjustCandidatePool(self, [])
1288
1289 if self.op.maintain_node_health is not None:
1290 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
1291 feedback_fn("Note: CONFD was disabled at build time, node health"
1292 " maintenance is not useful (still enabling it)")
1293 self.cluster.maintain_node_health = self.op.maintain_node_health
1294
1295 if self.op.modify_etc_hosts is not None:
1296 self.cluster.modify_etc_hosts = self.op.modify_etc_hosts
1297
1298 if self.op.prealloc_wipe_disks is not None:
1299 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
1300
1301 if self.op.add_uids is not None:
1302 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
1303
1304 if self.op.remove_uids is not None:
1305 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
1306
1307 if self.op.uid_pool is not None:
1308 self.cluster.uid_pool = self.op.uid_pool
1309
1310 if self.op.default_iallocator is not None:
1311 self.cluster.default_iallocator = self.op.default_iallocator
1312
1313 if self.op.reserved_lvs is not None:
1314 self.cluster.reserved_lvs = self.op.reserved_lvs
1315
1316 if self.op.use_external_mip_script is not None:
1317 self.cluster.use_external_mip_script = self.op.use_external_mip_script
1318
1319 def helper_os(aname, mods, desc):
1320 desc += " OS list"
1321 lst = getattr(self.cluster, aname)
1322 for key, val in mods:
1323 if key == constants.DDM_ADD:
1324 if val in lst:
1325 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
1326 else:
1327 lst.append(val)
1328 elif key == constants.DDM_REMOVE:
1329 if val in lst:
1330 lst.remove(val)
1331 else:
1332 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
1333 else:
1334 raise errors.ProgrammerError("Invalid modification '%s'" % key)
1335
1336 if self.op.hidden_os:
1337 helper_os("hidden_os", self.op.hidden_os, "hidden")
1338
1339 if self.op.blacklisted_os:
1340 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
1341
1342 if self.op.master_netdev:
1343 master_params = self.cfg.GetMasterNetworkParameters()
1344 ems = self.cfg.GetUseExternalMipScript()
1345 feedback_fn("Shutting down master ip on the current netdev (%s)" %
1346 self.cluster.master_netdev)
1347 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
1348 master_params, ems)
1349 if not self.op.force:
1350 result.Raise("Could not disable the master ip")
1351 else:
1352 if result.fail_msg:
1353 msg = ("Could not disable the master ip (continuing anyway): %s" %
1354 result.fail_msg)
1355 feedback_fn(msg)
1356 feedback_fn("Changing master_netdev from %s to %s" %
1357 (master_params.netdev, self.op.master_netdev))
1358 self.cluster.master_netdev = self.op.master_netdev
1359
1360 if self.op.master_netmask:
1361 master_params = self.cfg.GetMasterNetworkParameters()
1362 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
1363 result = self.rpc.call_node_change_master_netmask(
1364 master_params.uuid, master_params.netmask,
1365 self.op.master_netmask, master_params.ip,
1366 master_params.netdev)
1367 result.Warn("Could not change the master IP netmask", feedback_fn)
1368 self.cluster.master_netmask = self.op.master_netmask
1369
1370 self.cfg.Update(self.cluster, feedback_fn)
1371
1372 if self.op.master_netdev:
1373 master_params = self.cfg.GetMasterNetworkParameters()
1374 feedback_fn("Starting the master ip on the new master netdev (%s)" %
1375 self.op.master_netdev)
1376 ems = self.cfg.GetUseExternalMipScript()
1377 result = self.rpc.call_node_activate_master_ip(master_params.uuid,
1378 master_params, ems)
1379 result.Warn("Could not re-enable the master ip on the master,"
1380 " please restart manually", self.LogWarning)
1381
1384 """Submits all jobs necessary to verify the cluster.
1385
1386 """
1387 REQ_BGL = False
1388
1390 self.needed_locks = {}
1391
1392 - def Exec(self, feedback_fn):
1393 jobs = []
1394
1395 if self.op.group_name:
1396 groups = [self.op.group_name]
1397 depends_fn = lambda: None
1398 else:
1399 groups = self.cfg.GetNodeGroupList()
1400
1401
1402 jobs.append([
1403 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
1404 ])
1405
1406
1407 depends_fn = lambda: [(-len(jobs), [])]
1408
1409 jobs.extend(
1410 [opcodes.OpClusterVerifyGroup(group_name=group,
1411 ignore_errors=self.op.ignore_errors,
1412 depends=depends_fn())]
1413 for group in groups)
1414
1415
1416 for op in itertools.chain(*jobs):
1417 op.debug_simulate_errors = self.op.debug_simulate_errors
1418 op.verbose = self.op.verbose
1419 op.error_codes = self.op.error_codes
1420 try:
1421 op.skip_checks = self.op.skip_checks
1422 except AttributeError:
1423 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1424
1425 return ResultWithJobs(jobs)
1426
1429 """Mix-in for cluster/group verify LUs.
1430
1431 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1432 self.op and self._feedback_fn to be available.)
1433
1434 """
1435
1436 ETYPE_FIELD = "code"
1437 ETYPE_ERROR = "ERROR"
1438 ETYPE_WARNING = "WARNING"
1439
1440 - def _Error(self, ecode, item, msg, *args, **kwargs):
1441 """Format an error message.
1442
1443 Based on the opcode's error_codes parameter, either format a
1444 parseable error code, or a simpler error string.
1445
1446 This must be called only from Exec and functions called from Exec.
1447
1448 """
1449 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1450 itype, etxt, _ = ecode
1451
1452
1453 if etxt in self.op.ignore_errors:
1454 ltype = self.ETYPE_WARNING
1455
1456 if args:
1457 msg = msg % args
1458
1459 if self.op.error_codes:
1460 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1461 else:
1462 if item:
1463 item = " " + item
1464 else:
1465 item = ""
1466 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1467
1468 self._feedback_fn(" - %s" % msg)
1469
1470 if ltype == self.ETYPE_ERROR:
1471 self.bad = True
1472
1473 - def _ErrorIf(self, cond, *args, **kwargs):
1474 """Log an error message if the passed condition is True.
1475
1476 """
1477 if (bool(cond)
1478 or self.op.debug_simulate_errors):
1479 self._Error(*args, **kwargs)
1480
1483 """Verifies a certificate for L{LUClusterVerifyConfig}.
1484
1485 @type filename: string
1486 @param filename: Path to PEM file
1487
1488 """
1489 try:
1490 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1491 utils.ReadFile(filename))
1492 except Exception, err:
1493 return (LUClusterVerifyConfig.ETYPE_ERROR,
1494 "Failed to load X509 certificate %s: %s" % (filename, err))
1495
1496 (errcode, msg) = \
1497 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1498 constants.SSL_CERT_EXPIRATION_ERROR)
1499
1500 if msg:
1501 fnamemsg = "While verifying %s: %s" % (filename, msg)
1502 else:
1503 fnamemsg = None
1504
1505 if errcode is None:
1506 return (None, fnamemsg)
1507 elif errcode == utils.CERT_WARNING:
1508 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1509 elif errcode == utils.CERT_ERROR:
1510 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1511
1512 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1513
1516 """Compute the set of all hypervisor parameters.
1517
1518 @type cluster: L{objects.Cluster}
1519 @param cluster: the cluster object
1520 @param instances: list of L{objects.Instance}
1521 @param instances: additional instances from which to obtain parameters
1522 @rtype: list of (origin, hypervisor, parameters)
1523 @return: a list with all parameters found, indicating the hypervisor they
1524 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1525
1526 """
1527 hvp_data = []
1528
1529 for hv_name in cluster.enabled_hypervisors:
1530 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1531
1532 for os_name, os_hvp in cluster.os_hvp.items():
1533 for hv_name, hv_params in os_hvp.items():
1534 if hv_params:
1535 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1536 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1537
1538
1539 for instance in instances:
1540 if instance.hvparams:
1541 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1542 cluster.FillHV(instance)))
1543
1544 return hvp_data
1545
1548 """Verifies the cluster config.
1549
1550 """
1551 REQ_BGL = False
1552
1566
1570
1579
1580 - def Exec(self, feedback_fn):
1581 """Verify integrity of cluster, performing various test on nodes.
1582
1583 """
1584 self.bad = False
1585 self._feedback_fn = feedback_fn
1586
1587 feedback_fn("* Verifying cluster config")
1588
1589 for msg in self.cfg.VerifyConfig():
1590 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1591
1592 feedback_fn("* Verifying cluster certificate files")
1593
1594 for cert_filename in pathutils.ALL_CERT_FILES:
1595 (errcode, msg) = _VerifyCertificate(cert_filename)
1596 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1597
1598 self._ErrorIf(not utils.CanRead(constants.LUXID_USER,
1599 pathutils.NODED_CERT_FILE),
1600 constants.CV_ECLUSTERCERT,
1601 None,
1602 pathutils.NODED_CERT_FILE + " must be accessible by the " +
1603 constants.LUXID_USER + " user")
1604
1605 feedback_fn("* Verifying hypervisor parameters")
1606
1607 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1608 self.all_inst_info.values()))
1609
1610 feedback_fn("* Verifying all nodes belong to an existing group")
1611
1612
1613
1614
1615
1616 dangling_nodes = set(node for node in self.all_node_info.values()
1617 if node.group not in self.all_group_info)
1618
1619 dangling_instances = {}
1620 no_node_instances = []
1621
1622 for inst in self.all_inst_info.values():
1623 if inst.primary_node in [node.uuid for node in dangling_nodes]:
1624 dangling_instances.setdefault(inst.primary_node, []).append(inst)
1625 elif inst.primary_node not in self.all_node_info:
1626 no_node_instances.append(inst)
1627
1628 pretty_dangling = [
1629 "%s (%s)" %
1630 (node.name,
1631 utils.CommaJoin(inst.name for
1632 inst in dangling_instances.get(node.uuid, [])))
1633 for node in dangling_nodes]
1634
1635 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1636 None,
1637 "the following nodes (and their instances) belong to a non"
1638 " existing group: %s", utils.CommaJoin(pretty_dangling))
1639
1640 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1641 None,
1642 "the following instances have a non-existing primary-node:"
1643 " %s", utils.CommaJoin(inst.name for
1644 inst in no_node_instances))
1645
1646 return not self.bad
1647
1650 """Verifies the status of a node group.
1651
1652 """
1653 HPATH = "cluster-verify"
1654 HTYPE = constants.HTYPE_CLUSTER
1655 REQ_BGL = False
1656
1657 _HOOKS_INDENT_RE = re.compile("^", re.M)
1658
1660 """A class representing the logical and physical status of a node.
1661
1662 @type uuid: string
1663 @ivar uuid: the node UUID to which this object refers
1664 @ivar volumes: a structure as returned from
1665 L{ganeti.backend.GetVolumeList} (runtime)
1666 @ivar instances: a list of running instances (runtime)
1667 @ivar pinst: list of configured primary instances (config)
1668 @ivar sinst: list of configured secondary instances (config)
1669 @ivar sbp: dictionary of {primary-node: list of instances} for all
1670 instances for which this node is secondary (config)
1671 @ivar mfree: free memory, as reported by hypervisor (runtime)
1672 @ivar dfree: free disk, as reported by the node (runtime)
1673 @ivar offline: the offline status (config)
1674 @type rpc_fail: boolean
1675 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1676 not whether the individual keys were correct) (runtime)
1677 @type lvm_fail: boolean
1678 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1679 @type hyp_fail: boolean
1680 @ivar hyp_fail: whether the RPC call didn't return the instance list
1681 @type ghost: boolean
1682 @ivar ghost: whether this is a known node or not (config)
1683 @type os_fail: boolean
1684 @ivar os_fail: whether the RPC call didn't return valid OS data
1685 @type oslist: list
1686 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1687 @type vm_capable: boolean
1688 @ivar vm_capable: whether the node can host instances
1689 @type pv_min: float
1690 @ivar pv_min: size in MiB of the smallest PVs
1691 @type pv_max: float
1692 @ivar pv_max: size in MiB of the biggest PVs
1693
1694 """
1695 - def __init__(self, offline=False, uuid=None, vm_capable=True):
1696 self.uuid = uuid
1697 self.volumes = {}
1698 self.instances = []
1699 self.pinst = []
1700 self.sinst = []
1701 self.sbp = {}
1702 self.mfree = 0
1703 self.dfree = 0
1704 self.offline = offline
1705 self.vm_capable = vm_capable
1706 self.rpc_fail = False
1707 self.lvm_fail = False
1708 self.hyp_fail = False
1709 self.ghost = False
1710 self.os_fail = False
1711 self.oslist = {}
1712 self.pv_min = None
1713 self.pv_max = None
1714
1735
1753
1755 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1756 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1757
1758 group_node_uuids = set(self.group_info.members)
1759 group_inst_uuids = \
1760 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1761
1762 unlocked_node_uuids = \
1763 group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE))
1764
1765 unlocked_inst_uuids = \
1766 group_inst_uuids.difference(
1767 [self.cfg.GetInstanceInfoByName(name).uuid
1768 for name in self.owned_locks(locking.LEVEL_INSTANCE)])
1769
1770 if unlocked_node_uuids:
1771 raise errors.OpPrereqError(
1772 "Missing lock for nodes: %s" %
1773 utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)),
1774 errors.ECODE_STATE)
1775
1776 if unlocked_inst_uuids:
1777 raise errors.OpPrereqError(
1778 "Missing lock for instances: %s" %
1779 utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)),
1780 errors.ECODE_STATE)
1781
1782 self.all_node_info = self.cfg.GetAllNodesInfo()
1783 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1784
1785 self.my_node_uuids = group_node_uuids
1786 self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid])
1787 for node_uuid in group_node_uuids)
1788
1789 self.my_inst_uuids = group_inst_uuids
1790 self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid])
1791 for inst_uuid in group_inst_uuids)
1792
1793
1794
1795 extra_lv_nodes = set()
1796
1797 for inst in self.my_inst_info.values():
1798 if inst.disk_template in constants.DTS_INT_MIRROR:
1799 for nuuid in inst.all_nodes:
1800 if self.all_node_info[nuuid].group != self.group_uuid:
1801 extra_lv_nodes.add(nuuid)
1802
1803 unlocked_lv_nodes = \
1804 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1805
1806 if unlocked_lv_nodes:
1807 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1808 utils.CommaJoin(unlocked_lv_nodes),
1809 errors.ECODE_STATE)
1810 self.extra_lv_nodes = list(extra_lv_nodes)
1811
1813 """Perform some basic validation on data returned from a node.
1814
1815 - check the result data structure is well formed and has all the
1816 mandatory fields
1817 - check ganeti version
1818
1819 @type ninfo: L{objects.Node}
1820 @param ninfo: the node to check
1821 @param nresult: the results from the node
1822 @rtype: boolean
1823 @return: whether overall this call was successful (and we can expect
1824 reasonable values in the respose)
1825
1826 """
1827
1828 test = not nresult or not isinstance(nresult, dict)
1829 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
1830 "unable to verify node: no data returned")
1831 if test:
1832 return False
1833
1834
1835 local_version = constants.PROTOCOL_VERSION
1836 remote_version = nresult.get("version", None)
1837 test = not (remote_version and
1838 isinstance(remote_version, (list, tuple)) and
1839 len(remote_version) == 2)
1840 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
1841 "connection to node returned invalid data")
1842 if test:
1843 return False
1844
1845 test = local_version != remote_version[0]
1846 self._ErrorIf(test, constants.CV_ENODEVERSION, ninfo.name,
1847 "incompatible protocol versions: master %s,"
1848 " node %s", local_version, remote_version[0])
1849 if test:
1850 return False
1851
1852
1853
1854
1855 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1856 constants.CV_ENODEVERSION, ninfo.name,
1857 "software version mismatch: master %s, node %s",
1858 constants.RELEASE_VERSION, remote_version[1],
1859 code=self.ETYPE_WARNING)
1860
1861 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1862 if ninfo.vm_capable and isinstance(hyp_result, dict):
1863 for hv_name, hv_result in hyp_result.iteritems():
1864 test = hv_result is not None
1865 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
1866 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1867
1868 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1869 if ninfo.vm_capable and isinstance(hvp_result, list):
1870 for item, hv_name, hv_result in hvp_result:
1871 self._ErrorIf(True, constants.CV_ENODEHV, ninfo.name,
1872 "hypervisor %s parameter verify failure (source %s): %s",
1873 hv_name, item, hv_result)
1874
1875 test = nresult.get(constants.NV_NODESETUP,
1876 ["Missing NODESETUP results"])
1877 self._ErrorIf(test, constants.CV_ENODESETUP, ninfo.name,
1878 "node setup error: %s", "; ".join(test))
1879
1880 return True
1881
1882 - def _VerifyNodeTime(self, ninfo, nresult,
1883 nvinfo_starttime, nvinfo_endtime):
1884 """Check the node time.
1885
1886 @type ninfo: L{objects.Node}
1887 @param ninfo: the node to check
1888 @param nresult: the remote results for the node
1889 @param nvinfo_starttime: the start time of the RPC call
1890 @param nvinfo_endtime: the end time of the RPC call
1891
1892 """
1893 ntime = nresult.get(constants.NV_TIME, None)
1894 try:
1895 ntime_merged = utils.MergeTime(ntime)
1896 except (ValueError, TypeError):
1897 self._ErrorIf(True, constants.CV_ENODETIME, ninfo.name,
1898 "Node returned invalid time")
1899 return
1900
1901 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1902 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1903 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1904 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1905 else:
1906 ntime_diff = None
1907
1908 self._ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, ninfo.name,
1909 "Node time diverges by at least %s from master node time",
1910 ntime_diff)
1911
1913 """Check the node LVM results and update info for cross-node checks.
1914
1915 @type ninfo: L{objects.Node}
1916 @param ninfo: the node to check
1917 @param nresult: the remote results for the node
1918 @param vg_name: the configured VG name
1919 @type nimg: L{NodeImage}
1920 @param nimg: node image
1921
1922 """
1923 if vg_name is None:
1924 return
1925
1926
1927 vglist = nresult.get(constants.NV_VGLIST, None)
1928 test = not vglist
1929 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
1930 "unable to check volume groups")
1931 if not test:
1932 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1933 constants.MIN_VG_SIZE)
1934 self._ErrorIf(vgstatus, constants.CV_ENODELVM, ninfo.name, vgstatus)
1935
1936
1937 (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage)
1938 for em in errmsgs:
1939 self._Error(constants.CV_ENODELVM, ninfo.name, em)
1940 if pvminmax is not None:
1941 (nimg.pv_min, nimg.pv_max) = pvminmax
1942
1944 """Check cross-node DRBD version consistency.
1945
1946 @type node_verify_infos: dict
1947 @param node_verify_infos: infos about nodes as returned from the
1948 node_verify call.
1949
1950 """
1951 node_versions = {}
1952 for node_uuid, ndata in node_verify_infos.items():
1953 nresult = ndata.payload
1954 if nresult:
1955 version = nresult.get(constants.NV_DRBDVERSION, None)
1956 if version:
1957 node_versions[node_uuid] = version
1958
1959 if len(set(node_versions.values())) > 1:
1960 for node_uuid, version in sorted(node_versions.items()):
1961 msg = "DRBD version mismatch: %s" % version
1962 self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg,
1963 code=self.ETYPE_WARNING)
1964
1966 """Check cross-node consistency in LVM.
1967
1968 @type node_image: dict
1969 @param node_image: info about nodes, mapping from node to names to
1970 L{NodeImage} objects
1971 @param vg_name: the configured VG name
1972
1973 """
1974 if vg_name is None:
1975 return
1976
1977
1978 if not self._exclusive_storage:
1979 return
1980
1981
1982
1983
1984 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
1985 if not vals:
1986 return
1987 (pvmin, minnode_uuid) = min((ni.pv_min, ni.uuid) for ni in vals)
1988 (pvmax, maxnode_uuid) = max((ni.pv_max, ni.uuid) for ni in vals)
1989 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
1990 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
1991 "PV sizes differ too much in the group; smallest (%s MB) is"
1992 " on %s, biggest (%s MB) is on %s",
1993 pvmin, self.cfg.GetNodeName(minnode_uuid),
1994 pvmax, self.cfg.GetNodeName(maxnode_uuid))
1995
1997 """Check the node bridges.
1998
1999 @type ninfo: L{objects.Node}
2000 @param ninfo: the node to check
2001 @param nresult: the remote results for the node
2002 @param bridges: the expected list of bridges
2003
2004 """
2005 if not bridges:
2006 return
2007
2008 missing = nresult.get(constants.NV_BRIDGES, None)
2009 test = not isinstance(missing, list)
2010 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
2011 "did not return valid bridge information")
2012 if not test:
2013 self._ErrorIf(bool(missing), constants.CV_ENODENET, ninfo.name,
2014 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2015
2033
2035 """Check the node network connectivity results.
2036
2037 @type ninfo: L{objects.Node}
2038 @param ninfo: the node to check
2039 @param nresult: the remote results for the node
2040
2041 """
2042 test = constants.NV_NODELIST not in nresult
2043 self._ErrorIf(test, constants.CV_ENODESSH, ninfo.name,
2044 "node hasn't returned node ssh connectivity data")
2045 if not test:
2046 if nresult[constants.NV_NODELIST]:
2047 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2048 self._ErrorIf(True, constants.CV_ENODESSH, ninfo.name,
2049 "ssh communication with node '%s': %s", a_node, a_msg)
2050
2051 test = constants.NV_NODENETTEST not in nresult
2052 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
2053 "node hasn't returned node tcp connectivity data")
2054 if not test:
2055 if nresult[constants.NV_NODENETTEST]:
2056 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2057 for anode in nlist:
2058 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name,
2059 "tcp communication with node '%s': %s",
2060 anode, nresult[constants.NV_NODENETTEST][anode])
2061
2062 test = constants.NV_MASTERIP not in nresult
2063 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
2064 "node hasn't returned node master IP reachability data")
2065 if not test:
2066 if not nresult[constants.NV_MASTERIP]:
2067 if ninfo.uuid == self.master_node:
2068 msg = "the master node cannot reach the master IP (not configured?)"
2069 else:
2070 msg = "cannot reach the master IP"
2071 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg)
2072
2074 """Verify an instance.
2075
2076 This function checks to see if the required block devices are
2077 available on the instance's node, and that the nodes are in the correct
2078 state.
2079
2080 """
2081 pnode_uuid = instance.primary_node
2082 pnode_img = node_image[pnode_uuid]
2083 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2084
2085 node_vol_should = {}
2086 instance.MapLVsByNode(node_vol_should)
2087
2088 cluster = self.cfg.GetClusterInfo()
2089 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2090 self.group_info)
2091 err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg)
2092 self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name,
2093 utils.CommaJoin(err), code=self.ETYPE_WARNING)
2094
2095 for node_uuid in node_vol_should:
2096 n_img = node_image[node_uuid]
2097 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2098
2099 continue
2100 for volume in node_vol_should[node_uuid]:
2101 test = volume not in n_img.volumes
2102 self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name,
2103 "volume %s missing on node %s", volume,
2104 self.cfg.GetNodeName(node_uuid))
2105
2106 if instance.admin_state == constants.ADMINST_UP:
2107 test = instance.uuid not in pnode_img.instances and not pnode_img.offline
2108 self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name,
2109 "instance not running on its primary node %s",
2110 self.cfg.GetNodeName(pnode_uuid))
2111 self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE,
2112 instance.name, "instance is marked as running and lives on"
2113 " offline node %s", self.cfg.GetNodeName(pnode_uuid))
2114
2115 diskdata = [(nname, success, status, idx)
2116 for (nname, disks) in diskstatus.items()
2117 for idx, (success, status) in enumerate(disks)]
2118
2119 for nname, success, bdev_status, idx in diskdata:
2120
2121
2122 snode = node_image[nname]
2123 bad_snode = snode.ghost or snode.offline
2124 self._ErrorIf(instance.disks_active and
2125 not success and not bad_snode,
2126 constants.CV_EINSTANCEFAULTYDISK, instance.name,
2127 "couldn't retrieve status for disk/%s on %s: %s",
2128 idx, self.cfg.GetNodeName(nname), bdev_status)
2129
2130 if instance.disks_active and success and \
2131 (bdev_status.is_degraded or
2132 bdev_status.ldisk_status != constants.LDS_OKAY):
2133 msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname))
2134 if bdev_status.is_degraded:
2135 msg += " is degraded"
2136 if bdev_status.ldisk_status != constants.LDS_OKAY:
2137 msg += "; state is '%s'" % \
2138 constants.LDS_NAMES[bdev_status.ldisk_status]
2139
2140 self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg)
2141
2142 self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2143 constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid),
2144 "instance %s, connection to primary node failed",
2145 instance.name)
2146
2147 self._ErrorIf(len(instance.secondary_nodes) > 1,
2148 constants.CV_EINSTANCELAYOUT, instance.name,
2149 "instance has multiple secondary nodes: %s",
2150 utils.CommaJoin(instance.secondary_nodes),
2151 code=self.ETYPE_WARNING)
2152
2153 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, instance.all_nodes)
2154 if any(es_flags.values()):
2155 if instance.disk_template not in constants.DTS_EXCL_STORAGE:
2156
2157
2158 es_nodes = [n
2159 for (n, es) in es_flags.items()
2160 if es]
2161 self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name,
2162 "instance has template %s, which is not supported on nodes"
2163 " that have exclusive storage set: %s",
2164 instance.disk_template,
2165 utils.CommaJoin(self.cfg.GetNodeNames(es_nodes)))
2166 for (idx, disk) in enumerate(instance.disks):
2167 self._ErrorIf(disk.spindles is None,
2168 constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name,
2169 "number of spindles not configured for disk %s while"
2170 " exclusive storage is enabled, try running"
2171 " gnt-cluster repair-disk-sizes", idx)
2172
2173 if instance.disk_template in constants.DTS_INT_MIRROR:
2174 instance_nodes = utils.NiceSort(instance.all_nodes)
2175 instance_groups = {}
2176
2177 for node_uuid in instance_nodes:
2178 instance_groups.setdefault(self.all_node_info[node_uuid].group,
2179 []).append(node_uuid)
2180
2181 pretty_list = [
2182 "%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)),
2183 groupinfo[group].name)
2184
2185 for group, nodes in sorted(instance_groups.items(),
2186 key=lambda (_, nodes): pnode_uuid in nodes,
2187 reverse=True)]
2188
2189 self._ErrorIf(len(instance_groups) > 1,
2190 constants.CV_EINSTANCESPLITGROUPS,
2191 instance.name, "instance has primary and secondary nodes in"
2192 " different groups: %s", utils.CommaJoin(pretty_list),
2193 code=self.ETYPE_WARNING)
2194
2195 inst_nodes_offline = []
2196 for snode in instance.secondary_nodes:
2197 s_img = node_image[snode]
2198 self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2199 self.cfg.GetNodeName(snode),
2200 "instance %s, connection to secondary node failed",
2201 instance.name)
2202
2203 if s_img.offline:
2204 inst_nodes_offline.append(snode)
2205
2206
2207 self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE,
2208 instance.name, "instance has offline secondary node(s) %s",
2209 utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline)))
2210
2211 for node_uuid in instance.all_nodes:
2212 self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE,
2213 instance.name, "instance lives on ghost node %s",
2214 self.cfg.GetNodeName(node_uuid))
2215 self._ErrorIf(not node_image[node_uuid].vm_capable,
2216 constants.CV_EINSTANCEBADNODE, instance.name,
2217 "instance lives on non-vm_capable node %s",
2218 self.cfg.GetNodeName(node_uuid))
2219
2221 """Verify if there are any unknown volumes in the cluster.
2222
2223 The .os, .swap and backup volumes are ignored. All other volumes are
2224 reported as unknown.
2225
2226 @type reserved: L{ganeti.utils.FieldSet}
2227 @param reserved: a FieldSet of reserved volume names
2228
2229 """
2230 for node_uuid, n_img in node_image.items():
2231 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2232 self.all_node_info[node_uuid].group != self.group_uuid):
2233
2234 continue
2235 for volume in n_img.volumes:
2236 test = ((node_uuid not in node_vol_should or
2237 volume not in node_vol_should[node_uuid]) and
2238 not reserved.Matches(volume))
2239 self._ErrorIf(test, constants.CV_ENODEORPHANLV,
2240 self.cfg.GetNodeName(node_uuid),
2241 "volume %s is unknown", volume)
2242
2244 """Verify N+1 Memory Resilience.
2245
2246 Check that if one single node dies we can still start all the
2247 instances it was primary for.
2248
2249 """
2250 cluster_info = self.cfg.GetClusterInfo()
2251 for node_uuid, n_img in node_image.items():
2252
2253
2254
2255
2256
2257
2258
2259
2260 if n_img.offline or \
2261 self.all_node_info[node_uuid].group != self.group_uuid:
2262
2263
2264
2265
2266 continue
2267
2268 for prinode, inst_uuids in n_img.sbp.items():
2269 needed_mem = 0
2270 for inst_uuid in inst_uuids:
2271 bep = cluster_info.FillBE(all_insts[inst_uuid])
2272 if bep[constants.BE_AUTO_BALANCE]:
2273 needed_mem += bep[constants.BE_MINMEM]
2274 test = n_img.mfree < needed_mem
2275 self._ErrorIf(test, constants.CV_ENODEN1,
2276 self.cfg.GetNodeName(node_uuid),
2277 "not enough memory to accomodate instance failovers"
2278 " should node %s fail (%dMiB needed, %dMiB available)",
2279 self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)
2280
2281 - def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo,
2282 (files_all, files_opt, files_mc, files_vm)):
2283 """Verifies file checksums collected from all nodes.
2284
2285 @param nodes: List of L{objects.Node} objects
2286 @param master_node_uuid: UUID of master node
2287 @param all_nvinfo: RPC results
2288
2289 """
2290
2291 files2nodefn = [
2292 (files_all, None),
2293 (files_mc, lambda node: (node.master_candidate or
2294 node.uuid == master_node_uuid)),
2295 (files_vm, lambda node: node.vm_capable),
2296 ]
2297
2298
2299 nodefiles = {}
2300 for (files, fn) in files2nodefn:
2301 if fn is None:
2302 filenodes = nodes
2303 else:
2304 filenodes = filter(fn, nodes)
2305 nodefiles.update((filename,
2306 frozenset(map(operator.attrgetter("uuid"), filenodes)))
2307 for filename in files)
2308
2309 assert set(nodefiles) == (files_all | files_mc | files_vm)
2310
2311 fileinfo = dict((filename, {}) for filename in nodefiles)
2312 ignore_nodes = set()
2313
2314 for node in nodes:
2315 if node.offline:
2316 ignore_nodes.add(node.uuid)
2317 continue
2318
2319 nresult = all_nvinfo[node.uuid]
2320
2321 if nresult.fail_msg or not nresult.payload:
2322 node_files = None
2323 else:
2324 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2325 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2326 for (key, value) in fingerprints.items())
2327 del fingerprints
2328
2329 test = not (node_files and isinstance(node_files, dict))
2330 self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name,
2331 "Node did not return file checksum data")
2332 if test:
2333 ignore_nodes.add(node.uuid)
2334 continue
2335
2336
2337 for (filename, checksum) in node_files.items():
2338 assert filename in nodefiles
2339 fileinfo[filename].setdefault(checksum, set()).add(node.uuid)
2340
2341 for (filename, checksums) in fileinfo.items():
2342 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2343
2344
2345 with_file = frozenset(node_uuid
2346 for node_uuids in fileinfo[filename].values()
2347 for node_uuid in node_uuids) - ignore_nodes
2348
2349 expected_nodes = nodefiles[filename] - ignore_nodes
2350
2351
2352 missing_file = expected_nodes - with_file
2353
2354 if filename in files_opt:
2355
2356 self._ErrorIf(missing_file and missing_file != expected_nodes,
2357 constants.CV_ECLUSTERFILECHECK, None,
2358 "File %s is optional, but it must exist on all or no"
2359 " nodes (not found on %s)",
2360 filename,
2361 utils.CommaJoin(
2362 utils.NiceSort(
2363 map(self.cfg.GetNodeName, missing_file))))
2364 else:
2365 self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2366 "File %s is missing from node(s) %s", filename,
2367 utils.CommaJoin(
2368 utils.NiceSort(
2369 map(self.cfg.GetNodeName, missing_file))))
2370
2371
2372 unexpected = with_file - expected_nodes
2373 self._ErrorIf(unexpected,
2374 constants.CV_ECLUSTERFILECHECK, None,
2375 "File %s should not exist on node(s) %s",
2376 filename, utils.CommaJoin(
2377 utils.NiceSort(map(self.cfg.GetNodeName, unexpected))))
2378
2379
2380 test = len(checksums) > 1
2381 if test:
2382 variants = ["variant %s on %s" %
2383 (idx + 1,
2384 utils.CommaJoin(utils.NiceSort(
2385 map(self.cfg.GetNodeName, node_uuids))))
2386 for (idx, (checksum, node_uuids)) in
2387 enumerate(sorted(checksums.items()))]
2388 else:
2389 variants = []
2390
2391 self._ErrorIf(test, constants.CV_ECLUSTERFILECHECK, None,
2392 "File %s found with %s different checksums (%s)",
2393 filename, len(checksums), "; ".join(variants))
2394
2396 """Verify the drbd helper.
2397
2398 """
2399 if drbd_helper:
2400 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2401 test = (helper_result is None)
2402 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2403 "no drbd usermode helper returned")
2404 if helper_result:
2405 status, payload = helper_result
2406 test = not status
2407 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2408 "drbd usermode helper check unsuccessful: %s", payload)
2409 test = status and (payload != drbd_helper)
2410 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2411 "wrong drbd usermode helper: %s", payload)
2412
2413 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2414 drbd_map):
2415 """Verifies and the node DRBD status.
2416
2417 @type ninfo: L{objects.Node}
2418 @param ninfo: the node to check
2419 @param nresult: the remote results for the node
2420 @param instanceinfo: the dict of instances
2421 @param drbd_helper: the configured DRBD usermode helper
2422 @param drbd_map: the DRBD map as returned by
2423 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2424
2425 """
2426 self._VerifyNodeDrbdHelper(ninfo, nresult, drbd_helper)
2427
2428
2429 node_drbd = {}
2430 for minor, inst_uuid in drbd_map[ninfo.uuid].items():
2431 test = inst_uuid not in instanceinfo
2432 self._ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2433 "ghost instance '%s' in temporary DRBD map", inst_uuid)
2434
2435
2436
2437 if test:
2438 node_drbd[minor] = (inst_uuid, False)
2439 else:
2440 instance = instanceinfo[inst_uuid]
2441 node_drbd[minor] = (inst_uuid, instance.disks_active)
2442
2443
2444 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2445 test = not isinstance(used_minors, (tuple, list))
2446 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2447 "cannot parse drbd status file: %s", str(used_minors))
2448 if test:
2449
2450 return
2451
2452 for minor, (inst_uuid, must_exist) in node_drbd.items():
2453 test = minor not in used_minors and must_exist
2454 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2455 "drbd minor %d of instance %s is not active", minor,
2456 self.cfg.GetInstanceName(inst_uuid))
2457 for minor in used_minors:
2458 test = minor not in node_drbd
2459 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2460 "unallocated drbd minor %d is in use", minor)
2461
2463 """Builds the node OS structures.
2464
2465 @type ninfo: L{objects.Node}
2466 @param ninfo: the node to check
2467 @param nresult: the remote results for the node
2468 @param nimg: the node image object
2469
2470 """
2471 remote_os = nresult.get(constants.NV_OSLIST, None)
2472 test = (not isinstance(remote_os, list) or
2473 not compat.all(isinstance(v, list) and len(v) == 7
2474 for v in remote_os))
2475
2476 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
2477 "node hasn't returned valid OS data")
2478
2479 nimg.os_fail = test
2480
2481 if test:
2482 return
2483
2484 os_dict = {}
2485
2486 for (name, os_path, status, diagnose,
2487 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2488
2489 if name not in os_dict:
2490 os_dict[name] = []
2491
2492
2493
2494 parameters = [tuple(v) for v in parameters]
2495 os_dict[name].append((os_path, status, diagnose,
2496 set(variants), set(parameters), set(api_ver)))
2497
2498 nimg.oslist = os_dict
2499
2501 """Verifies the node OS list.
2502
2503 @type ninfo: L{objects.Node}
2504 @param ninfo: the node to check
2505 @param nimg: the node image object
2506 @param base: the 'template' node we match against (e.g. from the master)
2507
2508 """
2509 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2510
2511 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2512 for os_name, os_data in nimg.oslist.items():
2513 assert os_data, "Empty OS status for OS %s?!" % os_name
2514 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2515 self._ErrorIf(not f_status, constants.CV_ENODEOS, ninfo.name,
2516 "Invalid OS %s (located at %s): %s",
2517 os_name, f_path, f_diag)
2518 self._ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, ninfo.name,
2519 "OS '%s' has multiple entries"
2520 " (first one shadows the rest): %s",
2521 os_name, utils.CommaJoin([v[0] for v in os_data]))
2522
2523 test = os_name not in base.oslist
2524 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
2525 "Extra OS %s not present on reference node (%s)",
2526 os_name, self.cfg.GetNodeName(base.uuid))
2527 if test:
2528 continue
2529 assert base.oslist[os_name], "Base node has empty OS status?"
2530 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2531 if not b_status:
2532
2533 continue
2534 for kind, a, b in [("API version", f_api, b_api),
2535 ("variants list", f_var, b_var),
2536 ("parameters", beautify_params(f_param),
2537 beautify_params(b_param))]:
2538 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
2539 "OS %s for %s differs from reference node %s:"
2540 " [%s] vs. [%s]", kind, os_name,
2541 self.cfg.GetNodeName(base.uuid),
2542 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2543
2544
2545 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2546 self._ErrorIf(missing, constants.CV_ENODEOS, ninfo.name,
2547 "OSes present on reference node %s"
2548 " but missing on this node: %s",
2549 self.cfg.GetNodeName(base.uuid), utils.CommaJoin(missing))
2550
2552 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2553
2554 @type ninfo: L{objects.Node}
2555 @param ninfo: the node to check
2556 @param nresult: the remote results for the node
2557 @type is_master: bool
2558 @param is_master: Whether node is the master node
2559
2560 """
2561 cluster = self.cfg.GetClusterInfo()
2562 if (is_master and
2563 (cluster.IsFileStorageEnabled() or
2564 cluster.IsSharedFileStorageEnabled())):
2565 try:
2566 fspaths = nresult[constants.NV_ACCEPTED_STORAGE_PATHS]
2567 except KeyError:
2568
2569 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
2570 "Node did not return forbidden file storage paths")
2571 else:
2572 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
2573 "Found forbidden file storage paths: %s",
2574 utils.CommaJoin(fspaths))
2575 else:
2576 self._ErrorIf(constants.NV_ACCEPTED_STORAGE_PATHS in nresult,
2577 constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
2578 "Node should not have returned forbidden file storage"
2579 " paths")
2580
2581 - def _VerifyStoragePaths(self, ninfo, nresult, file_disk_template,
2582 verify_key, error_key):
2583 """Verifies (file) storage paths.
2584
2585 @type ninfo: L{objects.Node}
2586 @param ninfo: the node to check
2587 @param nresult: the remote results for the node
2588 @type file_disk_template: string
2589 @param file_disk_template: file-based disk template, whose directory
2590 is supposed to be verified
2591 @type verify_key: string
2592 @param verify_key: key for the verification map of this file
2593 verification step
2594 @param error_key: error key to be added to the verification results
2595 in case something goes wrong in this verification step
2596
2597 """
2598 assert (file_disk_template in
2599 utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE))
2600 cluster = self.cfg.GetClusterInfo()
2601 if cluster.IsDiskTemplateEnabled(file_disk_template):
2602 self._ErrorIf(
2603 verify_key in nresult,
2604 error_key, ninfo.name,
2605 "The configured %s storage path is unusable: %s" %
2606 (file_disk_template, nresult.get(verify_key)))
2607
2618
2629
2631 """Verifies out of band functionality of a node.
2632
2633 @type ninfo: L{objects.Node}
2634 @param ninfo: the node to check
2635 @param nresult: the remote results for the node
2636
2637 """
2638
2639
2640 if ((ninfo.master_candidate or ninfo.master_capable) and
2641 constants.NV_OOB_PATHS in nresult):
2642 for path_result in nresult[constants.NV_OOB_PATHS]:
2643 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH,
2644 ninfo.name, path_result)
2645
2647 """Verifies and updates the node volume data.
2648
2649 This function will update a L{NodeImage}'s internal structures
2650 with data from the remote call.
2651
2652 @type ninfo: L{objects.Node}
2653 @param ninfo: the node to check
2654 @param nresult: the remote results for the node
2655 @param nimg: the node image object
2656 @param vg_name: the configured VG name
2657
2658 """
2659 nimg.lvm_fail = True
2660 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2661 if vg_name is None:
2662 pass
2663 elif isinstance(lvdata, basestring):
2664 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
2665 "LVM problem on node: %s", utils.SafeEncode(lvdata))
2666 elif not isinstance(lvdata, dict):
2667 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
2668 "rpc call to node failed (lvlist)")
2669 else:
2670 nimg.volumes = lvdata
2671 nimg.lvm_fail = False
2672
2674 """Verifies and updates the node instance list.
2675
2676 If the listing was successful, then updates this node's instance
2677 list. Otherwise, it marks the RPC call as failed for the instance
2678 list key.
2679
2680 @type ninfo: L{objects.Node}
2681 @param ninfo: the node to check
2682 @param nresult: the remote results for the node
2683 @param nimg: the node image object
2684
2685 """
2686 idata = nresult.get(constants.NV_INSTANCELIST, None)
2687 test = not isinstance(idata, list)
2688 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2689 "rpc call to node failed (instancelist): %s",
2690 utils.SafeEncode(str(idata)))
2691 if test:
2692 nimg.hyp_fail = True
2693 else:
2694 nimg.instances = [inst.uuid for (_, inst) in
2695 self.cfg.GetMultiInstanceInfoByName(idata)]
2696
2698 """Verifies and computes a node information map
2699
2700 @type ninfo: L{objects.Node}
2701 @param ninfo: the node to check
2702 @param nresult: the remote results for the node
2703 @param nimg: the node image object
2704 @param vg_name: the configured VG name
2705
2706 """
2707
2708 hv_info = nresult.get(constants.NV_HVINFO, None)
2709 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2710 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2711 "rpc call to node failed (hvinfo)")
2712 if not test:
2713 try:
2714 nimg.mfree = int(hv_info["memory_free"])
2715 except (ValueError, TypeError):
2716 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
2717 "node returned invalid nodeinfo, check hypervisor")
2718
2719
2720 if vg_name is not None:
2721 test = (constants.NV_VGLIST not in nresult or
2722 vg_name not in nresult[constants.NV_VGLIST])
2723 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
2724 "node didn't return data for the volume group '%s'"
2725 " - it is either missing or broken", vg_name)
2726 if not test:
2727 try:
2728 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2729 except (ValueError, TypeError):
2730 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
2731 "node returned invalid LVM info, check LVM status")
2732
2734 """Gets per-disk status information for all instances.
2735
2736 @type node_uuids: list of strings
2737 @param node_uuids: Node UUIDs
2738 @type node_image: dict of (UUID, L{objects.Node})
2739 @param node_image: Node objects
2740 @type instanceinfo: dict of (UUID, L{objects.Instance})
2741 @param instanceinfo: Instance objects
2742 @rtype: {instance: {node: [(succes, payload)]}}
2743 @return: a dictionary of per-instance dictionaries with nodes as
2744 keys and disk information as values; the disk information is a
2745 list of tuples (success, payload)
2746
2747 """
2748 node_disks = {}
2749 node_disks_dev_inst_only = {}
2750 diskless_instances = set()
2751 nodisk_instances = set()
2752 diskless = constants.DT_DISKLESS
2753
2754 for nuuid in node_uuids:
2755 node_inst_uuids = list(itertools.chain(node_image[nuuid].pinst,
2756 node_image[nuuid].sinst))
2757 diskless_instances.update(uuid for uuid in node_inst_uuids
2758 if instanceinfo[uuid].disk_template == diskless)
2759 disks = [(inst_uuid, disk)
2760 for inst_uuid in node_inst_uuids
2761 for disk in instanceinfo[inst_uuid].disks]
2762
2763 if not disks:
2764 nodisk_instances.update(uuid for uuid in node_inst_uuids
2765 if instanceinfo[uuid].disk_template != diskless)
2766
2767 continue
2768
2769 node_disks[nuuid] = disks
2770
2771
2772 dev_inst_only = []
2773 for (inst_uuid, dev) in disks:
2774 (anno_disk,) = AnnotateDiskParams(instanceinfo[inst_uuid], [dev],
2775 self.cfg)
2776 dev_inst_only.append((anno_disk, instanceinfo[inst_uuid]))
2777
2778 node_disks_dev_inst_only[nuuid] = dev_inst_only
2779
2780 assert len(node_disks) == len(node_disks_dev_inst_only)
2781
2782
2783 result = self.rpc.call_blockdev_getmirrorstatus_multi(
2784 node_disks.keys(), node_disks_dev_inst_only)
2785
2786 assert len(result) == len(node_disks)
2787
2788 instdisk = {}
2789
2790 for (nuuid, nres) in result.items():
2791 node = self.cfg.GetNodeInfo(nuuid)
2792 disks = node_disks[node.uuid]
2793
2794 if nres.offline:
2795
2796 data = len(disks) * [(False, "node offline")]
2797 else:
2798 msg = nres.fail_msg
2799 self._ErrorIf(msg, constants.CV_ENODERPC, node.name,
2800 "while getting disk information: %s", msg)
2801 if msg:
2802
2803 data = len(disks) * [(False, msg)]
2804 else:
2805 data = []
2806 for idx, i in enumerate(nres.payload):
2807 if isinstance(i, (tuple, list)) and len(i) == 2:
2808 data.append(i)
2809 else:
2810 logging.warning("Invalid result from node %s, entry %d: %s",
2811 node.name, idx, i)
2812 data.append((False, "Invalid result from the remote node"))
2813
2814 for ((inst_uuid, _), status) in zip(disks, data):
2815 instdisk.setdefault(inst_uuid, {}).setdefault(node.uuid, []) \
2816 .append(status)
2817
2818
2819 for inst_uuid in diskless_instances:
2820 assert inst_uuid not in instdisk
2821 instdisk[inst_uuid] = {}
2822
2823 for inst_uuid in nodisk_instances:
2824 assert inst_uuid not in instdisk
2825 instdisk[inst_uuid] = {}
2826
2827 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2828 len(nuuids) <= len(instanceinfo[inst].all_nodes) and
2829 compat.all(isinstance(s, (tuple, list)) and
2830 len(s) == 2 for s in statuses)
2831 for inst, nuuids in instdisk.items()
2832 for nuuid, statuses in nuuids.items())
2833 if __debug__:
2834 instdisk_keys = set(instdisk)
2835 instanceinfo_keys = set(instanceinfo)
2836 assert instdisk_keys == instanceinfo_keys, \
2837 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
2838 (instdisk_keys, instanceinfo_keys))
2839
2840 return instdisk
2841
2842 @staticmethod
2844 """Create endless iterators for all potential SSH check hosts.
2845
2846 """
2847 nodes = [node for node in all_nodes
2848 if (node.group != group_uuid and
2849 not node.offline)]
2850 keyfunc = operator.attrgetter("group")
2851
2852 return map(itertools.cycle,
2853 [sorted(map(operator.attrgetter("name"), names))
2854 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2855 keyfunc)])
2856
2857 @classmethod
2859 """Choose which nodes should talk to which other nodes.
2860
2861 We will make nodes contact all nodes in their group, and one node from
2862 every other group.
2863
2864 @warning: This algorithm has a known issue if one node group is much
2865 smaller than others (e.g. just one node). In such a case all other
2866 nodes will talk to the single node.
2867
2868 """
2869 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2870 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2871
2872 return (online_nodes,
2873 dict((name, sorted([i.next() for i in sel]))
2874 for name in online_nodes))
2875
2877 """Build hooks env.
2878
2879 Cluster-Verify hooks just ran in the post phase and their failure makes
2880 the output be logged in the verify output and the verification to fail.
2881
2882 """
2883 env = {
2884 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
2885 }
2886
2887 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2888 for node in self.my_node_info.values())
2889
2890 return env
2891
2893 """Build hooks nodes.
2894
2895 """
2896 return ([], list(self.my_node_info.keys()))
2897
2898 - def Exec(self, feedback_fn):
2899 """Verify integrity of the node group, performing various test on nodes.
2900
2901 """
2902
2903 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2904
2905 if not self.my_node_uuids:
2906
2907 feedback_fn("* Empty node group, skipping verification")
2908 return True
2909
2910 self.bad = False
2911 verbose = self.op.verbose
2912 self._feedback_fn = feedback_fn
2913
2914 vg_name = self.cfg.GetVGName()
2915 drbd_helper = self.cfg.GetDRBDHelper()
2916 cluster = self.cfg.GetClusterInfo()
2917 hypervisors = cluster.enabled_hypervisors
2918 node_data_list = self.my_node_info.values()
2919
2920 i_non_redundant = []
2921 i_non_a_balanced = []
2922 i_offline = 0
2923 n_offline = 0
2924 n_drained = 0
2925 node_vol_should = {}
2926
2927
2928
2929
2930 filemap = ComputeAncillaryFiles(cluster, False)
2931
2932
2933 master_node_uuid = self.master_node = self.cfg.GetMasterNode()
2934 master_ip = self.cfg.GetMasterIP()
2935
2936 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_uuids))
2937
2938 user_scripts = []
2939 if self.cfg.GetUseExternalMipScript():
2940 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
2941
2942 node_verify_param = {
2943 constants.NV_FILELIST:
2944 map(vcluster.MakeVirtualPath,
2945 utils.UniqueSequence(filename
2946 for files in filemap
2947 for filename in files)),
2948 constants.NV_NODELIST:
2949 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2950 self.all_node_info.values()),
2951 constants.NV_HYPERVISOR: hypervisors,
2952 constants.NV_HVPARAMS:
2953 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2954 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2955 for node in node_data_list
2956 if not node.offline],
2957 constants.NV_INSTANCELIST: hypervisors,
2958 constants.NV_VERSION: None,
2959 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2960 constants.NV_NODESETUP: None,
2961 constants.NV_TIME: None,
2962 constants.NV_MASTERIP: (self.cfg.GetMasterNodeName(), master_ip),
2963 constants.NV_OSLIST: None,
2964 constants.NV_NONVMNODES: self.cfg.GetNonVmCapableNodeNameList(),
2965 constants.NV_USERSCRIPTS: user_scripts,
2966 }
2967
2968 if vg_name is not None:
2969 node_verify_param[constants.NV_VGLIST] = None
2970 node_verify_param[constants.NV_LVLIST] = vg_name
2971 node_verify_param[constants.NV_PVLIST] = [vg_name]
2972
2973 if cluster.IsDiskTemplateEnabled(constants.DT_DRBD8):
2974 if drbd_helper:
2975 node_verify_param[constants.NV_DRBDVERSION] = None
2976 node_verify_param[constants.NV_DRBDLIST] = None
2977 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2978
2979 if cluster.IsFileStorageEnabled() or \
2980 cluster.IsSharedFileStorageEnabled():
2981
2982 node_verify_param[constants.NV_ACCEPTED_STORAGE_PATHS] = \
2983 self.cfg.GetMasterNodeName()
2984 if cluster.IsFileStorageEnabled():
2985 node_verify_param[constants.NV_FILE_STORAGE_PATH] = \
2986 cluster.file_storage_dir
2987 if cluster.IsSharedFileStorageEnabled():
2988 node_verify_param[constants.NV_SHARED_FILE_STORAGE_PATH] = \
2989 cluster.shared_file_storage_dir
2990
2991
2992
2993 bridges = set()
2994 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2995 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2996 bridges.add(default_nicpp[constants.NIC_LINK])
2997 for inst_uuid in self.my_inst_info.values():
2998 for nic in inst_uuid.nics:
2999 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3000 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3001 bridges.add(full_nic[constants.NIC_LINK])
3002
3003 if bridges:
3004 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3005
3006
3007 node_image = dict((node.uuid, self.NodeImage(offline=node.offline,
3008 uuid=node.uuid,
3009 vm_capable=node.vm_capable))
3010 for node in node_data_list)
3011
3012
3013 oob_paths = []
3014 for node in self.all_node_info.values():
3015 path = SupportsOob(self.cfg, node)
3016 if path and path not in oob_paths:
3017 oob_paths.append(path)
3018
3019 if oob_paths:
3020 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3021
3022 for inst_uuid in self.my_inst_uuids:
3023 instance = self.my_inst_info[inst_uuid]
3024 if instance.admin_state == constants.ADMINST_OFFLINE:
3025 i_offline += 1
3026
3027 for nuuid in instance.all_nodes:
3028 if nuuid not in node_image:
3029 gnode = self.NodeImage(uuid=nuuid)
3030 gnode.ghost = (nuuid not in self.all_node_info)
3031 node_image[nuuid] = gnode
3032
3033 instance.MapLVsByNode(node_vol_should)
3034
3035 pnode = instance.primary_node
3036 node_image[pnode].pinst.append(instance.uuid)
3037
3038 for snode in instance.secondary_nodes:
3039 nimg = node_image[snode]
3040 nimg.sinst.append(instance.uuid)
3041 if pnode not in nimg.sbp:
3042 nimg.sbp[pnode] = []
3043 nimg.sbp[pnode].append(instance.uuid)
3044
3045 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg,
3046 self.my_node_info.keys())
3047
3048
3049 self._exclusive_storage = compat.any(es_flags.values())
3050 if self._exclusive_storage:
3051 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3052
3053
3054
3055
3056
3057
3058
3059
3060 nvinfo_starttime = time.time()
3061 all_nvinfo = self.rpc.call_node_verify(self.my_node_uuids,
3062 node_verify_param,
3063 self.cfg.GetClusterName(),
3064 self.cfg.GetClusterInfo().hvparams)
3065 nvinfo_endtime = time.time()
3066
3067 if self.extra_lv_nodes and vg_name is not None:
3068 extra_lv_nvinfo = \
3069 self.rpc.call_node_verify(self.extra_lv_nodes,
3070 {constants.NV_LVLIST: vg_name},
3071 self.cfg.GetClusterName(),
3072 self.cfg.GetClusterInfo().hvparams)
3073 else:
3074 extra_lv_nvinfo = {}
3075
3076 all_drbd_map = self.cfg.ComputeDRBDMap()
3077
3078 feedback_fn("* Gathering disk information (%s nodes)" %
3079 len(self.my_node_uuids))
3080 instdisk = self._CollectDiskInfo(self.my_node_info.keys(), node_image,
3081 self.my_inst_info)
3082
3083 feedback_fn("* Verifying configuration file consistency")
3084
3085
3086
3087 absent_node_uuids = set(self.all_node_info).difference(self.my_node_info)
3088 if absent_node_uuids:
3089 vf_nvinfo = all_nvinfo.copy()
3090 vf_node_info = list(self.my_node_info.values())
3091 additional_node_uuids = []
3092 if master_node_uuid not in self.my_node_info:
3093 additional_node_uuids.append(master_node_uuid)
3094 vf_node_info.append(self.all_node_info[master_node_uuid])
3095
3096
3097 for node_uuid in absent_node_uuids:
3098 nodeinfo = self.all_node_info[node_uuid]
3099 if (nodeinfo.vm_capable and not nodeinfo.offline and
3100 node_uuid != master_node_uuid):
3101 additional_node_uuids.append(node_uuid)
3102 vf_node_info.append(self.all_node_info[node_uuid])
3103 break
3104 key = constants.NV_FILELIST
3105 vf_nvinfo.update(self.rpc.call_node_verify(
3106 additional_node_uuids, {key: node_verify_param[key]},
3107 self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams))
3108 else:
3109 vf_nvinfo = all_nvinfo
3110 vf_node_info = self.my_node_info.values()
3111
3112 self._VerifyFiles(vf_node_info, master_node_uuid, vf_nvinfo, filemap)
3113
3114 feedback_fn("* Verifying node status")
3115
3116 refos_img = None
3117
3118 for node_i in node_data_list:
3119 nimg = node_image[node_i.uuid]
3120
3121 if node_i.offline:
3122 if verbose:
3123 feedback_fn("* Skipping offline node %s" % (node_i.name,))
3124 n_offline += 1
3125 continue
3126
3127 if node_i.uuid == master_node_uuid:
3128 ntype = "master"
3129 elif node_i.master_candidate:
3130 ntype = "master candidate"
3131 elif node_i.drained:
3132 ntype = "drained"
3133 n_drained += 1
3134 else:
3135 ntype = "regular"
3136 if verbose:
3137 feedback_fn("* Verifying node %s (%s)" % (node_i.name, ntype))
3138
3139 msg = all_nvinfo[node_i.uuid].fail_msg
3140 self._ErrorIf(msg, constants.CV_ENODERPC, node_i.name,
3141 "while contacting node: %s", msg)
3142 if msg:
3143 nimg.rpc_fail = True
3144 continue
3145
3146 nresult = all_nvinfo[node_i.uuid].payload
3147
3148 nimg.call_ok = self._VerifyNode(node_i, nresult)
3149 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3150 self._VerifyNodeNetwork(node_i, nresult)
3151 self._VerifyNodeUserScripts(node_i, nresult)
3152 self._VerifyOob(node_i, nresult)
3153 self._VerifyAcceptedFileStoragePaths(node_i, nresult,
3154 node_i.uuid == master_node_uuid)
3155 self._VerifyFileStoragePaths(node_i, nresult)
3156 self._VerifySharedFileStoragePaths(node_i, nresult)
3157
3158 if nimg.vm_capable:
3159 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3160 if constants.DT_DRBD8 in cluster.enabled_disk_templates:
3161 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3162 all_drbd_map)
3163
3164 if (constants.DT_PLAIN in cluster.enabled_disk_templates) or \
3165 (constants.DT_DRBD8 in cluster.enabled_disk_templates):
3166 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3167 self._UpdateNodeInstances(node_i, nresult, nimg)
3168 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3169 self._UpdateNodeOS(node_i, nresult, nimg)
3170
3171 if not nimg.os_fail:
3172 if refos_img is None:
3173 refos_img = nimg
3174 self._VerifyNodeOS(node_i, nimg, refos_img)
3175 self._VerifyNodeBridges(node_i, nresult, bridges)
3176
3177
3178
3179
3180 non_primary_inst_uuids = set(nimg.instances).difference(nimg.pinst)
3181
3182 for inst_uuid in non_primary_inst_uuids:
3183 test = inst_uuid in self.all_inst_info
3184 self._ErrorIf(test, constants.CV_EINSTANCEWRONGNODE,
3185 self.cfg.GetInstanceName(inst_uuid),
3186 "instance should not run on node %s", node_i.name)
3187 self._ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3188 "node is running unknown instance %s", inst_uuid)
3189
3190 self._VerifyGroupDRBDVersion(all_nvinfo)
3191 self._VerifyGroupLVM(node_image, vg_name)
3192
3193 for node_uuid, result in extra_lv_nvinfo.items():
3194 self._UpdateNodeVolumes(self.all_node_info[node_uuid], result.payload,
3195 node_image[node_uuid], vg_name)
3196
3197 feedback_fn("* Verifying instance status")
3198 for inst_uuid in self.my_inst_uuids:
3199 instance = self.my_inst_info[inst_uuid]
3200 if verbose:
3201 feedback_fn("* Verifying instance %s" % instance.name)
3202 self._VerifyInstance(instance, node_image, instdisk[inst_uuid])
3203
3204
3205
3206 if instance.disk_template not in constants.DTS_MIRRORED:
3207 i_non_redundant.append(instance)
3208
3209 if not cluster.FillBE(instance)[constants.BE_AUTO_BALANCE]:
3210 i_non_a_balanced.append(instance)
3211
3212 feedback_fn("* Verifying orphan volumes")
3213 reserved = utils.FieldSet(*cluster.reserved_lvs)
3214
3215
3216
3217
3218 for instance in self.all_inst_info.values():
3219 for secondary in instance.secondary_nodes:
3220 if (secondary in self.my_node_info
3221 and instance.name not in self.my_inst_info):
3222 instance.MapLVsByNode(node_vol_should)
3223 break
3224
3225 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3226
3227 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3228 feedback_fn("* Verifying N+1 Memory redundancy")
3229 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3230
3231 feedback_fn("* Other Notes")
3232 if i_non_redundant:
3233 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3234 % len(i_non_redundant))
3235
3236 if i_non_a_balanced:
3237 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3238 % len(i_non_a_balanced))
3239
3240 if i_offline:
3241 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3242
3243 if n_offline:
3244 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3245
3246 if n_drained:
3247 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3248
3249 return not self.bad
3250
3251 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3252 """Analyze the post-hooks' result
3253
3254 This method analyses the hook result, handles it, and sends some
3255 nicely-formatted feedback back to the user.
3256
3257 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3258 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3259 @param hooks_results: the results of the multi-node hooks rpc call
3260 @param feedback_fn: function used send feedback back to the caller
3261 @param lu_result: previous Exec result
3262 @return: the new Exec result, based on the previous result
3263 and hook results
3264
3265 """
3266
3267
3268 if not self.my_node_uuids:
3269
3270 pass
3271 elif phase == constants.HOOKS_PHASE_POST:
3272
3273 feedback_fn("* Hooks Results")
3274 assert hooks_results, "invalid result from hooks"
3275
3276 for node_name in hooks_results:
3277 res = hooks_results[node_name]
3278 msg = res.fail_msg
3279 test = msg and not res.offline
3280 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3281 "Communication failure in hooks execution: %s", msg)
3282 if test:
3283 lu_result = False
3284 continue
3285 if res.offline:
3286
3287 continue
3288 for script, hkr, output in res.payload:
3289 test = hkr == constants.HKR_FAIL
3290 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3291 "Script %s failed, output:", script)
3292 if test:
3293 output = self._HOOKS_INDENT_RE.sub(" ", output)
3294 feedback_fn("%s" % output)
3295 lu_result = False
3296
3297 return lu_result
3298
3301 """Verifies the cluster disks status.
3302
3303 """
3304 REQ_BGL = False
3305
3311
3312 - def Exec(self, feedback_fn):
3318