1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Logical units dealing with the cluster."""
23
24 import OpenSSL
25
26 import copy
27 import itertools
28 import logging
29 import operator
30 import os
31 import re
32 import time
33
34 from ganeti import compat
35 from ganeti import constants
36 from ganeti import errors
37 from ganeti import hypervisor
38 from ganeti import locking
39 from ganeti import masterd
40 from ganeti import netutils
41 from ganeti import objects
42 from ganeti import opcodes
43 from ganeti import pathutils
44 from ganeti import query
45 from ganeti import rpc
46 from ganeti import runtime
47 from ganeti import ssh
48 from ganeti import uidpool
49 from ganeti import utils
50 from ganeti import vcluster
51
52 from ganeti.cmdlib.base import NoHooksLU, QueryBase, LogicalUnit, \
53 ResultWithJobs
54 from ganeti.cmdlib.common import ShareAll, RunPostHook, \
55 ComputeAncillaryFiles, RedistributeAncillaryFiles, UploadHelper, \
56 GetWantedInstances, MergeAndVerifyHvState, MergeAndVerifyDiskState, \
57 GetUpdatedIPolicy, ComputeNewInstanceViolations, GetUpdatedParams, \
58 CheckOSParams, CheckHVParams, AdjustCandidatePool, CheckNodePVs, \
59 ComputeIPolicyInstanceViolation, AnnotateDiskParams, SupportsOob, \
60 CheckIpolicyVsDiskTemplates
61
62 import ganeti.masterd.instance
66 """Activate the master IP on the master node.
67
68 """
69 - def Exec(self, feedback_fn):
78
81 """Deactivate the master IP on the master node.
82
83 """
84 - def Exec(self, feedback_fn):
93
96 """Return configuration values.
97
98 """
99 REQ_BGL = False
100
102 self.cq = ClusterQuery(None, self.op.output_fields, False)
103
106
109
110 - def Exec(self, feedback_fn):
111 result = self.cq.OldStyleQuery(self)
112
113 assert len(result) == 1
114
115 return result[0]
116
119 """Logical unit for destroying the cluster.
120
121 """
122 HPATH = "cluster-destroy"
123 HTYPE = constants.HTYPE_CLUSTER
124
126 """Build hooks env.
127
128 """
129 return {
130 "OP_TARGET": self.cfg.GetClusterName(),
131 }
132
134 """Build hooks nodes.
135
136 """
137 return ([], [])
138
140 """Check prerequisites.
141
142 This checks whether the cluster is empty.
143
144 Any errors are signaled by raising errors.OpPrereqError.
145
146 """
147 master = self.cfg.GetMasterNode()
148
149 nodelist = self.cfg.GetNodeList()
150 if len(nodelist) != 1 or nodelist[0] != master:
151 raise errors.OpPrereqError("There are still %d node(s) in"
152 " this cluster." % (len(nodelist) - 1),
153 errors.ECODE_INVAL)
154 instancelist = self.cfg.GetInstanceList()
155 if instancelist:
156 raise errors.OpPrereqError("There are still %d instance(s) in"
157 " this cluster." % len(instancelist),
158 errors.ECODE_INVAL)
159
160 - def Exec(self, feedback_fn):
174
175
176 -class LUClusterPostInit(LogicalUnit):
177 """Logical unit for running hooks after cluster initialization.
178
179 """
180 HPATH = "cluster-init"
181 HTYPE = constants.HTYPE_CLUSTER
182
183 - def BuildHooksEnv(self):
184 """Build hooks env.
185
186 """
187 return {
188 "OP_TARGET": self.cfg.GetClusterName(),
189 }
190
191 - def BuildHooksNodes(self):
192 """Build hooks nodes.
193
194 """
195 return ([], [self.cfg.GetMasterNode()])
196
197 - def Exec(self, feedback_fn):
198 """Nothing to do.
199
200 """
201 return True
202
258
261 """Query cluster configuration.
262
263 """
264 REQ_BGL = False
265
267 self.needed_locks = {}
268
269 - def Exec(self, feedback_fn):
270 """Return cluster config.
271
272 """
273 cluster = self.cfg.GetClusterInfo()
274 os_hvp = {}
275
276
277 for os_name, hv_dict in cluster.os_hvp.items():
278 os_hvp[os_name] = {}
279 for hv_name, hv_params in hv_dict.items():
280 if hv_name in cluster.enabled_hypervisors:
281 os_hvp[os_name][hv_name] = hv_params
282
283
284 primary_ip_version = constants.IP4_VERSION
285 if cluster.primary_ip_family == netutils.IP6Address.family:
286 primary_ip_version = constants.IP6_VERSION
287
288 result = {
289 "software_version": constants.RELEASE_VERSION,
290 "protocol_version": constants.PROTOCOL_VERSION,
291 "config_version": constants.CONFIG_VERSION,
292 "os_api_version": max(constants.OS_API_VERSIONS),
293 "export_version": constants.EXPORT_VERSION,
294 "vcs_version": constants.VCS_VERSION,
295 "architecture": runtime.GetArchInfo(),
296 "name": cluster.cluster_name,
297 "master": self.cfg.GetMasterNodeName(),
298 "default_hypervisor": cluster.primary_hypervisor,
299 "enabled_hypervisors": cluster.enabled_hypervisors,
300 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
301 for hypervisor_name in cluster.enabled_hypervisors]),
302 "os_hvp": os_hvp,
303 "beparams": cluster.beparams,
304 "osparams": cluster.osparams,
305 "ipolicy": cluster.ipolicy,
306 "nicparams": cluster.nicparams,
307 "ndparams": cluster.ndparams,
308 "diskparams": cluster.diskparams,
309 "candidate_pool_size": cluster.candidate_pool_size,
310 "master_netdev": cluster.master_netdev,
311 "master_netmask": cluster.master_netmask,
312 "use_external_mip_script": cluster.use_external_mip_script,
313 "volume_group_name": cluster.volume_group_name,
314 "drbd_usermode_helper": cluster.drbd_usermode_helper,
315 "file_storage_dir": cluster.file_storage_dir,
316 "shared_file_storage_dir": cluster.shared_file_storage_dir,
317 "maintain_node_health": cluster.maintain_node_health,
318 "ctime": cluster.ctime,
319 "mtime": cluster.mtime,
320 "uuid": cluster.uuid,
321 "tags": list(cluster.GetTags()),
322 "uid_pool": cluster.uid_pool,
323 "default_iallocator": cluster.default_iallocator,
324 "reserved_lvs": cluster.reserved_lvs,
325 "primary_ip_version": primary_ip_version,
326 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
327 "hidden_os": cluster.hidden_os,
328 "blacklisted_os": cluster.blacklisted_os,
329 "enabled_disk_templates": cluster.enabled_disk_templates,
330 }
331
332 return result
333
336 """Force the redistribution of cluster configuration.
337
338 This is a very simple LU.
339
340 """
341 REQ_BGL = False
342
349
350 - def Exec(self, feedback_fn):
356
359 """Rename the cluster.
360
361 """
362 HPATH = "cluster-rename"
363 HTYPE = constants.HTYPE_CLUSTER
364
366 """Build hooks env.
367
368 """
369 return {
370 "OP_TARGET": self.cfg.GetClusterName(),
371 "NEW_NAME": self.op.name,
372 }
373
379
402
403 - def Exec(self, feedback_fn):
404 """Rename the cluster.
405
406 """
407 clustername = self.op.name
408 new_ip = self.ip
409
410
411 master_params = self.cfg.GetMasterNetworkParameters()
412 ems = self.cfg.GetUseExternalMipScript()
413 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
414 master_params, ems)
415 result.Raise("Could not disable the master role")
416
417 try:
418 cluster = self.cfg.GetClusterInfo()
419 cluster.cluster_name = clustername
420 cluster.master_ip = new_ip
421 self.cfg.Update(cluster, feedback_fn)
422
423
424 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
425 node_list = self.cfg.GetOnlineNodeList()
426 try:
427 node_list.remove(master_params.uuid)
428 except ValueError:
429 pass
430 UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
431 finally:
432 master_params.ip = new_ip
433 result = self.rpc.call_node_activate_master_ip(master_params.uuid,
434 master_params, ems)
435 result.Warn("Could not re-enable the master role on the master,"
436 " please restart manually", self.LogWarning)
437
438 return clustername
439
442 """Verifies the cluster disks sizes.
443
444 """
445 REQ_BGL = False
446
448 if self.op.instances:
449 (_, self.wanted_names) = GetWantedInstances(self, self.op.instances)
450
451
452 self.needed_locks = {
453 locking.LEVEL_NODE_RES: [],
454 locking.LEVEL_INSTANCE: self.wanted_names,
455 }
456 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
457 else:
458 self.wanted_names = None
459 self.needed_locks = {
460 locking.LEVEL_NODE_RES: locking.ALL_SET,
461 locking.LEVEL_INSTANCE: locking.ALL_SET,
462
463
464 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
465 }
466
467 self.share_locks = {
468 locking.LEVEL_NODE_RES: 1,
469 locking.LEVEL_INSTANCE: 0,
470 locking.LEVEL_NODE_ALLOC: 1,
471 }
472
474 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
475 self._LockInstancesNodes(primary_only=True, level=level)
476
478 """Check prerequisites.
479
480 This only checks the optional instance list against the existing names.
481
482 """
483 if self.wanted_names is None:
484 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
485
486 self.wanted_instances = \
487 map(compat.snd, self.cfg.GetMultiInstanceInfoByName(self.wanted_names))
488
490 """Ensure children of the disk have the needed disk size.
491
492 This is valid mainly for DRBD8 and fixes an issue where the
493 children have smaller disk size.
494
495 @param disk: an L{ganeti.objects.Disk} object
496
497 """
498 if disk.dev_type == constants.DT_DRBD8:
499 assert disk.children, "Empty children for DRBD8?"
500 fchild = disk.children[0]
501 mismatch = fchild.size < disk.size
502 if mismatch:
503 self.LogInfo("Child disk has size %d, parent %d, fixing",
504 fchild.size, disk.size)
505 fchild.size = disk.size
506
507
508 return self._EnsureChildSizes(fchild) or mismatch
509 else:
510 return False
511
512 - def Exec(self, feedback_fn):
513 """Verify the size of cluster disks.
514
515 """
516
517
518 per_node_disks = {}
519 for instance in self.wanted_instances:
520 pnode = instance.primary_node
521 if pnode not in per_node_disks:
522 per_node_disks[pnode] = []
523 for idx, disk in enumerate(instance.disks):
524 per_node_disks[pnode].append((instance, idx, disk))
525
526 assert not (frozenset(per_node_disks.keys()) -
527 self.owned_locks(locking.LEVEL_NODE_RES)), \
528 "Not owning correct locks"
529 assert not self.owned_locks(locking.LEVEL_NODE)
530
531 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg,
532 per_node_disks.keys())
533
534 changed = []
535 for node_uuid, dskl in per_node_disks.items():
536 newl = [v[2].Copy() for v in dskl]
537 for dsk in newl:
538 self.cfg.SetDiskID(dsk, node_uuid)
539 node_name = self.cfg.GetNodeName(node_uuid)
540 result = self.rpc.call_blockdev_getdimensions(node_uuid, newl)
541 if result.fail_msg:
542 self.LogWarning("Failure in blockdev_getdimensions call to node"
543 " %s, ignoring", node_name)
544 continue
545 if len(result.payload) != len(dskl):
546 logging.warning("Invalid result from node %s: len(dksl)=%d,"
547 " result.payload=%s", node_name, len(dskl),
548 result.payload)
549 self.LogWarning("Invalid result from node %s, ignoring node results",
550 node_name)
551 continue
552 for ((instance, idx, disk), dimensions) in zip(dskl, result.payload):
553 if dimensions is None:
554 self.LogWarning("Disk %d of instance %s did not return size"
555 " information, ignoring", idx, instance.name)
556 continue
557 if not isinstance(dimensions, (tuple, list)):
558 self.LogWarning("Disk %d of instance %s did not return valid"
559 " dimension information, ignoring", idx,
560 instance.name)
561 continue
562 (size, spindles) = dimensions
563 if not isinstance(size, (int, long)):
564 self.LogWarning("Disk %d of instance %s did not return valid"
565 " size information, ignoring", idx, instance.name)
566 continue
567 size = size >> 20
568 if size != disk.size:
569 self.LogInfo("Disk %d of instance %s has mismatched size,"
570 " correcting: recorded %d, actual %d", idx,
571 instance.name, disk.size, size)
572 disk.size = size
573 self.cfg.Update(instance, feedback_fn)
574 changed.append((instance.name, idx, "size", size))
575 if es_flags[node_uuid]:
576 if spindles is None:
577 self.LogWarning("Disk %d of instance %s did not return valid"
578 " spindles information, ignoring", idx,
579 instance.name)
580 elif disk.spindles is None or disk.spindles != spindles:
581 self.LogInfo("Disk %d of instance %s has mismatched spindles,"
582 " correcting: recorded %s, actual %s",
583 idx, instance.name, disk.spindles, spindles)
584 disk.spindles = spindles
585 self.cfg.Update(instance, feedback_fn)
586 changed.append((instance.name, idx, "spindles", disk.spindles))
587 if self._EnsureChildSizes(disk):
588 self.cfg.Update(instance, feedback_fn)
589 changed.append((instance.name, idx, "size", disk.size))
590 return changed
591
612
617 """Checks whether the given file-based storage directory is acceptable.
618
619 Note: This function is public, because it is also used in bootstrap.py.
620
621 @type logging_warn_fn: function
622 @param logging_warn_fn: function which accepts a string and logs it
623 @type file_storage_dir: string
624 @param file_storage_dir: the directory to be used for file-based instances
625 @type enabled_disk_templates: list of string
626 @param enabled_disk_templates: the list of enabled disk templates
627 @type file_disk_template: string
628 @param file_disk_template: the file-based disk template for which the
629 path should be checked
630
631 """
632 assert (file_disk_template in
633 utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE))
634 file_storage_enabled = file_disk_template in enabled_disk_templates
635 if file_storage_dir is not None:
636 if file_storage_dir == "":
637 if file_storage_enabled:
638 raise errors.OpPrereqError(
639 "Unsetting the '%s' storage directory while having '%s' storage"
640 " enabled is not permitted." %
641 (file_disk_template, file_disk_template))
642 else:
643 if not file_storage_enabled:
644 logging_warn_fn(
645 "Specified a %s storage directory, although %s storage is not"
646 " enabled." % (file_disk_template, file_disk_template))
647 else:
648 raise errors.ProgrammerError("Received %s storage dir with value"
649 " 'None'." % file_disk_template)
650
662
674
677 """Change the parameters of the cluster.
678
679 """
680 HPATH = "cluster-modify"
681 HTYPE = constants.HTYPE_CLUSTER
682 REQ_BGL = False
683
708
721
723 """Build hooks env.
724
725 """
726 return {
727 "OP_TARGET": self.cfg.GetClusterName(),
728 "NEW_VG_NAME": self.op.vg_name,
729 }
730
732 """Build hooks nodes.
733
734 """
735 mn = self.cfg.GetMasterNode()
736 return ([mn], [mn])
737
738 - def _CheckVgName(self, node_uuids, enabled_disk_templates,
739 new_enabled_disk_templates):
740 """Check the consistency of the vg name on all nodes and in case it gets
741 unset whether there are instances still using it.
742
743 """
744 lvm_is_enabled = utils.IsLvmEnabled(enabled_disk_templates)
745 lvm_gets_enabled = utils.LvmGetsEnabled(enabled_disk_templates,
746 new_enabled_disk_templates)
747 current_vg_name = self.cfg.GetVGName()
748
749 if self.op.vg_name == '':
750 if lvm_is_enabled:
751 raise errors.OpPrereqError("Cannot unset volume group if lvm-based"
752 " disk templates are or get enabled.")
753
754 if self.op.vg_name is None:
755 if current_vg_name is None and lvm_is_enabled:
756 raise errors.OpPrereqError("Please specify a volume group when"
757 " enabling lvm-based disk-templates.")
758
759 if self.op.vg_name is not None and not self.op.vg_name:
760 if self.cfg.HasAnyDiskOfType(constants.DT_PLAIN):
761 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
762 " instances exist", errors.ECODE_INVAL)
763
764 if (self.op.vg_name is not None and lvm_is_enabled) or \
765 (self.cfg.GetVGName() is not None and lvm_gets_enabled):
766 self._CheckVgNameOnNodes(node_uuids)
767
788
789 @staticmethod
792 """Determines the enabled disk templates and the subset of disk templates
793 that are newly enabled by this operation.
794
795 """
796 enabled_disk_templates = None
797 new_enabled_disk_templates = []
798 if op_enabled_disk_templates:
799 enabled_disk_templates = op_enabled_disk_templates
800 new_enabled_disk_templates = \
801 list(set(enabled_disk_templates)
802 - set(old_enabled_disk_templates))
803 else:
804 enabled_disk_templates = old_enabled_disk_templates
805 return (enabled_disk_templates, new_enabled_disk_templates)
806
808 """Determines the enabled disk templates and the subset of disk templates
809 that are newly enabled by this operation.
810
811 """
812 return self._GetEnabledDiskTemplatesInner(self.op.enabled_disk_templates,
813 cluster.enabled_disk_templates)
814
816 """Checks the ipolicy.
817
818 @type cluster: C{objects.Cluster}
819 @param cluster: the cluster's configuration
820 @type enabled_disk_templates: list of string
821 @param enabled_disk_templates: list of (possibly newly) enabled disk
822 templates
823
824 """
825
826 if self.op.ipolicy:
827 self.new_ipolicy = GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
828 group_policy=False)
829
830 CheckIpolicyVsDiskTemplates(self.new_ipolicy,
831 enabled_disk_templates)
832
833 all_instances = self.cfg.GetAllInstancesInfo().values()
834 violations = set()
835 for group in self.cfg.GetAllNodeGroupsInfo().values():
836 instances = frozenset([inst for inst in all_instances
837 if compat.any(nuuid in group.members
838 for nuuid in inst.all_nodes)])
839 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
840 ipol = masterd.instance.CalculateGroupIPolicy(cluster, group)
841 new = ComputeNewInstanceViolations(ipol, new_ipolicy, instances,
842 self.cfg)
843 if new:
844 violations.update(new)
845
846 if violations:
847 self.LogWarning("After the ipolicy change the following instances"
848 " violate them: %s",
849 utils.CommaJoin(utils.NiceSort(violations)))
850 else:
851 CheckIpolicyVsDiskTemplates(cluster.ipolicy,
852 enabled_disk_templates)
853
855 """Check prerequisites.
856
857 This checks whether the given params don't conflict and
858 if the given volume group is valid.
859
860 """
861 if self.op.drbd_helper is not None and not self.op.drbd_helper:
862 if self.cfg.HasAnyDiskOfType(constants.DT_DRBD8):
863 raise errors.OpPrereqError("Cannot disable drbd helper while"
864 " drbd-based instances exist",
865 errors.ECODE_INVAL)
866
867 node_uuids = self.owned_locks(locking.LEVEL_NODE)
868 self.cluster = cluster = self.cfg.GetClusterInfo()
869
870 vm_capable_node_uuids = [node.uuid
871 for node in self.cfg.GetAllNodesInfo().values()
872 if node.uuid in node_uuids and node.vm_capable]
873
874 (enabled_disk_templates, new_enabled_disk_templates) = \
875 self._GetEnabledDiskTemplates(cluster)
876
877 self._CheckVgName(vm_capable_node_uuids, enabled_disk_templates,
878 new_enabled_disk_templates)
879
880 if self.op.file_storage_dir is not None:
881 CheckFileStoragePathVsEnabledDiskTemplates(
882 self.LogWarning, self.op.file_storage_dir, enabled_disk_templates)
883
884 if self.op.shared_file_storage_dir is not None:
885 CheckSharedFileStoragePathVsEnabledDiskTemplates(
886 self.LogWarning, self.op.shared_file_storage_dir,
887 enabled_disk_templates)
888
889 if self.op.drbd_helper:
890
891 helpers = self.rpc.call_drbd_helper(node_uuids)
892 for (_, ninfo) in self.cfg.GetMultiNodeInfo(node_uuids):
893 if ninfo.offline:
894 self.LogInfo("Not checking drbd helper on offline node %s",
895 ninfo.name)
896 continue
897 msg = helpers[ninfo.uuid].fail_msg
898 if msg:
899 raise errors.OpPrereqError("Error checking drbd helper on node"
900 " '%s': %s" % (ninfo.name, msg),
901 errors.ECODE_ENVIRON)
902 node_helper = helpers[ninfo.uuid].payload
903 if node_helper != self.op.drbd_helper:
904 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
905 (ninfo.name, node_helper),
906 errors.ECODE_ENVIRON)
907
908
909 if self.op.beparams:
910 objects.UpgradeBeParams(self.op.beparams)
911 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
912 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
913
914 if self.op.ndparams:
915 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
916 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
917
918
919
920 if self.new_ndparams["oob_program"] == "":
921 self.new_ndparams["oob_program"] = \
922 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
923
924 if self.op.hv_state:
925 new_hv_state = MergeAndVerifyHvState(self.op.hv_state,
926 self.cluster.hv_state_static)
927 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
928 for hv, values in new_hv_state.items())
929
930 if self.op.disk_state:
931 new_disk_state = MergeAndVerifyDiskState(self.op.disk_state,
932 self.cluster.disk_state_static)
933 self.new_disk_state = \
934 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
935 for name, values in svalues.items()))
936 for storage, svalues in new_disk_state.items())
937
938 self._CheckIpolicy(cluster, enabled_disk_templates)
939
940 if self.op.nicparams:
941 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
942 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
943 objects.NIC.CheckParameterSyntax(self.new_nicparams)
944 nic_errors = []
945
946
947 for instance in self.cfg.GetAllInstancesInfo().values():
948 for nic_idx, nic in enumerate(instance.nics):
949 params_copy = copy.deepcopy(nic.nicparams)
950 params_filled = objects.FillDict(self.new_nicparams, params_copy)
951
952
953 try:
954 objects.NIC.CheckParameterSyntax(params_filled)
955 except errors.ConfigurationError, err:
956 nic_errors.append("Instance %s, nic/%d: %s" %
957 (instance.name, nic_idx, err))
958
959
960 target_mode = params_filled[constants.NIC_MODE]
961 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
962 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
963 " address" % (instance.name, nic_idx))
964 if nic_errors:
965 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
966 "\n".join(nic_errors), errors.ECODE_INVAL)
967
968
969 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
970 if self.op.hvparams:
971 for hv_name, hv_dict in self.op.hvparams.items():
972 if hv_name not in self.new_hvparams:
973 self.new_hvparams[hv_name] = hv_dict
974 else:
975 self.new_hvparams[hv_name].update(hv_dict)
976
977
978 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
979 if self.op.diskparams:
980 for dt_name, dt_params in self.op.diskparams.items():
981 if dt_name not in self.new_diskparams:
982 self.new_diskparams[dt_name] = dt_params
983 else:
984 self.new_diskparams[dt_name].update(dt_params)
985
986
987 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
988 if self.op.os_hvp:
989 for os_name, hvs in self.op.os_hvp.items():
990 if os_name not in self.new_os_hvp:
991 self.new_os_hvp[os_name] = hvs
992 else:
993 for hv_name, hv_dict in hvs.items():
994 if hv_dict is None:
995
996 self.new_os_hvp[os_name].pop(hv_name, None)
997 elif hv_name not in self.new_os_hvp[os_name]:
998 self.new_os_hvp[os_name][hv_name] = hv_dict
999 else:
1000 self.new_os_hvp[os_name][hv_name].update(hv_dict)
1001
1002
1003 self.new_osp = objects.FillDict(cluster.osparams, {})
1004 if self.op.osparams:
1005 for os_name, osp in self.op.osparams.items():
1006 if os_name not in self.new_osp:
1007 self.new_osp[os_name] = {}
1008
1009 self.new_osp[os_name] = GetUpdatedParams(self.new_osp[os_name], osp,
1010 use_none=True)
1011
1012 if not self.new_osp[os_name]:
1013
1014 del self.new_osp[os_name]
1015 else:
1016
1017 CheckOSParams(self, False, [self.cfg.GetMasterNode()],
1018 os_name, self.new_osp[os_name])
1019
1020
1021 if self.op.enabled_hypervisors is not None:
1022 self.hv_list = self.op.enabled_hypervisors
1023 for hv in self.hv_list:
1024
1025
1026
1027
1028
1029 if hv not in new_hvp:
1030 new_hvp[hv] = {}
1031 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
1032 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
1033 else:
1034 self.hv_list = cluster.enabled_hypervisors
1035
1036 if self.op.hvparams or self.op.enabled_hypervisors is not None:
1037
1038 for hv_name, hv_params in self.new_hvparams.items():
1039 if ((self.op.hvparams and hv_name in self.op.hvparams) or
1040 (self.op.enabled_hypervisors and
1041 hv_name in self.op.enabled_hypervisors)):
1042
1043 hv_class = hypervisor.GetHypervisorClass(hv_name)
1044 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1045 hv_class.CheckParameterSyntax(hv_params)
1046 CheckHVParams(self, node_uuids, hv_name, hv_params)
1047
1048 self._CheckDiskTemplateConsistency()
1049
1050 if self.op.os_hvp:
1051
1052
1053 for os_name, os_hvp in self.new_os_hvp.items():
1054 for hv_name, hv_params in os_hvp.items():
1055 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1056
1057 cluster_defaults = self.new_hvparams.get(hv_name, {})
1058 new_osp = objects.FillDict(cluster_defaults, hv_params)
1059 hv_class = hypervisor.GetHypervisorClass(hv_name)
1060 hv_class.CheckParameterSyntax(new_osp)
1061 CheckHVParams(self, node_uuids, hv_name, new_osp)
1062
1063 if self.op.default_iallocator:
1064 alloc_script = utils.FindFile(self.op.default_iallocator,
1065 constants.IALLOCATOR_SEARCH_PATH,
1066 os.path.isfile)
1067 if alloc_script is None:
1068 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
1069 " specified" % self.op.default_iallocator,
1070 errors.ECODE_INVAL)
1071
1073 """Check whether the disk templates that are going to be disabled
1074 are still in use by some instances.
1075
1076 """
1077 if self.op.enabled_disk_templates:
1078 cluster = self.cfg.GetClusterInfo()
1079 instances = self.cfg.GetAllInstancesInfo()
1080
1081 disk_templates_to_remove = set(cluster.enabled_disk_templates) \
1082 - set(self.op.enabled_disk_templates)
1083 for instance in instances.itervalues():
1084 if instance.disk_template in disk_templates_to_remove:
1085 raise errors.OpPrereqError("Cannot disable disk template '%s',"
1086 " because instance '%s' is using it." %
1087 (instance.disk_template, instance.name))
1088
1090 """Determines and sets the new volume group name.
1091
1092 """
1093 if self.op.vg_name is not None:
1094 new_volume = self.op.vg_name
1095 if not new_volume:
1096 new_volume = None
1097 if new_volume != self.cfg.GetVGName():
1098 self.cfg.SetVGName(new_volume)
1099 else:
1100 feedback_fn("Cluster LVM configuration already in desired"
1101 " state, not changing")
1102
1104 """Set the file storage directory.
1105
1106 """
1107 if self.op.file_storage_dir is not None:
1108 if self.cluster.file_storage_dir == self.op.file_storage_dir:
1109 feedback_fn("Global file storage dir already set to value '%s'"
1110 % self.cluster.file_storage_dir)
1111 else:
1112 self.cluster.file_storage_dir = self.op.file_storage_dir
1113
1114 - def Exec(self, feedback_fn):
1115 """Change the parameters of the cluster.
1116
1117 """
1118 if self.op.enabled_disk_templates:
1119 self.cluster.enabled_disk_templates = \
1120 list(set(self.op.enabled_disk_templates))
1121
1122 self._SetVgName(feedback_fn)
1123 self._SetFileStorageDir(feedback_fn)
1124
1125 if self.op.drbd_helper is not None:
1126 if not constants.DT_DRBD8 in self.cluster.enabled_disk_templates:
1127 feedback_fn("Note that you specified a drbd user helper, but did"
1128 " enabled the drbd disk template.")
1129 new_helper = self.op.drbd_helper
1130 if not new_helper:
1131 new_helper = None
1132 if new_helper != self.cfg.GetDRBDHelper():
1133 self.cfg.SetDRBDHelper(new_helper)
1134 else:
1135 feedback_fn("Cluster DRBD helper already in desired state,"
1136 " not changing")
1137 if self.op.hvparams:
1138 self.cluster.hvparams = self.new_hvparams
1139 if self.op.os_hvp:
1140 self.cluster.os_hvp = self.new_os_hvp
1141 if self.op.enabled_hypervisors is not None:
1142 self.cluster.hvparams = self.new_hvparams
1143 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1144 if self.op.beparams:
1145 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1146 if self.op.nicparams:
1147 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1148 if self.op.ipolicy:
1149 self.cluster.ipolicy = self.new_ipolicy
1150 if self.op.osparams:
1151 self.cluster.osparams = self.new_osp
1152 if self.op.ndparams:
1153 self.cluster.ndparams = self.new_ndparams
1154 if self.op.diskparams:
1155 self.cluster.diskparams = self.new_diskparams
1156 if self.op.hv_state:
1157 self.cluster.hv_state_static = self.new_hv_state
1158 if self.op.disk_state:
1159 self.cluster.disk_state_static = self.new_disk_state
1160
1161 if self.op.candidate_pool_size is not None:
1162 self.cluster.candidate_pool_size = self.op.candidate_pool_size
1163
1164 AdjustCandidatePool(self, [])
1165
1166 if self.op.maintain_node_health is not None:
1167 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
1168 feedback_fn("Note: CONFD was disabled at build time, node health"
1169 " maintenance is not useful (still enabling it)")
1170 self.cluster.maintain_node_health = self.op.maintain_node_health
1171
1172 if self.op.modify_etc_hosts is not None:
1173 self.cluster.modify_etc_hosts = self.op.modify_etc_hosts
1174
1175 if self.op.prealloc_wipe_disks is not None:
1176 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
1177
1178 if self.op.add_uids is not None:
1179 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
1180
1181 if self.op.remove_uids is not None:
1182 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
1183
1184 if self.op.uid_pool is not None:
1185 self.cluster.uid_pool = self.op.uid_pool
1186
1187 if self.op.default_iallocator is not None:
1188 self.cluster.default_iallocator = self.op.default_iallocator
1189
1190 if self.op.reserved_lvs is not None:
1191 self.cluster.reserved_lvs = self.op.reserved_lvs
1192
1193 if self.op.use_external_mip_script is not None:
1194 self.cluster.use_external_mip_script = self.op.use_external_mip_script
1195
1196 def helper_os(aname, mods, desc):
1197 desc += " OS list"
1198 lst = getattr(self.cluster, aname)
1199 for key, val in mods:
1200 if key == constants.DDM_ADD:
1201 if val in lst:
1202 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
1203 else:
1204 lst.append(val)
1205 elif key == constants.DDM_REMOVE:
1206 if val in lst:
1207 lst.remove(val)
1208 else:
1209 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
1210 else:
1211 raise errors.ProgrammerError("Invalid modification '%s'" % key)
1212
1213 if self.op.hidden_os:
1214 helper_os("hidden_os", self.op.hidden_os, "hidden")
1215
1216 if self.op.blacklisted_os:
1217 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
1218
1219 if self.op.master_netdev:
1220 master_params = self.cfg.GetMasterNetworkParameters()
1221 ems = self.cfg.GetUseExternalMipScript()
1222 feedback_fn("Shutting down master ip on the current netdev (%s)" %
1223 self.cluster.master_netdev)
1224 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
1225 master_params, ems)
1226 if not self.op.force:
1227 result.Raise("Could not disable the master ip")
1228 else:
1229 if result.fail_msg:
1230 msg = ("Could not disable the master ip (continuing anyway): %s" %
1231 result.fail_msg)
1232 feedback_fn(msg)
1233 feedback_fn("Changing master_netdev from %s to %s" %
1234 (master_params.netdev, self.op.master_netdev))
1235 self.cluster.master_netdev = self.op.master_netdev
1236
1237 if self.op.master_netmask:
1238 master_params = self.cfg.GetMasterNetworkParameters()
1239 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
1240 result = self.rpc.call_node_change_master_netmask(
1241 master_params.uuid, master_params.netmask,
1242 self.op.master_netmask, master_params.ip,
1243 master_params.netdev)
1244 result.Warn("Could not change the master IP netmask", feedback_fn)
1245 self.cluster.master_netmask = self.op.master_netmask
1246
1247 self.cfg.Update(self.cluster, feedback_fn)
1248
1249 if self.op.master_netdev:
1250 master_params = self.cfg.GetMasterNetworkParameters()
1251 feedback_fn("Starting the master ip on the new master netdev (%s)" %
1252 self.op.master_netdev)
1253 ems = self.cfg.GetUseExternalMipScript()
1254 result = self.rpc.call_node_activate_master_ip(master_params.uuid,
1255 master_params, ems)
1256 result.Warn("Could not re-enable the master ip on the master,"
1257 " please restart manually", self.LogWarning)
1258
1261 """Submits all jobs necessary to verify the cluster.
1262
1263 """
1264 REQ_BGL = False
1265
1267 self.needed_locks = {}
1268
1269 - def Exec(self, feedback_fn):
1270 jobs = []
1271
1272 if self.op.group_name:
1273 groups = [self.op.group_name]
1274 depends_fn = lambda: None
1275 else:
1276 groups = self.cfg.GetNodeGroupList()
1277
1278
1279 jobs.append([
1280 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
1281 ])
1282
1283
1284 depends_fn = lambda: [(-len(jobs), [])]
1285
1286 jobs.extend(
1287 [opcodes.OpClusterVerifyGroup(group_name=group,
1288 ignore_errors=self.op.ignore_errors,
1289 depends=depends_fn())]
1290 for group in groups)
1291
1292
1293 for op in itertools.chain(*jobs):
1294 op.debug_simulate_errors = self.op.debug_simulate_errors
1295 op.verbose = self.op.verbose
1296 op.error_codes = self.op.error_codes
1297 try:
1298 op.skip_checks = self.op.skip_checks
1299 except AttributeError:
1300 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1301
1302 return ResultWithJobs(jobs)
1303
1306 """Mix-in for cluster/group verify LUs.
1307
1308 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1309 self.op and self._feedback_fn to be available.)
1310
1311 """
1312
1313 ETYPE_FIELD = "code"
1314 ETYPE_ERROR = "ERROR"
1315 ETYPE_WARNING = "WARNING"
1316
1317 - def _Error(self, ecode, item, msg, *args, **kwargs):
1318 """Format an error message.
1319
1320 Based on the opcode's error_codes parameter, either format a
1321 parseable error code, or a simpler error string.
1322
1323 This must be called only from Exec and functions called from Exec.
1324
1325 """
1326 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1327 itype, etxt, _ = ecode
1328
1329
1330 if etxt in self.op.ignore_errors:
1331 ltype = self.ETYPE_WARNING
1332
1333 if args:
1334 msg = msg % args
1335
1336 if self.op.error_codes:
1337 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1338 else:
1339 if item:
1340 item = " " + item
1341 else:
1342 item = ""
1343 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1344
1345 self._feedback_fn(" - %s" % msg)
1346
1347 if ltype == self.ETYPE_ERROR:
1348 self.bad = True
1349
1350 - def _ErrorIf(self, cond, *args, **kwargs):
1351 """Log an error message if the passed condition is True.
1352
1353 """
1354 if (bool(cond)
1355 or self.op.debug_simulate_errors):
1356 self._Error(*args, **kwargs)
1357
1360 """Verifies a certificate for L{LUClusterVerifyConfig}.
1361
1362 @type filename: string
1363 @param filename: Path to PEM file
1364
1365 """
1366 try:
1367 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1368 utils.ReadFile(filename))
1369 except Exception, err:
1370 return (LUClusterVerifyConfig.ETYPE_ERROR,
1371 "Failed to load X509 certificate %s: %s" % (filename, err))
1372
1373 (errcode, msg) = \
1374 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1375 constants.SSL_CERT_EXPIRATION_ERROR)
1376
1377 if msg:
1378 fnamemsg = "While verifying %s: %s" % (filename, msg)
1379 else:
1380 fnamemsg = None
1381
1382 if errcode is None:
1383 return (None, fnamemsg)
1384 elif errcode == utils.CERT_WARNING:
1385 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1386 elif errcode == utils.CERT_ERROR:
1387 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1388
1389 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1390
1393 """Compute the set of all hypervisor parameters.
1394
1395 @type cluster: L{objects.Cluster}
1396 @param cluster: the cluster object
1397 @param instances: list of L{objects.Instance}
1398 @param instances: additional instances from which to obtain parameters
1399 @rtype: list of (origin, hypervisor, parameters)
1400 @return: a list with all parameters found, indicating the hypervisor they
1401 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1402
1403 """
1404 hvp_data = []
1405
1406 for hv_name in cluster.enabled_hypervisors:
1407 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1408
1409 for os_name, os_hvp in cluster.os_hvp.items():
1410 for hv_name, hv_params in os_hvp.items():
1411 if hv_params:
1412 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1413 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1414
1415
1416 for instance in instances:
1417 if instance.hvparams:
1418 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1419 cluster.FillHV(instance)))
1420
1421 return hvp_data
1422
1425 """Verifies the cluster config.
1426
1427 """
1428 REQ_BGL = False
1429
1443
1447
1456
1457 - def Exec(self, feedback_fn):
1458 """Verify integrity of cluster, performing various test on nodes.
1459
1460 """
1461 self.bad = False
1462 self._feedback_fn = feedback_fn
1463
1464 feedback_fn("* Verifying cluster config")
1465
1466 for msg in self.cfg.VerifyConfig():
1467 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1468
1469 feedback_fn("* Verifying cluster certificate files")
1470
1471 for cert_filename in pathutils.ALL_CERT_FILES:
1472 (errcode, msg) = _VerifyCertificate(cert_filename)
1473 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1474
1475 self._ErrorIf(not utils.CanRead(constants.LUXID_USER,
1476 pathutils.NODED_CERT_FILE),
1477 constants.CV_ECLUSTERCERT,
1478 None,
1479 pathutils.NODED_CERT_FILE + " must be accessible by the " +
1480 constants.LUXID_USER + " user")
1481
1482 feedback_fn("* Verifying hypervisor parameters")
1483
1484 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1485 self.all_inst_info.values()))
1486
1487 feedback_fn("* Verifying all nodes belong to an existing group")
1488
1489
1490
1491
1492
1493 dangling_nodes = set(node for node in self.all_node_info.values()
1494 if node.group not in self.all_group_info)
1495
1496 dangling_instances = {}
1497 no_node_instances = []
1498
1499 for inst in self.all_inst_info.values():
1500 if inst.primary_node in [node.uuid for node in dangling_nodes]:
1501 dangling_instances.setdefault(inst.primary_node, []).append(inst)
1502 elif inst.primary_node not in self.all_node_info:
1503 no_node_instances.append(inst)
1504
1505 pretty_dangling = [
1506 "%s (%s)" %
1507 (node.name,
1508 utils.CommaJoin(inst.name for
1509 inst in dangling_instances.get(node.uuid, [])))
1510 for node in dangling_nodes]
1511
1512 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1513 None,
1514 "the following nodes (and their instances) belong to a non"
1515 " existing group: %s", utils.CommaJoin(pretty_dangling))
1516
1517 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1518 None,
1519 "the following instances have a non-existing primary-node:"
1520 " %s", utils.CommaJoin(inst.name for
1521 inst in no_node_instances))
1522
1523 return not self.bad
1524
1527 """Verifies the status of a node group.
1528
1529 """
1530 HPATH = "cluster-verify"
1531 HTYPE = constants.HTYPE_CLUSTER
1532 REQ_BGL = False
1533
1534 _HOOKS_INDENT_RE = re.compile("^", re.M)
1535
1537 """A class representing the logical and physical status of a node.
1538
1539 @type uuid: string
1540 @ivar uuid: the node UUID to which this object refers
1541 @ivar volumes: a structure as returned from
1542 L{ganeti.backend.GetVolumeList} (runtime)
1543 @ivar instances: a list of running instances (runtime)
1544 @ivar pinst: list of configured primary instances (config)
1545 @ivar sinst: list of configured secondary instances (config)
1546 @ivar sbp: dictionary of {primary-node: list of instances} for all
1547 instances for which this node is secondary (config)
1548 @ivar mfree: free memory, as reported by hypervisor (runtime)
1549 @ivar dfree: free disk, as reported by the node (runtime)
1550 @ivar offline: the offline status (config)
1551 @type rpc_fail: boolean
1552 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1553 not whether the individual keys were correct) (runtime)
1554 @type lvm_fail: boolean
1555 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1556 @type hyp_fail: boolean
1557 @ivar hyp_fail: whether the RPC call didn't return the instance list
1558 @type ghost: boolean
1559 @ivar ghost: whether this is a known node or not (config)
1560 @type os_fail: boolean
1561 @ivar os_fail: whether the RPC call didn't return valid OS data
1562 @type oslist: list
1563 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1564 @type vm_capable: boolean
1565 @ivar vm_capable: whether the node can host instances
1566 @type pv_min: float
1567 @ivar pv_min: size in MiB of the smallest PVs
1568 @type pv_max: float
1569 @ivar pv_max: size in MiB of the biggest PVs
1570
1571 """
1572 - def __init__(self, offline=False, uuid=None, vm_capable=True):
1573 self.uuid = uuid
1574 self.volumes = {}
1575 self.instances = []
1576 self.pinst = []
1577 self.sinst = []
1578 self.sbp = {}
1579 self.mfree = 0
1580 self.dfree = 0
1581 self.offline = offline
1582 self.vm_capable = vm_capable
1583 self.rpc_fail = False
1584 self.lvm_fail = False
1585 self.hyp_fail = False
1586 self.ghost = False
1587 self.os_fail = False
1588 self.oslist = {}
1589 self.pv_min = None
1590 self.pv_max = None
1591
1612
1630
1632 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1633 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1634
1635 group_node_uuids = set(self.group_info.members)
1636 group_inst_uuids = \
1637 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1638
1639 unlocked_node_uuids = \
1640 group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE))
1641
1642 unlocked_inst_uuids = \
1643 group_inst_uuids.difference(
1644 [self.cfg.GetInstanceInfoByName(name).uuid
1645 for name in self.owned_locks(locking.LEVEL_INSTANCE)])
1646
1647 if unlocked_node_uuids:
1648 raise errors.OpPrereqError(
1649 "Missing lock for nodes: %s" %
1650 utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)),
1651 errors.ECODE_STATE)
1652
1653 if unlocked_inst_uuids:
1654 raise errors.OpPrereqError(
1655 "Missing lock for instances: %s" %
1656 utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)),
1657 errors.ECODE_STATE)
1658
1659 self.all_node_info = self.cfg.GetAllNodesInfo()
1660 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1661
1662 self.my_node_uuids = group_node_uuids
1663 self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid])
1664 for node_uuid in group_node_uuids)
1665
1666 self.my_inst_uuids = group_inst_uuids
1667 self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid])
1668 for inst_uuid in group_inst_uuids)
1669
1670
1671
1672 extra_lv_nodes = set()
1673
1674 for inst in self.my_inst_info.values():
1675 if inst.disk_template in constants.DTS_INT_MIRROR:
1676 for nuuid in inst.all_nodes:
1677 if self.all_node_info[nuuid].group != self.group_uuid:
1678 extra_lv_nodes.add(nuuid)
1679
1680 unlocked_lv_nodes = \
1681 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1682
1683 if unlocked_lv_nodes:
1684 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1685 utils.CommaJoin(unlocked_lv_nodes),
1686 errors.ECODE_STATE)
1687 self.extra_lv_nodes = list(extra_lv_nodes)
1688
1690 """Perform some basic validation on data returned from a node.
1691
1692 - check the result data structure is well formed and has all the
1693 mandatory fields
1694 - check ganeti version
1695
1696 @type ninfo: L{objects.Node}
1697 @param ninfo: the node to check
1698 @param nresult: the results from the node
1699 @rtype: boolean
1700 @return: whether overall this call was successful (and we can expect
1701 reasonable values in the respose)
1702
1703 """
1704
1705 test = not nresult or not isinstance(nresult, dict)
1706 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
1707 "unable to verify node: no data returned")
1708 if test:
1709 return False
1710
1711
1712 local_version = constants.PROTOCOL_VERSION
1713 remote_version = nresult.get("version", None)
1714 test = not (remote_version and
1715 isinstance(remote_version, (list, tuple)) and
1716 len(remote_version) == 2)
1717 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
1718 "connection to node returned invalid data")
1719 if test:
1720 return False
1721
1722 test = local_version != remote_version[0]
1723 self._ErrorIf(test, constants.CV_ENODEVERSION, ninfo.name,
1724 "incompatible protocol versions: master %s,"
1725 " node %s", local_version, remote_version[0])
1726 if test:
1727 return False
1728
1729
1730
1731
1732 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1733 constants.CV_ENODEVERSION, ninfo.name,
1734 "software version mismatch: master %s, node %s",
1735 constants.RELEASE_VERSION, remote_version[1],
1736 code=self.ETYPE_WARNING)
1737
1738 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1739 if ninfo.vm_capable and isinstance(hyp_result, dict):
1740 for hv_name, hv_result in hyp_result.iteritems():
1741 test = hv_result is not None
1742 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
1743 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1744
1745 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1746 if ninfo.vm_capable and isinstance(hvp_result, list):
1747 for item, hv_name, hv_result in hvp_result:
1748 self._ErrorIf(True, constants.CV_ENODEHV, ninfo.name,
1749 "hypervisor %s parameter verify failure (source %s): %s",
1750 hv_name, item, hv_result)
1751
1752 test = nresult.get(constants.NV_NODESETUP,
1753 ["Missing NODESETUP results"])
1754 self._ErrorIf(test, constants.CV_ENODESETUP, ninfo.name,
1755 "node setup error: %s", "; ".join(test))
1756
1757 return True
1758
1759 - def _VerifyNodeTime(self, ninfo, nresult,
1760 nvinfo_starttime, nvinfo_endtime):
1761 """Check the node time.
1762
1763 @type ninfo: L{objects.Node}
1764 @param ninfo: the node to check
1765 @param nresult: the remote results for the node
1766 @param nvinfo_starttime: the start time of the RPC call
1767 @param nvinfo_endtime: the end time of the RPC call
1768
1769 """
1770 ntime = nresult.get(constants.NV_TIME, None)
1771 try:
1772 ntime_merged = utils.MergeTime(ntime)
1773 except (ValueError, TypeError):
1774 self._ErrorIf(True, constants.CV_ENODETIME, ninfo.name,
1775 "Node returned invalid time")
1776 return
1777
1778 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1779 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1780 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1781 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1782 else:
1783 ntime_diff = None
1784
1785 self._ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, ninfo.name,
1786 "Node time diverges by at least %s from master node time",
1787 ntime_diff)
1788
1790 """Check the node LVM results and update info for cross-node checks.
1791
1792 @type ninfo: L{objects.Node}
1793 @param ninfo: the node to check
1794 @param nresult: the remote results for the node
1795 @param vg_name: the configured VG name
1796 @type nimg: L{NodeImage}
1797 @param nimg: node image
1798
1799 """
1800 if vg_name is None:
1801 return
1802
1803
1804 vglist = nresult.get(constants.NV_VGLIST, None)
1805 test = not vglist
1806 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
1807 "unable to check volume groups")
1808 if not test:
1809 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1810 constants.MIN_VG_SIZE)
1811 self._ErrorIf(vgstatus, constants.CV_ENODELVM, ninfo.name, vgstatus)
1812
1813
1814 (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage)
1815 for em in errmsgs:
1816 self._Error(constants.CV_ENODELVM, ninfo.name, em)
1817 if pvminmax is not None:
1818 (nimg.pv_min, nimg.pv_max) = pvminmax
1819
1821 """Check cross-node DRBD version consistency.
1822
1823 @type node_verify_infos: dict
1824 @param node_verify_infos: infos about nodes as returned from the
1825 node_verify call.
1826
1827 """
1828 node_versions = {}
1829 for node_uuid, ndata in node_verify_infos.items():
1830 nresult = ndata.payload
1831 if nresult:
1832 version = nresult.get(constants.NV_DRBDVERSION, "Missing DRBD version")
1833 node_versions[node_uuid] = version
1834
1835 if len(set(node_versions.values())) > 1:
1836 for node_uuid, version in sorted(node_versions.items()):
1837 msg = "DRBD version mismatch: %s" % version
1838 self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg,
1839 code=self.ETYPE_WARNING)
1840
1842 """Check cross-node consistency in LVM.
1843
1844 @type node_image: dict
1845 @param node_image: info about nodes, mapping from node to names to
1846 L{NodeImage} objects
1847 @param vg_name: the configured VG name
1848
1849 """
1850 if vg_name is None:
1851 return
1852
1853
1854 if not self._exclusive_storage:
1855 return
1856
1857
1858
1859
1860 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
1861 if not vals:
1862 return
1863 (pvmin, minnode_uuid) = min((ni.pv_min, ni.uuid) for ni in vals)
1864 (pvmax, maxnode_uuid) = max((ni.pv_max, ni.uuid) for ni in vals)
1865 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
1866 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
1867 "PV sizes differ too much in the group; smallest (%s MB) is"
1868 " on %s, biggest (%s MB) is on %s",
1869 pvmin, self.cfg.GetNodeName(minnode_uuid),
1870 pvmax, self.cfg.GetNodeName(maxnode_uuid))
1871
1873 """Check the node bridges.
1874
1875 @type ninfo: L{objects.Node}
1876 @param ninfo: the node to check
1877 @param nresult: the remote results for the node
1878 @param bridges: the expected list of bridges
1879
1880 """
1881 if not bridges:
1882 return
1883
1884 missing = nresult.get(constants.NV_BRIDGES, None)
1885 test = not isinstance(missing, list)
1886 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
1887 "did not return valid bridge information")
1888 if not test:
1889 self._ErrorIf(bool(missing), constants.CV_ENODENET, ninfo.name,
1890 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1891
1909
1911 """Check the node network connectivity results.
1912
1913 @type ninfo: L{objects.Node}
1914 @param ninfo: the node to check
1915 @param nresult: the remote results for the node
1916
1917 """
1918 test = constants.NV_NODELIST not in nresult
1919 self._ErrorIf(test, constants.CV_ENODESSH, ninfo.name,
1920 "node hasn't returned node ssh connectivity data")
1921 if not test:
1922 if nresult[constants.NV_NODELIST]:
1923 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1924 self._ErrorIf(True, constants.CV_ENODESSH, ninfo.name,
1925 "ssh communication with node '%s': %s", a_node, a_msg)
1926
1927 test = constants.NV_NODENETTEST not in nresult
1928 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
1929 "node hasn't returned node tcp connectivity data")
1930 if not test:
1931 if nresult[constants.NV_NODENETTEST]:
1932 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1933 for anode in nlist:
1934 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name,
1935 "tcp communication with node '%s': %s",
1936 anode, nresult[constants.NV_NODENETTEST][anode])
1937
1938 test = constants.NV_MASTERIP not in nresult
1939 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
1940 "node hasn't returned node master IP reachability data")
1941 if not test:
1942 if not nresult[constants.NV_MASTERIP]:
1943 if ninfo.uuid == self.master_node:
1944 msg = "the master node cannot reach the master IP (not configured?)"
1945 else:
1946 msg = "cannot reach the master IP"
1947 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg)
1948
1950 """Verify an instance.
1951
1952 This function checks to see if the required block devices are
1953 available on the instance's node, and that the nodes are in the correct
1954 state.
1955
1956 """
1957 pnode_uuid = instance.primary_node
1958 pnode_img = node_image[pnode_uuid]
1959 groupinfo = self.cfg.GetAllNodeGroupsInfo()
1960
1961 node_vol_should = {}
1962 instance.MapLVsByNode(node_vol_should)
1963
1964 cluster = self.cfg.GetClusterInfo()
1965 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
1966 self.group_info)
1967 err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg)
1968 self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name,
1969 utils.CommaJoin(err), code=self.ETYPE_WARNING)
1970
1971 for node_uuid in node_vol_should:
1972 n_img = node_image[node_uuid]
1973 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1974
1975 continue
1976 for volume in node_vol_should[node_uuid]:
1977 test = volume not in n_img.volumes
1978 self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name,
1979 "volume %s missing on node %s", volume,
1980 self.cfg.GetNodeName(node_uuid))
1981
1982 if instance.admin_state == constants.ADMINST_UP:
1983 test = instance.uuid not in pnode_img.instances and not pnode_img.offline
1984 self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name,
1985 "instance not running on its primary node %s",
1986 self.cfg.GetNodeName(pnode_uuid))
1987 self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE,
1988 instance.name, "instance is marked as running and lives on"
1989 " offline node %s", self.cfg.GetNodeName(pnode_uuid))
1990
1991 diskdata = [(nname, success, status, idx)
1992 for (nname, disks) in diskstatus.items()
1993 for idx, (success, status) in enumerate(disks)]
1994
1995 for nname, success, bdev_status, idx in diskdata:
1996
1997
1998 snode = node_image[nname]
1999 bad_snode = snode.ghost or snode.offline
2000 self._ErrorIf(instance.disks_active and
2001 not success and not bad_snode,
2002 constants.CV_EINSTANCEFAULTYDISK, instance.name,
2003 "couldn't retrieve status for disk/%s on %s: %s",
2004 idx, self.cfg.GetNodeName(nname), bdev_status)
2005
2006 if instance.disks_active and success and \
2007 (bdev_status.is_degraded or
2008 bdev_status.ldisk_status != constants.LDS_OKAY):
2009 msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname))
2010 if bdev_status.is_degraded:
2011 msg += " is degraded"
2012 if bdev_status.ldisk_status != constants.LDS_OKAY:
2013 msg += "; state is '%s'" % \
2014 constants.LDS_NAMES[bdev_status.ldisk_status]
2015
2016 self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg)
2017
2018 self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2019 constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid),
2020 "instance %s, connection to primary node failed",
2021 instance.name)
2022
2023 self._ErrorIf(len(instance.secondary_nodes) > 1,
2024 constants.CV_EINSTANCELAYOUT, instance.name,
2025 "instance has multiple secondary nodes: %s",
2026 utils.CommaJoin(instance.secondary_nodes),
2027 code=self.ETYPE_WARNING)
2028
2029 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, instance.all_nodes)
2030 if any(es_flags.values()):
2031 if instance.disk_template not in constants.DTS_EXCL_STORAGE:
2032
2033
2034 es_nodes = [n
2035 for (n, es) in es_flags.items()
2036 if es]
2037 self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name,
2038 "instance has template %s, which is not supported on nodes"
2039 " that have exclusive storage set: %s",
2040 instance.disk_template,
2041 utils.CommaJoin(self.cfg.GetNodeNames(es_nodes)))
2042 for (idx, disk) in enumerate(instance.disks):
2043 self._ErrorIf(disk.spindles is None,
2044 constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name,
2045 "number of spindles not configured for disk %s while"
2046 " exclusive storage is enabled, try running"
2047 " gnt-cluster repair-disk-sizes", idx)
2048
2049 if instance.disk_template in constants.DTS_INT_MIRROR:
2050 instance_nodes = utils.NiceSort(instance.all_nodes)
2051 instance_groups = {}
2052
2053 for node_uuid in instance_nodes:
2054 instance_groups.setdefault(self.all_node_info[node_uuid].group,
2055 []).append(node_uuid)
2056
2057 pretty_list = [
2058 "%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)),
2059 groupinfo[group].name)
2060
2061 for group, nodes in sorted(instance_groups.items(),
2062 key=lambda (_, nodes): pnode_uuid in nodes,
2063 reverse=True)]
2064
2065 self._ErrorIf(len(instance_groups) > 1,
2066 constants.CV_EINSTANCESPLITGROUPS,
2067 instance.name, "instance has primary and secondary nodes in"
2068 " different groups: %s", utils.CommaJoin(pretty_list),
2069 code=self.ETYPE_WARNING)
2070
2071 inst_nodes_offline = []
2072 for snode in instance.secondary_nodes:
2073 s_img = node_image[snode]
2074 self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2075 self.cfg.GetNodeName(snode),
2076 "instance %s, connection to secondary node failed",
2077 instance.name)
2078
2079 if s_img.offline:
2080 inst_nodes_offline.append(snode)
2081
2082
2083 self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE,
2084 instance.name, "instance has offline secondary node(s) %s",
2085 utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline)))
2086
2087 for node_uuid in instance.all_nodes:
2088 self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE,
2089 instance.name, "instance lives on ghost node %s",
2090 self.cfg.GetNodeName(node_uuid))
2091 self._ErrorIf(not node_image[node_uuid].vm_capable,
2092 constants.CV_EINSTANCEBADNODE, instance.name,
2093 "instance lives on non-vm_capable node %s",
2094 self.cfg.GetNodeName(node_uuid))
2095
2097 """Verify if there are any unknown volumes in the cluster.
2098
2099 The .os, .swap and backup volumes are ignored. All other volumes are
2100 reported as unknown.
2101
2102 @type reserved: L{ganeti.utils.FieldSet}
2103 @param reserved: a FieldSet of reserved volume names
2104
2105 """
2106 for node_uuid, n_img in node_image.items():
2107 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2108 self.all_node_info[node_uuid].group != self.group_uuid):
2109
2110 continue
2111 for volume in n_img.volumes:
2112 test = ((node_uuid not in node_vol_should or
2113 volume not in node_vol_should[node_uuid]) and
2114 not reserved.Matches(volume))
2115 self._ErrorIf(test, constants.CV_ENODEORPHANLV,
2116 self.cfg.GetNodeName(node_uuid),
2117 "volume %s is unknown", volume)
2118
2120 """Verify N+1 Memory Resilience.
2121
2122 Check that if one single node dies we can still start all the
2123 instances it was primary for.
2124
2125 """
2126 cluster_info = self.cfg.GetClusterInfo()
2127 for node_uuid, n_img in node_image.items():
2128
2129
2130
2131
2132
2133
2134
2135
2136 if n_img.offline or \
2137 self.all_node_info[node_uuid].group != self.group_uuid:
2138
2139
2140
2141
2142 continue
2143
2144 for prinode, inst_uuids in n_img.sbp.items():
2145 needed_mem = 0
2146 for inst_uuid in inst_uuids:
2147 bep = cluster_info.FillBE(all_insts[inst_uuid])
2148 if bep[constants.BE_AUTO_BALANCE]:
2149 needed_mem += bep[constants.BE_MINMEM]
2150 test = n_img.mfree < needed_mem
2151 self._ErrorIf(test, constants.CV_ENODEN1,
2152 self.cfg.GetNodeName(node_uuid),
2153 "not enough memory to accomodate instance failovers"
2154 " should node %s fail (%dMiB needed, %dMiB available)",
2155 self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)
2156
2157 - def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo,
2158 (files_all, files_opt, files_mc, files_vm)):
2159 """Verifies file checksums collected from all nodes.
2160
2161 @param nodes: List of L{objects.Node} objects
2162 @param master_node_uuid: UUID of master node
2163 @param all_nvinfo: RPC results
2164
2165 """
2166
2167 files2nodefn = [
2168 (files_all, None),
2169 (files_mc, lambda node: (node.master_candidate or
2170 node.uuid == master_node_uuid)),
2171 (files_vm, lambda node: node.vm_capable),
2172 ]
2173
2174
2175 nodefiles = {}
2176 for (files, fn) in files2nodefn:
2177 if fn is None:
2178 filenodes = nodes
2179 else:
2180 filenodes = filter(fn, nodes)
2181 nodefiles.update((filename,
2182 frozenset(map(operator.attrgetter("uuid"), filenodes)))
2183 for filename in files)
2184
2185 assert set(nodefiles) == (files_all | files_mc | files_vm)
2186
2187 fileinfo = dict((filename, {}) for filename in nodefiles)
2188 ignore_nodes = set()
2189
2190 for node in nodes:
2191 if node.offline:
2192 ignore_nodes.add(node.uuid)
2193 continue
2194
2195 nresult = all_nvinfo[node.uuid]
2196
2197 if nresult.fail_msg or not nresult.payload:
2198 node_files = None
2199 else:
2200 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2201 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2202 for (key, value) in fingerprints.items())
2203 del fingerprints
2204
2205 test = not (node_files and isinstance(node_files, dict))
2206 self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name,
2207 "Node did not return file checksum data")
2208 if test:
2209 ignore_nodes.add(node.uuid)
2210 continue
2211
2212
2213 for (filename, checksum) in node_files.items():
2214 assert filename in nodefiles
2215 fileinfo[filename].setdefault(checksum, set()).add(node.uuid)
2216
2217 for (filename, checksums) in fileinfo.items():
2218 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2219
2220
2221 with_file = frozenset(node_uuid
2222 for node_uuids in fileinfo[filename].values()
2223 for node_uuid in node_uuids) - ignore_nodes
2224
2225 expected_nodes = nodefiles[filename] - ignore_nodes
2226
2227
2228 missing_file = expected_nodes - with_file
2229
2230 if filename in files_opt:
2231
2232 self._ErrorIf(missing_file and missing_file != expected_nodes,
2233 constants.CV_ECLUSTERFILECHECK, None,
2234 "File %s is optional, but it must exist on all or no"
2235 " nodes (not found on %s)",
2236 filename,
2237 utils.CommaJoin(
2238 utils.NiceSort(
2239 map(self.cfg.GetNodeName, missing_file))))
2240 else:
2241 self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2242 "File %s is missing from node(s) %s", filename,
2243 utils.CommaJoin(
2244 utils.NiceSort(
2245 map(self.cfg.GetNodeName, missing_file))))
2246
2247
2248 unexpected = with_file - expected_nodes
2249 self._ErrorIf(unexpected,
2250 constants.CV_ECLUSTERFILECHECK, None,
2251 "File %s should not exist on node(s) %s",
2252 filename, utils.CommaJoin(
2253 utils.NiceSort(map(self.cfg.GetNodeName, unexpected))))
2254
2255
2256 test = len(checksums) > 1
2257 if test:
2258 variants = ["variant %s on %s" %
2259 (idx + 1,
2260 utils.CommaJoin(utils.NiceSort(
2261 map(self.cfg.GetNodeName, node_uuids))))
2262 for (idx, (checksum, node_uuids)) in
2263 enumerate(sorted(checksums.items()))]
2264 else:
2265 variants = []
2266
2267 self._ErrorIf(test, constants.CV_ECLUSTERFILECHECK, None,
2268 "File %s found with %s different checksums (%s)",
2269 filename, len(checksums), "; ".join(variants))
2270
2271 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2272 drbd_map):
2273 """Verifies and the node DRBD status.
2274
2275 @type ninfo: L{objects.Node}
2276 @param ninfo: the node to check
2277 @param nresult: the remote results for the node
2278 @param instanceinfo: the dict of instances
2279 @param drbd_helper: the configured DRBD usermode helper
2280 @param drbd_map: the DRBD map as returned by
2281 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2282
2283 """
2284 if drbd_helper:
2285 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2286 test = (helper_result is None)
2287 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2288 "no drbd usermode helper returned")
2289 if helper_result:
2290 status, payload = helper_result
2291 test = not status
2292 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2293 "drbd usermode helper check unsuccessful: %s", payload)
2294 test = status and (payload != drbd_helper)
2295 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2296 "wrong drbd usermode helper: %s", payload)
2297
2298
2299 node_drbd = {}
2300 for minor, inst_uuid in drbd_map[ninfo.uuid].items():
2301 test = inst_uuid not in instanceinfo
2302 self._ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2303 "ghost instance '%s' in temporary DRBD map", inst_uuid)
2304
2305
2306
2307 if test:
2308 node_drbd[minor] = (inst_uuid, False)
2309 else:
2310 instance = instanceinfo[inst_uuid]
2311 node_drbd[minor] = (inst_uuid, instance.disks_active)
2312
2313
2314 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2315 test = not isinstance(used_minors, (tuple, list))
2316 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2317 "cannot parse drbd status file: %s", str(used_minors))
2318 if test:
2319
2320 return
2321
2322 for minor, (inst_uuid, must_exist) in node_drbd.items():
2323 test = minor not in used_minors and must_exist
2324 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2325 "drbd minor %d of instance %s is not active", minor,
2326 self.cfg.GetInstanceName(inst_uuid))
2327 for minor in used_minors:
2328 test = minor not in node_drbd
2329 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2330 "unallocated drbd minor %d is in use", minor)
2331
2333 """Builds the node OS structures.
2334
2335 @type ninfo: L{objects.Node}
2336 @param ninfo: the node to check
2337 @param nresult: the remote results for the node
2338 @param nimg: the node image object
2339
2340 """
2341 remote_os = nresult.get(constants.NV_OSLIST, None)
2342 test = (not isinstance(remote_os, list) or
2343 not compat.all(isinstance(v, list) and len(v) == 7
2344 for v in remote_os))
2345
2346 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
2347 "node hasn't returned valid OS data")
2348
2349 nimg.os_fail = test
2350
2351 if test:
2352 return
2353
2354 os_dict = {}
2355
2356 for (name, os_path, status, diagnose,
2357 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2358
2359 if name not in os_dict:
2360 os_dict[name] = []
2361
2362
2363
2364 parameters = [tuple(v) for v in parameters]
2365 os_dict[name].append((os_path, status, diagnose,
2366 set(variants), set(parameters), set(api_ver)))
2367
2368 nimg.oslist = os_dict
2369
2371 """Verifies the node OS list.
2372
2373 @type ninfo: L{objects.Node}
2374 @param ninfo: the node to check
2375 @param nimg: the node image object
2376 @param base: the 'template' node we match against (e.g. from the master)
2377
2378 """
2379 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2380
2381 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2382 for os_name, os_data in nimg.oslist.items():
2383 assert os_data, "Empty OS status for OS %s?!" % os_name
2384 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2385 self._ErrorIf(not f_status, constants.CV_ENODEOS, ninfo.name,
2386 "Invalid OS %s (located at %s): %s",
2387 os_name, f_path, f_diag)
2388 self._ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, ninfo.name,
2389 "OS '%s' has multiple entries"
2390 " (first one shadows the rest): %s",
2391 os_name, utils.CommaJoin([v[0] for v in os_data]))
2392
2393 test = os_name not in base.oslist
2394 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
2395 "Extra OS %s not present on reference node (%s)",
2396 os_name, self.cfg.GetNodeName(base.uuid))
2397 if test:
2398 continue
2399 assert base.oslist[os_name], "Base node has empty OS status?"
2400 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2401 if not b_status:
2402
2403 continue
2404 for kind, a, b in [("API version", f_api, b_api),
2405 ("variants list", f_var, b_var),
2406 ("parameters", beautify_params(f_param),
2407 beautify_params(b_param))]:
2408 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
2409 "OS %s for %s differs from reference node %s:"
2410 " [%s] vs. [%s]", kind, os_name,
2411 self.cfg.GetNodeName(base.uuid),
2412 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2413
2414
2415 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2416 self._ErrorIf(missing, constants.CV_ENODEOS, ninfo.name,
2417 "OSes present on reference node %s"
2418 " but missing on this node: %s",
2419 self.cfg.GetNodeName(base.uuid), utils.CommaJoin(missing))
2420
2422 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2423
2424 @type ninfo: L{objects.Node}
2425 @param ninfo: the node to check
2426 @param nresult: the remote results for the node
2427 @type is_master: bool
2428 @param is_master: Whether node is the master node
2429
2430 """
2431 cluster = self.cfg.GetClusterInfo()
2432 if (is_master and
2433 (cluster.IsFileStorageEnabled() or
2434 cluster.IsSharedFileStorageEnabled())):
2435 try:
2436 fspaths = nresult[constants.NV_ACCEPTED_STORAGE_PATHS]
2437 except KeyError:
2438
2439 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
2440 "Node did not return forbidden file storage paths")
2441 else:
2442 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
2443 "Found forbidden file storage paths: %s",
2444 utils.CommaJoin(fspaths))
2445 else:
2446 self._ErrorIf(constants.NV_ACCEPTED_STORAGE_PATHS in nresult,
2447 constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
2448 "Node should not have returned forbidden file storage"
2449 " paths")
2450
2451 - def _VerifyStoragePaths(self, ninfo, nresult, file_disk_template,
2452 verify_key, error_key):
2453 """Verifies (file) storage paths.
2454
2455 @type ninfo: L{objects.Node}
2456 @param ninfo: the node to check
2457 @param nresult: the remote results for the node
2458 @type file_disk_template: string
2459 @param file_disk_template: file-based disk template, whose directory
2460 is supposed to be verified
2461 @type verify_key: string
2462 @param verify_key: key for the verification map of this file
2463 verification step
2464 @param error_key: error key to be added to the verification results
2465 in case something goes wrong in this verification step
2466
2467 """
2468 assert (file_disk_template in
2469 utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE))
2470 cluster = self.cfg.GetClusterInfo()
2471 if cluster.IsDiskTemplateEnabled(file_disk_template):
2472 self._ErrorIf(
2473 verify_key in nresult,
2474 error_key, ninfo.name,
2475 "The configured %s storage path is unusable: %s" %
2476 (file_disk_template, nresult.get(verify_key)))
2477
2488
2499
2501 """Verifies out of band functionality of a node.
2502
2503 @type ninfo: L{objects.Node}
2504 @param ninfo: the node to check
2505 @param nresult: the remote results for the node
2506
2507 """
2508
2509
2510 if ((ninfo.master_candidate or ninfo.master_capable) and
2511 constants.NV_OOB_PATHS in nresult):
2512 for path_result in nresult[constants.NV_OOB_PATHS]:
2513 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH,
2514 ninfo.name, path_result)
2515
2517 """Verifies and updates the node volume data.
2518
2519 This function will update a L{NodeImage}'s internal structures
2520 with data from the remote call.
2521
2522 @type ninfo: L{objects.Node}
2523 @param ninfo: the node to check
2524 @param nresult: the remote results for the node
2525 @param nimg: the node image object
2526 @param vg_name: the configured VG name
2527
2528 """
2529 nimg.lvm_fail = True
2530 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2531 if vg_name is None:
2532 pass
2533 elif isinstance(lvdata, basestring):
2534 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
2535 "LVM problem on node: %s", utils.SafeEncode(lvdata))
2536 elif not isinstance(lvdata, dict):
2537 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
2538 "rpc call to node failed (lvlist)")
2539 else:
2540 nimg.volumes = lvdata
2541 nimg.lvm_fail = False
2542
2544 """Verifies and updates the node instance list.
2545
2546 If the listing was successful, then updates this node's instance
2547 list. Otherwise, it marks the RPC call as failed for the instance
2548 list key.
2549
2550 @type ninfo: L{objects.Node}
2551 @param ninfo: the node to check
2552 @param nresult: the remote results for the node
2553 @param nimg: the node image object
2554
2555 """
2556 idata = nresult.get(constants.NV_INSTANCELIST, None)
2557 test = not isinstance(idata, list)
2558 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2559 "rpc call to node failed (instancelist): %s",
2560 utils.SafeEncode(str(idata)))
2561 if test:
2562 nimg.hyp_fail = True
2563 else:
2564 nimg.instances = [inst.uuid for (_, inst) in
2565 self.cfg.GetMultiInstanceInfoByName(idata)]
2566
2568 """Verifies and computes a node information map
2569
2570 @type ninfo: L{objects.Node}
2571 @param ninfo: the node to check
2572 @param nresult: the remote results for the node
2573 @param nimg: the node image object
2574 @param vg_name: the configured VG name
2575
2576 """
2577
2578 hv_info = nresult.get(constants.NV_HVINFO, None)
2579 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2580 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2581 "rpc call to node failed (hvinfo)")
2582 if not test:
2583 try:
2584 nimg.mfree = int(hv_info["memory_free"])
2585 except (ValueError, TypeError):
2586 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
2587 "node returned invalid nodeinfo, check hypervisor")
2588
2589
2590 if vg_name is not None:
2591 test = (constants.NV_VGLIST not in nresult or
2592 vg_name not in nresult[constants.NV_VGLIST])
2593 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
2594 "node didn't return data for the volume group '%s'"
2595 " - it is either missing or broken", vg_name)
2596 if not test:
2597 try:
2598 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2599 except (ValueError, TypeError):
2600 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
2601 "node returned invalid LVM info, check LVM status")
2602
2604 """Gets per-disk status information for all instances.
2605
2606 @type node_uuids: list of strings
2607 @param node_uuids: Node UUIDs
2608 @type node_image: dict of (UUID, L{objects.Node})
2609 @param node_image: Node objects
2610 @type instanceinfo: dict of (UUID, L{objects.Instance})
2611 @param instanceinfo: Instance objects
2612 @rtype: {instance: {node: [(succes, payload)]}}
2613 @return: a dictionary of per-instance dictionaries with nodes as
2614 keys and disk information as values; the disk information is a
2615 list of tuples (success, payload)
2616
2617 """
2618 node_disks = {}
2619 node_disks_devonly = {}
2620 diskless_instances = set()
2621 nodisk_instances = set()
2622 diskless = constants.DT_DISKLESS
2623
2624 for nuuid in node_uuids:
2625 node_inst_uuids = list(itertools.chain(node_image[nuuid].pinst,
2626 node_image[nuuid].sinst))
2627 diskless_instances.update(uuid for uuid in node_inst_uuids
2628 if instanceinfo[uuid].disk_template == diskless)
2629 disks = [(inst_uuid, disk)
2630 for inst_uuid in node_inst_uuids
2631 for disk in instanceinfo[inst_uuid].disks]
2632
2633 if not disks:
2634 nodisk_instances.update(uuid for uuid in node_inst_uuids
2635 if instanceinfo[uuid].disk_template != diskless)
2636
2637 continue
2638
2639 node_disks[nuuid] = disks
2640
2641
2642 devonly = []
2643 for (inst_uuid, dev) in disks:
2644 (anno_disk,) = AnnotateDiskParams(instanceinfo[inst_uuid], [dev],
2645 self.cfg)
2646 self.cfg.SetDiskID(anno_disk, nuuid)
2647 devonly.append(anno_disk)
2648
2649 node_disks_devonly[nuuid] = devonly
2650
2651 assert len(node_disks) == len(node_disks_devonly)
2652
2653
2654 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2655 node_disks_devonly)
2656
2657 assert len(result) == len(node_disks)
2658
2659 instdisk = {}
2660
2661 for (nuuid, nres) in result.items():
2662 node = self.cfg.GetNodeInfo(nuuid)
2663 disks = node_disks[node.uuid]
2664
2665 if nres.offline:
2666
2667 data = len(disks) * [(False, "node offline")]
2668 else:
2669 msg = nres.fail_msg
2670 self._ErrorIf(msg, constants.CV_ENODERPC, node.name,
2671 "while getting disk information: %s", msg)
2672 if msg:
2673
2674 data = len(disks) * [(False, msg)]
2675 else:
2676 data = []
2677 for idx, i in enumerate(nres.payload):
2678 if isinstance(i, (tuple, list)) and len(i) == 2:
2679 data.append(i)
2680 else:
2681 logging.warning("Invalid result from node %s, entry %d: %s",
2682 node.name, idx, i)
2683 data.append((False, "Invalid result from the remote node"))
2684
2685 for ((inst_uuid, _), status) in zip(disks, data):
2686 instdisk.setdefault(inst_uuid, {}).setdefault(node.uuid, []) \
2687 .append(status)
2688
2689
2690 for inst_uuid in diskless_instances:
2691 assert inst_uuid not in instdisk
2692 instdisk[inst_uuid] = {}
2693
2694 for inst_uuid in nodisk_instances:
2695 assert inst_uuid not in instdisk
2696 instdisk[inst_uuid] = {}
2697
2698 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2699 len(nuuids) <= len(instanceinfo[inst].all_nodes) and
2700 compat.all(isinstance(s, (tuple, list)) and
2701 len(s) == 2 for s in statuses)
2702 for inst, nuuids in instdisk.items()
2703 for nuuid, statuses in nuuids.items())
2704 if __debug__:
2705 instdisk_keys = set(instdisk)
2706 instanceinfo_keys = set(instanceinfo)
2707 assert instdisk_keys == instanceinfo_keys, \
2708 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
2709 (instdisk_keys, instanceinfo_keys))
2710
2711 return instdisk
2712
2713 @staticmethod
2715 """Create endless iterators for all potential SSH check hosts.
2716
2717 """
2718 nodes = [node for node in all_nodes
2719 if (node.group != group_uuid and
2720 not node.offline)]
2721 keyfunc = operator.attrgetter("group")
2722
2723 return map(itertools.cycle,
2724 [sorted(map(operator.attrgetter("name"), names))
2725 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2726 keyfunc)])
2727
2728 @classmethod
2730 """Choose which nodes should talk to which other nodes.
2731
2732 We will make nodes contact all nodes in their group, and one node from
2733 every other group.
2734
2735 @warning: This algorithm has a known issue if one node group is much
2736 smaller than others (e.g. just one node). In such a case all other
2737 nodes will talk to the single node.
2738
2739 """
2740 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2741 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2742
2743 return (online_nodes,
2744 dict((name, sorted([i.next() for i in sel]))
2745 for name in online_nodes))
2746
2748 """Build hooks env.
2749
2750 Cluster-Verify hooks just ran in the post phase and their failure makes
2751 the output be logged in the verify output and the verification to fail.
2752
2753 """
2754 env = {
2755 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
2756 }
2757
2758 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2759 for node in self.my_node_info.values())
2760
2761 return env
2762
2764 """Build hooks nodes.
2765
2766 """
2767 return ([], list(self.my_node_info.keys()))
2768
2769 - def Exec(self, feedback_fn):
2770 """Verify integrity of the node group, performing various test on nodes.
2771
2772 """
2773
2774 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2775
2776 if not self.my_node_uuids:
2777
2778 feedback_fn("* Empty node group, skipping verification")
2779 return True
2780
2781 self.bad = False
2782 verbose = self.op.verbose
2783 self._feedback_fn = feedback_fn
2784
2785 vg_name = self.cfg.GetVGName()
2786 drbd_helper = self.cfg.GetDRBDHelper()
2787 cluster = self.cfg.GetClusterInfo()
2788 hypervisors = cluster.enabled_hypervisors
2789 node_data_list = self.my_node_info.values()
2790
2791 i_non_redundant = []
2792 i_non_a_balanced = []
2793 i_offline = 0
2794 n_offline = 0
2795 n_drained = 0
2796 node_vol_should = {}
2797
2798
2799
2800
2801 filemap = ComputeAncillaryFiles(cluster, False)
2802
2803
2804 master_node_uuid = self.master_node = self.cfg.GetMasterNode()
2805 master_ip = self.cfg.GetMasterIP()
2806
2807 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_uuids))
2808
2809 user_scripts = []
2810 if self.cfg.GetUseExternalMipScript():
2811 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
2812
2813 node_verify_param = {
2814 constants.NV_FILELIST:
2815 map(vcluster.MakeVirtualPath,
2816 utils.UniqueSequence(filename
2817 for files in filemap
2818 for filename in files)),
2819 constants.NV_NODELIST:
2820 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2821 self.all_node_info.values()),
2822 constants.NV_HYPERVISOR: hypervisors,
2823 constants.NV_HVPARAMS:
2824 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2825 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2826 for node in node_data_list
2827 if not node.offline],
2828 constants.NV_INSTANCELIST: hypervisors,
2829 constants.NV_VERSION: None,
2830 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2831 constants.NV_NODESETUP: None,
2832 constants.NV_TIME: None,
2833 constants.NV_MASTERIP: (self.cfg.GetMasterNodeName(), master_ip),
2834 constants.NV_OSLIST: None,
2835 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2836 constants.NV_USERSCRIPTS: user_scripts,
2837 }
2838
2839 if vg_name is not None:
2840 node_verify_param[constants.NV_VGLIST] = None
2841 node_verify_param[constants.NV_LVLIST] = vg_name
2842 node_verify_param[constants.NV_PVLIST] = [vg_name]
2843
2844 if drbd_helper:
2845 node_verify_param[constants.NV_DRBDVERSION] = None
2846 node_verify_param[constants.NV_DRBDLIST] = None
2847 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2848
2849 if cluster.IsFileStorageEnabled() or \
2850 cluster.IsSharedFileStorageEnabled():
2851
2852 node_verify_param[constants.NV_ACCEPTED_STORAGE_PATHS] = \
2853 self.cfg.GetMasterNodeName()
2854 if cluster.IsFileStorageEnabled():
2855 node_verify_param[constants.NV_FILE_STORAGE_PATH] = \
2856 cluster.file_storage_dir
2857
2858
2859
2860 bridges = set()
2861 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2862 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2863 bridges.add(default_nicpp[constants.NIC_LINK])
2864 for inst_uuid in self.my_inst_info.values():
2865 for nic in inst_uuid.nics:
2866 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2867 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2868 bridges.add(full_nic[constants.NIC_LINK])
2869
2870 if bridges:
2871 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2872
2873
2874 node_image = dict((node.uuid, self.NodeImage(offline=node.offline,
2875 uuid=node.uuid,
2876 vm_capable=node.vm_capable))
2877 for node in node_data_list)
2878
2879
2880 oob_paths = []
2881 for node in self.all_node_info.values():
2882 path = SupportsOob(self.cfg, node)
2883 if path and path not in oob_paths:
2884 oob_paths.append(path)
2885
2886 if oob_paths:
2887 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2888
2889 for inst_uuid in self.my_inst_uuids:
2890 instance = self.my_inst_info[inst_uuid]
2891 if instance.admin_state == constants.ADMINST_OFFLINE:
2892 i_offline += 1
2893
2894 for nuuid in instance.all_nodes:
2895 if nuuid not in node_image:
2896 gnode = self.NodeImage(uuid=nuuid)
2897 gnode.ghost = (nuuid not in self.all_node_info)
2898 node_image[nuuid] = gnode
2899
2900 instance.MapLVsByNode(node_vol_should)
2901
2902 pnode = instance.primary_node
2903 node_image[pnode].pinst.append(instance.uuid)
2904
2905 for snode in instance.secondary_nodes:
2906 nimg = node_image[snode]
2907 nimg.sinst.append(instance.uuid)
2908 if pnode not in nimg.sbp:
2909 nimg.sbp[pnode] = []
2910 nimg.sbp[pnode].append(instance.uuid)
2911
2912 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg,
2913 self.my_node_info.keys())
2914
2915
2916 self._exclusive_storage = compat.any(es_flags.values())
2917 if self._exclusive_storage:
2918 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
2919
2920
2921
2922
2923
2924
2925
2926
2927 nvinfo_starttime = time.time()
2928 all_nvinfo = self.rpc.call_node_verify(self.my_node_uuids,
2929 node_verify_param,
2930 self.cfg.GetClusterName(),
2931 self.cfg.GetClusterInfo().hvparams)
2932 nvinfo_endtime = time.time()
2933
2934 if self.extra_lv_nodes and vg_name is not None:
2935 extra_lv_nvinfo = \
2936 self.rpc.call_node_verify(self.extra_lv_nodes,
2937 {constants.NV_LVLIST: vg_name},
2938 self.cfg.GetClusterName(),
2939 self.cfg.GetClusterInfo().hvparams)
2940 else:
2941 extra_lv_nvinfo = {}
2942
2943 all_drbd_map = self.cfg.ComputeDRBDMap()
2944
2945 feedback_fn("* Gathering disk information (%s nodes)" %
2946 len(self.my_node_uuids))
2947 instdisk = self._CollectDiskInfo(self.my_node_info.keys(), node_image,
2948 self.my_inst_info)
2949
2950 feedback_fn("* Verifying configuration file consistency")
2951
2952
2953
2954 absent_node_uuids = set(self.all_node_info).difference(self.my_node_info)
2955 if absent_node_uuids:
2956 vf_nvinfo = all_nvinfo.copy()
2957 vf_node_info = list(self.my_node_info.values())
2958 additional_node_uuids = []
2959 if master_node_uuid not in self.my_node_info:
2960 additional_node_uuids.append(master_node_uuid)
2961 vf_node_info.append(self.all_node_info[master_node_uuid])
2962
2963
2964 for node_uuid in absent_node_uuids:
2965 nodeinfo = self.all_node_info[node_uuid]
2966 if (nodeinfo.vm_capable and not nodeinfo.offline and
2967 node_uuid != master_node_uuid):
2968 additional_node_uuids.append(node_uuid)
2969 vf_node_info.append(self.all_node_info[node_uuid])
2970 break
2971 key = constants.NV_FILELIST
2972 vf_nvinfo.update(self.rpc.call_node_verify(
2973 additional_node_uuids, {key: node_verify_param[key]},
2974 self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams))
2975 else:
2976 vf_nvinfo = all_nvinfo
2977 vf_node_info = self.my_node_info.values()
2978
2979 self._VerifyFiles(vf_node_info, master_node_uuid, vf_nvinfo, filemap)
2980
2981 feedback_fn("* Verifying node status")
2982
2983 refos_img = None
2984
2985 for node_i in node_data_list:
2986 nimg = node_image[node_i.uuid]
2987
2988 if node_i.offline:
2989 if verbose:
2990 feedback_fn("* Skipping offline node %s" % (node_i.name,))
2991 n_offline += 1
2992 continue
2993
2994 if node_i.uuid == master_node_uuid:
2995 ntype = "master"
2996 elif node_i.master_candidate:
2997 ntype = "master candidate"
2998 elif node_i.drained:
2999 ntype = "drained"
3000 n_drained += 1
3001 else:
3002 ntype = "regular"
3003 if verbose:
3004 feedback_fn("* Verifying node %s (%s)" % (node_i.name, ntype))
3005
3006 msg = all_nvinfo[node_i.uuid].fail_msg
3007 self._ErrorIf(msg, constants.CV_ENODERPC, node_i.name,
3008 "while contacting node: %s", msg)
3009 if msg:
3010 nimg.rpc_fail = True
3011 continue
3012
3013 nresult = all_nvinfo[node_i.uuid].payload
3014
3015 nimg.call_ok = self._VerifyNode(node_i, nresult)
3016 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3017 self._VerifyNodeNetwork(node_i, nresult)
3018 self._VerifyNodeUserScripts(node_i, nresult)
3019 self._VerifyOob(node_i, nresult)
3020 self._VerifyAcceptedFileStoragePaths(node_i, nresult,
3021 node_i.uuid == master_node_uuid)
3022 self._VerifyFileStoragePaths(node_i, nresult)
3023 self._VerifySharedFileStoragePaths(node_i, nresult)
3024
3025 if nimg.vm_capable:
3026 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3027 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3028 all_drbd_map)
3029
3030 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3031 self._UpdateNodeInstances(node_i, nresult, nimg)
3032 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3033 self._UpdateNodeOS(node_i, nresult, nimg)
3034
3035 if not nimg.os_fail:
3036 if refos_img is None:
3037 refos_img = nimg
3038 self._VerifyNodeOS(node_i, nimg, refos_img)
3039 self._VerifyNodeBridges(node_i, nresult, bridges)
3040
3041
3042
3043
3044 non_primary_inst_uuids = set(nimg.instances).difference(nimg.pinst)
3045
3046 for inst_uuid in non_primary_inst_uuids:
3047 test = inst_uuid in self.all_inst_info
3048 self._ErrorIf(test, constants.CV_EINSTANCEWRONGNODE,
3049 self.cfg.GetInstanceName(inst_uuid),
3050 "instance should not run on node %s", node_i.name)
3051 self._ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3052 "node is running unknown instance %s", inst_uuid)
3053
3054 self._VerifyGroupDRBDVersion(all_nvinfo)
3055 self._VerifyGroupLVM(node_image, vg_name)
3056
3057 for node_uuid, result in extra_lv_nvinfo.items():
3058 self._UpdateNodeVolumes(self.all_node_info[node_uuid], result.payload,
3059 node_image[node_uuid], vg_name)
3060
3061 feedback_fn("* Verifying instance status")
3062 for inst_uuid in self.my_inst_uuids:
3063 instance = self.my_inst_info[inst_uuid]
3064 if verbose:
3065 feedback_fn("* Verifying instance %s" % instance.name)
3066 self._VerifyInstance(instance, node_image, instdisk[inst_uuid])
3067
3068
3069
3070 if instance.disk_template not in constants.DTS_MIRRORED:
3071 i_non_redundant.append(instance)
3072
3073 if not cluster.FillBE(instance)[constants.BE_AUTO_BALANCE]:
3074 i_non_a_balanced.append(instance)
3075
3076 feedback_fn("* Verifying orphan volumes")
3077 reserved = utils.FieldSet(*cluster.reserved_lvs)
3078
3079
3080
3081
3082 for instance in self.all_inst_info.values():
3083 for secondary in instance.secondary_nodes:
3084 if (secondary in self.my_node_info
3085 and instance.name not in self.my_inst_info):
3086 instance.MapLVsByNode(node_vol_should)
3087 break
3088
3089 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3090
3091 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3092 feedback_fn("* Verifying N+1 Memory redundancy")
3093 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3094
3095 feedback_fn("* Other Notes")
3096 if i_non_redundant:
3097 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3098 % len(i_non_redundant))
3099
3100 if i_non_a_balanced:
3101 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3102 % len(i_non_a_balanced))
3103
3104 if i_offline:
3105 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3106
3107 if n_offline:
3108 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3109
3110 if n_drained:
3111 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3112
3113 return not self.bad
3114
3115 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3116 """Analyze the post-hooks' result
3117
3118 This method analyses the hook result, handles it, and sends some
3119 nicely-formatted feedback back to the user.
3120
3121 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3122 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3123 @param hooks_results: the results of the multi-node hooks rpc call
3124 @param feedback_fn: function used send feedback back to the caller
3125 @param lu_result: previous Exec result
3126 @return: the new Exec result, based on the previous result
3127 and hook results
3128
3129 """
3130
3131
3132 if not self.my_node_uuids:
3133
3134 pass
3135 elif phase == constants.HOOKS_PHASE_POST:
3136
3137 feedback_fn("* Hooks Results")
3138 assert hooks_results, "invalid result from hooks"
3139
3140 for node_name in hooks_results:
3141 res = hooks_results[node_name]
3142 msg = res.fail_msg
3143 test = msg and not res.offline
3144 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3145 "Communication failure in hooks execution: %s", msg)
3146 if res.offline or msg:
3147
3148
3149 continue
3150 for script, hkr, output in res.payload:
3151 test = hkr == constants.HKR_FAIL
3152 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3153 "Script %s failed, output:", script)
3154 if test:
3155 output = self._HOOKS_INDENT_RE.sub(" ", output)
3156 feedback_fn("%s" % output)
3157 lu_result = False
3158
3159 return lu_result
3160
3163 """Verifies the cluster disks status.
3164
3165 """
3166 REQ_BGL = False
3167
3173
3174 - def Exec(self, feedback_fn):
3180