1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Logical units dealing with the cluster."""
23
24 import OpenSSL
25
26 import copy
27 import itertools
28 import logging
29 import operator
30 import os
31 import re
32 import time
33
34 from ganeti import compat
35 from ganeti import constants
36 from ganeti import errors
37 from ganeti import hypervisor
38 from ganeti import locking
39 from ganeti import masterd
40 from ganeti import netutils
41 from ganeti import objects
42 from ganeti import opcodes
43 from ganeti import pathutils
44 from ganeti import query
45 from ganeti import rpc
46 from ganeti import runtime
47 from ganeti import ssh
48 from ganeti import uidpool
49 from ganeti import utils
50 from ganeti import vcluster
51
52 from ganeti.cmdlib.base import NoHooksLU, QueryBase, LogicalUnit, \
53 ResultWithJobs
54 from ganeti.cmdlib.common import ShareAll, RunPostHook, \
55 ComputeAncillaryFiles, RedistributeAncillaryFiles, UploadHelper, \
56 GetWantedInstances, MergeAndVerifyHvState, MergeAndVerifyDiskState, \
57 GetUpdatedIPolicy, ComputeNewInstanceViolations, GetUpdatedParams, \
58 CheckOSParams, CheckHVParams, AdjustCandidatePool, CheckNodePVs, \
59 ComputeIPolicyInstanceViolation, AnnotateDiskParams, SupportsOob, \
60 CheckIpolicyVsDiskTemplates
61
62 import ganeti.masterd.instance
66 """Activate the master IP on the master node.
67
68 """
69 - def Exec(self, feedback_fn):
78
81 """Deactivate the master IP on the master node.
82
83 """
84 - def Exec(self, feedback_fn):
93
96 """Return configuration values.
97
98 """
99 REQ_BGL = False
100
102 self.cq = ClusterQuery(None, self.op.output_fields, False)
103
106
109
110 - def Exec(self, feedback_fn):
111 result = self.cq.OldStyleQuery(self)
112
113 assert len(result) == 1
114
115 return result[0]
116
119 """Logical unit for destroying the cluster.
120
121 """
122 HPATH = "cluster-destroy"
123 HTYPE = constants.HTYPE_CLUSTER
124
126 """Build hooks env.
127
128 """
129 return {
130 "OP_TARGET": self.cfg.GetClusterName(),
131 }
132
134 """Build hooks nodes.
135
136 """
137 return ([], [])
138
140 """Check prerequisites.
141
142 This checks whether the cluster is empty.
143
144 Any errors are signaled by raising errors.OpPrereqError.
145
146 """
147 master = self.cfg.GetMasterNode()
148
149 nodelist = self.cfg.GetNodeList()
150 if len(nodelist) != 1 or nodelist[0] != master:
151 raise errors.OpPrereqError("There are still %d node(s) in"
152 " this cluster." % (len(nodelist) - 1),
153 errors.ECODE_INVAL)
154 instancelist = self.cfg.GetInstanceList()
155 if instancelist:
156 raise errors.OpPrereqError("There are still %d instance(s) in"
157 " this cluster." % len(instancelist),
158 errors.ECODE_INVAL)
159
160 - def Exec(self, feedback_fn):
174
175
176 -class LUClusterPostInit(LogicalUnit):
177 """Logical unit for running hooks after cluster initialization.
178
179 """
180 HPATH = "cluster-init"
181 HTYPE = constants.HTYPE_CLUSTER
182
183 - def BuildHooksEnv(self):
184 """Build hooks env.
185
186 """
187 return {
188 "OP_TARGET": self.cfg.GetClusterName(),
189 }
190
191 - def BuildHooksNodes(self):
192 """Build hooks nodes.
193
194 """
195 return ([], [self.cfg.GetMasterNode()])
196
197 - def Exec(self, feedback_fn):
198 """Nothing to do.
199
200 """
201 return True
202
258
261 """Query cluster configuration.
262
263 """
264 REQ_BGL = False
265
267 self.needed_locks = {}
268
269 - def Exec(self, feedback_fn):
270 """Return cluster config.
271
272 """
273 cluster = self.cfg.GetClusterInfo()
274 os_hvp = {}
275
276
277 for os_name, hv_dict in cluster.os_hvp.items():
278 os_hvp[os_name] = {}
279 for hv_name, hv_params in hv_dict.items():
280 if hv_name in cluster.enabled_hypervisors:
281 os_hvp[os_name][hv_name] = hv_params
282
283
284 primary_ip_version = constants.IP4_VERSION
285 if cluster.primary_ip_family == netutils.IP6Address.family:
286 primary_ip_version = constants.IP6_VERSION
287
288 result = {
289 "software_version": constants.RELEASE_VERSION,
290 "protocol_version": constants.PROTOCOL_VERSION,
291 "config_version": constants.CONFIG_VERSION,
292 "os_api_version": max(constants.OS_API_VERSIONS),
293 "export_version": constants.EXPORT_VERSION,
294 "vcs_version": constants.VCS_VERSION,
295 "architecture": runtime.GetArchInfo(),
296 "name": cluster.cluster_name,
297 "master": self.cfg.GetMasterNodeName(),
298 "default_hypervisor": cluster.primary_hypervisor,
299 "enabled_hypervisors": cluster.enabled_hypervisors,
300 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
301 for hypervisor_name in cluster.enabled_hypervisors]),
302 "os_hvp": os_hvp,
303 "beparams": cluster.beparams,
304 "osparams": cluster.osparams,
305 "ipolicy": cluster.ipolicy,
306 "nicparams": cluster.nicparams,
307 "ndparams": cluster.ndparams,
308 "diskparams": cluster.diskparams,
309 "candidate_pool_size": cluster.candidate_pool_size,
310 "master_netdev": cluster.master_netdev,
311 "master_netmask": cluster.master_netmask,
312 "use_external_mip_script": cluster.use_external_mip_script,
313 "volume_group_name": cluster.volume_group_name,
314 "drbd_usermode_helper": cluster.drbd_usermode_helper,
315 "file_storage_dir": cluster.file_storage_dir,
316 "shared_file_storage_dir": cluster.shared_file_storage_dir,
317 "maintain_node_health": cluster.maintain_node_health,
318 "ctime": cluster.ctime,
319 "mtime": cluster.mtime,
320 "uuid": cluster.uuid,
321 "tags": list(cluster.GetTags()),
322 "uid_pool": cluster.uid_pool,
323 "default_iallocator": cluster.default_iallocator,
324 "reserved_lvs": cluster.reserved_lvs,
325 "primary_ip_version": primary_ip_version,
326 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
327 "hidden_os": cluster.hidden_os,
328 "blacklisted_os": cluster.blacklisted_os,
329 "enabled_disk_templates": cluster.enabled_disk_templates,
330 }
331
332 return result
333
336 """Force the redistribution of cluster configuration.
337
338 This is a very simple LU.
339
340 """
341 REQ_BGL = False
342
349
350 - def Exec(self, feedback_fn):
356
359 """Rename the cluster.
360
361 """
362 HPATH = "cluster-rename"
363 HTYPE = constants.HTYPE_CLUSTER
364
366 """Build hooks env.
367
368 """
369 return {
370 "OP_TARGET": self.cfg.GetClusterName(),
371 "NEW_NAME": self.op.name,
372 }
373
379
402
403 - def Exec(self, feedback_fn):
404 """Rename the cluster.
405
406 """
407 clustername = self.op.name
408 new_ip = self.ip
409
410
411 master_params = self.cfg.GetMasterNetworkParameters()
412 ems = self.cfg.GetUseExternalMipScript()
413 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
414 master_params, ems)
415 result.Raise("Could not disable the master role")
416
417 try:
418 cluster = self.cfg.GetClusterInfo()
419 cluster.cluster_name = clustername
420 cluster.master_ip = new_ip
421 self.cfg.Update(cluster, feedback_fn)
422
423
424 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
425 node_list = self.cfg.GetOnlineNodeList()
426 try:
427 node_list.remove(master_params.uuid)
428 except ValueError:
429 pass
430 UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
431 finally:
432 master_params.ip = new_ip
433 result = self.rpc.call_node_activate_master_ip(master_params.uuid,
434 master_params, ems)
435 result.Warn("Could not re-enable the master role on the master,"
436 " please restart manually", self.LogWarning)
437
438 return clustername
439
442 """Verifies the cluster disks sizes.
443
444 """
445 REQ_BGL = False
446
448 if self.op.instances:
449 (_, self.wanted_names) = GetWantedInstances(self, self.op.instances)
450
451
452 self.needed_locks = {
453 locking.LEVEL_NODE_RES: [],
454 locking.LEVEL_INSTANCE: self.wanted_names,
455 }
456 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
457 else:
458 self.wanted_names = None
459 self.needed_locks = {
460 locking.LEVEL_NODE_RES: locking.ALL_SET,
461 locking.LEVEL_INSTANCE: locking.ALL_SET,
462
463
464 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
465 }
466
467 self.share_locks = {
468 locking.LEVEL_NODE_RES: 1,
469 locking.LEVEL_INSTANCE: 0,
470 locking.LEVEL_NODE_ALLOC: 1,
471 }
472
474 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
475 self._LockInstancesNodes(primary_only=True, level=level)
476
478 """Check prerequisites.
479
480 This only checks the optional instance list against the existing names.
481
482 """
483 if self.wanted_names is None:
484 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
485
486 self.wanted_instances = \
487 map(compat.snd, self.cfg.GetMultiInstanceInfoByName(self.wanted_names))
488
490 """Ensure children of the disk have the needed disk size.
491
492 This is valid mainly for DRBD8 and fixes an issue where the
493 children have smaller disk size.
494
495 @param disk: an L{ganeti.objects.Disk} object
496
497 """
498 if disk.dev_type == constants.DT_DRBD8:
499 assert disk.children, "Empty children for DRBD8?"
500 fchild = disk.children[0]
501 mismatch = fchild.size < disk.size
502 if mismatch:
503 self.LogInfo("Child disk has size %d, parent %d, fixing",
504 fchild.size, disk.size)
505 fchild.size = disk.size
506
507
508 return self._EnsureChildSizes(fchild) or mismatch
509 else:
510 return False
511
512 - def Exec(self, feedback_fn):
513 """Verify the size of cluster disks.
514
515 """
516
517
518 per_node_disks = {}
519 for instance in self.wanted_instances:
520 pnode = instance.primary_node
521 if pnode not in per_node_disks:
522 per_node_disks[pnode] = []
523 for idx, disk in enumerate(instance.disks):
524 per_node_disks[pnode].append((instance, idx, disk))
525
526 assert not (frozenset(per_node_disks.keys()) -
527 self.owned_locks(locking.LEVEL_NODE_RES)), \
528 "Not owning correct locks"
529 assert not self.owned_locks(locking.LEVEL_NODE)
530
531 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg,
532 per_node_disks.keys())
533
534 changed = []
535 for node_uuid, dskl in per_node_disks.items():
536 newl = [v[2].Copy() for v in dskl]
537 for dsk in newl:
538 self.cfg.SetDiskID(dsk, node_uuid)
539 node_name = self.cfg.GetNodeName(node_uuid)
540 result = self.rpc.call_blockdev_getdimensions(node_uuid, newl)
541 if result.fail_msg:
542 self.LogWarning("Failure in blockdev_getdimensions call to node"
543 " %s, ignoring", node_name)
544 continue
545 if len(result.payload) != len(dskl):
546 logging.warning("Invalid result from node %s: len(dksl)=%d,"
547 " result.payload=%s", node_name, len(dskl),
548 result.payload)
549 self.LogWarning("Invalid result from node %s, ignoring node results",
550 node_name)
551 continue
552 for ((instance, idx, disk), dimensions) in zip(dskl, result.payload):
553 if dimensions is None:
554 self.LogWarning("Disk %d of instance %s did not return size"
555 " information, ignoring", idx, instance.name)
556 continue
557 if not isinstance(dimensions, (tuple, list)):
558 self.LogWarning("Disk %d of instance %s did not return valid"
559 " dimension information, ignoring", idx,
560 instance.name)
561 continue
562 (size, spindles) = dimensions
563 if not isinstance(size, (int, long)):
564 self.LogWarning("Disk %d of instance %s did not return valid"
565 " size information, ignoring", idx, instance.name)
566 continue
567 size = size >> 20
568 if size != disk.size:
569 self.LogInfo("Disk %d of instance %s has mismatched size,"
570 " correcting: recorded %d, actual %d", idx,
571 instance.name, disk.size, size)
572 disk.size = size
573 self.cfg.Update(instance, feedback_fn)
574 changed.append((instance.name, idx, "size", size))
575 if es_flags[node_uuid]:
576 if spindles is None:
577 self.LogWarning("Disk %d of instance %s did not return valid"
578 " spindles information, ignoring", idx,
579 instance.name)
580 elif disk.spindles is None or disk.spindles != spindles:
581 self.LogInfo("Disk %d of instance %s has mismatched spindles,"
582 " correcting: recorded %s, actual %s",
583 idx, instance.name, disk.spindles, spindles)
584 disk.spindles = spindles
585 self.cfg.Update(instance, feedback_fn)
586 changed.append((instance.name, idx, "spindles", disk.spindles))
587 if self._EnsureChildSizes(disk):
588 self.cfg.Update(instance, feedback_fn)
589 changed.append((instance.name, idx, "size", disk.size))
590 return changed
591
612
617 """Checks whether the given file-based storage directory is acceptable.
618
619 Note: This function is public, because it is also used in bootstrap.py.
620
621 @type logging_warn_fn: function
622 @param logging_warn_fn: function which accepts a string and logs it
623 @type file_storage_dir: string
624 @param file_storage_dir: the directory to be used for file-based instances
625 @type enabled_disk_templates: list of string
626 @param enabled_disk_templates: the list of enabled disk templates
627 @type file_disk_template: string
628 @param file_disk_template: the file-based disk template for which the
629 path should be checked
630
631 """
632 assert (file_disk_template in
633 utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE))
634 file_storage_enabled = file_disk_template in enabled_disk_templates
635 if file_storage_dir is not None:
636 if file_storage_dir == "":
637 if file_storage_enabled:
638 raise errors.OpPrereqError(
639 "Unsetting the '%s' storage directory while having '%s' storage"
640 " enabled is not permitted." %
641 (file_disk_template, file_disk_template))
642 else:
643 if not file_storage_enabled:
644 logging_warn_fn(
645 "Specified a %s storage directory, although %s storage is not"
646 " enabled." % (file_disk_template, file_disk_template))
647 else:
648 raise errors.ProgrammerError("Received %s storage dir with value"
649 " 'None'." % file_disk_template)
650
662
674
677 """Change the parameters of the cluster.
678
679 """
680 HPATH = "cluster-modify"
681 HTYPE = constants.HTYPE_CLUSTER
682 REQ_BGL = False
683
708
721
723 """Build hooks env.
724
725 """
726 return {
727 "OP_TARGET": self.cfg.GetClusterName(),
728 "NEW_VG_NAME": self.op.vg_name,
729 }
730
732 """Build hooks nodes.
733
734 """
735 mn = self.cfg.GetMasterNode()
736 return ([mn], [mn])
737
738 - def _CheckVgName(self, node_uuids, enabled_disk_templates,
739 new_enabled_disk_templates):
740 """Check the consistency of the vg name on all nodes and in case it gets
741 unset whether there are instances still using it.
742
743 """
744 lvm_is_enabled = utils.IsLvmEnabled(enabled_disk_templates)
745 lvm_gets_enabled = utils.LvmGetsEnabled(enabled_disk_templates,
746 new_enabled_disk_templates)
747 current_vg_name = self.cfg.GetVGName()
748
749 if self.op.vg_name == '':
750 if lvm_is_enabled:
751 raise errors.OpPrereqError("Cannot unset volume group if lvm-based"
752 " disk templates are or get enabled.")
753
754 if self.op.vg_name is None:
755 if current_vg_name is None and lvm_is_enabled:
756 raise errors.OpPrereqError("Please specify a volume group when"
757 " enabling lvm-based disk-templates.")
758
759 if self.op.vg_name is not None and not self.op.vg_name:
760 if self.cfg.HasAnyDiskOfType(constants.DT_PLAIN):
761 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
762 " instances exist", errors.ECODE_INVAL)
763
764 if (self.op.vg_name is not None and lvm_is_enabled) or \
765 (self.cfg.GetVGName() is not None and lvm_gets_enabled):
766 self._CheckVgNameOnNodes(node_uuids)
767
788
789 @staticmethod
792 """Determines the enabled disk templates and the subset of disk templates
793 that are newly enabled by this operation.
794
795 """
796 enabled_disk_templates = None
797 new_enabled_disk_templates = []
798 if op_enabled_disk_templates:
799 enabled_disk_templates = op_enabled_disk_templates
800 new_enabled_disk_templates = \
801 list(set(enabled_disk_templates)
802 - set(old_enabled_disk_templates))
803 else:
804 enabled_disk_templates = old_enabled_disk_templates
805 return (enabled_disk_templates, new_enabled_disk_templates)
806
808 """Determines the enabled disk templates and the subset of disk templates
809 that are newly enabled by this operation.
810
811 """
812 return self._GetEnabledDiskTemplatesInner(self.op.enabled_disk_templates,
813 cluster.enabled_disk_templates)
814
816 """Checks the ipolicy.
817
818 @type cluster: C{objects.Cluster}
819 @param cluster: the cluster's configuration
820 @type enabled_disk_templates: list of string
821 @param enabled_disk_templates: list of (possibly newly) enabled disk
822 templates
823
824 """
825
826 if self.op.ipolicy:
827 self.new_ipolicy = GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
828 group_policy=False)
829
830 CheckIpolicyVsDiskTemplates(self.new_ipolicy,
831 enabled_disk_templates)
832
833 all_instances = self.cfg.GetAllInstancesInfo().values()
834 violations = set()
835 for group in self.cfg.GetAllNodeGroupsInfo().values():
836 instances = frozenset([inst for inst in all_instances
837 if compat.any(nuuid in group.members
838 for nuuid in inst.all_nodes)])
839 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
840 ipol = masterd.instance.CalculateGroupIPolicy(cluster, group)
841 new = ComputeNewInstanceViolations(ipol, new_ipolicy, instances,
842 self.cfg)
843 if new:
844 violations.update(new)
845
846 if violations:
847 self.LogWarning("After the ipolicy change the following instances"
848 " violate them: %s",
849 utils.CommaJoin(utils.NiceSort(violations)))
850 else:
851 CheckIpolicyVsDiskTemplates(cluster.ipolicy,
852 enabled_disk_templates)
853
855 """Check prerequisites.
856
857 This checks whether the given params don't conflict and
858 if the given volume group is valid.
859
860 """
861 if self.op.drbd_helper is not None and not self.op.drbd_helper:
862 if self.cfg.HasAnyDiskOfType(constants.DT_DRBD8):
863 raise errors.OpPrereqError("Cannot disable drbd helper while"
864 " drbd-based instances exist",
865 errors.ECODE_INVAL)
866
867 node_uuids = self.owned_locks(locking.LEVEL_NODE)
868 self.cluster = cluster = self.cfg.GetClusterInfo()
869
870 vm_capable_node_uuids = [node.uuid
871 for node in self.cfg.GetAllNodesInfo().values()
872 if node.uuid in node_uuids and node.vm_capable]
873
874 (enabled_disk_templates, new_enabled_disk_templates) = \
875 self._GetEnabledDiskTemplates(cluster)
876
877 self._CheckVgName(vm_capable_node_uuids, enabled_disk_templates,
878 new_enabled_disk_templates)
879
880 if self.op.file_storage_dir is not None:
881 CheckFileStoragePathVsEnabledDiskTemplates(
882 self.LogWarning, self.op.file_storage_dir, enabled_disk_templates)
883
884 if self.op.shared_file_storage_dir is not None:
885 CheckSharedFileStoragePathVsEnabledDiskTemplates(
886 self.LogWarning, self.op.shared_file_storage_dir,
887 enabled_disk_templates)
888
889 if self.op.drbd_helper:
890
891 helpers = self.rpc.call_drbd_helper(node_uuids)
892 for (_, ninfo) in self.cfg.GetMultiNodeInfo(node_uuids):
893 if ninfo.offline:
894 self.LogInfo("Not checking drbd helper on offline node %s",
895 ninfo.name)
896 continue
897 msg = helpers[ninfo.uuid].fail_msg
898 if msg:
899 raise errors.OpPrereqError("Error checking drbd helper on node"
900 " '%s': %s" % (ninfo.name, msg),
901 errors.ECODE_ENVIRON)
902 node_helper = helpers[ninfo.uuid].payload
903 if node_helper != self.op.drbd_helper:
904 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
905 (ninfo.name, node_helper),
906 errors.ECODE_ENVIRON)
907
908
909 if self.op.beparams:
910 objects.UpgradeBeParams(self.op.beparams)
911 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
912 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
913
914 if self.op.ndparams:
915 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
916 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
917
918
919
920 if self.new_ndparams["oob_program"] == "":
921 self.new_ndparams["oob_program"] = \
922 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
923
924 if self.op.hv_state:
925 new_hv_state = MergeAndVerifyHvState(self.op.hv_state,
926 self.cluster.hv_state_static)
927 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
928 for hv, values in new_hv_state.items())
929
930 if self.op.disk_state:
931 new_disk_state = MergeAndVerifyDiskState(self.op.disk_state,
932 self.cluster.disk_state_static)
933 self.new_disk_state = \
934 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
935 for name, values in svalues.items()))
936 for storage, svalues in new_disk_state.items())
937
938 self._CheckIpolicy(cluster, enabled_disk_templates)
939
940 if self.op.nicparams:
941 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
942 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
943 objects.NIC.CheckParameterSyntax(self.new_nicparams)
944 nic_errors = []
945
946
947 for instance in self.cfg.GetAllInstancesInfo().values():
948 for nic_idx, nic in enumerate(instance.nics):
949 params_copy = copy.deepcopy(nic.nicparams)
950 params_filled = objects.FillDict(self.new_nicparams, params_copy)
951
952
953 try:
954 objects.NIC.CheckParameterSyntax(params_filled)
955 except errors.ConfigurationError, err:
956 nic_errors.append("Instance %s, nic/%d: %s" %
957 (instance.name, nic_idx, err))
958
959
960 target_mode = params_filled[constants.NIC_MODE]
961 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
962 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
963 " address" % (instance.name, nic_idx))
964 if nic_errors:
965 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
966 "\n".join(nic_errors), errors.ECODE_INVAL)
967
968
969 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
970 if self.op.hvparams:
971 for hv_name, hv_dict in self.op.hvparams.items():
972 if hv_name not in self.new_hvparams:
973 self.new_hvparams[hv_name] = hv_dict
974 else:
975 self.new_hvparams[hv_name].update(hv_dict)
976
977
978 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
979 if self.op.diskparams:
980 for dt_name, dt_params in self.op.diskparams.items():
981 if dt_name not in self.new_diskparams:
982 self.new_diskparams[dt_name] = dt_params
983 else:
984 self.new_diskparams[dt_name].update(dt_params)
985
986
987 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
988 if self.op.os_hvp:
989 for os_name, hvs in self.op.os_hvp.items():
990 if os_name not in self.new_os_hvp:
991 self.new_os_hvp[os_name] = hvs
992 else:
993 for hv_name, hv_dict in hvs.items():
994 if hv_dict is None:
995
996 self.new_os_hvp[os_name].pop(hv_name, None)
997 elif hv_name not in self.new_os_hvp[os_name]:
998 self.new_os_hvp[os_name][hv_name] = hv_dict
999 else:
1000 self.new_os_hvp[os_name][hv_name].update(hv_dict)
1001
1002
1003 self.new_osp = objects.FillDict(cluster.osparams, {})
1004 if self.op.osparams:
1005 for os_name, osp in self.op.osparams.items():
1006 if os_name not in self.new_osp:
1007 self.new_osp[os_name] = {}
1008
1009 self.new_osp[os_name] = GetUpdatedParams(self.new_osp[os_name], osp,
1010 use_none=True)
1011
1012 if not self.new_osp[os_name]:
1013
1014 del self.new_osp[os_name]
1015 else:
1016
1017 CheckOSParams(self, False, [self.cfg.GetMasterNode()],
1018 os_name, self.new_osp[os_name])
1019
1020
1021 if self.op.enabled_hypervisors is not None:
1022 self.hv_list = self.op.enabled_hypervisors
1023 for hv in self.hv_list:
1024
1025
1026
1027
1028
1029 if hv not in new_hvp:
1030 new_hvp[hv] = {}
1031 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
1032 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
1033 else:
1034 self.hv_list = cluster.enabled_hypervisors
1035
1036 if self.op.hvparams or self.op.enabled_hypervisors is not None:
1037
1038 for hv_name, hv_params in self.new_hvparams.items():
1039 if ((self.op.hvparams and hv_name in self.op.hvparams) or
1040 (self.op.enabled_hypervisors and
1041 hv_name in self.op.enabled_hypervisors)):
1042
1043 hv_class = hypervisor.GetHypervisorClass(hv_name)
1044 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1045 hv_class.CheckParameterSyntax(hv_params)
1046 CheckHVParams(self, node_uuids, hv_name, hv_params)
1047
1048 self._CheckDiskTemplateConsistency()
1049
1050 if self.op.os_hvp:
1051
1052
1053 for os_name, os_hvp in self.new_os_hvp.items():
1054 for hv_name, hv_params in os_hvp.items():
1055 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1056
1057 cluster_defaults = self.new_hvparams.get(hv_name, {})
1058 new_osp = objects.FillDict(cluster_defaults, hv_params)
1059 hv_class = hypervisor.GetHypervisorClass(hv_name)
1060 hv_class.CheckParameterSyntax(new_osp)
1061 CheckHVParams(self, node_uuids, hv_name, new_osp)
1062
1063 if self.op.default_iallocator:
1064 alloc_script = utils.FindFile(self.op.default_iallocator,
1065 constants.IALLOCATOR_SEARCH_PATH,
1066 os.path.isfile)
1067 if alloc_script is None:
1068 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
1069 " specified" % self.op.default_iallocator,
1070 errors.ECODE_INVAL)
1071
1073 """Check whether the disk templates that are going to be disabled
1074 are still in use by some instances.
1075
1076 """
1077 if self.op.enabled_disk_templates:
1078 cluster = self.cfg.GetClusterInfo()
1079 instances = self.cfg.GetAllInstancesInfo()
1080
1081 disk_templates_to_remove = set(cluster.enabled_disk_templates) \
1082 - set(self.op.enabled_disk_templates)
1083 for instance in instances.itervalues():
1084 if instance.disk_template in disk_templates_to_remove:
1085 raise errors.OpPrereqError("Cannot disable disk template '%s',"
1086 " because instance '%s' is using it." %
1087 (instance.disk_template, instance.name))
1088
1090 """Determines and sets the new volume group name.
1091
1092 """
1093 if self.op.vg_name is not None:
1094 new_volume = self.op.vg_name
1095 if not new_volume:
1096 new_volume = None
1097 if new_volume != self.cfg.GetVGName():
1098 self.cfg.SetVGName(new_volume)
1099 else:
1100 feedback_fn("Cluster LVM configuration already in desired"
1101 " state, not changing")
1102
1104 """Set the file storage directory.
1105
1106 """
1107 if self.op.file_storage_dir is not None:
1108 if self.cluster.file_storage_dir == self.op.file_storage_dir:
1109 feedback_fn("Global file storage dir already set to value '%s'"
1110 % self.cluster.file_storage_dir)
1111 else:
1112 self.cluster.file_storage_dir = self.op.file_storage_dir
1113
1114 - def Exec(self, feedback_fn):
1115 """Change the parameters of the cluster.
1116
1117 """
1118 if self.op.enabled_disk_templates:
1119 self.cluster.enabled_disk_templates = \
1120 list(set(self.op.enabled_disk_templates))
1121
1122 self._SetVgName(feedback_fn)
1123 self._SetFileStorageDir(feedback_fn)
1124
1125 if self.op.drbd_helper is not None:
1126 if not constants.DT_DRBD8 in self.cluster.enabled_disk_templates:
1127 feedback_fn("Note that you specified a drbd user helper, but did"
1128 " enabled the drbd disk template.")
1129 new_helper = self.op.drbd_helper
1130 if not new_helper:
1131 new_helper = None
1132 if new_helper != self.cfg.GetDRBDHelper():
1133 self.cfg.SetDRBDHelper(new_helper)
1134 else:
1135 feedback_fn("Cluster DRBD helper already in desired state,"
1136 " not changing")
1137 if self.op.hvparams:
1138 self.cluster.hvparams = self.new_hvparams
1139 if self.op.os_hvp:
1140 self.cluster.os_hvp = self.new_os_hvp
1141 if self.op.enabled_hypervisors is not None:
1142 self.cluster.hvparams = self.new_hvparams
1143 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1144 if self.op.beparams:
1145 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1146 if self.op.nicparams:
1147 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1148 if self.op.ipolicy:
1149 self.cluster.ipolicy = self.new_ipolicy
1150 if self.op.osparams:
1151 self.cluster.osparams = self.new_osp
1152 if self.op.ndparams:
1153 self.cluster.ndparams = self.new_ndparams
1154 if self.op.diskparams:
1155 self.cluster.diskparams = self.new_diskparams
1156 if self.op.hv_state:
1157 self.cluster.hv_state_static = self.new_hv_state
1158 if self.op.disk_state:
1159 self.cluster.disk_state_static = self.new_disk_state
1160
1161 if self.op.candidate_pool_size is not None:
1162 self.cluster.candidate_pool_size = self.op.candidate_pool_size
1163
1164 AdjustCandidatePool(self, [])
1165
1166 if self.op.maintain_node_health is not None:
1167 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
1168 feedback_fn("Note: CONFD was disabled at build time, node health"
1169 " maintenance is not useful (still enabling it)")
1170 self.cluster.maintain_node_health = self.op.maintain_node_health
1171
1172 if self.op.modify_etc_hosts is not None:
1173 self.cluster.modify_etc_hosts = self.op.modify_etc_hosts
1174
1175 if self.op.prealloc_wipe_disks is not None:
1176 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
1177
1178 if self.op.add_uids is not None:
1179 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
1180
1181 if self.op.remove_uids is not None:
1182 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
1183
1184 if self.op.uid_pool is not None:
1185 self.cluster.uid_pool = self.op.uid_pool
1186
1187 if self.op.default_iallocator is not None:
1188 self.cluster.default_iallocator = self.op.default_iallocator
1189
1190 if self.op.reserved_lvs is not None:
1191 self.cluster.reserved_lvs = self.op.reserved_lvs
1192
1193 if self.op.use_external_mip_script is not None:
1194 self.cluster.use_external_mip_script = self.op.use_external_mip_script
1195
1196 def helper_os(aname, mods, desc):
1197 desc += " OS list"
1198 lst = getattr(self.cluster, aname)
1199 for key, val in mods:
1200 if key == constants.DDM_ADD:
1201 if val in lst:
1202 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
1203 else:
1204 lst.append(val)
1205 elif key == constants.DDM_REMOVE:
1206 if val in lst:
1207 lst.remove(val)
1208 else:
1209 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
1210 else:
1211 raise errors.ProgrammerError("Invalid modification '%s'" % key)
1212
1213 if self.op.hidden_os:
1214 helper_os("hidden_os", self.op.hidden_os, "hidden")
1215
1216 if self.op.blacklisted_os:
1217 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
1218
1219 if self.op.master_netdev:
1220 master_params = self.cfg.GetMasterNetworkParameters()
1221 ems = self.cfg.GetUseExternalMipScript()
1222 feedback_fn("Shutting down master ip on the current netdev (%s)" %
1223 self.cluster.master_netdev)
1224 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
1225 master_params, ems)
1226 if not self.op.force:
1227 result.Raise("Could not disable the master ip")
1228 else:
1229 if result.fail_msg:
1230 msg = ("Could not disable the master ip (continuing anyway): %s" %
1231 result.fail_msg)
1232 feedback_fn(msg)
1233 feedback_fn("Changing master_netdev from %s to %s" %
1234 (master_params.netdev, self.op.master_netdev))
1235 self.cluster.master_netdev = self.op.master_netdev
1236
1237 if self.op.master_netmask:
1238 master_params = self.cfg.GetMasterNetworkParameters()
1239 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
1240 result = self.rpc.call_node_change_master_netmask(
1241 master_params.uuid, master_params.netmask,
1242 self.op.master_netmask, master_params.ip,
1243 master_params.netdev)
1244 result.Warn("Could not change the master IP netmask", feedback_fn)
1245 self.cluster.master_netmask = self.op.master_netmask
1246
1247 self.cfg.Update(self.cluster, feedback_fn)
1248
1249 if self.op.master_netdev:
1250 master_params = self.cfg.GetMasterNetworkParameters()
1251 feedback_fn("Starting the master ip on the new master netdev (%s)" %
1252 self.op.master_netdev)
1253 ems = self.cfg.GetUseExternalMipScript()
1254 result = self.rpc.call_node_activate_master_ip(master_params.uuid,
1255 master_params, ems)
1256 result.Warn("Could not re-enable the master ip on the master,"
1257 " please restart manually", self.LogWarning)
1258
1261 """Submits all jobs necessary to verify the cluster.
1262
1263 """
1264 REQ_BGL = False
1265
1267 self.needed_locks = {}
1268
1269 - def Exec(self, feedback_fn):
1270 jobs = []
1271
1272 if self.op.group_name:
1273 groups = [self.op.group_name]
1274 depends_fn = lambda: None
1275 else:
1276 groups = self.cfg.GetNodeGroupList()
1277
1278
1279 jobs.append([
1280 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
1281 ])
1282
1283
1284 depends_fn = lambda: [(-len(jobs), [])]
1285
1286 jobs.extend(
1287 [opcodes.OpClusterVerifyGroup(group_name=group,
1288 ignore_errors=self.op.ignore_errors,
1289 depends=depends_fn())]
1290 for group in groups)
1291
1292
1293 for op in itertools.chain(*jobs):
1294 op.debug_simulate_errors = self.op.debug_simulate_errors
1295 op.verbose = self.op.verbose
1296 op.error_codes = self.op.error_codes
1297 try:
1298 op.skip_checks = self.op.skip_checks
1299 except AttributeError:
1300 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1301
1302 return ResultWithJobs(jobs)
1303
1306 """Mix-in for cluster/group verify LUs.
1307
1308 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1309 self.op and self._feedback_fn to be available.)
1310
1311 """
1312
1313 ETYPE_FIELD = "code"
1314 ETYPE_ERROR = "ERROR"
1315 ETYPE_WARNING = "WARNING"
1316
1317 - def _Error(self, ecode, item, msg, *args, **kwargs):
1318 """Format an error message.
1319
1320 Based on the opcode's error_codes parameter, either format a
1321 parseable error code, or a simpler error string.
1322
1323 This must be called only from Exec and functions called from Exec.
1324
1325 """
1326 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1327 itype, etxt, _ = ecode
1328
1329
1330 if etxt in self.op.ignore_errors:
1331 ltype = self.ETYPE_WARNING
1332
1333 if args:
1334 msg = msg % args
1335
1336 if self.op.error_codes:
1337 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1338 else:
1339 if item:
1340 item = " " + item
1341 else:
1342 item = ""
1343 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1344
1345 self._feedback_fn(" - %s" % msg)
1346
1347 if ltype == self.ETYPE_ERROR:
1348 self.bad = True
1349
1350 - def _ErrorIf(self, cond, *args, **kwargs):
1351 """Log an error message if the passed condition is True.
1352
1353 """
1354 if (bool(cond)
1355 or self.op.debug_simulate_errors):
1356 self._Error(*args, **kwargs)
1357
1360 """Verifies a certificate for L{LUClusterVerifyConfig}.
1361
1362 @type filename: string
1363 @param filename: Path to PEM file
1364
1365 """
1366 try:
1367 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1368 utils.ReadFile(filename))
1369 except Exception, err:
1370 return (LUClusterVerifyConfig.ETYPE_ERROR,
1371 "Failed to load X509 certificate %s: %s" % (filename, err))
1372
1373 (errcode, msg) = \
1374 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1375 constants.SSL_CERT_EXPIRATION_ERROR)
1376
1377 if msg:
1378 fnamemsg = "While verifying %s: %s" % (filename, msg)
1379 else:
1380 fnamemsg = None
1381
1382 if errcode is None:
1383 return (None, fnamemsg)
1384 elif errcode == utils.CERT_WARNING:
1385 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1386 elif errcode == utils.CERT_ERROR:
1387 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1388
1389 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1390
1393 """Compute the set of all hypervisor parameters.
1394
1395 @type cluster: L{objects.Cluster}
1396 @param cluster: the cluster object
1397 @param instances: list of L{objects.Instance}
1398 @param instances: additional instances from which to obtain parameters
1399 @rtype: list of (origin, hypervisor, parameters)
1400 @return: a list with all parameters found, indicating the hypervisor they
1401 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1402
1403 """
1404 hvp_data = []
1405
1406 for hv_name in cluster.enabled_hypervisors:
1407 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1408
1409 for os_name, os_hvp in cluster.os_hvp.items():
1410 for hv_name, hv_params in os_hvp.items():
1411 if hv_params:
1412 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1413 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1414
1415
1416 for instance in instances:
1417 if instance.hvparams:
1418 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1419 cluster.FillHV(instance)))
1420
1421 return hvp_data
1422
1425 """Verifies the cluster config.
1426
1427 """
1428 REQ_BGL = False
1429
1443
1447
1456
1457 - def Exec(self, feedback_fn):
1458 """Verify integrity of cluster, performing various test on nodes.
1459
1460 """
1461 self.bad = False
1462 self._feedback_fn = feedback_fn
1463
1464 feedback_fn("* Verifying cluster config")
1465
1466 for msg in self.cfg.VerifyConfig():
1467 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1468
1469 feedback_fn("* Verifying cluster certificate files")
1470
1471 for cert_filename in pathutils.ALL_CERT_FILES:
1472 (errcode, msg) = _VerifyCertificate(cert_filename)
1473 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1474
1475 self._ErrorIf(not utils.CanRead(constants.LUXID_USER,
1476 pathutils.NODED_CERT_FILE),
1477 constants.CV_ECLUSTERCERT,
1478 None,
1479 pathutils.NODED_CERT_FILE + " must be accessible by the " +
1480 constants.LUXID_USER + " user")
1481
1482 feedback_fn("* Verifying hypervisor parameters")
1483
1484 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1485 self.all_inst_info.values()))
1486
1487 feedback_fn("* Verifying all nodes belong to an existing group")
1488
1489
1490
1491
1492
1493 dangling_nodes = set(node for node in self.all_node_info.values()
1494 if node.group not in self.all_group_info)
1495
1496 dangling_instances = {}
1497 no_node_instances = []
1498
1499 for inst in self.all_inst_info.values():
1500 if inst.primary_node in [node.uuid for node in dangling_nodes]:
1501 dangling_instances.setdefault(inst.primary_node, []).append(inst)
1502 elif inst.primary_node not in self.all_node_info:
1503 no_node_instances.append(inst)
1504
1505 pretty_dangling = [
1506 "%s (%s)" %
1507 (node.name,
1508 utils.CommaJoin(inst.name for
1509 inst in dangling_instances.get(node.uuid, [])))
1510 for node in dangling_nodes]
1511
1512 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1513 None,
1514 "the following nodes (and their instances) belong to a non"
1515 " existing group: %s", utils.CommaJoin(pretty_dangling))
1516
1517 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1518 None,
1519 "the following instances have a non-existing primary-node:"
1520 " %s", utils.CommaJoin(inst.name for
1521 inst in no_node_instances))
1522
1523 return not self.bad
1524
1527 """Verifies the status of a node group.
1528
1529 """
1530 HPATH = "cluster-verify"
1531 HTYPE = constants.HTYPE_CLUSTER
1532 REQ_BGL = False
1533
1534 _HOOKS_INDENT_RE = re.compile("^", re.M)
1535
1537 """A class representing the logical and physical status of a node.
1538
1539 @type uuid: string
1540 @ivar uuid: the node UUID to which this object refers
1541 @ivar volumes: a structure as returned from
1542 L{ganeti.backend.GetVolumeList} (runtime)
1543 @ivar instances: a list of running instances (runtime)
1544 @ivar pinst: list of configured primary instances (config)
1545 @ivar sinst: list of configured secondary instances (config)
1546 @ivar sbp: dictionary of {primary-node: list of instances} for all
1547 instances for which this node is secondary (config)
1548 @ivar mfree: free memory, as reported by hypervisor (runtime)
1549 @ivar dfree: free disk, as reported by the node (runtime)
1550 @ivar offline: the offline status (config)
1551 @type rpc_fail: boolean
1552 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1553 not whether the individual keys were correct) (runtime)
1554 @type lvm_fail: boolean
1555 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1556 @type hyp_fail: boolean
1557 @ivar hyp_fail: whether the RPC call didn't return the instance list
1558 @type ghost: boolean
1559 @ivar ghost: whether this is a known node or not (config)
1560 @type os_fail: boolean
1561 @ivar os_fail: whether the RPC call didn't return valid OS data
1562 @type oslist: list
1563 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1564 @type vm_capable: boolean
1565 @ivar vm_capable: whether the node can host instances
1566 @type pv_min: float
1567 @ivar pv_min: size in MiB of the smallest PVs
1568 @type pv_max: float
1569 @ivar pv_max: size in MiB of the biggest PVs
1570
1571 """
1572 - def __init__(self, offline=False, uuid=None, vm_capable=True):
1573 self.uuid = uuid
1574 self.volumes = {}
1575 self.instances = []
1576 self.pinst = []
1577 self.sinst = []
1578 self.sbp = {}
1579 self.mfree = 0
1580 self.dfree = 0
1581 self.offline = offline
1582 self.vm_capable = vm_capable
1583 self.rpc_fail = False
1584 self.lvm_fail = False
1585 self.hyp_fail = False
1586 self.ghost = False
1587 self.os_fail = False
1588 self.oslist = {}
1589 self.pv_min = None
1590 self.pv_max = None
1591
1612
1630
1632 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1633 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1634
1635 group_node_uuids = set(self.group_info.members)
1636 group_inst_uuids = \
1637 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1638
1639 unlocked_node_uuids = \
1640 group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE))
1641
1642 unlocked_inst_uuids = \
1643 group_inst_uuids.difference(
1644 [self.cfg.GetInstanceInfoByName(name).uuid
1645 for name in self.owned_locks(locking.LEVEL_INSTANCE)])
1646
1647 if unlocked_node_uuids:
1648 raise errors.OpPrereqError(
1649 "Missing lock for nodes: %s" %
1650 utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)),
1651 errors.ECODE_STATE)
1652
1653 if unlocked_inst_uuids:
1654 raise errors.OpPrereqError(
1655 "Missing lock for instances: %s" %
1656 utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)),
1657 errors.ECODE_STATE)
1658
1659 self.all_node_info = self.cfg.GetAllNodesInfo()
1660 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1661
1662 self.my_node_uuids = group_node_uuids
1663 self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid])
1664 for node_uuid in group_node_uuids)
1665
1666 self.my_inst_uuids = group_inst_uuids
1667 self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid])
1668 for inst_uuid in group_inst_uuids)
1669
1670
1671
1672 extra_lv_nodes = set()
1673
1674 for inst in self.my_inst_info.values():
1675 if inst.disk_template in constants.DTS_INT_MIRROR:
1676 for nuuid in inst.all_nodes:
1677 if self.all_node_info[nuuid].group != self.group_uuid:
1678 extra_lv_nodes.add(nuuid)
1679
1680 unlocked_lv_nodes = \
1681 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1682
1683 if unlocked_lv_nodes:
1684 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1685 utils.CommaJoin(unlocked_lv_nodes),
1686 errors.ECODE_STATE)
1687 self.extra_lv_nodes = list(extra_lv_nodes)
1688
1690 """Perform some basic validation on data returned from a node.
1691
1692 - check the result data structure is well formed and has all the
1693 mandatory fields
1694 - check ganeti version
1695
1696 @type ninfo: L{objects.Node}
1697 @param ninfo: the node to check
1698 @param nresult: the results from the node
1699 @rtype: boolean
1700 @return: whether overall this call was successful (and we can expect
1701 reasonable values in the respose)
1702
1703 """
1704
1705 test = not nresult or not isinstance(nresult, dict)
1706 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
1707 "unable to verify node: no data returned")
1708 if test:
1709 return False
1710
1711
1712 local_version = constants.PROTOCOL_VERSION
1713 remote_version = nresult.get("version", None)
1714 test = not (remote_version and
1715 isinstance(remote_version, (list, tuple)) and
1716 len(remote_version) == 2)
1717 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
1718 "connection to node returned invalid data")
1719 if test:
1720 return False
1721
1722 test = local_version != remote_version[0]
1723 self._ErrorIf(test, constants.CV_ENODEVERSION, ninfo.name,
1724 "incompatible protocol versions: master %s,"
1725 " node %s", local_version, remote_version[0])
1726 if test:
1727 return False
1728
1729
1730
1731
1732 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1733 constants.CV_ENODEVERSION, ninfo.name,
1734 "software version mismatch: master %s, node %s",
1735 constants.RELEASE_VERSION, remote_version[1],
1736 code=self.ETYPE_WARNING)
1737
1738 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1739 if ninfo.vm_capable and isinstance(hyp_result, dict):
1740 for hv_name, hv_result in hyp_result.iteritems():
1741 test = hv_result is not None
1742 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
1743 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1744
1745 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1746 if ninfo.vm_capable and isinstance(hvp_result, list):
1747 for item, hv_name, hv_result in hvp_result:
1748 self._ErrorIf(True, constants.CV_ENODEHV, ninfo.name,
1749 "hypervisor %s parameter verify failure (source %s): %s",
1750 hv_name, item, hv_result)
1751
1752 test = nresult.get(constants.NV_NODESETUP,
1753 ["Missing NODESETUP results"])
1754 self._ErrorIf(test, constants.CV_ENODESETUP, ninfo.name,
1755 "node setup error: %s", "; ".join(test))
1756
1757 return True
1758
1759 - def _VerifyNodeTime(self, ninfo, nresult,
1760 nvinfo_starttime, nvinfo_endtime):
1761 """Check the node time.
1762
1763 @type ninfo: L{objects.Node}
1764 @param ninfo: the node to check
1765 @param nresult: the remote results for the node
1766 @param nvinfo_starttime: the start time of the RPC call
1767 @param nvinfo_endtime: the end time of the RPC call
1768
1769 """
1770 ntime = nresult.get(constants.NV_TIME, None)
1771 try:
1772 ntime_merged = utils.MergeTime(ntime)
1773 except (ValueError, TypeError):
1774 self._ErrorIf(True, constants.CV_ENODETIME, ninfo.name,
1775 "Node returned invalid time")
1776 return
1777
1778 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1779 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1780 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1781 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1782 else:
1783 ntime_diff = None
1784
1785 self._ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, ninfo.name,
1786 "Node time diverges by at least %s from master node time",
1787 ntime_diff)
1788
1790 """Check the node LVM results and update info for cross-node checks.
1791
1792 @type ninfo: L{objects.Node}
1793 @param ninfo: the node to check
1794 @param nresult: the remote results for the node
1795 @param vg_name: the configured VG name
1796 @type nimg: L{NodeImage}
1797 @param nimg: node image
1798
1799 """
1800 if vg_name is None:
1801 return
1802
1803
1804 vglist = nresult.get(constants.NV_VGLIST, None)
1805 test = not vglist
1806 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
1807 "unable to check volume groups")
1808 if not test:
1809 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1810 constants.MIN_VG_SIZE)
1811 self._ErrorIf(vgstatus, constants.CV_ENODELVM, ninfo.name, vgstatus)
1812
1813
1814 (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage)
1815 for em in errmsgs:
1816 self._Error(constants.CV_ENODELVM, ninfo.name, em)
1817 if pvminmax is not None:
1818 (nimg.pv_min, nimg.pv_max) = pvminmax
1819
1821 """Check cross-node DRBD version consistency.
1822
1823 @type node_verify_infos: dict
1824 @param node_verify_infos: infos about nodes as returned from the
1825 node_verify call.
1826
1827 """
1828 node_versions = {}
1829 for node_uuid, ndata in node_verify_infos.items():
1830 nresult = ndata.payload
1831 if nresult:
1832 version = nresult.get(constants.NV_DRBDVERSION, "Missing DRBD version")
1833 node_versions[node_uuid] = version
1834
1835 if len(set(node_versions.values())) > 1:
1836 for node_uuid, version in sorted(node_versions.items()):
1837 msg = "DRBD version mismatch: %s" % version
1838 self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg,
1839 code=self.ETYPE_WARNING)
1840
1842 """Check cross-node consistency in LVM.
1843
1844 @type node_image: dict
1845 @param node_image: info about nodes, mapping from node to names to
1846 L{NodeImage} objects
1847 @param vg_name: the configured VG name
1848
1849 """
1850 if vg_name is None:
1851 return
1852
1853
1854 if not self._exclusive_storage:
1855 return
1856
1857
1858
1859
1860 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
1861 if not vals:
1862 return
1863 (pvmin, minnode_uuid) = min((ni.pv_min, ni.uuid) for ni in vals)
1864 (pvmax, maxnode_uuid) = max((ni.pv_max, ni.uuid) for ni in vals)
1865 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
1866 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
1867 "PV sizes differ too much in the group; smallest (%s MB) is"
1868 " on %s, biggest (%s MB) is on %s",
1869 pvmin, self.cfg.GetNodeName(minnode_uuid),
1870 pvmax, self.cfg.GetNodeName(maxnode_uuid))
1871
1873 """Check the node bridges.
1874
1875 @type ninfo: L{objects.Node}
1876 @param ninfo: the node to check
1877 @param nresult: the remote results for the node
1878 @param bridges: the expected list of bridges
1879
1880 """
1881 if not bridges:
1882 return
1883
1884 missing = nresult.get(constants.NV_BRIDGES, None)
1885 test = not isinstance(missing, list)
1886 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
1887 "did not return valid bridge information")
1888 if not test:
1889 self._ErrorIf(bool(missing), constants.CV_ENODENET, ninfo.name,
1890 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1891
1909
1911 """Check the node network connectivity results.
1912
1913 @type ninfo: L{objects.Node}
1914 @param ninfo: the node to check
1915 @param nresult: the remote results for the node
1916
1917 """
1918 test = constants.NV_NODELIST not in nresult
1919 self._ErrorIf(test, constants.CV_ENODESSH, ninfo.name,
1920 "node hasn't returned node ssh connectivity data")
1921 if not test:
1922 if nresult[constants.NV_NODELIST]:
1923 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1924 self._ErrorIf(True, constants.CV_ENODESSH, ninfo.name,
1925 "ssh communication with node '%s': %s", a_node, a_msg)
1926
1927 test = constants.NV_NODENETTEST not in nresult
1928 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
1929 "node hasn't returned node tcp connectivity data")
1930 if not test:
1931 if nresult[constants.NV_NODENETTEST]:
1932 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1933 for anode in nlist:
1934 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name,
1935 "tcp communication with node '%s': %s",
1936 anode, nresult[constants.NV_NODENETTEST][anode])
1937
1938 test = constants.NV_MASTERIP not in nresult
1939 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
1940 "node hasn't returned node master IP reachability data")
1941 if not test:
1942 if not nresult[constants.NV_MASTERIP]:
1943 if ninfo.uuid == self.master_node:
1944 msg = "the master node cannot reach the master IP (not configured?)"
1945 else:
1946 msg = "cannot reach the master IP"
1947 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg)
1948
1950 """Verify an instance.
1951
1952 This function checks to see if the required block devices are
1953 available on the instance's node, and that the nodes are in the correct
1954 state.
1955
1956 """
1957 pnode_uuid = instance.primary_node
1958 pnode_img = node_image[pnode_uuid]
1959 groupinfo = self.cfg.GetAllNodeGroupsInfo()
1960
1961 node_vol_should = {}
1962 instance.MapLVsByNode(node_vol_should)
1963
1964 cluster = self.cfg.GetClusterInfo()
1965 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
1966 self.group_info)
1967 err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg)
1968 self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name,
1969 utils.CommaJoin(err), code=self.ETYPE_WARNING)
1970
1971 for node_uuid in node_vol_should:
1972 n_img = node_image[node_uuid]
1973 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1974
1975 continue
1976 for volume in node_vol_should[node_uuid]:
1977 test = volume not in n_img.volumes
1978 self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name,
1979 "volume %s missing on node %s", volume,
1980 self.cfg.GetNodeName(node_uuid))
1981
1982 if instance.admin_state == constants.ADMINST_UP:
1983 test = instance.uuid not in pnode_img.instances and not pnode_img.offline
1984 self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name,
1985 "instance not running on its primary node %s",
1986 self.cfg.GetNodeName(pnode_uuid))
1987 self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE,
1988 instance.name, "instance is marked as running and lives on"
1989 " offline node %s", self.cfg.GetNodeName(pnode_uuid))
1990
1991 diskdata = [(nname, success, status, idx)
1992 for (nname, disks) in diskstatus.items()
1993 for idx, (success, status) in enumerate(disks)]
1994
1995 for nname, success, bdev_status, idx in diskdata:
1996
1997
1998 snode = node_image[nname]
1999 bad_snode = snode.ghost or snode.offline
2000 self._ErrorIf(instance.disks_active and
2001 not success and not bad_snode,
2002 constants.CV_EINSTANCEFAULTYDISK, instance.name,
2003 "couldn't retrieve status for disk/%s on %s: %s",
2004 idx, self.cfg.GetNodeName(nname), bdev_status)
2005
2006 if instance.disks_active and success and \
2007 (bdev_status.is_degraded or
2008 bdev_status.ldisk_status != constants.LDS_OKAY):
2009 msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname))
2010 if bdev_status.is_degraded:
2011 msg += " is degraded"
2012 if bdev_status.ldisk_status != constants.LDS_OKAY:
2013 msg += "; state is '%s'" % \
2014 constants.LDS_NAMES[bdev_status.ldisk_status]
2015
2016 self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg)
2017
2018 self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2019 constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid),
2020 "instance %s, connection to primary node failed",
2021 instance.name)
2022
2023 self._ErrorIf(len(instance.secondary_nodes) > 1,
2024 constants.CV_EINSTANCELAYOUT, instance.name,
2025 "instance has multiple secondary nodes: %s",
2026 utils.CommaJoin(instance.secondary_nodes),
2027 code=self.ETYPE_WARNING)
2028
2029 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, instance.all_nodes)
2030 if any(es_flags.values()):
2031 if instance.disk_template not in constants.DTS_EXCL_STORAGE:
2032
2033
2034 es_nodes = [n
2035 for (n, es) in es_flags.items()
2036 if es]
2037 self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name,
2038 "instance has template %s, which is not supported on nodes"
2039 " that have exclusive storage set: %s",
2040 instance.disk_template,
2041 utils.CommaJoin(self.cfg.GetNodeNames(es_nodes)))
2042 for (idx, disk) in enumerate(instance.disks):
2043 self._ErrorIf(disk.spindles is None,
2044 constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name,
2045 "number of spindles not configured for disk %s while"
2046 " exclusive storage is enabled, try running"
2047 " gnt-cluster repair-disk-sizes", idx)
2048
2049 if instance.disk_template in constants.DTS_INT_MIRROR:
2050 instance_nodes = utils.NiceSort(instance.all_nodes)
2051 instance_groups = {}
2052
2053 for node_uuid in instance_nodes:
2054 instance_groups.setdefault(self.all_node_info[node_uuid].group,
2055 []).append(node_uuid)
2056
2057 pretty_list = [
2058 "%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)),
2059 groupinfo[group].name)
2060
2061 for group, nodes in sorted(instance_groups.items(),
2062 key=lambda (_, nodes): pnode_uuid in nodes,
2063 reverse=True)]
2064
2065 self._ErrorIf(len(instance_groups) > 1,
2066 constants.CV_EINSTANCESPLITGROUPS,
2067 instance.name, "instance has primary and secondary nodes in"
2068 " different groups: %s", utils.CommaJoin(pretty_list),
2069 code=self.ETYPE_WARNING)
2070
2071 inst_nodes_offline = []
2072 for snode in instance.secondary_nodes:
2073 s_img = node_image[snode]
2074 self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2075 self.cfg.GetNodeName(snode),
2076 "instance %s, connection to secondary node failed",
2077 instance.name)
2078
2079 if s_img.offline:
2080 inst_nodes_offline.append(snode)
2081
2082
2083 self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE,
2084 instance.name, "instance has offline secondary node(s) %s",
2085 utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline)))
2086
2087 for node_uuid in instance.all_nodes:
2088 self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE,
2089 instance.name, "instance lives on ghost node %s",
2090 self.cfg.GetNodeName(node_uuid))
2091 self._ErrorIf(not node_image[node_uuid].vm_capable,
2092 constants.CV_EINSTANCEBADNODE, instance.name,
2093 "instance lives on non-vm_capable node %s",
2094 self.cfg.GetNodeName(node_uuid))
2095
2097 """Verify if there are any unknown volumes in the cluster.
2098
2099 The .os, .swap and backup volumes are ignored. All other volumes are
2100 reported as unknown.
2101
2102 @type reserved: L{ganeti.utils.FieldSet}
2103 @param reserved: a FieldSet of reserved volume names
2104
2105 """
2106 for node_uuid, n_img in node_image.items():
2107 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2108 self.all_node_info[node_uuid].group != self.group_uuid):
2109
2110 continue
2111 for volume in n_img.volumes:
2112 test = ((node_uuid not in node_vol_should or
2113 volume not in node_vol_should[node_uuid]) and
2114 not reserved.Matches(volume))
2115 self._ErrorIf(test, constants.CV_ENODEORPHANLV,
2116 self.cfg.GetNodeName(node_uuid),
2117 "volume %s is unknown", volume)
2118
2120 """Verify N+1 Memory Resilience.
2121
2122 Check that if one single node dies we can still start all the
2123 instances it was primary for.
2124
2125 """
2126 cluster_info = self.cfg.GetClusterInfo()
2127 for node_uuid, n_img in node_image.items():
2128
2129
2130
2131
2132
2133
2134
2135
2136 if n_img.offline or \
2137 self.all_node_info[node_uuid].group != self.group_uuid:
2138
2139
2140
2141
2142 continue
2143
2144 for prinode, inst_uuids in n_img.sbp.items():
2145 needed_mem = 0
2146 for inst_uuid in inst_uuids:
2147 bep = cluster_info.FillBE(all_insts[inst_uuid])
2148 if bep[constants.BE_AUTO_BALANCE]:
2149 needed_mem += bep[constants.BE_MINMEM]
2150 test = n_img.mfree < needed_mem
2151 self._ErrorIf(test, constants.CV_ENODEN1,
2152 self.cfg.GetNodeName(node_uuid),
2153 "not enough memory to accomodate instance failovers"
2154 " should node %s fail (%dMiB needed, %dMiB available)",
2155 self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)
2156
2157 - def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo,
2158 (files_all, files_opt, files_mc, files_vm)):
2159 """Verifies file checksums collected from all nodes.
2160
2161 @param nodes: List of L{objects.Node} objects
2162 @param master_node_uuid: UUID of master node
2163 @param all_nvinfo: RPC results
2164
2165 """
2166
2167 files2nodefn = [
2168 (files_all, None),
2169 (files_mc, lambda node: (node.master_candidate or
2170 node.uuid == master_node_uuid)),
2171 (files_vm, lambda node: node.vm_capable),
2172 ]
2173
2174
2175 nodefiles = {}
2176 for (files, fn) in files2nodefn:
2177 if fn is None:
2178 filenodes = nodes
2179 else:
2180 filenodes = filter(fn, nodes)
2181 nodefiles.update((filename,
2182 frozenset(map(operator.attrgetter("uuid"), filenodes)))
2183 for filename in files)
2184
2185 assert set(nodefiles) == (files_all | files_mc | files_vm)
2186
2187 fileinfo = dict((filename, {}) for filename in nodefiles)
2188 ignore_nodes = set()
2189
2190 for node in nodes:
2191 if node.offline:
2192 ignore_nodes.add(node.uuid)
2193 continue
2194
2195 nresult = all_nvinfo[node.uuid]
2196
2197 if nresult.fail_msg or not nresult.payload:
2198 node_files = None
2199 else:
2200 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2201 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2202 for (key, value) in fingerprints.items())
2203 del fingerprints
2204
2205 test = not (node_files and isinstance(node_files, dict))
2206 self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name,
2207 "Node did not return file checksum data")
2208 if test:
2209 ignore_nodes.add(node.uuid)
2210 continue
2211
2212
2213 for (filename, checksum) in node_files.items():
2214 assert filename in nodefiles
2215 fileinfo[filename].setdefault(checksum, set()).add(node.uuid)
2216
2217 for (filename, checksums) in fileinfo.items():
2218 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2219
2220
2221 with_file = frozenset(node_uuid
2222 for node_uuids in fileinfo[filename].values()
2223 for node_uuid in node_uuids) - ignore_nodes
2224
2225 expected_nodes = nodefiles[filename] - ignore_nodes
2226
2227
2228 missing_file = expected_nodes - with_file
2229
2230 if filename in files_opt:
2231
2232 self._ErrorIf(missing_file and missing_file != expected_nodes,
2233 constants.CV_ECLUSTERFILECHECK, None,
2234 "File %s is optional, but it must exist on all or no"
2235 " nodes (not found on %s)",
2236 filename,
2237 utils.CommaJoin(
2238 utils.NiceSort(
2239 map(self.cfg.GetNodeName, missing_file))))
2240 else:
2241 self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2242 "File %s is missing from node(s) %s", filename,
2243 utils.CommaJoin(
2244 utils.NiceSort(
2245 map(self.cfg.GetNodeName, missing_file))))
2246
2247
2248 unexpected = with_file - expected_nodes
2249 self._ErrorIf(unexpected,
2250 constants.CV_ECLUSTERFILECHECK, None,
2251 "File %s should not exist on node(s) %s",
2252 filename, utils.CommaJoin(
2253 utils.NiceSort(map(self.cfg.GetNodeName, unexpected))))
2254
2255
2256 test = len(checksums) > 1
2257 if test:
2258 variants = ["variant %s on %s" %
2259 (idx + 1,
2260 utils.CommaJoin(utils.NiceSort(
2261 map(self.cfg.GetNodeName, node_uuids))))
2262 for (idx, (checksum, node_uuids)) in
2263 enumerate(sorted(checksums.items()))]
2264 else:
2265 variants = []
2266
2267 self._ErrorIf(test, constants.CV_ECLUSTERFILECHECK, None,
2268 "File %s found with %s different checksums (%s)",
2269 filename, len(checksums), "; ".join(variants))
2270
2271 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2272 drbd_map):
2273 """Verifies and the node DRBD status.
2274
2275 @type ninfo: L{objects.Node}
2276 @param ninfo: the node to check
2277 @param nresult: the remote results for the node
2278 @param instanceinfo: the dict of instances
2279 @param drbd_helper: the configured DRBD usermode helper
2280 @param drbd_map: the DRBD map as returned by
2281 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2282
2283 """
2284 if drbd_helper:
2285 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2286 test = (helper_result is None)
2287 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2288 "no drbd usermode helper returned")
2289 if helper_result:
2290 status, payload = helper_result
2291 test = not status
2292 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2293 "drbd usermode helper check unsuccessful: %s", payload)
2294 test = status and (payload != drbd_helper)
2295 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2296 "wrong drbd usermode helper: %s", payload)
2297
2298
2299 node_drbd = {}
2300 for minor, inst_uuid in drbd_map[ninfo.uuid].items():
2301 test = inst_uuid not in instanceinfo
2302 self._ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2303 "ghost instance '%s' in temporary DRBD map", inst_uuid)
2304
2305
2306
2307 if test:
2308 node_drbd[minor] = (inst_uuid, False)
2309 else:
2310 instance = instanceinfo[inst_uuid]
2311 node_drbd[minor] = (inst_uuid, instance.disks_active)
2312
2313
2314 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2315 test = not isinstance(used_minors, (tuple, list))
2316 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2317 "cannot parse drbd status file: %s", str(used_minors))
2318 if test:
2319
2320 return
2321
2322 for minor, (inst_uuid, must_exist) in node_drbd.items():
2323 test = minor not in used_minors and must_exist
2324 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2325 "drbd minor %d of instance %s is not active", minor,
2326 self.cfg.GetInstanceName(inst_uuid))
2327 for minor in used_minors:
2328 test = minor not in node_drbd
2329 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2330 "unallocated drbd minor %d is in use", minor)
2331
2333 """Builds the node OS structures.
2334
2335 @type ninfo: L{objects.Node}
2336 @param ninfo: the node to check
2337 @param nresult: the remote results for the node
2338 @param nimg: the node image object
2339
2340 """
2341 remote_os = nresult.get(constants.NV_OSLIST, None)
2342 test = (not isinstance(remote_os, list) or
2343 not compat.all(isinstance(v, list) and len(v) == 7
2344 for v in remote_os))
2345
2346 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
2347 "node hasn't returned valid OS data")
2348
2349 nimg.os_fail = test
2350
2351 if test:
2352 return
2353
2354 os_dict = {}
2355
2356 for (name, os_path, status, diagnose,
2357 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2358
2359 if name not in os_dict:
2360 os_dict[name] = []
2361
2362
2363
2364 parameters = [tuple(v) for v in parameters]
2365 os_dict[name].append((os_path, status, diagnose,
2366 set(variants), set(parameters), set(api_ver)))
2367
2368 nimg.oslist = os_dict
2369
2371 """Verifies the node OS list.
2372
2373 @type ninfo: L{objects.Node}
2374 @param ninfo: the node to check
2375 @param nimg: the node image object
2376 @param base: the 'template' node we match against (e.g. from the master)
2377
2378 """
2379 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2380
2381 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2382 for os_name, os_data in nimg.oslist.items():
2383 assert os_data, "Empty OS status for OS %s?!" % os_name
2384 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2385 self._ErrorIf(not f_status, constants.CV_ENODEOS, ninfo.name,
2386 "Invalid OS %s (located at %s): %s",
2387 os_name, f_path, f_diag)
2388 self._ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, ninfo.name,
2389 "OS '%s' has multiple entries"
2390 " (first one shadows the rest): %s",
2391 os_name, utils.CommaJoin([v[0] for v in os_data]))
2392
2393 test = os_name not in base.oslist
2394 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
2395 "Extra OS %s not present on reference node (%s)",
2396 os_name, self.cfg.GetNodeName(base.uuid))
2397 if test:
2398 continue
2399 assert base.oslist[os_name], "Base node has empty OS status?"
2400 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2401 if not b_status:
2402
2403 continue
2404 for kind, a, b in [("API version", f_api, b_api),
2405 ("variants list", f_var, b_var),
2406 ("parameters", beautify_params(f_param),
2407 beautify_params(b_param))]:
2408 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
2409 "OS %s for %s differs from reference node %s:"
2410 " [%s] vs. [%s]", kind, os_name,
2411 self.cfg.GetNodeName(base.uuid),
2412 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2413
2414
2415 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2416 self._ErrorIf(missing, constants.CV_ENODEOS, ninfo.name,
2417 "OSes present on reference node %s"
2418 " but missing on this node: %s",
2419 self.cfg.GetNodeName(base.uuid), utils.CommaJoin(missing))
2420
2422 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2423
2424 @type ninfo: L{objects.Node}
2425 @param ninfo: the node to check
2426 @param nresult: the remote results for the node
2427 @type is_master: bool
2428 @param is_master: Whether node is the master node
2429
2430 """
2431 cluster = self.cfg.GetClusterInfo()
2432 if (is_master and
2433 (cluster.IsFileStorageEnabled() or
2434 cluster.IsSharedFileStorageEnabled())):
2435 try:
2436 fspaths = nresult[constants.NV_ACCEPTED_STORAGE_PATHS]
2437 except KeyError:
2438
2439 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
2440 "Node did not return forbidden file storage paths")
2441 else:
2442 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
2443 "Found forbidden file storage paths: %s",
2444 utils.CommaJoin(fspaths))
2445 else:
2446 self._ErrorIf(constants.NV_ACCEPTED_STORAGE_PATHS in nresult,
2447 constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
2448 "Node should not have returned forbidden file storage"
2449 " paths")
2450
2451 - def _VerifyStoragePaths(self, ninfo, nresult, file_disk_template,
2452 verify_key, error_key):
2453 """Verifies (file) storage paths.
2454
2455 @type ninfo: L{objects.Node}
2456 @param ninfo: the node to check
2457 @param nresult: the remote results for the node
2458 @type file_disk_template: string
2459 @param file_disk_template: file-based disk template, whose directory
2460 is supposed to be verified
2461 @type verify_key: string
2462 @param verify_key: key for the verification map of this file
2463 verification step
2464 @param error_key: error key to be added to the verification results
2465 in case something goes wrong in this verification step
2466
2467 """
2468 assert (file_disk_template in
2469 utils.storage.GetDiskTemplatesOfStorageType(constants.ST_FILE))
2470 cluster = self.cfg.GetClusterInfo()
2471 if cluster.IsDiskTemplateEnabled(file_disk_template):
2472 self._ErrorIf(
2473 verify_key in nresult,
2474 error_key, ninfo.name,
2475 "The configured %s storage path is unusable: %s" %
2476 (file_disk_template, nresult.get(verify_key)))
2477
2488
2499
2501 """Verifies out of band functionality of a node.
2502
2503 @type ninfo: L{objects.Node}
2504 @param ninfo: the node to check
2505 @param nresult: the remote results for the node
2506
2507 """
2508
2509
2510 if ((ninfo.master_candidate or ninfo.master_capable) and
2511 constants.NV_OOB_PATHS in nresult):
2512 for path_result in nresult[constants.NV_OOB_PATHS]:
2513 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH,
2514 ninfo.name, path_result)
2515
2517 """Verifies and updates the node volume data.
2518
2519 This function will update a L{NodeImage}'s internal structures
2520 with data from the remote call.
2521
2522 @type ninfo: L{objects.Node}
2523 @param ninfo: the node to check
2524 @param nresult: the remote results for the node
2525 @param nimg: the node image object
2526 @param vg_name: the configured VG name
2527
2528 """
2529 nimg.lvm_fail = True
2530 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2531 if vg_name is None:
2532 pass
2533 elif isinstance(lvdata, basestring):
2534 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
2535 "LVM problem on node: %s", utils.SafeEncode(lvdata))
2536 elif not isinstance(lvdata, dict):
2537 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
2538 "rpc call to node failed (lvlist)")
2539 else:
2540 nimg.volumes = lvdata
2541 nimg.lvm_fail = False
2542
2544 """Verifies and updates the node instance list.
2545
2546 If the listing was successful, then updates this node's instance
2547 list. Otherwise, it marks the RPC call as failed for the instance
2548 list key.
2549
2550 @type ninfo: L{objects.Node}
2551 @param ninfo: the node to check
2552 @param nresult: the remote results for the node
2553 @param nimg: the node image object
2554
2555 """
2556 idata = nresult.get(constants.NV_INSTANCELIST, None)
2557 test = not isinstance(idata, list)
2558 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2559 "rpc call to node failed (instancelist): %s",
2560 utils.SafeEncode(str(idata)))
2561 if test:
2562 nimg.hyp_fail = True
2563 else:
2564 nimg.instances = [inst.uuid for (_, inst) in
2565 self.cfg.GetMultiInstanceInfoByName(idata)]
2566
2568 """Verifies and computes a node information map
2569
2570 @type ninfo: L{objects.Node}
2571 @param ninfo: the node to check
2572 @param nresult: the remote results for the node
2573 @param nimg: the node image object
2574 @param vg_name: the configured VG name
2575
2576 """
2577
2578 hv_info = nresult.get(constants.NV_HVINFO, None)
2579 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2580 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2581 "rpc call to node failed (hvinfo)")
2582 if not test:
2583 try:
2584 nimg.mfree = int(hv_info["memory_free"])
2585 except (ValueError, TypeError):
2586 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
2587 "node returned invalid nodeinfo, check hypervisor")
2588
2589
2590 if vg_name is not None:
2591 test = (constants.NV_VGLIST not in nresult or
2592 vg_name not in nresult[constants.NV_VGLIST])
2593 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
2594 "node didn't return data for the volume group '%s'"
2595 " - it is either missing or broken", vg_name)
2596 if not test:
2597 try:
2598 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2599 except (ValueError, TypeError):
2600 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
2601 "node returned invalid LVM info, check LVM status")
2602
2604 """Gets per-disk status information for all instances.
2605
2606 @type node_uuids: list of strings
2607 @param node_uuids: Node UUIDs
2608 @type node_image: dict of (UUID, L{objects.Node})
2609 @param node_image: Node objects
2610 @type instanceinfo: dict of (UUID, L{objects.Instance})
2611 @param instanceinfo: Instance objects
2612 @rtype: {instance: {node: [(succes, payload)]}}
2613 @return: a dictionary of per-instance dictionaries with nodes as
2614 keys and disk information as values; the disk information is a
2615 list of tuples (success, payload)
2616
2617 """
2618 node_disks = {}
2619 node_disks_devonly = {}
2620 diskless_instances = set()
2621 nodisk_instances = set()
2622 diskless = constants.DT_DISKLESS
2623
2624 for nuuid in node_uuids:
2625 node_inst_uuids = list(itertools.chain(node_image[nuuid].pinst,
2626 node_image[nuuid].sinst))
2627 diskless_instances.update(uuid for uuid in node_inst_uuids
2628 if instanceinfo[uuid].disk_template == diskless)
2629 disks = [(inst_uuid, disk)
2630 for inst_uuid in node_inst_uuids
2631 for disk in instanceinfo[inst_uuid].disks]
2632
2633 if not disks:
2634 nodisk_instances.update(uuid for uuid in node_inst_uuids
2635 if instanceinfo[uuid].disk_template != diskless)
2636
2637 continue
2638
2639 node_disks[nuuid] = disks
2640
2641
2642 devonly = []
2643 for (inst_uuid, dev) in disks:
2644 (anno_disk,) = AnnotateDiskParams(instanceinfo[inst_uuid], [dev],
2645 self.cfg)
2646 self.cfg.SetDiskID(anno_disk, nuuid)
2647 devonly.append(anno_disk)
2648
2649 node_disks_devonly[nuuid] = devonly
2650
2651 assert len(node_disks) == len(node_disks_devonly)
2652
2653
2654 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2655 node_disks_devonly)
2656
2657 assert len(result) == len(node_disks)
2658
2659 instdisk = {}
2660
2661 for (nuuid, nres) in result.items():
2662 node = self.cfg.