1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 """Logical units for cluster verification."""
31
32 import itertools
33 import logging
34 import operator
35 import re
36 import time
37 import ganeti.masterd.instance
38 import ganeti.rpc.node as rpc
39
40 from ganeti import compat
41 from ganeti import constants
42 from ganeti import errors
43 from ganeti import locking
44 from ganeti import pathutils
45 from ganeti import utils
46 from ganeti import vcluster
47 from ganeti import hypervisor
48 from ganeti import opcodes
49
50 from ganeti.cmdlib.base import LogicalUnit, NoHooksLU, ResultWithJobs
51 from ganeti.cmdlib.common import ShareAll, ComputeAncillaryFiles, \
52 CheckNodePVs, ComputeIPolicyInstanceViolation, AnnotateDiskParams, \
53 SupportsOob
57 """Compute the set of all hypervisor parameters.
58
59 @type cluster: L{objects.Cluster}
60 @param cluster: the cluster object
61 @param instances: list of L{objects.Instance}
62 @param instances: additional instances from which to obtain parameters
63 @rtype: list of (origin, hypervisor, parameters)
64 @return: a list with all parameters found, indicating the hypervisor they
65 apply to, and the origin (can be "cluster", "os X", or "instance Y")
66
67 """
68 hvp_data = []
69
70 for hv_name in cluster.enabled_hypervisors:
71 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
72
73 for os_name, os_hvp in cluster.os_hvp.items():
74 for hv_name, hv_params in os_hvp.items():
75 if hv_params:
76 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
77 hvp_data.append(("os %s" % os_name, hv_name, full_params))
78
79
80 for instance in instances:
81 if instance.hvparams:
82 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
83 cluster.FillHV(instance)))
84
85 return hvp_data
86
89 """Mix-in for cluster/group verify LUs.
90
91 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
92 self.op and self._feedback_fn to be available.)
93
94 """
95
96 ETYPE_FIELD = "code"
97 ETYPE_ERROR = constants.CV_ERROR
98 ETYPE_WARNING = constants.CV_WARNING
99
100 - def _Error(self, ecode, item, msg, *args, **kwargs):
101 """Format an error message.
102
103 Based on the opcode's error_codes parameter, either format a
104 parseable error code, or a simpler error string.
105
106 This must be called only from Exec and functions called from Exec.
107
108 """
109 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
110 itype, etxt, _ = ecode
111
112
113 if etxt in self.op.ignore_errors:
114 ltype = self.ETYPE_WARNING
115
116 if args:
117 msg = msg % args
118
119 if self.op.error_codes:
120 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
121 else:
122 if item:
123 item = " " + item
124 else:
125 item = ""
126 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
127
128 self._feedback_fn(" - %s" % msg)
129
130 if ltype == self.ETYPE_ERROR:
131 self.bad = True
132
133 - def _ErrorIf(self, cond, *args, **kwargs):
134 """Log an error message if the passed condition is True.
135
136 """
137 if (bool(cond)
138 or self.op.debug_simulate_errors):
139 self._Error(*args, **kwargs)
140
143 """Submits all jobs necessary to verify the cluster.
144
145 """
146 REQ_BGL = False
147
149 self.needed_locks = {}
150
151 - def Exec(self, feedback_fn):
152 jobs = []
153
154 if self.op.group_name:
155 groups = [self.op.group_name]
156 depends_fn = lambda: None
157 else:
158 groups = self.cfg.GetNodeGroupList()
159
160
161 jobs.append([
162 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
163 ])
164
165
166 depends_fn = lambda: [(-len(jobs), [])]
167
168 jobs.extend(
169 [opcodes.OpClusterVerifyGroup(group_name=group,
170 ignore_errors=self.op.ignore_errors,
171 depends=depends_fn(),
172 verify_clutter=self.op.verify_clutter)]
173 for group in groups)
174
175
176 for op in itertools.chain(*jobs):
177 op.debug_simulate_errors = self.op.debug_simulate_errors
178 op.verbose = self.op.verbose
179 op.error_codes = self.op.error_codes
180 try:
181 op.skip_checks = self.op.skip_checks
182 except AttributeError:
183 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
184
185 return ResultWithJobs(jobs)
186
189 """Verifies the cluster disks status.
190
191 """
192 REQ_BGL = False
193
199
200 - def Exec(self, feedback_fn):
206
209 """Verifies the cluster config.
210
211 """
212 REQ_BGL = False
213
227
231
240
241 - def Exec(self, feedback_fn):
242 """Verify integrity of cluster, performing various test on nodes.
243
244 """
245 self.bad = False
246 self._feedback_fn = feedback_fn
247
248 feedback_fn("* Verifying cluster config")
249
250 for msg in self.cfg.VerifyConfig():
251 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
252
253 feedback_fn("* Verifying cluster certificate files")
254
255 for cert_filename in pathutils.ALL_CERT_FILES:
256 (errcode, msg) = utils.VerifyCertificate(cert_filename)
257 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
258
259 self._ErrorIf(not utils.CanRead(constants.LUXID_USER,
260 pathutils.NODED_CERT_FILE),
261 constants.CV_ECLUSTERCERT,
262 None,
263 pathutils.NODED_CERT_FILE + " must be accessible by the " +
264 constants.LUXID_USER + " user")
265
266 feedback_fn("* Verifying hypervisor parameters")
267
268 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
269 self.all_inst_info.values()))
270
271 feedback_fn("* Verifying all nodes belong to an existing group")
272
273
274
275
276
277 dangling_nodes = set(node for node in self.all_node_info.values()
278 if node.group not in self.all_group_info)
279
280 dangling_instances = {}
281 no_node_instances = []
282
283 for inst in self.all_inst_info.values():
284 if inst.primary_node in [node.uuid for node in dangling_nodes]:
285 dangling_instances.setdefault(inst.primary_node, []).append(inst)
286 elif inst.primary_node not in self.all_node_info:
287 no_node_instances.append(inst)
288
289 pretty_dangling = [
290 "%s (%s)" %
291 (node.name,
292 utils.CommaJoin(inst.name for
293 inst in dangling_instances.get(node.uuid, [])))
294 for node in dangling_nodes]
295
296 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
297 None,
298 "the following nodes (and their instances) belong to a non"
299 " existing group: %s", utils.CommaJoin(pretty_dangling))
300
301 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
302 None,
303 "the following instances have a non-existing primary-node:"
304 " %s", utils.CommaJoin(inst.name for
305 inst in no_node_instances))
306
307 return not self.bad
308
311 """Verifies the status of a node group.
312
313 """
314 HPATH = "cluster-verify"
315 HTYPE = constants.HTYPE_CLUSTER
316 REQ_BGL = False
317
318 _HOOKS_INDENT_RE = re.compile("^", re.M)
319
321 """A class representing the logical and physical status of a node.
322
323 @type uuid: string
324 @ivar uuid: the node UUID to which this object refers
325 @ivar volumes: a structure as returned from
326 L{ganeti.backend.GetVolumeList} (runtime)
327 @ivar instances: a list of running instances (runtime)
328 @ivar pinst: list of configured primary instances (config)
329 @ivar sinst: list of configured secondary instances (config)
330 @ivar sbp: dictionary of {primary-node: list of instances} for all
331 instances for which this node is secondary (config)
332 @ivar mfree: free memory, as reported by hypervisor (runtime)
333 @ivar dfree: free disk, as reported by the node (runtime)
334 @ivar offline: the offline status (config)
335 @type rpc_fail: boolean
336 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
337 not whether the individual keys were correct) (runtime)
338 @type lvm_fail: boolean
339 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
340 @type hyp_fail: boolean
341 @ivar hyp_fail: whether the RPC call didn't return the instance list
342 @type ghost: boolean
343 @ivar ghost: whether this is a known node or not (config)
344 @type os_fail: boolean
345 @ivar os_fail: whether the RPC call didn't return valid OS data
346 @type oslist: list
347 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
348 @type vm_capable: boolean
349 @ivar vm_capable: whether the node can host instances
350 @type pv_min: float
351 @ivar pv_min: size in MiB of the smallest PVs
352 @type pv_max: float
353 @ivar pv_max: size in MiB of the biggest PVs
354
355 """
356 - def __init__(self, offline=False, uuid=None, vm_capable=True):
357 self.uuid = uuid
358 self.volumes = {}
359 self.instances = []
360 self.pinst = []
361 self.sinst = []
362 self.sbp = {}
363 self.mfree = 0
364 self.dfree = 0
365 self.offline = offline
366 self.vm_capable = vm_capable
367 self.rpc_fail = False
368 self.lvm_fail = False
369 self.hyp_fail = False
370 self.ghost = False
371 self.os_fail = False
372 self.oslist = {}
373 self.pv_min = None
374 self.pv_max = None
375
391
410
412 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
413 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
414
415 group_node_uuids = set(self.group_info.members)
416 group_inst_uuids = \
417 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
418
419 unlocked_node_uuids = \
420 group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE))
421
422 unlocked_inst_uuids = \
423 group_inst_uuids.difference(
424 [self.cfg.GetInstanceInfoByName(name).uuid
425 for name in self.owned_locks(locking.LEVEL_INSTANCE)])
426
427 if unlocked_node_uuids:
428 raise errors.OpPrereqError(
429 "Missing lock for nodes: %s" %
430 utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)),
431 errors.ECODE_STATE)
432
433 if unlocked_inst_uuids:
434 raise errors.OpPrereqError(
435 "Missing lock for instances: %s" %
436 utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)),
437 errors.ECODE_STATE)
438
439 self.all_node_info = self.cfg.GetAllNodesInfo()
440 self.all_inst_info = self.cfg.GetAllInstancesInfo()
441 self.all_disks_info = self.cfg.GetAllDisksInfo()
442
443 self.my_node_uuids = group_node_uuids
444 self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid])
445 for node_uuid in group_node_uuids)
446
447 self.my_inst_uuids = group_inst_uuids
448 self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid])
449 for inst_uuid in group_inst_uuids)
450
451
452
453 extra_lv_nodes = set()
454
455 for inst in self.my_inst_info.values():
456 disks = self.cfg.GetInstanceDisks(inst.uuid)
457 if utils.AnyDiskOfType(disks, constants.DTS_INT_MIRROR):
458 inst_nodes = self.cfg.GetInstanceNodes(inst.uuid)
459 for nuuid in inst_nodes:
460 if self.all_node_info[nuuid].group != self.group_uuid:
461 extra_lv_nodes.add(nuuid)
462
463 unlocked_lv_nodes = \
464 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
465
466 if unlocked_lv_nodes:
467 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
468 utils.CommaJoin(unlocked_lv_nodes),
469 errors.ECODE_STATE)
470 self.extra_lv_nodes = list(extra_lv_nodes)
471
473 """Perform some basic validation on data returned from a node.
474
475 - check the result data structure is well formed and has all the
476 mandatory fields
477 - check ganeti version
478
479 @type ninfo: L{objects.Node}
480 @param ninfo: the node to check
481 @param nresult: the results from the node
482 @rtype: boolean
483 @return: whether overall this call was successful (and we can expect
484 reasonable values in the respose)
485
486 """
487
488 test = not nresult or not isinstance(nresult, dict)
489 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
490 "unable to verify node: no data returned")
491 if test:
492 return False
493
494
495 local_version = constants.PROTOCOL_VERSION
496 remote_version = nresult.get("version", None)
497 test = not (remote_version and
498 isinstance(remote_version, (list, tuple)) and
499 len(remote_version) == 2)
500 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
501 "connection to node returned invalid data")
502 if test:
503 return False
504
505 test = local_version != remote_version[0]
506 self._ErrorIf(test, constants.CV_ENODEVERSION, ninfo.name,
507 "incompatible protocol versions: master %s,"
508 " node %s", local_version, remote_version[0])
509 if test:
510 return False
511
512
513
514
515 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
516 constants.CV_ENODEVERSION, ninfo.name,
517 "software version mismatch: master %s, node %s",
518 constants.RELEASE_VERSION, remote_version[1],
519 code=self.ETYPE_WARNING)
520
521 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
522 if ninfo.vm_capable and isinstance(hyp_result, dict):
523 for hv_name, hv_result in hyp_result.iteritems():
524 test = hv_result is not None
525 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
526 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
527
528 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
529 if ninfo.vm_capable and isinstance(hvp_result, list):
530 for item, hv_name, hv_result in hvp_result:
531 self._ErrorIf(True, constants.CV_ENODEHV, ninfo.name,
532 "hypervisor %s parameter verify failure (source %s): %s",
533 hv_name, item, hv_result)
534
535 test = nresult.get(constants.NV_NODESETUP,
536 ["Missing NODESETUP results"])
537 self._ErrorIf(test, constants.CV_ENODESETUP, ninfo.name,
538 "node setup error: %s", "; ".join(test))
539
540 return True
541
542 - def _VerifyNodeTime(self, ninfo, nresult,
543 nvinfo_starttime, nvinfo_endtime):
544 """Check the node time.
545
546 @type ninfo: L{objects.Node}
547 @param ninfo: the node to check
548 @param nresult: the remote results for the node
549 @param nvinfo_starttime: the start time of the RPC call
550 @param nvinfo_endtime: the end time of the RPC call
551
552 """
553 ntime = nresult.get(constants.NV_TIME, None)
554 try:
555 ntime_merged = utils.MergeTime(ntime)
556 except (ValueError, TypeError):
557 self._ErrorIf(True, constants.CV_ENODETIME, ninfo.name,
558 "Node returned invalid time")
559 return
560
561 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
562 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
563 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
564 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
565 else:
566 ntime_diff = None
567
568 self._ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, ninfo.name,
569 "Node time diverges by at least %s from master node time",
570 ntime_diff)
571
573 """Check the node LVM results and update info for cross-node checks.
574
575 @type ninfo: L{objects.Node}
576 @param ninfo: the node to check
577 @param nresult: the remote results for the node
578 @param vg_name: the configured VG name
579 @type nimg: L{NodeImage}
580 @param nimg: node image
581
582 """
583 if vg_name is None:
584 return
585
586
587 vglist = nresult.get(constants.NV_VGLIST, None)
588 test = not vglist
589 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
590 "unable to check volume groups")
591 if not test:
592 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
593 constants.MIN_VG_SIZE)
594 self._ErrorIf(vgstatus, constants.CV_ENODELVM, ninfo.name, vgstatus)
595
596
597 (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage)
598 for em in errmsgs:
599 self._Error(constants.CV_ENODELVM, ninfo.name, em)
600 if pvminmax is not None:
601 (nimg.pv_min, nimg.pv_max) = pvminmax
602
604 """Check cross-node DRBD version consistency.
605
606 @type node_verify_infos: dict
607 @param node_verify_infos: infos about nodes as returned from the
608 node_verify call.
609
610 """
611 node_versions = {}
612 for node_uuid, ndata in node_verify_infos.items():
613 nresult = ndata.payload
614 if nresult:
615 version = nresult.get(constants.NV_DRBDVERSION, None)
616 if version:
617 node_versions[node_uuid] = version
618
619 if len(set(node_versions.values())) > 1:
620 for node_uuid, version in sorted(node_versions.items()):
621 msg = "DRBD version mismatch: %s" % version
622 self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg,
623 code=self.ETYPE_WARNING)
624
626 """Check cross-node consistency in LVM.
627
628 @type node_image: dict
629 @param node_image: info about nodes, mapping from node to names to
630 L{NodeImage} objects
631 @param vg_name: the configured VG name
632
633 """
634 if vg_name is None:
635 return
636
637
638 if not self._exclusive_storage:
639 return
640
641
642
643
644 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
645 if not vals:
646 return
647 (pvmin, minnode_uuid) = min((ni.pv_min, ni.uuid) for ni in vals)
648 (pvmax, maxnode_uuid) = max((ni.pv_max, ni.uuid) for ni in vals)
649 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
650 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
651 "PV sizes differ too much in the group; smallest (%s MB) is"
652 " on %s, biggest (%s MB) is on %s",
653 pvmin, self.cfg.GetNodeName(minnode_uuid),
654 pvmax, self.cfg.GetNodeName(maxnode_uuid))
655
657 """Check the node bridges.
658
659 @type ninfo: L{objects.Node}
660 @param ninfo: the node to check
661 @param nresult: the remote results for the node
662 @param bridges: the expected list of bridges
663
664 """
665 if not bridges:
666 return
667
668 missing = nresult.get(constants.NV_BRIDGES, None)
669 test = not isinstance(missing, list)
670 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
671 "did not return valid bridge information")
672 if not test:
673 self._ErrorIf(bool(missing), constants.CV_ENODENET, ninfo.name,
674 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
675
693
695 """Check the node network connectivity results.
696
697 @type ninfo: L{objects.Node}
698 @param ninfo: the node to check
699 @param nresult: the remote results for the node
700
701 """
702 test = constants.NV_NODELIST not in nresult
703 self._ErrorIf(test, constants.CV_ENODESSH, ninfo.name,
704 "node hasn't returned node ssh connectivity data")
705 if not test:
706 if nresult[constants.NV_NODELIST]:
707 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
708 self._ErrorIf(True, constants.CV_ENODESSH, ninfo.name,
709 "ssh communication with node '%s': %s", a_node, a_msg)
710
711 test = constants.NV_NODENETTEST not in nresult
712 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
713 "node hasn't returned node tcp connectivity data")
714 if not test:
715 if nresult[constants.NV_NODENETTEST]:
716 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
717 for anode in nlist:
718 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name,
719 "tcp communication with node '%s': %s",
720 anode, nresult[constants.NV_NODENETTEST][anode])
721
722 test = constants.NV_MASTERIP not in nresult
723 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
724 "node hasn't returned node master IP reachability data")
725 if not test:
726 if not nresult[constants.NV_MASTERIP]:
727 if ninfo.uuid == self.master_node:
728 msg = "the master node cannot reach the master IP (not configured?)"
729 else:
730 msg = "cannot reach the master IP"
731 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg)
732
734 """Verify an instance.
735
736 This function checks to see if the required block devices are
737 available on the instance's node, and that the nodes are in the correct
738 state.
739
740 """
741 pnode_uuid = instance.primary_node
742 pnode_img = node_image[pnode_uuid]
743 groupinfo = self.cfg.GetAllNodeGroupsInfo()
744
745 node_vol_should = {}
746 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
747
748 cluster = self.cfg.GetClusterInfo()
749 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
750 self.group_info)
751 err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg)
752 self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name,
753 utils.CommaJoin(err), code=self.ETYPE_WARNING)
754
755 for node_uuid in node_vol_should:
756 n_img = node_image[node_uuid]
757 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
758
759 continue
760 for volume in node_vol_should[node_uuid]:
761 test = volume not in n_img.volumes
762 self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name,
763 "volume %s missing on node %s", volume,
764 self.cfg.GetNodeName(node_uuid))
765
766 if instance.admin_state == constants.ADMINST_UP:
767 test = instance.uuid not in pnode_img.instances and not pnode_img.offline
768 self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name,
769 "instance not running on its primary node %s",
770 self.cfg.GetNodeName(pnode_uuid))
771 self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE,
772 instance.name, "instance is marked as running and lives on"
773 " offline node %s", self.cfg.GetNodeName(pnode_uuid))
774
775 diskdata = [(nname, success, status, idx)
776 for (nname, disks) in diskstatus.items()
777 for idx, (success, status) in enumerate(disks)]
778
779 for nname, success, bdev_status, idx in diskdata:
780
781
782 snode = node_image[nname]
783 bad_snode = snode.ghost or snode.offline
784 self._ErrorIf(instance.disks_active and
785 not success and not bad_snode,
786 constants.CV_EINSTANCEFAULTYDISK, instance.name,
787 "couldn't retrieve status for disk/%s on %s: %s",
788 idx, self.cfg.GetNodeName(nname), bdev_status)
789
790 if instance.disks_active and success and bdev_status.is_degraded:
791 msg = "disk/%s on %s is degraded" % (idx, self.cfg.GetNodeName(nname))
792
793 code = self.ETYPE_ERROR
794 accepted_lds = [constants.LDS_OKAY, constants.LDS_SYNC]
795
796 if bdev_status.ldisk_status in accepted_lds:
797 code = self.ETYPE_WARNING
798
799 msg += "; local disk state is '%s'" % \
800 constants.LDS_NAMES[bdev_status.ldisk_status]
801
802 self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg,
803 code=code)
804
805 self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
806 constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid),
807 "instance %s, connection to primary node failed",
808 instance.name)
809
810 secondary_nodes = self.cfg.GetInstanceSecondaryNodes(instance.uuid)
811 self._ErrorIf(len(secondary_nodes) > 1,
812 constants.CV_EINSTANCELAYOUT, instance.name,
813 "instance has multiple secondary nodes: %s",
814 utils.CommaJoin(secondary_nodes),
815 code=self.ETYPE_WARNING)
816
817 inst_nodes = self.cfg.GetInstanceNodes(instance.uuid)
818 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, inst_nodes)
819 disks = self.cfg.GetInstanceDisks(instance.uuid)
820 if any(es_flags.values()):
821 if not utils.AllDiskOfType(disks, constants.DTS_EXCL_STORAGE):
822
823
824 es_nodes = [n
825 for (n, es) in es_flags.items()
826 if es]
827 unsupported = [d.dev_type for d in disks
828 if d.dev_type not in constants.DTS_EXCL_STORAGE]
829 self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name,
830 "instance uses disk types %s, which are not supported on"
831 " nodes that have exclusive storage set: %s",
832 utils.CommaJoin(unsupported),
833 utils.CommaJoin(self.cfg.GetNodeNames(es_nodes)))
834 for (idx, disk) in enumerate(disks):
835 self._ErrorIf(disk.spindles is None,
836 constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name,
837 "number of spindles not configured for disk %s while"
838 " exclusive storage is enabled, try running"
839 " gnt-cluster repair-disk-sizes", idx)
840
841 if utils.AnyDiskOfType(disks, constants.DTS_INT_MIRROR):
842 instance_nodes = utils.NiceSort(inst_nodes)
843 instance_groups = {}
844
845 for node_uuid in instance_nodes:
846 instance_groups.setdefault(self.all_node_info[node_uuid].group,
847 []).append(node_uuid)
848
849 pretty_list = [
850 "%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)),
851 groupinfo[group].name)
852
853 for group, nodes in sorted(instance_groups.items(),
854 key=lambda (_, nodes): pnode_uuid in nodes,
855 reverse=True)]
856
857 self._ErrorIf(len(instance_groups) > 1,
858 constants.CV_EINSTANCESPLITGROUPS,
859 instance.name, "instance has primary and secondary nodes in"
860 " different groups: %s", utils.CommaJoin(pretty_list),
861 code=self.ETYPE_WARNING)
862
863 inst_nodes_offline = []
864 for snode in secondary_nodes:
865 s_img = node_image[snode]
866 self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
867 self.cfg.GetNodeName(snode),
868 "instance %s, connection to secondary node failed",
869 instance.name)
870
871 if s_img.offline:
872 inst_nodes_offline.append(snode)
873
874
875 self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE,
876 instance.name, "instance has offline secondary node(s) %s",
877 utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline)))
878
879 for node_uuid in inst_nodes:
880 self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE,
881 instance.name, "instance lives on ghost node %s",
882 self.cfg.GetNodeName(node_uuid))
883 self._ErrorIf(not node_image[node_uuid].vm_capable,
884 constants.CV_EINSTANCEBADNODE, instance.name,
885 "instance lives on non-vm_capable node %s",
886 self.cfg.GetNodeName(node_uuid))
887
890 """Verify if there are any unknown volumes in the cluster.
891
892 The .os, .swap and backup volumes are ignored. All other volumes are
893 reported as unknown.
894
895 @type vg_name: string
896 @param vg_name: the name of the Ganeti-administered volume group
897 @type reserved: L{ganeti.utils.FieldSet}
898 @param reserved: a FieldSet of reserved volume names
899
900 """
901 for node_uuid, n_img in node_image.items():
902 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
903 self.all_node_info[node_uuid].group != self.group_uuid):
904
905 continue
906 for volume in n_img.volumes:
907
908 if volume.split('/')[0] != vg_name:
909 continue
910
911 test = ((node_uuid not in node_vol_should or
912 volume not in node_vol_should[node_uuid]) and
913 not reserved.Matches(volume))
914 self._ErrorIf(test, constants.CV_ENODEORPHANLV,
915 self.cfg.GetNodeName(node_uuid),
916 "volume %s is unknown", volume,
917 code=_VerifyErrors.ETYPE_WARNING)
918
920 """Verify N+1 Memory Resilience.
921
922 Check that if one single node dies we can still start all the
923 instances it was primary for.
924
925 """
926 cluster_info = self.cfg.GetClusterInfo()
927 for node_uuid, n_img in node_image.items():
928
929
930
931
932
933
934
935
936 if n_img.offline or \
937 self.all_node_info[node_uuid].group != self.group_uuid:
938
939
940
941
942 continue
943
944 for prinode, inst_uuids in n_img.sbp.items():
945 needed_mem = 0
946 for inst_uuid in inst_uuids:
947 bep = cluster_info.FillBE(all_insts[inst_uuid])
948 if bep[constants.BE_AUTO_BALANCE]:
949 needed_mem += bep[constants.BE_MINMEM]
950 test = n_img.mfree < needed_mem
951 self._ErrorIf(test, constants.CV_ENODEN1,
952 self.cfg.GetNodeName(node_uuid),
953 "not enough memory to accomodate instance failovers"
954 " should node %s fail (%dMiB needed, %dMiB available)",
955 self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)
956
958 """Verifies the consistency of the client certificates.
959
960 This includes several aspects:
961 - the individual validation of all nodes' certificates
962 - the consistency of the master candidate certificate map
963 - the consistency of the master candidate certificate map with the
964 certificates that the master candidates are actually using.
965
966 @param nodes: the list of nodes to consider in this verification
967 @param all_nvinfo: the map of results of the verify_node call to
968 all nodes
969
970 """
971 candidate_certs = self.cfg.GetClusterInfo().candidate_certs
972 if candidate_certs is None or len(candidate_certs) == 0:
973 self._ErrorIf(
974 True, constants.CV_ECLUSTERCLIENTCERT, None,
975 "The cluster's list of master candidate certificates is empty."
976 " If you just updated the cluster, please run"
977 " 'gnt-cluster renew-crypto --new-node-certificates'.")
978 return
979
980 self._ErrorIf(
981 len(candidate_certs) != len(set(candidate_certs.values())),
982 constants.CV_ECLUSTERCLIENTCERT, None,
983 "There are at least two master candidates configured to use the same"
984 " certificate.")
985
986
987 for node in nodes:
988 if node.offline:
989 continue
990
991 nresult = all_nvinfo[node.uuid]
992 if nresult.fail_msg or not nresult.payload:
993 continue
994
995 (errcode, msg) = nresult.payload.get(constants.NV_CLIENT_CERT, None)
996
997 self._ErrorIf(
998 errcode is not None, constants.CV_ECLUSTERCLIENTCERT, None,
999 "Client certificate of node '%s' failed validation: %s (code '%s')",
1000 node.uuid, msg, errcode)
1001
1002 if not errcode:
1003 digest = msg
1004 if node.master_candidate:
1005 if node.uuid in candidate_certs:
1006 self._ErrorIf(
1007 digest != candidate_certs[node.uuid],
1008 constants.CV_ECLUSTERCLIENTCERT, None,
1009 "Client certificate digest of master candidate '%s' does not"
1010 " match its entry in the cluster's map of master candidate"
1011 " certificates. Expected: %s Got: %s", node.uuid,
1012 digest, candidate_certs[node.uuid])
1013 else:
1014 self._ErrorIf(
1015 True, constants.CV_ECLUSTERCLIENTCERT, None,
1016 "The master candidate '%s' does not have an entry in the"
1017 " map of candidate certificates.", node.uuid)
1018 self._ErrorIf(
1019 digest in candidate_certs.values(),
1020 constants.CV_ECLUSTERCLIENTCERT, None,
1021 "Master candidate '%s' is using a certificate of another node.",
1022 node.uuid)
1023 else:
1024 self._ErrorIf(
1025 node.uuid in candidate_certs,
1026 constants.CV_ECLUSTERCLIENTCERT, None,
1027 "Node '%s' is not a master candidate, but still listed in the"
1028 " map of master candidate certificates.", node.uuid)
1029 self._ErrorIf(
1030 (node.uuid not in candidate_certs) and
1031 (digest in candidate_certs.values()),
1032 constants.CV_ECLUSTERCLIENTCERT, None,
1033 "Node '%s' is not a master candidate and is incorrectly using a"
1034 " certificate of another node which is master candidate.",
1035 node.uuid)
1036
1038 """Evaluates the verification results of the SSH setup and clutter test.
1039
1040 @param nodes: List of L{objects.Node} objects
1041 @param all_nvinfo: RPC results
1042
1043 """
1044 for node in nodes:
1045 if not node.offline:
1046 nresult = all_nvinfo[node.uuid]
1047 if nresult.fail_msg or not nresult.payload:
1048 self._ErrorIf(True, constants.CV_ENODESSH, node.name,
1049 "Could not verify the SSH setup of this node.")
1050 return
1051 for ssh_test in [constants.NV_SSH_SETUP, constants.NV_SSH_CLUTTER]:
1052 result = nresult.payload.get(ssh_test, None)
1053 error_msg = ""
1054 if isinstance(result, list):
1055 error_msg = " ".join(result)
1056 self._ErrorIf(result,
1057 constants.CV_ENODESSH, None, error_msg)
1058
1059 - def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo,
1060 (files_all, files_opt, files_mc, files_vm)):
1061 """Verifies file checksums collected from all nodes.
1062
1063 @param nodes: List of L{objects.Node} objects
1064 @param master_node_uuid: UUID of master node
1065 @param all_nvinfo: RPC results
1066
1067 """
1068
1069 files2nodefn = [
1070 (files_all, None),
1071 (files_mc, lambda node: (node.master_candidate or
1072 node.uuid == master_node_uuid)),
1073 (files_vm, lambda node: node.vm_capable),
1074 ]
1075
1076
1077 nodefiles = {}
1078 for (files, fn) in files2nodefn:
1079 if fn is None:
1080 filenodes = nodes
1081 else:
1082 filenodes = filter(fn, nodes)
1083 nodefiles.update((filename,
1084 frozenset(map(operator.attrgetter("uuid"), filenodes)))
1085 for filename in files)
1086
1087 assert set(nodefiles) == (files_all | files_mc | files_vm)
1088
1089 fileinfo = dict((filename, {}) for filename in nodefiles)
1090 ignore_nodes = set()
1091
1092 for node in nodes:
1093 if node.offline:
1094 ignore_nodes.add(node.uuid)
1095 continue
1096
1097 nresult = all_nvinfo[node.uuid]
1098
1099 if nresult.fail_msg or not nresult.payload:
1100 node_files = None
1101 else:
1102 fingerprints = nresult.payload.get(constants.NV_FILELIST, {})
1103 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
1104 for (key, value) in fingerprints.items())
1105 del fingerprints
1106
1107 test = not (node_files and isinstance(node_files, dict))
1108 self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name,
1109 "Node did not return file checksum data")
1110 if test:
1111 ignore_nodes.add(node.uuid)
1112 continue
1113
1114
1115 for (filename, checksum) in node_files.items():
1116 assert filename in nodefiles
1117 fileinfo[filename].setdefault(checksum, set()).add(node.uuid)
1118
1119 for (filename, checksums) in fileinfo.items():
1120 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1121
1122
1123 with_file = frozenset(node_uuid
1124 for node_uuids in fileinfo[filename].values()
1125 for node_uuid in node_uuids) - ignore_nodes
1126
1127 expected_nodes = nodefiles[filename] - ignore_nodes
1128
1129
1130 missing_file = expected_nodes - with_file
1131
1132 if filename in files_opt:
1133
1134 self._ErrorIf(missing_file and missing_file != expected_nodes,
1135 constants.CV_ECLUSTERFILECHECK, None,
1136 "File %s is optional, but it must exist on all or no"
1137 " nodes (not found on %s)",
1138 filename,
1139 utils.CommaJoin(
1140 utils.NiceSort(
1141 map(self.cfg.GetNodeName, missing_file))))
1142 else:
1143 self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None,
1144 "File %s is missing from node(s) %s", filename,
1145 utils.CommaJoin(
1146 utils.NiceSort(
1147 map(self.cfg.GetNodeName, missing_file))))
1148
1149
1150 unexpected = with_file - expected_nodes
1151 self._ErrorIf(unexpected,
1152 constants.CV_ECLUSTERFILECHECK, None,
1153 "File %s should not exist on node(s) %s",
1154 filename, utils.CommaJoin(
1155 utils.NiceSort(map(self.cfg.GetNodeName, unexpected))))
1156
1157
1158 test = len(checksums) > 1
1159 if test:
1160 variants = ["variant %s on %s" %
1161 (idx + 1,
1162 utils.CommaJoin(utils.NiceSort(
1163 map(self.cfg.GetNodeName, node_uuids))))
1164 for (idx, (checksum, node_uuids)) in
1165 enumerate(sorted(checksums.items()))]
1166 else:
1167 variants = []
1168
1169 self._ErrorIf(test, constants.CV_ECLUSTERFILECHECK, None,
1170 "File %s found with %s different checksums (%s)",
1171 filename, len(checksums), "; ".join(variants))
1172
1174 """Verify the drbd helper.
1175
1176 """
1177 if drbd_helper:
1178 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1179 test = (helper_result is None)
1180 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
1181 "no drbd usermode helper returned")
1182 if helper_result:
1183 status, payload = helper_result
1184 test = not status
1185 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
1186 "drbd usermode helper check unsuccessful: %s", payload)
1187 test = status and (payload != drbd_helper)
1188 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
1189 "wrong drbd usermode helper: %s", payload)
1190
1191 @staticmethod
1193 """Gives the DRBD information in a map for a node.
1194
1195 @type ninfo: L{objects.Node}
1196 @param ninfo: the node to check
1197 @param instanceinfo: the dict of instances
1198 @param disks_info: the dict of disks
1199 @param drbd_map: the DRBD map as returned by
1200 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1201 @type error_if: callable like L{_ErrorIf}
1202 @param error_if: The error reporting function
1203 @return: dict from minor number to (disk_uuid, instance_uuid, active)
1204
1205 """
1206 node_drbd = {}
1207 for minor, disk_uuid in drbd_map[ninfo.uuid].items():
1208 test = disk_uuid not in disks_info
1209 error_if(test, constants.CV_ECLUSTERCFG, None,
1210 "ghost disk '%s' in temporary DRBD map", disk_uuid)
1211
1212
1213
1214 if test:
1215 node_drbd[minor] = (disk_uuid, None, False)
1216 else:
1217 disk_active = False
1218 disk_instance = None
1219 for (inst_uuid, inst) in instanceinfo.items():
1220 if disk_uuid in inst.disks:
1221 disk_active = inst.disks_active
1222 disk_instance = inst_uuid
1223 break
1224 node_drbd[minor] = (disk_uuid, disk_instance, disk_active)
1225 return node_drbd
1226
1227 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, disks_info,
1228 drbd_helper, drbd_map):
1229 """Verifies and the node DRBD status.
1230
1231 @type ninfo: L{objects.Node}
1232 @param ninfo: the node to check
1233 @param nresult: the remote results for the node
1234 @param instanceinfo: the dict of instances
1235 @param disks_info: the dict of disks
1236 @param drbd_helper: the configured DRBD usermode helper
1237 @param drbd_map: the DRBD map as returned by
1238 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1239
1240 """
1241 self._VerifyNodeDrbdHelper(ninfo, nresult, drbd_helper)
1242
1243
1244 node_drbd = self._ComputeDrbdMinors(ninfo, instanceinfo, disks_info,
1245 drbd_map, self._ErrorIf)
1246
1247
1248 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1249 test = not isinstance(used_minors, (tuple, list))
1250 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
1251 "cannot parse drbd status file: %s", str(used_minors))
1252 if test:
1253
1254 return
1255
1256 for minor, (disk_uuid, inst_uuid, must_exist) in node_drbd.items():
1257 test = minor not in used_minors and must_exist
1258 if inst_uuid is not None:
1259 attached = "(attached in instance '%s')" % \
1260 self.cfg.GetInstanceName(inst_uuid)
1261 else:
1262 attached = "(detached)"
1263 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
1264 "drbd minor %d of disk %s %s is not active",
1265 minor, disk_uuid, attached)
1266 for minor in used_minors:
1267 test = minor not in node_drbd
1268 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
1269 "unallocated drbd minor %d is in use", minor)
1270
1272 """Builds the node OS structures.
1273
1274 @type ninfo: L{objects.Node}
1275 @param ninfo: the node to check
1276 @param nresult: the remote results for the node
1277 @param nimg: the node image object
1278
1279 """
1280 remote_os = nresult.get(constants.NV_OSLIST, None)
1281 test = (not isinstance(remote_os, list) or
1282 not compat.all(isinstance(v, list) and len(v) == 8
1283 for v in remote_os))
1284
1285 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
1286 "node hasn't returned valid OS data")
1287
1288 nimg.os_fail = test
1289
1290 if test:
1291 return
1292
1293 os_dict = {}
1294
1295 for (name, os_path, status, diagnose,
1296 variants, parameters, api_ver,
1297 trusted) in nresult[constants.NV_OSLIST]:
1298
1299 if name not in os_dict:
1300 os_dict[name] = []
1301
1302
1303
1304 parameters = [tuple(v) for v in parameters]
1305 os_dict[name].append((os_path, status, diagnose,
1306 set(variants), set(parameters), set(api_ver),
1307 trusted))
1308
1309 nimg.oslist = os_dict
1310
1312 """Verifies the node OS list.
1313
1314 @type ninfo: L{objects.Node}
1315 @param ninfo: the node to check
1316 @param nimg: the node image object
1317 @param base: the 'template' node we match against (e.g. from the master)
1318
1319 """
1320 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1321
1322 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1323 for os_name, os_data in nimg.oslist.items():
1324 assert os_data, "Empty OS status for OS %s?!" % os_name
1325 f_path, f_status, f_diag, f_var, f_param, f_api, f_trusted = os_data[0]
1326 self._ErrorIf(not f_status, constants.CV_ENODEOS, ninfo.name,
1327 "Invalid OS %s (located at %s): %s",
1328 os_name, f_path, f_diag)
1329 self._ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, ninfo.name,
1330 "OS '%s' has multiple entries"
1331 " (first one shadows the rest): %s",
1332 os_name, utils.CommaJoin([v[0] for v in os_data]))
1333
1334 test = os_name not in base.oslist
1335 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
1336 "Extra OS %s not present on reference node (%s)",
1337 os_name, self.cfg.GetNodeName(base.uuid))
1338 if test:
1339 continue
1340 assert base.oslist[os_name], "Base node has empty OS status?"
1341 _, b_status, _, b_var, b_param, b_api, b_trusted = base.oslist[os_name][0]
1342 if not b_status:
1343
1344 continue
1345 for kind, a, b in [("API version", f_api, b_api),
1346 ("variants list", f_var, b_var),
1347 ("parameters", beautify_params(f_param),
1348 beautify_params(b_param))]:
1349 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
1350 "OS %s for %s differs from reference node %s:"
1351 " [%s] vs. [%s]", kind, os_name,
1352 self.cfg.GetNodeName(base.uuid),
1353 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1354 for kind, a, b in [("trusted", f_trusted, b_trusted)]:
1355 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
1356 "OS %s for %s differs from reference node %s:"
1357 " %s vs. %s", kind, os_name,
1358 self.cfg.GetNodeName(base.uuid), a, b)
1359
1360
1361 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1362 self._ErrorIf(missing, constants.CV_ENODEOS, ninfo.name,
1363 "OSes present on reference node %s"
1364 " but missing on this node: %s",
1365 self.cfg.GetNodeName(base.uuid), utils.CommaJoin(missing))
1366
1368 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
1369
1370 @type ninfo: L{objects.Node}
1371 @param ninfo: the node to check
1372 @param nresult: the remote results for the node
1373 @type is_master: bool
1374 @param is_master: Whether node is the master node
1375
1376 """
1377 cluster = self.cfg.GetClusterInfo()
1378 if (is_master and
1379 (cluster.IsFileStorageEnabled() or
1380 cluster.IsSharedFileStorageEnabled())):
1381 try:
1382 fspaths = nresult[constants.NV_ACCEPTED_STORAGE_PATHS]
1383 except KeyError:
1384
1385 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
1386 "Node did not return forbidden file storage paths")
1387 else:
1388 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
1389 "Found forbidden file storage paths: %s",
1390 utils.CommaJoin(fspaths))
1391 else:
1392 self._ErrorIf(constants.NV_ACCEPTED_STORAGE_PATHS in nresult,
1393 constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
1394 "Node should not have returned forbidden file storage"
1395 " paths")
1396
1397 - def _VerifyStoragePaths(self, ninfo, nresult, file_disk_template,
1398 verify_key, error_key):
1399 """Verifies (file) storage paths.
1400
1401 @type ninfo: L{objects.Node}
1402 @param ninfo: the node to check
1403 @param nresult: the remote results for the node
1404 @type file_disk_template: string
1405 @param file_disk_template: file-based disk template, whose directory
1406 is supposed to be verified
1407 @type verify_key: string
1408 @param verify_key: key for the verification map of this file
1409 verification step
1410 @param error_key: error key to be added to the verification results
1411 in case something goes wrong in this verification step
1412
1413 """
1414 assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageTypes(
1415 constants.ST_FILE, constants.ST_SHARED_FILE, constants.ST_GLUSTER
1416 ))
1417
1418 cluster = self.cfg.GetClusterInfo()
1419 if cluster.IsDiskTemplateEnabled(file_disk_template):
1420 self._ErrorIf(
1421 verify_key in nresult,
1422 error_key, ninfo.name,
1423 "The configured %s storage path is unusable: %s" %
1424 (file_disk_template, nresult.get(verify_key)))
1425
1436
1447
1458
1460 """Verifies out of band functionality of a node.
1461
1462 @type ninfo: L{objects.Node}
1463 @param ninfo: the node to check
1464 @param nresult: the remote results for the node
1465
1466 """
1467
1468
1469 if ((ninfo.master_candidate or ninfo.master_capable) and
1470 constants.NV_OOB_PATHS in nresult):
1471 for path_result in nresult[constants.NV_OOB_PATHS]:
1472 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH,
1473 ninfo.name, path_result)
1474
1476 """Verifies and updates the node volume data.
1477
1478 This function will update a L{NodeImage}'s internal structures
1479 with data from the remote call.
1480
1481 @type ninfo: L{objects.Node}
1482 @param ninfo: the node to check
1483 @param nresult: the remote results for the node
1484 @param nimg: the node image object
1485 @param vg_name: the configured VG name
1486
1487 """
1488 nimg.lvm_fail = True
1489 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1490 if vg_name is None:
1491 pass
1492 elif isinstance(lvdata, basestring):
1493 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
1494 "LVM problem on node: %s", utils.SafeEncode(lvdata))
1495 elif not isinstance(lvdata, dict):
1496 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
1497 "rpc call to node failed (lvlist)")
1498 else:
1499 nimg.volumes = lvdata
1500 nimg.lvm_fail = False
1501
1503 """Verifies and updates the node instance list.
1504
1505 If the listing was successful, then updates this node's instance
1506 list. Otherwise, it marks the RPC call as failed for the instance
1507 list key.
1508
1509 @type ninfo: L{objects.Node}
1510 @param ninfo: the node to check
1511 @param nresult: the remote results for the node
1512 @param nimg: the node image object
1513
1514 """
1515 idata = nresult.get(constants.NV_INSTANCELIST, None)
1516 test = not isinstance(idata, list)
1517 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
1518 "rpc call to node failed (instancelist): %s",
1519 utils.SafeEncode(str(idata)))
1520 if test:
1521 nimg.hyp_fail = True
1522 else:
1523 nimg.instances = [uuid for (uuid, _) in
1524 self.cfg.GetMultiInstanceInfoByName(idata)]
1525
1527 """Verifies and computes a node information map
1528
1529 @type ninfo: L{objects.Node}
1530 @param ninfo: the node to check
1531 @param nresult: the remote results for the node
1532 @param nimg: the node image object
1533 @param vg_name: the configured VG name
1534
1535 """
1536
1537 hv_info = nresult.get(constants.NV_HVINFO, None)
1538 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1539 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
1540 "rpc call to node failed (hvinfo)")
1541 if not test:
1542 try:
1543 nimg.mfree = int(hv_info["memory_free"])
1544 except (ValueError, TypeError):
1545 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
1546 "node returned invalid nodeinfo, check hypervisor")
1547
1548
1549 if vg_name is not None:
1550 test = (constants.NV_VGLIST not in nresult or
1551 vg_name not in nresult[constants.NV_VGLIST])
1552 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
1553 "node didn't return data for the volume group '%s'"
1554 " - it is either missing or broken", vg_name)
1555 if not test:
1556 try:
1557 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1558 except (ValueError, TypeError):
1559 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
1560 "node returned invalid LVM info, check LVM status")
1561
1563 """Gets per-disk status information for all instances.
1564
1565 @type node_uuids: list of strings
1566 @param node_uuids: Node UUIDs
1567 @type node_image: dict of (UUID, L{objects.Node})
1568 @param node_image: Node objects
1569 @type instanceinfo: dict of (UUID, L{objects.Instance})
1570 @param instanceinfo: Instance objects
1571 @rtype: {instance: {node: [(succes, payload)]}}
1572 @return: a dictionary of per-instance dictionaries with nodes as
1573 keys and disk information as values; the disk information is a
1574 list of tuples (success, payload)
1575
1576 """
1577 node_disks = {}
1578 node_disks_dev_inst_only = {}
1579 diskless_instances = set()
1580 nodisk_instances = set()
1581
1582 for nuuid in node_uuids:
1583 node_inst_uuids = list(itertools.chain(node_image[nuuid].pinst,
1584 node_image[nuuid].sinst))
1585 diskless_instances.update(uuid for uuid in node_inst_uuids
1586 if not instanceinfo[uuid].disks)
1587 disks = [(inst_uuid, disk)
1588 for inst_uuid in node_inst_uuids
1589 for disk in self.cfg.GetInstanceDisks(inst_uuid)]
1590
1591 if not disks:
1592 nodisk_instances.update(uuid for uuid in node_inst_uuids
1593 if instanceinfo[uuid].disks)
1594
1595 continue
1596
1597 node_disks[nuuid] = disks
1598
1599
1600 dev_inst_only = []
1601 for (inst_uuid, dev) in disks:
1602 (anno_disk,) = AnnotateDiskParams(instanceinfo[inst_uuid], [dev],
1603 self.cfg)
1604 dev_inst_only.append((anno_disk, instanceinfo[inst_uuid]))
1605
1606 node_disks_dev_inst_only[nuuid] = dev_inst_only
1607
1608 assert len(node_disks) == len(node_disks_dev_inst_only)
1609
1610
1611 result = self.rpc.call_blockdev_getmirrorstatus_multi(
1612 node_disks.keys(), node_disks_dev_inst_only)
1613
1614 assert len(result) == len(node_disks)
1615
1616 instdisk = {}
1617
1618 for (nuuid, nres) in result.items():
1619 node = self.cfg.GetNodeInfo(nuuid)
1620 disks = node_disks[node.uuid]
1621
1622 if nres.offline:
1623
1624 data = len(disks) * [(False, "node offline")]
1625 else:
1626 msg = nres.fail_msg
1627 self._ErrorIf(msg, constants.CV_ENODERPC, node.name,
1628 "while getting disk information: %s", msg)
1629 if msg:
1630
1631 data = len(disks) * [(False, msg)]
1632 else:
1633 data = []
1634 for idx, i in enumerate(nres.payload):
1635 if isinstance(i, (tuple, list)) and len(i) == 2:
1636 data.append(i)
1637 else:
1638 logging.warning("Invalid result from node %s, entry %d: %s",
1639 node.name, idx, i)
1640 data.append((False, "Invalid result from the remote node"))
1641
1642 for ((inst_uuid, _), status) in zip(disks, data):
1643 instdisk.setdefault(inst_uuid, {}).setdefault(node.uuid, []) \
1644 .append(status)
1645
1646
1647 for inst_uuid in diskless_instances:
1648 assert inst_uuid not in instdisk
1649 instdisk[inst_uuid] = {}
1650
1651 for inst_uuid in nodisk_instances:
1652 assert inst_uuid not in instdisk
1653 instdisk[inst_uuid] = {}
1654
1655 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
1656 len(nuuids) <= len(
1657 self.cfg.GetInstanceNodes(instanceinfo[inst].uuid)) and
1658 compat.all(isinstance(s, (tuple, list)) and
1659 len(s) == 2 for s in statuses)
1660 for inst, nuuids in instdisk.items()
1661 for nuuid, statuses in nuuids.items())
1662 if __debug__:
1663 instdisk_keys = set(instdisk)
1664 instanceinfo_keys = set(instanceinfo)
1665 assert instdisk_keys == instanceinfo_keys, \
1666 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
1667 (instdisk_keys, instanceinfo_keys))
1668
1669 return instdisk
1670
1671 @staticmethod
1673 """Create endless iterators for all potential SSH check hosts.
1674
1675 """
1676 nodes = [node for node in all_nodes
1677 if (node.group != group_uuid and
1678 not node.offline)]
1679 keyfunc = operator.attrgetter("group")
1680
1681 return map(itertools.cycle,
1682 [sorted(map(operator.attrgetter("name"), names))
1683 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
1684 keyfunc)])
1685
1686 @classmethod
1688 """Choose which nodes should talk to which other nodes.
1689
1690 We will make nodes contact all nodes in their group, and one node from
1691 every other group.
1692
1693 @rtype: tuple of (string, dict of strings to list of strings, string)
1694 @return: a tuple containing the list of all online nodes, a dictionary
1695 mapping node names to additional nodes of other node groups to which
1696 connectivity should be tested, and a list of all online master
1697 candidates
1698
1699 @warning: This algorithm has a known issue if one node group is much
1700 smaller than others (e.g. just one node). In such a case all other
1701 nodes will talk to the single node.
1702
1703 """
1704 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
1705 online_mcs = sorted(node.name for node in group_nodes
1706 if (node.master_candidate and not node.offline))
1707 sel = cls._SshNodeSelector(group_uuid, all_nodes)
1708
1709 return (online_nodes,
1710 dict((name, sorted([i.next() for i in sel]))
1711 for name in online_nodes),
1712 online_mcs)
1713
1715 """Prepare the input data for the SSH setup verification.
1716
1717 """
1718 all_nodes_info = self.cfg.GetAllNodesInfo()
1719 potential_master_candidates = self.cfg.GetPotentialMasterCandidates()
1720 node_status = [
1721 (uuid, node_info.name, node_info.master_candidate,
1722 node_info.name in potential_master_candidates, not node_info.offline)
1723 for (uuid, node_info) in all_nodes_info.items()]
1724 return node_status
1725
1727 """Build hooks env.
1728
1729 Cluster-Verify hooks just ran in the post phase and their failure makes
1730 the output be logged in the verify output and the verification to fail.
1731
1732 """
1733 env = {
1734 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
1735 }
1736
1737 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
1738 for node in self.my_node_info.values())
1739
1740 return env
1741
1743 """Build hooks nodes.
1744
1745 """
1746 return ([], list(self.my_node_info.keys()))
1747
1748 @staticmethod
1749 - def _VerifyOtherNotes(feedback_fn, i_non_redundant, i_non_a_balanced,
1750 i_offline, n_offline, n_drained):
1751 feedback_fn("* Other Notes")
1752 if i_non_redundant:
1753 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1754 % len(i_non_redundant))
1755
1756 if i_non_a_balanced:
1757 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1758 % len(i_non_a_balanced))
1759
1760 if i_offline:
1761 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
1762
1763 if n_offline:
1764 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1765
1766 if n_drained:
1767 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1768
1769 - def Exec(self, feedback_fn):
1770 """Verify integrity of the node group, performing various test on nodes.
1771
1772 """
1773
1774 feedback_fn("* Verifying group '%s'" % self.group_info.name)
1775
1776 if not self.my_node_uuids:
1777
1778 feedback_fn("* Empty node group, skipping verification")
1779 return True
1780
1781 self.bad = False
1782 verbose = self.op.verbose
1783 self._feedback_fn = feedback_fn
1784
1785 vg_name = self.cfg.GetVGName()
1786 drbd_helper = self.cfg.GetDRBDHelper()
1787 cluster = self.cfg.GetClusterInfo()
1788 hypervisors = cluster.enabled_hypervisors
1789 node_data_list = self.my_node_info.values()
1790
1791 i_non_redundant = []
1792 i_non_a_balanced = []
1793 i_offline = 0
1794 n_offline = 0
1795 n_drained = 0
1796 node_vol_should = {}
1797
1798
1799
1800
1801 filemap = ComputeAncillaryFiles(cluster, False)
1802
1803
1804 master_node_uuid = self.master_node = self.cfg.GetMasterNode()
1805 master_ip = self.cfg.GetMasterIP()
1806
1807 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_uuids))
1808
1809 user_scripts = []
1810 if self.cfg.GetUseExternalMipScript():
1811 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
1812
1813 node_verify_param = {
1814 constants.NV_FILELIST:
1815 map(vcluster.MakeVirtualPath,
1816 utils.UniqueSequence(filename
1817 for files in filemap
1818 for filename in files)),
1819 constants.NV_NODELIST:
1820 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
1821 self.all_node_info.values()),
1822 constants.NV_HYPERVISOR: hypervisors,
1823 constants.NV_HVPARAMS:
1824 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
1825 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
1826 for node in node_data_list
1827 if not node.offline],
1828 constants.NV_INSTANCELIST: hypervisors,
1829 constants.NV_VERSION: None,
1830 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1831 constants.NV_NODESETUP: None,
1832 constants.NV_TIME: None,
1833 constants.NV_MASTERIP: (self.cfg.GetMasterNodeName(), master_ip),
1834 constants.NV_OSLIST: None,
1835 constants.NV_NONVMNODES: self.cfg.GetNonVmCapableNodeNameList(),
1836 constants.NV_USERSCRIPTS: user_scripts,
1837 constants.NV_CLIENT_CERT: None,
1838 }
1839
1840 if self.cfg.GetClusterInfo().modify_ssh_setup:
1841 node_verify_param[constants.NV_SSH_SETUP] = self._PrepareSshSetupCheck()
1842 if self.op.verify_clutter:
1843 node_verify_param[constants.NV_SSH_CLUTTER] = True
1844
1845 if vg_name is not None:
1846 node_verify_param[constants.NV_VGLIST] = None
1847 node_verify_param[constants.NV_LVLIST] = vg_name
1848 node_verify_param[constants.NV_PVLIST] = [vg_name]
1849
1850 if cluster.IsDiskTemplateEnabled(constants.DT_DRBD8):
1851 if drbd_helper:
1852 node_verify_param[constants.NV_DRBDVERSION] = None
1853 node_verify_param[constants.NV_DRBDLIST] = None
1854 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
1855
1856 if cluster.IsFileStorageEnabled() or \
1857 cluster.IsSharedFileStorageEnabled():
1858
1859 node_verify_param[constants.NV_ACCEPTED_STORAGE_PATHS] = \
1860 self.cfg.GetMasterNodeName()
1861 if cluster.IsFileStorageEnabled():
1862 node_verify_param[constants.NV_FILE_STORAGE_PATH] = \
1863 cluster.file_storage_dir
1864 if cluster.IsSharedFileStorageEnabled():
1865 node_verify_param[constants.NV_SHARED_FILE_STORAGE_PATH] = \
1866 cluster.shared_file_storage_dir
1867
1868
1869
1870 bridges = set()
1871 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
1872 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
1873 bridges.add(default_nicpp[constants.NIC_LINK])
1874 for inst_uuid in self.my_inst_info.values():
1875 for nic in inst_uuid.nics:
1876 full_nic = cluster.SimpleFillNIC(nic.nicparams)
1877 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
1878 bridges.add(full_nic[constants.NIC_LINK])
1879
1880 if bridges:
1881 node_verify_param[constants.NV_BRIDGES] = list(bridges)
1882
1883
1884 node_image = dict((node.uuid, self.NodeImage(offline=node.offline,
1885 uuid=node.uuid,
1886 vm_capable=node.vm_capable))
1887 for node in node_data_list)
1888
1889
1890 oob_paths = []
1891 for node in self.all_node_info.values():
1892 path = SupportsOob(self.cfg, node)
1893 if path and path not in oob_paths:
1894 oob_paths.append(path)
1895
1896 if oob_paths:
1897 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
1898
1899 for inst_uuid in self.my_inst_uuids:
1900 instance = self.my_inst_info[inst_uuid]
1901 if instance.admin_state == constants.ADMINST_OFFLINE:
1902 i_offline += 1
1903
1904 inst_nodes = self.cfg.GetInstanceNodes(instance.uuid)
1905 for nuuid in inst_nodes:
1906 if nuuid not in node_image:
1907 gnode = self.NodeImage(uuid=nuuid)
1908 gnode.ghost = (nuuid not in self.all_node_info)
1909 node_image[nuuid] = gnode
1910
1911 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
1912
1913 pnode = instance.primary_node
1914 node_image[pnode].pinst.append(instance.uuid)
1915
1916 for snode in self.cfg.GetInstanceSecondaryNodes(instance.uuid):
1917 nimg = node_image[snode]
1918 nimg.sinst.append(instance.uuid)
1919 if pnode not in nimg.sbp:
1920 nimg.sbp[pnode] = []
1921 nimg.sbp[pnode].append(instance.uuid)
1922
1923 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg,
1924 self.my_node_info.keys())
1925
1926
1927 self._exclusive_storage = compat.any(es_flags.values())
1928 if self._exclusive_storage:
1929 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
1930
1931 node_group_uuids = dict(map(lambda n: (n.name, n.group),
1932 self.cfg.GetAllNodesInfo().values()))
1933 groups_config = self.cfg.GetAllNodeGroupsInfoDict()
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948 with self.cfg.GetConfigManager(shared=True, forcelock=True):
1949 feedback_fn("* Gathering information about nodes (%s nodes)" %
1950 len(self.my_node_uuids))
1951
1952 self.cfg.FlushConfig()
1953
1954
1955
1956
1957 nvinfo_starttime = time.time()
1958
1959
1960
1961
1962
1963 cluster_name = self.cfg.GetClusterName()
1964 hvparams = self.cfg.GetClusterInfo().hvparams
1965 all_nvinfo = self.rpc.call_node_verify(self.my_node_uuids,
1966 node_verify_param,
1967 cluster_name,
1968 hvparams,
1969 node_group_uuids,
1970 groups_config)
1971 nvinfo_endtime = time.time()
1972
1973 if self.extra_lv_nodes and vg_name is not None:
1974 feedback_fn("* Gathering information about extra nodes (%s nodes)" %
1975 len(self.extra_lv_nodes))
1976 extra_lv_nvinfo = \
1977 self.rpc.call_node_verify(self.extra_lv_nodes,
1978 {constants.NV_LVLIST: vg_name},
1979 self.cfg.GetClusterName(),
1980 self.cfg.GetClusterInfo().hvparams,
1981 node_group_uuids,
1982 groups_config)
1983 else:
1984 extra_lv_nvinfo = {}
1985
1986
1987
1988 absent_node_uuids = set(self.all_node_info).difference(self.my_node_info)
1989 if absent_node_uuids:
1990 vf_nvinfo = all_nvinfo.copy()
1991 vf_node_info = list(self.my_node_info.values())
1992 additional_node_uuids = []
1993 if master_node_uuid not in self.my_node_info:
1994 additional_node_uuids.append(master_node_uuid)
1995 vf_node_info.append(self.all_node_info[master_node_uuid])
1996
1997
1998 for node_uuid in absent_node_uuids:
1999 nodeinfo = self.all_node_info[node_uuid]
2000 if (nodeinfo.vm_capable and not nodeinfo.offline and
2001 node_uuid != master_node_uuid):
2002 additional_node_uuids.append(node_uuid)
2003 vf_node_info.append(self.all_node_info[node_uuid])
2004 break
2005 key = constants.NV_FILELIST
2006
2007 feedback_fn("* Gathering information about the master node")
2008 vf_nvinfo.update(self.rpc.call_node_verify(
2009 additional_node_uuids, {key: node_verify_param[key]},
2010 self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams,
2011 node_group_uuids,
2012 groups_config))
2013 else:
2014 vf_nvinfo = all_nvinfo
2015 vf_node_info = self.my_node_info.values()
2016
2017 all_drbd_map = self.cfg.ComputeDRBDMap()
2018
2019 feedback_fn("* Gathering disk information (%s nodes)" %
2020 len(self.my_node_uuids))
2021 instdisk = self._CollectDiskInfo(self.my_node_info.keys(), node_image,
2022 self.my_inst_info)
2023
2024 feedback_fn("* Verifying configuration file consistency")
2025
2026 self._VerifyClientCertificates(self.my_node_info.values(), all_nvinfo)
2027 if self.cfg.GetClusterInfo().modify_ssh_setup:
2028 self._VerifySshSetup(self.my_node_info.values(), all_nvinfo)
2029 self._VerifyFiles(vf_node_info, master_node_uuid, vf_nvinfo, filemap)
2030
2031 feedback_fn("* Verifying node status")
2032
2033 refos_img = None
2034
2035 for node_i in node_data_list:
2036 nimg = node_image[node_i.uuid]
2037
2038 if node_i.offline:
2039 if verbose:
2040 feedback_fn("* Skipping offline node %s" % (node_i.name,))
2041 n_offline += 1
2042 continue
2043
2044 if node_i.uuid == master_node_uuid:
2045 ntype = "master"
2046 elif node_i.master_candidate:
2047 ntype = "master candidate"
2048 elif node_i.drained:
2049 ntype = "drained"
2050 n_drained += 1
2051 else:
2052 ntype = "regular"
2053 if verbose:
2054 feedback_fn("* Verifying node %s (%s)" % (node_i.name, ntype))
2055
2056 msg = all_nvinfo[node_i.uuid].fail_msg
2057 self._ErrorIf(msg, constants.CV_ENODERPC, node_i.name,
2058 "while contacting node: %s", msg)
2059 if msg:
2060 nimg.rpc_fail = True
2061 continue
2062
2063 nresult = all_nvinfo[node_i.uuid].payload
2064
2065 nimg.call_ok = self._VerifyNode(node_i, nresult)
2066 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2067 self._VerifyNodeNetwork(node_i, nresult)
2068 self._VerifyNodeUserScripts(node_i, nresult)
2069 self._VerifyOob(node_i, nresult)
2070 self._VerifyAcceptedFileStoragePaths(node_i, nresult,
2071 node_i.uuid == master_node_uuid)
2072 self._VerifyFileStoragePaths(node_i, nresult)
2073 self._VerifySharedFileStoragePaths(node_i, nresult)
2074 self._VerifyGlusterStoragePaths(node_i, nresult)
2075
2076 if nimg.vm_capable:
2077 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
2078 if constants.DT_DRBD8 in cluster.enabled_disk_templates:
2079 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info,
2080 self.all_disks_info, drbd_helper, all_drbd_map)
2081
2082 if (constants.DT_PLAIN in cluster.enabled_disk_templates) or \
2083 (constants.DT_DRBD8 in cluster.enabled_disk_templates):
2084 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2085 self._UpdateNodeInstances(node_i, nresult, nimg)
2086 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2087 self._UpdateNodeOS(node_i, nresult, nimg)
2088
2089 if not nimg.os_fail:
2090 if refos_img is None:
2091 refos_img = nimg
2092 self._VerifyNodeOS(node_i, nimg, refos_img)
2093 self._VerifyNodeBridges(node_i, nresult, bridges)
2094
2095
2096
2097
2098 non_primary_inst_uuids = set(nimg.instances).difference(nimg.pinst)
2099
2100 for inst_uuid in non_primary_inst_uuids:
2101 test = inst_uuid in self.all_inst_info
2102 self._ErrorIf(test, constants.CV_EINSTANCEWRONGNODE,
2103 self.cfg.GetInstanceName(inst_uuid),
2104 "instance should not run on node %s", node_i.name)
2105 self._ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2106 "node is running unknown instance %s", inst_uuid)
2107
2108 self._VerifyGroupDRBDVersion(all_nvinfo)
2109 self._VerifyGroupLVM(node_image, vg_name)
2110
2111 for node_uuid, result in extra_lv_nvinfo.items():
2112 self._UpdateNodeVolumes(self.all_node_info[node_uuid], result.payload,
2113 node_image[node_uuid], vg_name)
2114
2115 feedback_fn("* Verifying instance status")
2116 for inst_uuid in self.my_inst_uuids:
2117 instance = self.my_inst_info[inst_uuid]
2118 if verbose:
2119 feedback_fn("* Verifying instance %s" % instance.name)
2120 self._VerifyInstance(instance, node_image, instdisk[inst_uuid])
2121
2122
2123
2124 inst_disks = self.cfg.GetInstanceDisks(instance.uuid)
2125 if not utils.AllDiskOfType(inst_disks, constants.DTS_MIRRORED):
2126 i_non_redundant.append(instance)
2127
2128 if not cluster.FillBE(instance)[constants.BE_AUTO_BALANCE]:
2129 i_non_a_balanced.append(instance)
2130
2131 feedback_fn("* Verifying orphan volumes")
2132 reserved = utils.FieldSet(*cluster.reserved_lvs)
2133
2134
2135
2136
2137 for instance in self.all_inst_info.values():
2138 for secondary in self.cfg.GetInstanceSecondaryNodes(instance.uuid):
2139 if (secondary in self.my_node_info
2140 and instance.name not in self.my_inst_info):
2141 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
2142 break
2143
2144 self._VerifyOrphanVolumes(vg_name, node_vol_should, node_image, reserved)
2145
2146 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2147 feedback_fn("* Verifying N+1 Memory redundancy")
2148 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2149
2150 self._VerifyOtherNotes(feedback_fn, i_non_redundant, i_non_a_balanced,
2151 i_offline, n_offline, n_drained)
2152
2153 return not self.bad
2154
2155 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2156 """Analyze the post-hooks' result
2157
2158 This method analyses the hook result, handles it, and sends some
2159 nicely-formatted feedback back to the user.
2160
2161 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2162 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2163 @param hooks_results: the results of the multi-node hooks rpc call
2164 @param feedback_fn: function used send feedback back to the caller
2165 @param lu_result: previous Exec result
2166 @return: the new Exec result, based on the previous result
2167 and hook results
2168
2169 """
2170
2171
2172 if not self.my_node_uuids:
2173
2174 pass
2175 elif phase == constants.HOOKS_PHASE_POST:
2176
2177 feedback_fn("* Hooks Results")
2178 assert hooks_results, "invalid result from hooks"
2179
2180 for node_name in hooks_results:
2181 res = hooks_results[node_name]
2182 msg = res.fail_msg
2183 test = msg and not res.offline
2184 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
2185 "Communication failure in hooks execution: %s", msg)
2186 if test:
2187 lu_result = False
2188 continue
2189 if res.offline:
2190
2191 continue
2192 for script, hkr, output in res.payload:
2193 test = hkr == constants.HKR_FAIL
2194 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
2195 "Script %s failed, output:", script)
2196 if test:
2197 output = self._HOOKS_INDENT_RE.sub(" ", output)
2198 feedback_fn("%s" % output)
2199 lu_result = False
2200
2201 return lu_result
2202