1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 """Logical units for cluster verification."""
31
32 import itertools
33 import logging
34 import operator
35 import re
36 import time
37 import ganeti.masterd.instance
38 import ganeti.rpc.node as rpc
39
40 from ganeti import compat
41 from ganeti import constants
42 from ganeti import errors
43 from ganeti import locking
44 from ganeti import pathutils
45 from ganeti import utils
46 from ganeti import vcluster
47 from ganeti import hypervisor
48 from ganeti import opcodes
49
50 from ganeti.cmdlib.base import LogicalUnit, NoHooksLU, ResultWithJobs
51 from ganeti.cmdlib.common import ShareAll, ComputeAncillaryFiles, \
52 CheckNodePVs, ComputeIPolicyInstanceViolation, AnnotateDiskParams, \
53 SupportsOob
57 """Compute the set of all hypervisor parameters.
58
59 @type cluster: L{objects.Cluster}
60 @param cluster: the cluster object
61 @param instances: list of L{objects.Instance}
62 @param instances: additional instances from which to obtain parameters
63 @rtype: list of (origin, hypervisor, parameters)
64 @return: a list with all parameters found, indicating the hypervisor they
65 apply to, and the origin (can be "cluster", "os X", or "instance Y")
66
67 """
68 hvp_data = []
69
70 for hv_name in cluster.enabled_hypervisors:
71 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
72
73 for os_name, os_hvp in cluster.os_hvp.items():
74 for hv_name, hv_params in os_hvp.items():
75 if hv_params:
76 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
77 hvp_data.append(("os %s" % os_name, hv_name, full_params))
78
79
80 for instance in instances:
81 if instance.hvparams:
82 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
83 cluster.FillHV(instance)))
84
85 return hvp_data
86
89 """Mix-in for cluster/group verify LUs.
90
91 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
92 self.op and self._feedback_fn to be available.)
93
94 """
95
96 ETYPE_FIELD = "code"
97 ETYPE_ERROR = constants.CV_ERROR
98 ETYPE_WARNING = constants.CV_WARNING
99
100 - def _Error(self, ecode, item, msg, *args, **kwargs):
101 """Format an error message.
102
103 Based on the opcode's error_codes parameter, either format a
104 parseable error code, or a simpler error string.
105
106 This must be called only from Exec and functions called from Exec.
107
108 """
109 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
110 itype, etxt, _ = ecode
111
112
113 if etxt in self.op.ignore_errors:
114 ltype = self.ETYPE_WARNING
115
116 if args:
117 msg = msg % args
118
119 if self.op.error_codes:
120 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
121 else:
122 if item:
123 item = " " + item
124 else:
125 item = ""
126 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
127
128 self._feedback_fn(" - %s" % msg)
129
130 if ltype == self.ETYPE_ERROR:
131 self.bad = True
132
133 - def _ErrorIf(self, cond, *args, **kwargs):
134 """Log an error message if the passed condition is True.
135
136 """
137 if (bool(cond)
138 or self.op.debug_simulate_errors):
139 self._Error(*args, **kwargs)
140
143 """Submits all jobs necessary to verify the cluster.
144
145 """
146 REQ_BGL = False
147
149 self.needed_locks = {}
150
151 - def Exec(self, feedback_fn):
152 jobs = []
153
154 if self.op.group_name:
155 groups = [self.op.group_name]
156 depends_fn = lambda: None
157 else:
158 groups = self.cfg.GetNodeGroupList()
159
160
161 jobs.append([
162 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
163 ])
164
165
166 depends_fn = lambda: [(-len(jobs), [])]
167
168 jobs.extend(
169 [opcodes.OpClusterVerifyGroup(group_name=group,
170 ignore_errors=self.op.ignore_errors,
171 depends=depends_fn(),
172 verify_clutter=self.op.verify_clutter)]
173 for group in groups)
174
175
176 for op in itertools.chain(*jobs):
177 op.debug_simulate_errors = self.op.debug_simulate_errors
178 op.verbose = self.op.verbose
179 op.error_codes = self.op.error_codes
180 try:
181 op.skip_checks = self.op.skip_checks
182 except AttributeError:
183 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
184
185 return ResultWithJobs(jobs)
186
189 """Verifies the cluster disks status.
190
191 """
192 REQ_BGL = False
193
195 self.share_locks = ShareAll()
196 if self.op.group_name:
197 self.needed_locks = {
198 locking.LEVEL_NODEGROUP: [self.cfg.LookupNodeGroup(self.op.group_name)]
199 }
200 else:
201 self.needed_locks = {
202 locking.LEVEL_NODEGROUP: locking.ALL_SET,
203 }
204
205 - def Exec(self, feedback_fn):
211
214 """Verifies the cluster config.
215
216 """
217 REQ_BGL = False
218
232
236
245
246 - def Exec(self, feedback_fn):
247 """Verify integrity of cluster, performing various test on nodes.
248
249 """
250 self.bad = False
251 self._feedback_fn = feedback_fn
252
253 feedback_fn("* Verifying cluster config")
254
255 for msg in self.cfg.VerifyConfig():
256 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
257
258 feedback_fn("* Verifying cluster certificate files")
259
260 for cert_filename in pathutils.ALL_CERT_FILES:
261 (errcode, msg) = utils.VerifyCertificate(cert_filename)
262 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
263
264 self._ErrorIf(not utils.CanRead(constants.LUXID_USER,
265 pathutils.NODED_CERT_FILE),
266 constants.CV_ECLUSTERCERT,
267 None,
268 pathutils.NODED_CERT_FILE + " must be accessible by the " +
269 constants.LUXID_USER + " user")
270
271 feedback_fn("* Verifying hypervisor parameters")
272
273 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
274 self.all_inst_info.values()))
275
276 feedback_fn("* Verifying all nodes belong to an existing group")
277
278
279
280
281
282 dangling_nodes = set(node for node in self.all_node_info.values()
283 if node.group not in self.all_group_info)
284
285 dangling_instances = {}
286 no_node_instances = []
287
288 for inst in self.all_inst_info.values():
289 if inst.primary_node in [node.uuid for node in dangling_nodes]:
290 dangling_instances.setdefault(inst.primary_node, []).append(inst)
291 elif inst.primary_node not in self.all_node_info:
292 no_node_instances.append(inst)
293
294 pretty_dangling = [
295 "%s (%s)" %
296 (node.name,
297 utils.CommaJoin(inst.name for
298 inst in dangling_instances.get(node.uuid, [])))
299 for node in dangling_nodes]
300
301 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
302 None,
303 "the following nodes (and their instances) belong to a non"
304 " existing group: %s", utils.CommaJoin(pretty_dangling))
305
306 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
307 None,
308 "the following instances have a non-existing primary-node:"
309 " %s", utils.CommaJoin(inst.name for
310 inst in no_node_instances))
311
312 return not self.bad
313
316 """Verifies the status of a node group.
317
318 """
319 HPATH = "cluster-verify"
320 HTYPE = constants.HTYPE_CLUSTER
321 REQ_BGL = False
322
323 _HOOKS_INDENT_RE = re.compile("^", re.M)
324
326 """A class representing the logical and physical status of a node.
327
328 @type uuid: string
329 @ivar uuid: the node UUID to which this object refers
330 @ivar volumes: a structure as returned from
331 L{ganeti.backend.GetVolumeList} (runtime)
332 @ivar instances: a list of running instances (runtime)
333 @ivar pinst: list of configured primary instances (config)
334 @ivar sinst: list of configured secondary instances (config)
335 @ivar sbp: dictionary of {primary-node: list of instances} for all
336 instances for which this node is secondary (config)
337 @ivar mfree: free memory, as reported by hypervisor (runtime)
338 @ivar mtotal: total memory, as reported by hypervisor (runtime)
339 @ivar mdom0: domain0 memory, as reported by hypervisor (runtime)
340 @ivar dfree: free disk, as reported by the node (runtime)
341 @ivar offline: the offline status (config)
342 @type rpc_fail: boolean
343 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
344 not whether the individual keys were correct) (runtime)
345 @type lvm_fail: boolean
346 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
347 @type hyp_fail: boolean
348 @ivar hyp_fail: whether the RPC call didn't return the instance list
349 @type ghost: boolean
350 @ivar ghost: whether this is a known node or not (config)
351 @type os_fail: boolean
352 @ivar os_fail: whether the RPC call didn't return valid OS data
353 @type oslist: list
354 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
355 @type vm_capable: boolean
356 @ivar vm_capable: whether the node can host instances
357 @type pv_min: float
358 @ivar pv_min: size in MiB of the smallest PVs
359 @type pv_max: float
360 @ivar pv_max: size in MiB of the biggest PVs
361
362 """
363 - def __init__(self, offline=False, uuid=None, vm_capable=True):
364 self.uuid = uuid
365 self.volumes = {}
366 self.instances = []
367 self.pinst = []
368 self.sinst = []
369 self.sbp = {}
370 self.mfree = 0
371 self.mtotal = 0
372 self.mdom0 = 0
373 self.dfree = 0
374 self.offline = offline
375 self.vm_capable = vm_capable
376 self.rpc_fail = False
377 self.lvm_fail = False
378 self.hyp_fail = False
379 self.ghost = False
380 self.os_fail = False
381 self.oslist = {}
382 self.pv_min = None
383 self.pv_max = None
384
400
419
421 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
422 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
423
424 group_node_uuids = set(self.group_info.members)
425 group_inst_uuids = \
426 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
427
428 unlocked_node_uuids = \
429 group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE))
430
431 unlocked_inst_uuids = \
432 group_inst_uuids.difference(
433 [self.cfg.GetInstanceInfoByName(name).uuid
434 for name in self.owned_locks(locking.LEVEL_INSTANCE)])
435
436 if unlocked_node_uuids:
437 raise errors.OpPrereqError(
438 "Missing lock for nodes: %s" %
439 utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)),
440 errors.ECODE_STATE)
441
442 if unlocked_inst_uuids:
443 raise errors.OpPrereqError(
444 "Missing lock for instances: %s" %
445 utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)),
446 errors.ECODE_STATE)
447
448 self.all_node_info = self.cfg.GetAllNodesInfo()
449 self.all_inst_info = self.cfg.GetAllInstancesInfo()
450 self.all_disks_info = self.cfg.GetAllDisksInfo()
451
452 self.my_node_uuids = group_node_uuids
453 self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid])
454 for node_uuid in group_node_uuids)
455
456 self.my_inst_uuids = group_inst_uuids
457 self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid])
458 for inst_uuid in group_inst_uuids)
459
460
461
462 extra_lv_nodes = set()
463
464 for inst in self.my_inst_info.values():
465 disks = self.cfg.GetInstanceDisks(inst.uuid)
466 if utils.AnyDiskOfType(disks, constants.DTS_INT_MIRROR):
467 inst_nodes = self.cfg.GetInstanceNodes(inst.uuid)
468 for nuuid in inst_nodes:
469 if self.all_node_info[nuuid].group != self.group_uuid:
470 extra_lv_nodes.add(nuuid)
471
472 unlocked_lv_nodes = \
473 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
474
475 if unlocked_lv_nodes:
476 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
477 utils.CommaJoin(unlocked_lv_nodes),
478 errors.ECODE_STATE)
479 self.extra_lv_nodes = list(extra_lv_nodes)
480
482 """Perform some basic validation on data returned from a node.
483
484 - check the result data structure is well formed and has all the
485 mandatory fields
486 - check ganeti version
487
488 @type ninfo: L{objects.Node}
489 @param ninfo: the node to check
490 @param nresult: the results from the node
491 @rtype: boolean
492 @return: whether overall this call was successful (and we can expect
493 reasonable values in the respose)
494
495 """
496
497 test = not nresult or not isinstance(nresult, dict)
498 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
499 "unable to verify node: no data returned")
500 if test:
501 return False
502
503
504 local_version = constants.PROTOCOL_VERSION
505 remote_version = nresult.get("version", None)
506 test = not (remote_version and
507 isinstance(remote_version, (list, tuple)) and
508 len(remote_version) == 2)
509 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
510 "connection to node returned invalid data")
511 if test:
512 return False
513
514 test = local_version != remote_version[0]
515 self._ErrorIf(test, constants.CV_ENODEVERSION, ninfo.name,
516 "incompatible protocol versions: master %s,"
517 " node %s", local_version, remote_version[0])
518 if test:
519 return False
520
521
522
523
524 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
525 constants.CV_ENODEVERSION, ninfo.name,
526 "software version mismatch: master %s, node %s",
527 constants.RELEASE_VERSION, remote_version[1],
528 code=self.ETYPE_WARNING)
529
530 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
531 if ninfo.vm_capable and isinstance(hyp_result, dict):
532 for hv_name, hv_result in hyp_result.iteritems():
533 test = hv_result is not None
534 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
535 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
536
537 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
538 if ninfo.vm_capable and isinstance(hvp_result, list):
539 for item, hv_name, hv_result in hvp_result:
540 self._ErrorIf(True, constants.CV_ENODEHV, ninfo.name,
541 "hypervisor %s parameter verify failure (source %s): %s",
542 hv_name, item, hv_result)
543
544 test = nresult.get(constants.NV_NODESETUP,
545 ["Missing NODESETUP results"])
546 self._ErrorIf(test, constants.CV_ENODESETUP, ninfo.name,
547 "node setup error: %s", "; ".join(test))
548
549 return True
550
551 - def _VerifyNodeTime(self, ninfo, nresult,
552 nvinfo_starttime, nvinfo_endtime):
553 """Check the node time.
554
555 @type ninfo: L{objects.Node}
556 @param ninfo: the node to check
557 @param nresult: the remote results for the node
558 @param nvinfo_starttime: the start time of the RPC call
559 @param nvinfo_endtime: the end time of the RPC call
560
561 """
562 ntime = nresult.get(constants.NV_TIME, None)
563 try:
564 ntime_merged = utils.MergeTime(ntime)
565 except (ValueError, TypeError):
566 self._ErrorIf(True, constants.CV_ENODETIME, ninfo.name,
567 "Node returned invalid time")
568 return
569
570 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
571 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
572 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
573 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
574 else:
575 ntime_diff = None
576
577 self._ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, ninfo.name,
578 "Node time diverges by at least %s from master node time",
579 ntime_diff)
580
582 """Check the node LVM results and update info for cross-node checks.
583
584 @type ninfo: L{objects.Node}
585 @param ninfo: the node to check
586 @param nresult: the remote results for the node
587 @param vg_name: the configured VG name
588 @type nimg: L{NodeImage}
589 @param nimg: node image
590
591 """
592 if vg_name is None:
593 return
594
595
596 vglist = nresult.get(constants.NV_VGLIST, None)
597 test = not vglist
598 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
599 "unable to check volume groups")
600 if not test:
601 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
602 constants.MIN_VG_SIZE)
603 self._ErrorIf(vgstatus, constants.CV_ENODELVM, ninfo.name, vgstatus)
604
605
606 (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage)
607 for em in errmsgs:
608 self._Error(constants.CV_ENODELVM, ninfo.name, em)
609 if pvminmax is not None:
610 (nimg.pv_min, nimg.pv_max) = pvminmax
611
613 """Check cross-node DRBD version consistency.
614
615 @type node_verify_infos: dict
616 @param node_verify_infos: infos about nodes as returned from the
617 node_verify call.
618
619 """
620 node_versions = {}
621 for node_uuid, ndata in node_verify_infos.items():
622 nresult = ndata.payload
623 if nresult:
624 version = nresult.get(constants.NV_DRBDVERSION, None)
625 if version:
626 node_versions[node_uuid] = version
627
628 if len(set(node_versions.values())) > 1:
629 for node_uuid, version in sorted(node_versions.items()):
630 msg = "DRBD version mismatch: %s" % version
631 self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg,
632 code=self.ETYPE_WARNING)
633
635 """Check cross-node consistency in LVM.
636
637 @type node_image: dict
638 @param node_image: info about nodes, mapping from node to names to
639 L{NodeImage} objects
640 @param vg_name: the configured VG name
641
642 """
643 if vg_name is None:
644 return
645
646
647 if not self._exclusive_storage:
648 return
649
650
651
652
653 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
654 if not vals:
655 return
656 (pvmin, minnode_uuid) = min((ni.pv_min, ni.uuid) for ni in vals)
657 (pvmax, maxnode_uuid) = max((ni.pv_max, ni.uuid) for ni in vals)
658 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
659 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
660 "PV sizes differ too much in the group; smallest (%s MB) is"
661 " on %s, biggest (%s MB) is on %s",
662 pvmin, self.cfg.GetNodeName(minnode_uuid),
663 pvmax, self.cfg.GetNodeName(maxnode_uuid))
664
666 """Check the node bridges.
667
668 @type ninfo: L{objects.Node}
669 @param ninfo: the node to check
670 @param nresult: the remote results for the node
671 @param bridges: the expected list of bridges
672
673 """
674 if not bridges:
675 return
676
677 missing = nresult.get(constants.NV_BRIDGES, None)
678 test = not isinstance(missing, list)
679 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
680 "did not return valid bridge information")
681 if not test:
682 self._ErrorIf(bool(missing), constants.CV_ENODENET, ninfo.name,
683 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
684
702
704 """Check the node network connectivity results.
705
706 @type ninfo: L{objects.Node}
707 @param ninfo: the node to check
708 @param nresult: the remote results for the node
709
710 """
711 test = constants.NV_NODELIST not in nresult
712 self._ErrorIf(test, constants.CV_ENODESSH, ninfo.name,
713 "node hasn't returned node ssh connectivity data")
714 if not test:
715 if nresult[constants.NV_NODELIST]:
716 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
717 self._ErrorIf(True, constants.CV_ENODESSH, ninfo.name,
718 "ssh communication with node '%s': %s", a_node, a_msg)
719
720 test = constants.NV_NODENETTEST not in nresult
721 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
722 "node hasn't returned node tcp connectivity data")
723 if not test:
724 if nresult[constants.NV_NODENETTEST]:
725 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
726 for anode in nlist:
727 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name,
728 "tcp communication with node '%s': %s",
729 anode, nresult[constants.NV_NODENETTEST][anode])
730
731 test = constants.NV_MASTERIP not in nresult
732 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
733 "node hasn't returned node master IP reachability data")
734 if not test:
735 if not nresult[constants.NV_MASTERIP]:
736 if ninfo.uuid == self.master_node:
737 msg = "the master node cannot reach the master IP (not configured?)"
738 else:
739 msg = "cannot reach the master IP"
740 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg)
741
743 """Verify an instance.
744
745 This function checks to see if the required block devices are
746 available on the instance's node, and that the nodes are in the correct
747 state.
748
749 """
750 pnode_uuid = instance.primary_node
751 pnode_img = node_image[pnode_uuid]
752 groupinfo = self.cfg.GetAllNodeGroupsInfo()
753
754 node_vol_should = {}
755 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
756
757 cluster = self.cfg.GetClusterInfo()
758 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
759 self.group_info)
760 err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg)
761 self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name,
762 utils.CommaJoin(err), code=self.ETYPE_WARNING)
763
764 for node_uuid in node_vol_should:
765 n_img = node_image[node_uuid]
766 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
767
768 continue
769 for volume in node_vol_should[node_uuid]:
770 test = volume not in n_img.volumes
771 self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name,
772 "volume %s missing on node %s", volume,
773 self.cfg.GetNodeName(node_uuid))
774
775 if instance.admin_state == constants.ADMINST_UP:
776 test = instance.uuid not in pnode_img.instances and not pnode_img.offline
777 self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name,
778 "instance not running on its primary node %s",
779 self.cfg.GetNodeName(pnode_uuid))
780 self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE,
781 instance.name, "instance is marked as running and lives on"
782 " offline node %s", self.cfg.GetNodeName(pnode_uuid))
783
784 diskdata = [(nname, success, status, idx)
785 for (nname, disks) in diskstatus.items()
786 for idx, (success, status) in enumerate(disks)]
787
788 for nname, success, bdev_status, idx in diskdata:
789
790
791 snode = node_image[nname]
792 bad_snode = snode.ghost or snode.offline
793 self._ErrorIf(instance.disks_active and
794 not success and not bad_snode,
795 constants.CV_EINSTANCEFAULTYDISK, instance.name,
796 "couldn't retrieve status for disk/%s on %s: %s",
797 idx, self.cfg.GetNodeName(nname), bdev_status)
798
799 if instance.disks_active and success and bdev_status.is_degraded:
800 msg = "disk/%s on %s is degraded" % (idx, self.cfg.GetNodeName(nname))
801
802 code = self.ETYPE_ERROR
803 accepted_lds = [constants.LDS_OKAY, constants.LDS_SYNC]
804
805 if bdev_status.ldisk_status in accepted_lds:
806 code = self.ETYPE_WARNING
807
808 msg += "; local disk state is '%s'" % \
809 constants.LDS_NAMES[bdev_status.ldisk_status]
810
811 self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg,
812 code=code)
813
814 self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
815 constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid),
816 "instance %s, connection to primary node failed",
817 instance.name)
818
819 secondary_nodes = self.cfg.GetInstanceSecondaryNodes(instance.uuid)
820 self._ErrorIf(len(secondary_nodes) > 1,
821 constants.CV_EINSTANCELAYOUT, instance.name,
822 "instance has multiple secondary nodes: %s",
823 utils.CommaJoin(secondary_nodes),
824 code=self.ETYPE_WARNING)
825
826 inst_nodes = self.cfg.GetInstanceNodes(instance.uuid)
827 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, inst_nodes)
828 disks = self.cfg.GetInstanceDisks(instance.uuid)
829 if any(es_flags.values()):
830 if not utils.AllDiskOfType(disks, constants.DTS_EXCL_STORAGE):
831
832
833 es_nodes = [n
834 for (n, es) in es_flags.items()
835 if es]
836 unsupported = [d.dev_type for d in disks
837 if d.dev_type not in constants.DTS_EXCL_STORAGE]
838 self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name,
839 "instance uses disk types %s, which are not supported on"
840 " nodes that have exclusive storage set: %s",
841 utils.CommaJoin(unsupported),
842 utils.CommaJoin(self.cfg.GetNodeNames(es_nodes)))
843 for (idx, disk) in enumerate(disks):
844 self._ErrorIf(disk.spindles is None,
845 constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name,
846 "number of spindles not configured for disk %s while"
847 " exclusive storage is enabled, try running"
848 " gnt-cluster repair-disk-sizes", idx)
849
850 if utils.AnyDiskOfType(disks, constants.DTS_INT_MIRROR):
851 instance_nodes = utils.NiceSort(inst_nodes)
852 instance_groups = {}
853
854 for node_uuid in instance_nodes:
855 instance_groups.setdefault(self.all_node_info[node_uuid].group,
856 []).append(node_uuid)
857
858 pretty_list = [
859 "%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)),
860 groupinfo[group].name)
861
862 for group, nodes in sorted(instance_groups.items(),
863 key=lambda (_, nodes): pnode_uuid in nodes,
864 reverse=True)]
865
866 self._ErrorIf(len(instance_groups) > 1,
867 constants.CV_EINSTANCESPLITGROUPS,
868 instance.name, "instance has primary and secondary nodes in"
869 " different groups: %s", utils.CommaJoin(pretty_list),
870 code=self.ETYPE_WARNING)
871
872 inst_nodes_offline = []
873 for snode in secondary_nodes:
874 s_img = node_image[snode]
875 self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
876 self.cfg.GetNodeName(snode),
877 "instance %s, connection to secondary node failed",
878 instance.name)
879
880 if s_img.offline:
881 inst_nodes_offline.append(snode)
882
883
884 self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE,
885 instance.name, "instance has offline secondary node(s) %s",
886 utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline)))
887
888 for node_uuid in inst_nodes:
889 self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE,
890 instance.name, "instance lives on ghost node %s",
891 self.cfg.GetNodeName(node_uuid))
892 self._ErrorIf(not node_image[node_uuid].vm_capable,
893 constants.CV_EINSTANCEBADNODE, instance.name,
894 "instance lives on non-vm_capable node %s",
895 self.cfg.GetNodeName(node_uuid))
896
899 """Verify if there are any unknown volumes in the cluster.
900
901 The .os, .swap and backup volumes are ignored. All other volumes are
902 reported as unknown.
903
904 @type vg_name: string
905 @param vg_name: the name of the Ganeti-administered volume group
906 @type reserved: L{ganeti.utils.FieldSet}
907 @param reserved: a FieldSet of reserved volume names
908
909 """
910 for node_uuid, n_img in node_image.items():
911 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
912 self.all_node_info[node_uuid].group != self.group_uuid):
913
914 continue
915 for volume in n_img.volumes:
916
917 if volume.split('/')[0] != vg_name:
918 continue
919
920 test = ((node_uuid not in node_vol_should or
921 volume not in node_vol_should[node_uuid]) and
922 not reserved.Matches(volume))
923 self._ErrorIf(test, constants.CV_ENODEORPHANLV,
924 self.cfg.GetNodeName(node_uuid),
925 "volume %s is unknown", volume,
926 code=_VerifyErrors.ETYPE_WARNING)
927
929 """Verify N+1 Memory Resilience.
930
931 Check that if one single node dies we can still start all the
932 instances it was primary for.
933
934 """
935 cluster_info = self.cfg.GetClusterInfo()
936 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster_info,
937 self.group_info)
938 memory_ratio = ipolicy[constants.IPOLICY_MEMORY_RATIO]
939
940 for node_uuid, n_img in node_image.items():
941
942
943
944
945
946
947
948
949 node_cfg = self.all_node_info[node_uuid]
950 if n_img.offline or \
951 node_cfg.group != self.group_uuid:
952
953
954
955
956 continue
957
958 for prinode, inst_uuids in n_img.sbp.items():
959 needed_mem = 0
960 for inst_uuid in inst_uuids:
961 bep = cluster_info.FillBE(all_insts[inst_uuid])
962 if bep[constants.BE_AUTO_BALANCE]:
963 needed_mem += bep[constants.BE_MINMEM]
964 mnode = n_img.mdom0
965 (hv, hv_state) = self.cfg.GetFilledHvStateParams(node_cfg).items()[0]
966 if hv != constants.HT_XEN_PVM and hv != constants.HT_XEN_HVM:
967 mnode = hv_state["mem_node"]
968
969 mem_treshold = (n_img.mtotal - mnode) * (memory_ratio - 1)
970 test = n_img.mfree - needed_mem < mem_treshold
971 self._ErrorIf(test, constants.CV_ENODEN1,
972 self.cfg.GetNodeName(node_uuid),
973 "not enough memory to accomodate instance failovers"
974 " should node %s fail (%dMiB needed, %dMiB available)",
975 self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)
976
978 """Verifies the consistency of the client certificates.
979
980 This includes several aspects:
981 - the individual validation of all nodes' certificates
982 - the consistency of the master candidate certificate map
983 - the consistency of the master candidate certificate map with the
984 certificates that the master candidates are actually using.
985
986 @param nodes: the list of nodes to consider in this verification
987 @param all_nvinfo: the map of results of the verify_node call to
988 all nodes
989
990 """
991 candidate_certs = self.cfg.GetClusterInfo().candidate_certs
992 if candidate_certs is None or len(candidate_certs) == 0:
993 self._ErrorIf(
994 True, constants.CV_ECLUSTERCLIENTCERT, None,
995 "The cluster's list of master candidate certificates is empty."
996 " If you just updated the cluster, please run"
997 " 'gnt-cluster renew-crypto --new-node-certificates'.")
998 return
999
1000 self._ErrorIf(
1001 len(candidate_certs) != len(set(candidate_certs.values())),
1002 constants.CV_ECLUSTERCLIENTCERT, None,
1003 "There are at least two master candidates configured to use the same"
1004 " certificate.")
1005
1006
1007 for node in nodes:
1008 if node.offline:
1009 continue
1010
1011 nresult = all_nvinfo[node.uuid]
1012 if nresult.fail_msg or not nresult.payload:
1013 continue
1014
1015 (errcode, msg) = nresult.payload.get(constants.NV_CLIENT_CERT, None)
1016
1017 self._ErrorIf(
1018 errcode is not None, constants.CV_ECLUSTERCLIENTCERT, None,
1019 "Client certificate of node '%s' failed validation: %s (code '%s')",
1020 node.uuid, msg, errcode)
1021
1022 if not errcode:
1023 digest = msg
1024 if node.master_candidate:
1025 if node.uuid in candidate_certs:
1026 self._ErrorIf(
1027 digest != candidate_certs[node.uuid],
1028 constants.CV_ECLUSTERCLIENTCERT, None,
1029 "Client certificate digest of master candidate '%s' does not"
1030 " match its entry in the cluster's map of master candidate"
1031 " certificates. Expected: %s Got: %s", node.uuid,
1032 digest, candidate_certs[node.uuid])
1033 else:
1034 self._ErrorIf(
1035 True, constants.CV_ECLUSTERCLIENTCERT, None,
1036 "The master candidate '%s' does not have an entry in the"
1037 " map of candidate certificates.", node.uuid)
1038 self._ErrorIf(
1039 digest in candidate_certs.values(),
1040 constants.CV_ECLUSTERCLIENTCERT, None,
1041 "Master candidate '%s' is using a certificate of another node.",
1042 node.uuid)
1043 else:
1044 self._ErrorIf(
1045 node.uuid in candidate_certs,
1046 constants.CV_ECLUSTERCLIENTCERT, None,
1047 "Node '%s' is not a master candidate, but still listed in the"
1048 " map of master candidate certificates.", node.uuid)
1049 self._ErrorIf(
1050 (node.uuid not in candidate_certs) and
1051 (digest in candidate_certs.values()),
1052 constants.CV_ECLUSTERCLIENTCERT, None,
1053 "Node '%s' is not a master candidate and is incorrectly using a"
1054 " certificate of another node which is master candidate.",
1055 node.uuid)
1056
1058 """Evaluates the verification results of the SSH setup and clutter test.
1059
1060 @param nodes: List of L{objects.Node} objects
1061 @param all_nvinfo: RPC results
1062
1063 """
1064 for node in nodes:
1065 if not node.offline:
1066 nresult = all_nvinfo[node.uuid]
1067 if nresult.fail_msg or not nresult.payload:
1068 self._ErrorIf(True, constants.CV_ENODESSH, node.name,
1069 "Could not verify the SSH setup of this node.")
1070 return
1071 for ssh_test in [constants.NV_SSH_SETUP, constants.NV_SSH_CLUTTER]:
1072 result = nresult.payload.get(ssh_test, None)
1073 error_msg = ""
1074 if isinstance(result, list):
1075 error_msg = " ".join(result)
1076 self._ErrorIf(result,
1077 constants.CV_ENODESSH, None, error_msg)
1078
1079 - def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo,
1080 (files_all, files_opt, files_mc, files_vm)):
1081 """Verifies file checksums collected from all nodes.
1082
1083 @param nodes: List of L{objects.Node} objects
1084 @param master_node_uuid: UUID of master node
1085 @param all_nvinfo: RPC results
1086
1087 """
1088
1089 files2nodefn = [
1090 (files_all, None),
1091 (files_mc, lambda node: (node.master_candidate or
1092 node.uuid == master_node_uuid)),
1093 (files_vm, lambda node: node.vm_capable),
1094 ]
1095
1096
1097 nodefiles = {}
1098 for (files, fn) in files2nodefn:
1099 if fn is None:
1100 filenodes = nodes
1101 else:
1102 filenodes = filter(fn, nodes)
1103 nodefiles.update((filename,
1104 frozenset(map(operator.attrgetter("uuid"), filenodes)))
1105 for filename in files)
1106
1107 assert set(nodefiles) == (files_all | files_mc | files_vm)
1108
1109 fileinfo = dict((filename, {}) for filename in nodefiles)
1110 ignore_nodes = set()
1111
1112 for node in nodes:
1113 if node.offline:
1114 ignore_nodes.add(node.uuid)
1115 continue
1116
1117 nresult = all_nvinfo[node.uuid]
1118
1119 if nresult.fail_msg or not nresult.payload:
1120 node_files = None
1121 else:
1122 fingerprints = nresult.payload.get(constants.NV_FILELIST, {})
1123 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
1124 for (key, value) in fingerprints.items())
1125 del fingerprints
1126
1127 test = not (node_files and isinstance(node_files, dict))
1128 self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name,
1129 "Node did not return file checksum data")
1130 if test:
1131 ignore_nodes.add(node.uuid)
1132 continue
1133
1134
1135 for (filename, checksum) in node_files.items():
1136 assert filename in nodefiles
1137 fileinfo[filename].setdefault(checksum, set()).add(node.uuid)
1138
1139 for (filename, checksums) in fileinfo.items():
1140 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1141
1142
1143 with_file = frozenset(node_uuid
1144 for node_uuids in fileinfo[filename].values()
1145 for node_uuid in node_uuids) - ignore_nodes
1146
1147 expected_nodes = nodefiles[filename] - ignore_nodes
1148
1149
1150 missing_file = expected_nodes - with_file
1151
1152 if filename in files_opt:
1153
1154 self._ErrorIf(missing_file and missing_file != expected_nodes,
1155 constants.CV_ECLUSTERFILECHECK, None,
1156 "File %s is optional, but it must exist on all or no"
1157 " nodes (not found on %s)",
1158 filename,
1159 utils.CommaJoin(
1160 utils.NiceSort(
1161 map(self.cfg.GetNodeName, missing_file))))
1162 else:
1163 self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None,
1164 "File %s is missing from node(s) %s", filename,
1165 utils.CommaJoin(
1166 utils.NiceSort(
1167 map(self.cfg.GetNodeName, missing_file))))
1168
1169
1170 unexpected = with_file - expected_nodes
1171 self._ErrorIf(unexpected,
1172 constants.CV_ECLUSTERFILECHECK, None,
1173 "File %s should not exist on node(s) %s",
1174 filename, utils.CommaJoin(
1175 utils.NiceSort(map(self.cfg.GetNodeName, unexpected))))
1176
1177
1178 test = len(checksums) > 1
1179 if test:
1180 variants = ["variant %s on %s" %
1181 (idx + 1,
1182 utils.CommaJoin(utils.NiceSort(
1183 map(self.cfg.GetNodeName, node_uuids))))
1184 for (idx, (checksum, node_uuids)) in
1185 enumerate(sorted(checksums.items()))]
1186 else:
1187 variants = []
1188
1189 self._ErrorIf(test, constants.CV_ECLUSTERFILECHECK, None,
1190 "File %s found with %s different checksums (%s)",
1191 filename, len(checksums), "; ".join(variants))
1192
1194 """Verify the drbd helper.
1195
1196 """
1197 if drbd_helper:
1198 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1199 test = (helper_result is None)
1200 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
1201 "no drbd usermode helper returned")
1202 if helper_result:
1203 status, payload = helper_result
1204 test = not status
1205 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
1206 "drbd usermode helper check unsuccessful: %s", payload)
1207 test = status and (payload != drbd_helper)
1208 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
1209 "wrong drbd usermode helper: %s", payload)
1210
1211 @staticmethod
1213 """Gives the DRBD information in a map for a node.
1214
1215 @type ninfo: L{objects.Node}
1216 @param ninfo: the node to check
1217 @param instanceinfo: the dict of instances
1218 @param disks_info: the dict of disks
1219 @param drbd_map: the DRBD map as returned by
1220 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1221 @type error_if: callable like L{_ErrorIf}
1222 @param error_if: The error reporting function
1223 @return: dict from minor number to (disk_uuid, instance_uuid, active)
1224
1225 """
1226 node_drbd = {}
1227 for minor, disk_uuid in drbd_map[ninfo.uuid].items():
1228 test = disk_uuid not in disks_info
1229 error_if(test, constants.CV_ECLUSTERCFG, None,
1230 "ghost disk '%s' in temporary DRBD map", disk_uuid)
1231
1232
1233
1234 if test:
1235 node_drbd[minor] = (disk_uuid, None, False)
1236 else:
1237 disk_active = False
1238 disk_instance = None
1239 for (inst_uuid, inst) in instanceinfo.items():
1240 if disk_uuid in inst.disks:
1241 disk_active = inst.disks_active
1242 disk_instance = inst_uuid
1243 break
1244 node_drbd[minor] = (disk_uuid, disk_instance, disk_active)
1245 return node_drbd
1246
1247 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, disks_info,
1248 drbd_helper, drbd_map):
1249 """Verifies and the node DRBD status.
1250
1251 @type ninfo: L{objects.Node}
1252 @param ninfo: the node to check
1253 @param nresult: the remote results for the node
1254 @param instanceinfo: the dict of instances
1255 @param disks_info: the dict of disks
1256 @param drbd_helper: the configured DRBD usermode helper
1257 @param drbd_map: the DRBD map as returned by
1258 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1259
1260 """
1261 self._VerifyNodeDrbdHelper(ninfo, nresult, drbd_helper)
1262
1263
1264 node_drbd = self._ComputeDrbdMinors(ninfo, instanceinfo, disks_info,
1265 drbd_map, self._ErrorIf)
1266
1267
1268 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1269 test = not isinstance(used_minors, (tuple, list))
1270 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
1271 "cannot parse drbd status file: %s", str(used_minors))
1272 if test:
1273
1274 return
1275
1276 for minor, (disk_uuid, inst_uuid, must_exist) in node_drbd.items():
1277 test = minor not in used_minors and must_exist
1278 if inst_uuid is not None:
1279 attached = "(attached in instance '%s')" % \
1280 self.cfg.GetInstanceName(inst_uuid)
1281 else:
1282 attached = "(detached)"
1283 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
1284 "drbd minor %d of disk %s %s is not active",
1285 minor, disk_uuid, attached)
1286 for minor in used_minors:
1287 test = minor not in node_drbd
1288 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
1289 "unallocated drbd minor %d is in use", minor)
1290
1292 """Builds the node OS structures.
1293
1294 @type ninfo: L{objects.Node}
1295 @param ninfo: the node to check
1296 @param nresult: the remote results for the node
1297 @param nimg: the node image object
1298
1299 """
1300 remote_os = nresult.get(constants.NV_OSLIST, None)
1301 test = (not isinstance(remote_os, list) or
1302 not compat.all(isinstance(v, list) and len(v) == 8
1303 for v in remote_os))
1304
1305 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
1306 "node hasn't returned valid OS data")
1307
1308 nimg.os_fail = test
1309
1310 if test:
1311 return
1312
1313 os_dict = {}
1314
1315 for (name, os_path, status, diagnose,
1316 variants, parameters, api_ver,
1317 trusted) in nresult[constants.NV_OSLIST]:
1318
1319 if name not in os_dict:
1320 os_dict[name] = []
1321
1322
1323
1324 parameters = [tuple(v) for v in parameters]
1325 os_dict[name].append((os_path, status, diagnose,
1326 set(variants), set(parameters), set(api_ver),
1327 trusted))
1328
1329 nimg.oslist = os_dict
1330
1332 """Verifies the node OS list.
1333
1334 @type ninfo: L{objects.Node}
1335 @param ninfo: the node to check
1336 @param nimg: the node image object
1337 @param base: the 'template' node we match against (e.g. from the master)
1338
1339 """
1340 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1341
1342 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1343 for os_name, os_data in nimg.oslist.items():
1344 assert os_data, "Empty OS status for OS %s?!" % os_name
1345 f_path, f_status, f_diag, f_var, f_param, f_api, f_trusted = os_data[0]
1346 self._ErrorIf(not f_status, constants.CV_ENODEOS, ninfo.name,
1347 "Invalid OS %s (located at %s): %s",
1348 os_name, f_path, f_diag)
1349 self._ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, ninfo.name,
1350 "OS '%s' has multiple entries"
1351 " (first one shadows the rest): %s",
1352 os_name, utils.CommaJoin([v[0] for v in os_data]))
1353
1354 test = os_name not in base.oslist
1355 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
1356 "Extra OS %s not present on reference node (%s)",
1357 os_name, self.cfg.GetNodeName(base.uuid))
1358 if test:
1359 continue
1360 assert base.oslist[os_name], "Base node has empty OS status?"
1361 _, b_status, _, b_var, b_param, b_api, b_trusted = base.oslist[os_name][0]
1362 if not b_status:
1363
1364 continue
1365 for kind, a, b in [("API version", f_api, b_api),
1366 ("variants list", f_var, b_var),
1367 ("parameters", beautify_params(f_param),
1368 beautify_params(b_param))]:
1369 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
1370 "OS %s for %s differs from reference node %s:"
1371 " [%s] vs. [%s]", kind, os_name,
1372 self.cfg.GetNodeName(base.uuid),
1373 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1374 for kind, a, b in [("trusted", f_trusted, b_trusted)]:
1375 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
1376 "OS %s for %s differs from reference node %s:"
1377 " %s vs. %s", kind, os_name,
1378 self.cfg.GetNodeName(base.uuid), a, b)
1379
1380
1381 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1382 self._ErrorIf(missing, constants.CV_ENODEOS, ninfo.name,
1383 "OSes present on reference node %s"
1384 " but missing on this node: %s",
1385 self.cfg.GetNodeName(base.uuid), utils.CommaJoin(missing))
1386
1388 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
1389
1390 @type ninfo: L{objects.Node}
1391 @param ninfo: the node to check
1392 @param nresult: the remote results for the node
1393 @type is_master: bool
1394 @param is_master: Whether node is the master node
1395
1396 """
1397 cluster = self.cfg.GetClusterInfo()
1398 if (is_master and
1399 (cluster.IsFileStorageEnabled() or
1400 cluster.IsSharedFileStorageEnabled())):
1401 try:
1402 fspaths = nresult[constants.NV_ACCEPTED_STORAGE_PATHS]
1403 except KeyError:
1404
1405 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
1406 "Node did not return forbidden file storage paths")
1407 else:
1408 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
1409 "Found forbidden file storage paths: %s",
1410 utils.CommaJoin(fspaths))
1411 else:
1412 self._ErrorIf(constants.NV_ACCEPTED_STORAGE_PATHS in nresult,
1413 constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
1414 "Node should not have returned forbidden file storage"
1415 " paths")
1416
1417 - def _VerifyStoragePaths(self, ninfo, nresult, file_disk_template,
1418 verify_key, error_key):
1419 """Verifies (file) storage paths.
1420
1421 @type ninfo: L{objects.Node}
1422 @param ninfo: the node to check
1423 @param nresult: the remote results for the node
1424 @type file_disk_template: string
1425 @param file_disk_template: file-based disk template, whose directory
1426 is supposed to be verified
1427 @type verify_key: string
1428 @param verify_key: key for the verification map of this file
1429 verification step
1430 @param error_key: error key to be added to the verification results
1431 in case something goes wrong in this verification step
1432
1433 """
1434 assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageTypes(
1435 constants.ST_FILE, constants.ST_SHARED_FILE, constants.ST_GLUSTER
1436 ))
1437
1438 cluster = self.cfg.GetClusterInfo()
1439 if cluster.IsDiskTemplateEnabled(file_disk_template):
1440 self._ErrorIf(
1441 verify_key in nresult,
1442 error_key, ninfo.name,
1443 "The configured %s storage path is unusable: %s" %
1444 (file_disk_template, nresult.get(verify_key)))
1445
1456
1467
1478
1480 """Verifies out of band functionality of a node.
1481
1482 @type ninfo: L{objects.Node}
1483 @param ninfo: the node to check
1484 @param nresult: the remote results for the node
1485
1486 """
1487
1488
1489 if ((ninfo.master_candidate or ninfo.master_capable) and
1490 constants.NV_OOB_PATHS in nresult):
1491 for path_result in nresult[constants.NV_OOB_PATHS]:
1492 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH,
1493 ninfo.name, path_result)
1494
1496 """Verifies and updates the node volume data.
1497
1498 This function will update a L{NodeImage}'s internal structures
1499 with data from the remote call.
1500
1501 @type ninfo: L{objects.Node}
1502 @param ninfo: the node to check
1503 @param nresult: the remote results for the node
1504 @param nimg: the node image object
1505 @param vg_name: the configured VG name
1506
1507 """
1508 nimg.lvm_fail = True
1509 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1510 if vg_name is None:
1511 pass
1512 elif isinstance(lvdata, basestring):
1513 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
1514 "LVM problem on node: %s", utils.SafeEncode(lvdata))
1515 elif not isinstance(lvdata, dict):
1516 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
1517 "rpc call to node failed (lvlist)")
1518 else:
1519 nimg.volumes = lvdata
1520 nimg.lvm_fail = False
1521
1523 """Verifies and updates the node instance list.
1524
1525 If the listing was successful, then updates this node's instance
1526 list. Otherwise, it marks the RPC call as failed for the instance
1527 list key.
1528
1529 @type ninfo: L{objects.Node}
1530 @param ninfo: the node to check
1531 @param nresult: the remote results for the node
1532 @param nimg: the node image object
1533
1534 """
1535 idata = nresult.get(constants.NV_INSTANCELIST, None)
1536 test = not isinstance(idata, list)
1537 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
1538 "rpc call to node failed (instancelist): %s",
1539 utils.SafeEncode(str(idata)))
1540 if test:
1541 nimg.hyp_fail = True
1542 else:
1543 nimg.instances = [uuid for (uuid, _) in
1544 self.cfg.GetMultiInstanceInfoByName(idata)]
1545
1547 """Verifies and computes a node information map
1548
1549 @type ninfo: L{objects.Node}
1550 @param ninfo: the node to check
1551 @param nresult: the remote results for the node
1552 @param nimg: the node image object
1553 @param vg_name: the configured VG name
1554
1555 """
1556
1557 hv_info = nresult.get(constants.NV_HVINFO, None)
1558 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info \
1559 or "memory_total" not in hv_info \
1560 or "memory_dom0" not in hv_info
1561 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
1562 "rpc call to node failed (hvinfo)")
1563 if not test:
1564 try:
1565 nimg.mfree = int(hv_info["memory_free"])
1566 nimg.mtotal = int(hv_info["memory_total"])
1567 nimg.mdom0 = int(hv_info["memory_dom0"])
1568 except (ValueError, TypeError):
1569 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
1570 "node returned invalid nodeinfo, check hypervisor")
1571
1572
1573 if vg_name is not None:
1574 test = (constants.NV_VGLIST not in nresult or
1575 vg_name not in nresult[constants.NV_VGLIST])
1576 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
1577 "node didn't return data for the volume group '%s'"
1578 " - it is either missing or broken", vg_name)
1579 if not test:
1580 try:
1581 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1582 except (ValueError, TypeError):
1583 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
1584 "node returned invalid LVM info, check LVM status")
1585
1587 """Gets per-disk status information for all instances.
1588
1589 @type node_uuids: list of strings
1590 @param node_uuids: Node UUIDs
1591 @type node_image: dict of (UUID, L{objects.Node})
1592 @param node_image: Node objects
1593 @type instanceinfo: dict of (UUID, L{objects.Instance})
1594 @param instanceinfo: Instance objects
1595 @rtype: {instance: {node: [(succes, payload)]}}
1596 @return: a dictionary of per-instance dictionaries with nodes as
1597 keys and disk information as values; the disk information is a
1598 list of tuples (success, payload)
1599
1600 """
1601 node_disks = {}
1602 node_disks_dev_inst_only = {}
1603 diskless_instances = set()
1604 nodisk_instances = set()
1605
1606 for nuuid in node_uuids:
1607 node_inst_uuids = list(itertools.chain(node_image[nuuid].pinst,
1608 node_image[nuuid].sinst))
1609 diskless_instances.update(uuid for uuid in node_inst_uuids
1610 if not instanceinfo[uuid].disks)
1611 disks = [(inst_uuid, disk)
1612 for inst_uuid in node_inst_uuids
1613 for disk in self.cfg.GetInstanceDisks(inst_uuid)]
1614
1615 if not disks:
1616 nodisk_instances.update(uuid for uuid in node_inst_uuids
1617 if instanceinfo[uuid].disks)
1618
1619 continue
1620
1621 node_disks[nuuid] = disks
1622
1623
1624 dev_inst_only = []
1625 for (inst_uuid, dev) in disks:
1626 (anno_disk,) = AnnotateDiskParams(instanceinfo[inst_uuid], [dev],
1627 self.cfg)
1628 dev_inst_only.append((anno_disk, instanceinfo[inst_uuid]))
1629
1630 node_disks_dev_inst_only[nuuid] = dev_inst_only
1631
1632 assert len(node_disks) == len(node_disks_dev_inst_only)
1633
1634
1635 result = self.rpc.call_blockdev_getmirrorstatus_multi(
1636 node_disks.keys(), node_disks_dev_inst_only)
1637
1638 assert len(result) == len(node_disks)
1639
1640 instdisk = {}
1641
1642 for (nuuid, nres) in result.items():
1643 node = self.cfg.GetNodeInfo(nuuid)
1644 disks = node_disks[node.uuid]
1645
1646 if nres.offline:
1647
1648 data = len(disks) * [(False, "node offline")]
1649 else:
1650 msg = nres.fail_msg
1651 self._ErrorIf(msg, constants.CV_ENODERPC, node.name,
1652 "while getting disk information: %s", msg)
1653 if msg:
1654
1655 data = len(disks) * [(False, msg)]
1656 else:
1657 data = []
1658 for idx, i in enumerate(nres.payload):
1659 if isinstance(i, (tuple, list)) and len(i) == 2:
1660 data.append(i)
1661 else:
1662 logging.warning("Invalid result from node %s, entry %d: %s",
1663 node.name, idx, i)
1664 data.append((False, "Invalid result from the remote node"))
1665
1666 for ((inst_uuid, _), status) in zip(disks, data):
1667 instdisk.setdefault(inst_uuid, {}).setdefault(node.uuid, []) \
1668 .append(status)
1669
1670
1671 for inst_uuid in diskless_instances:
1672 assert inst_uuid not in instdisk
1673 instdisk[inst_uuid] = {}
1674
1675 for inst_uuid in nodisk_instances:
1676 assert inst_uuid not in instdisk
1677 instdisk[inst_uuid] = {}
1678
1679 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
1680 len(nuuids) <= len(
1681 self.cfg.GetInstanceNodes(instanceinfo[inst].uuid)) and
1682 compat.all(isinstance(s, (tuple, list)) and
1683 len(s) == 2 for s in statuses)
1684 for inst, nuuids in instdisk.items()
1685 for nuuid, statuses in nuuids.items())
1686 if __debug__:
1687 instdisk_keys = set(instdisk)
1688 instanceinfo_keys = set(instanceinfo)
1689 assert instdisk_keys == instanceinfo_keys, \
1690 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
1691 (instdisk_keys, instanceinfo_keys))
1692
1693 return instdisk
1694
1695 @staticmethod
1697 """Create endless iterators for all potential SSH check hosts.
1698
1699 """
1700 nodes = [node for node in all_nodes
1701 if (node.group != group_uuid and
1702 not node.offline)]
1703 keyfunc = operator.attrgetter("group")
1704
1705 return map(itertools.cycle,
1706 [sorted(map(operator.attrgetter("name"), names))
1707 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
1708 keyfunc)])
1709
1710 @classmethod
1712 """Choose which nodes should talk to which other nodes.
1713
1714 We will make nodes contact all nodes in their group, and one node from
1715 every other group.
1716
1717 @rtype: tuple of (string, dict of strings to list of strings, string)
1718 @return: a tuple containing the list of all online nodes, a dictionary
1719 mapping node names to additional nodes of other node groups to which
1720 connectivity should be tested, and a list of all online master
1721 candidates
1722
1723 @warning: This algorithm has a known issue if one node group is much
1724 smaller than others (e.g. just one node). In such a case all other
1725 nodes will talk to the single node.
1726
1727 """
1728 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
1729 online_mcs = sorted(node.name for node in group_nodes
1730 if (node.master_candidate and not node.offline))
1731 sel = cls._SshNodeSelector(group_uuid, all_nodes)
1732
1733 return (online_nodes,
1734 dict((name, sorted([i.next() for i in sel]))
1735 for name in online_nodes),
1736 online_mcs)
1737
1739 """Prepare the input data for the SSH setup verification.
1740
1741 """
1742 all_nodes_info = self.cfg.GetAllNodesInfo()
1743 potential_master_candidates = self.cfg.GetPotentialMasterCandidates()
1744 node_status = [
1745 (uuid, node_info.name, node_info.master_candidate,
1746 node_info.name in potential_master_candidates, not node_info.offline)
1747 for (uuid, node_info) in all_nodes_info.items()]
1748 return node_status
1749
1751 """Build hooks env.
1752
1753 Cluster-Verify hooks just ran in the post phase and their failure makes
1754 the output be logged in the verify output and the verification to fail.
1755
1756 """
1757 env = {
1758 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
1759 }
1760
1761 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
1762 for node in self.my_node_info.values())
1763
1764 return env
1765
1767 """Build hooks nodes.
1768
1769 """
1770 return ([], list(self.my_node_info.keys()))
1771
1772 @staticmethod
1773 - def _VerifyOtherNotes(feedback_fn, i_non_redundant, i_non_a_balanced,
1774 i_offline, n_offline, n_drained):
1775 feedback_fn("* Other Notes")
1776 if i_non_redundant:
1777 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1778 % len(i_non_redundant))
1779
1780 if i_non_a_balanced:
1781 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1782 % len(i_non_a_balanced))
1783
1784 if i_offline:
1785 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
1786
1787 if n_offline:
1788 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1789
1790 if n_drained:
1791 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1792
1822
1823 - def Exec(self, feedback_fn):
1824 """Verify integrity of the node group, performing various test on nodes.
1825
1826 """
1827
1828 feedback_fn("* Verifying group '%s'" % self.group_info.name)
1829
1830 if not self.my_node_uuids:
1831
1832 feedback_fn("* Empty node group, skipping verification")
1833 return True
1834
1835 self.bad = False
1836 verbose = self.op.verbose
1837 self._feedback_fn = feedback_fn
1838
1839 vg_name = self.cfg.GetVGName()
1840 drbd_helper = self.cfg.GetDRBDHelper()
1841 cluster = self.cfg.GetClusterInfo()
1842 hypervisors = cluster.enabled_hypervisors
1843 node_data_list = self.my_node_info.values()
1844
1845 i_non_redundant = []
1846 i_non_a_balanced = []
1847 i_offline = 0
1848 n_offline = 0
1849 n_drained = 0
1850 node_vol_should = {}
1851
1852
1853
1854
1855 filemap = ComputeAncillaryFiles(cluster, False)
1856
1857
1858 master_node_uuid = self.master_node = self.cfg.GetMasterNode()
1859 master_ip = self.cfg.GetMasterIP()
1860
1861 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_uuids))
1862
1863 user_scripts = []
1864 if self.cfg.GetUseExternalMipScript():
1865 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
1866
1867 node_verify_param = {
1868 constants.NV_FILELIST:
1869 map(vcluster.MakeVirtualPath,
1870 utils.UniqueSequence(filename
1871 for files in filemap
1872 for filename in files)),
1873 constants.NV_NODELIST:
1874 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
1875 self.all_node_info.values()),
1876 constants.NV_HYPERVISOR: hypervisors,
1877 constants.NV_HVPARAMS:
1878 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
1879 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
1880 for node in node_data_list
1881 if not node.offline],
1882 constants.NV_INSTANCELIST: hypervisors,
1883 constants.NV_VERSION: None,
1884 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1885 constants.NV_NODESETUP: None,
1886 constants.NV_TIME: None,
1887 constants.NV_MASTERIP: (self.cfg.GetMasterNodeName(), master_ip),
1888 constants.NV_OSLIST: None,
1889 constants.NV_NONVMNODES: self.cfg.GetNonVmCapableNodeNameList(),
1890 constants.NV_USERSCRIPTS: user_scripts,
1891 constants.NV_CLIENT_CERT: None,
1892 }
1893
1894 if self.cfg.GetClusterInfo().modify_ssh_setup:
1895 node_verify_param[constants.NV_SSH_SETUP] = \
1896 (self._PrepareSshSetupCheck(), self.cfg.GetClusterInfo().ssh_key_type)
1897 if self.op.verify_clutter:
1898 node_verify_param[constants.NV_SSH_CLUTTER] = True
1899
1900 if vg_name is not None:
1901 node_verify_param[constants.NV_VGLIST] = None
1902 node_verify_param[constants.NV_LVLIST] = vg_name
1903 node_verify_param[constants.NV_PVLIST] = [vg_name]
1904
1905 if cluster.IsDiskTemplateEnabled(constants.DT_DRBD8):
1906 if drbd_helper:
1907 node_verify_param[constants.NV_DRBDVERSION] = None
1908 node_verify_param[constants.NV_DRBDLIST] = None
1909 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
1910
1911 if cluster.IsFileStorageEnabled() or \
1912 cluster.IsSharedFileStorageEnabled():
1913
1914 node_verify_param[constants.NV_ACCEPTED_STORAGE_PATHS] = \
1915 self.cfg.GetMasterNodeName()
1916 if cluster.IsFileStorageEnabled():
1917 node_verify_param[constants.NV_FILE_STORAGE_PATH] = \
1918 cluster.file_storage_dir
1919 if cluster.IsSharedFileStorageEnabled():
1920 node_verify_param[constants.NV_SHARED_FILE_STORAGE_PATH] = \
1921 cluster.shared_file_storage_dir
1922
1923
1924
1925 bridges = set()
1926 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
1927 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
1928 bridges.add(default_nicpp[constants.NIC_LINK])
1929 for inst_uuid in self.my_inst_info.values():
1930 for nic in inst_uuid.nics:
1931 full_nic = cluster.SimpleFillNIC(nic.nicparams)
1932 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
1933 bridges.add(full_nic[constants.NIC_LINK])
1934
1935 if bridges:
1936 node_verify_param[constants.NV_BRIDGES] = list(bridges)
1937
1938
1939 node_image = dict((node.uuid, self.NodeImage(offline=node.offline,
1940 uuid=node.uuid,
1941 vm_capable=node.vm_capable))
1942 for node in node_data_list)
1943
1944
1945 oob_paths = []
1946 for node in self.all_node_info.values():
1947 path = SupportsOob(self.cfg, node)
1948 if path and path not in oob_paths:
1949 oob_paths.append(path)
1950
1951 if oob_paths:
1952 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
1953
1954 for inst_uuid in self.my_inst_uuids:
1955 instance = self.my_inst_info[inst_uuid]
1956 if instance.admin_state == constants.ADMINST_OFFLINE:
1957 i_offline += 1
1958
1959 inst_nodes = self.cfg.GetInstanceNodes(instance.uuid)
1960 for nuuid in inst_nodes:
1961 if nuuid not in node_image:
1962 gnode = self.NodeImage(uuid=nuuid)
1963 gnode.ghost = (nuuid not in self.all_node_info)
1964 node_image[nuuid] = gnode
1965
1966 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
1967
1968 pnode = instance.primary_node
1969 node_image[pnode].pinst.append(instance.uuid)
1970
1971 for snode in self.cfg.GetInstanceSecondaryNodes(instance.uuid):
1972 nimg = node_image[snode]
1973 nimg.sinst.append(instance.uuid)
1974 if pnode not in nimg.sbp:
1975 nimg.sbp[pnode] = []
1976 nimg.sbp[pnode].append(instance.uuid)
1977
1978 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg,
1979 self.my_node_info.keys())
1980
1981
1982 self._exclusive_storage = compat.any(es_flags.values())
1983 if self._exclusive_storage:
1984 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999 with self.cfg.GetConfigManager(shared=True, forcelock=True):
2000 feedback_fn("* Gathering information about nodes (%s nodes)" %
2001 len(self.my_node_uuids))
2002
2003 self.cfg.FlushConfigGroup(self.group_uuid)
2004
2005
2006
2007
2008 nvinfo_starttime = time.time()
2009
2010
2011
2012
2013
2014 cluster_name = self.cfg.GetClusterName()
2015 hvparams = self.cfg.GetClusterInfo().hvparams
2016 all_nvinfo = self.rpc.call_node_verify(self.my_node_uuids,
2017 node_verify_param,
2018 cluster_name,
2019 hvparams)
2020 nvinfo_endtime = time.time()
2021
2022 if self.extra_lv_nodes and vg_name is not None:
2023 feedback_fn("* Gathering information about extra nodes (%s nodes)" %
2024 len(self.extra_lv_nodes))
2025 extra_lv_nvinfo = \
2026 self.rpc.call_node_verify(self.extra_lv_nodes,
2027 {constants.NV_LVLIST: vg_name},
2028 self.cfg.GetClusterName(),
2029 self.cfg.GetClusterInfo().hvparams)
2030 else:
2031 extra_lv_nvinfo = {}
2032
2033
2034
2035 absent_node_uuids = set(self.all_node_info).difference(self.my_node_info)
2036 if absent_node_uuids:
2037 vf_nvinfo = all_nvinfo.copy()
2038 vf_node_info = list(self.my_node_info.values())
2039 additional_node_uuids = []
2040 if master_node_uuid not in self.my_node_info:
2041 additional_node_uuids.append(master_node_uuid)
2042 vf_node_info.append(self.all_node_info[master_node_uuid])
2043
2044
2045 for node_uuid in absent_node_uuids:
2046 nodeinfo = self.all_node_info[node_uuid]
2047 if (nodeinfo.vm_capable and not nodeinfo.offline and
2048 node_uuid != master_node_uuid):
2049 additional_node_uuids.append(node_uuid)
2050 vf_node_info.append(self.all_node_info[node_uuid])
2051 break
2052 key = constants.NV_FILELIST
2053
2054 feedback_fn("* Gathering information about the master node")
2055 vf_nvinfo.update(self.rpc.call_node_verify(
2056 additional_node_uuids, {key: node_verify_param[key]},
2057 self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams))
2058 else:
2059 vf_nvinfo = all_nvinfo
2060 vf_node_info = self.my_node_info.values()
2061
2062 all_drbd_map = self.cfg.ComputeDRBDMap()
2063
2064 feedback_fn("* Gathering disk information (%s nodes)" %
2065 len(self.my_node_uuids))
2066 instdisk = self._CollectDiskInfo(self.my_node_info.keys(), node_image,
2067 self.my_inst_info)
2068
2069 feedback_fn("* Verifying configuration file consistency")
2070
2071 self._VerifyClientCertificates(self.my_node_info.values(), all_nvinfo)
2072 if self.cfg.GetClusterInfo().modify_ssh_setup:
2073 self._VerifySshSetup(self.my_node_info.values(), all_nvinfo)
2074 self._VerifyFiles(vf_node_info, master_node_uuid, vf_nvinfo, filemap)
2075
2076 feedback_fn("* Verifying node status")
2077
2078 refos_img = None
2079
2080 for node_i in node_data_list:
2081 nimg = node_image[node_i.uuid]
2082
2083 if node_i.offline:
2084 if verbose:
2085 feedback_fn("* Skipping offline node %s" % (node_i.name,))
2086 n_offline += 1
2087 continue
2088
2089 if node_i.uuid == master_node_uuid:
2090 ntype = "master"
2091 elif node_i.master_candidate:
2092 ntype = "master candidate"
2093 elif node_i.drained:
2094 ntype = "drained"
2095 n_drained += 1
2096 else:
2097 ntype = "regular"
2098 if verbose:
2099 feedback_fn("* Verifying node %s (%s)" % (node_i.name, ntype))
2100
2101 msg = all_nvinfo[node_i.uuid].fail_msg
2102 self._ErrorIf(msg, constants.CV_ENODERPC, node_i.name,
2103 "while contacting node: %s", msg)
2104 if msg:
2105 nimg.rpc_fail = True
2106 continue
2107
2108 nresult = all_nvinfo[node_i.uuid].payload
2109
2110 nimg.call_ok = self._VerifyNode(node_i, nresult)
2111 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2112 self._VerifyNodeNetwork(node_i, nresult)
2113 self._VerifyNodeUserScripts(node_i, nresult)
2114 self._VerifyOob(node_i, nresult)
2115 self._VerifyAcceptedFileStoragePaths(node_i, nresult,
2116 node_i.uuid == master_node_uuid)
2117 self._VerifyFileStoragePaths(node_i, nresult)
2118 self._VerifySharedFileStoragePaths(node_i, nresult)
2119 self._VerifyGlusterStoragePaths(node_i, nresult)
2120
2121 if nimg.vm_capable:
2122 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
2123 if constants.DT_DRBD8 in cluster.enabled_disk_templates:
2124 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info,
2125 self.all_disks_info, drbd_helper, all_drbd_map)
2126
2127 if (constants.DT_PLAIN in cluster.enabled_disk_templates) or \
2128 (constants.DT_DRBD8 in cluster.enabled_disk_templates):
2129 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2130 self._UpdateNodeInstances(node_i, nresult, nimg)
2131 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2132 self._UpdateNodeOS(node_i, nresult, nimg)
2133
2134 if not nimg.os_fail:
2135 if refos_img is None:
2136 refos_img = nimg
2137 self._VerifyNodeOS(node_i, nimg, refos_img)
2138 self._VerifyNodeBridges(node_i, nresult, bridges)
2139
2140
2141
2142
2143 non_primary_inst_uuids = set(nimg.instances).difference(nimg.pinst)
2144
2145 for inst_uuid in non_primary_inst_uuids:
2146 test = inst_uuid in self.all_inst_info
2147 self._ErrorIf(test, constants.CV_EINSTANCEWRONGNODE,
2148 self.cfg.GetInstanceName(inst_uuid),
2149 "instance should not run on node %s", node_i.name)
2150 self._ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2151 "node is running unknown instance %s", inst_uuid)
2152
2153 self._VerifyExclusionTags(node_i.name, nimg.pinst, cluster.tags)
2154
2155 self._VerifyGroupDRBDVersion(all_nvinfo)
2156 self._VerifyGroupLVM(node_image, vg_name)
2157
2158 for node_uuid, result in extra_lv_nvinfo.items():
2159 self._UpdateNodeVolumes(self.all_node_info[node_uuid], result.payload,
2160 node_image[node_uuid], vg_name)
2161
2162 feedback_fn("* Verifying instance status")
2163 for inst_uuid in self.my_inst_uuids:
2164 instance = self.my_inst_info[inst_uuid]
2165 if verbose:
2166 feedback_fn("* Verifying instance %s" % instance.name)
2167 self._VerifyInstance(instance, node_image, instdisk[inst_uuid])
2168
2169
2170
2171 inst_disks = self.cfg.GetInstanceDisks(instance.uuid)
2172 if not utils.AllDiskOfType(inst_disks, constants.DTS_MIRRORED):
2173 i_non_redundant.append(instance)
2174
2175 if not cluster.FillBE(instance)[constants.BE_AUTO_BALANCE]:
2176 i_non_a_balanced.append(instance)
2177
2178 feedback_fn("* Verifying orphan volumes")
2179 reserved = utils.FieldSet(*cluster.reserved_lvs)
2180
2181
2182
2183
2184 for instance in self.all_inst_info.values():
2185 for secondary in self.cfg.GetInstanceSecondaryNodes(instance.uuid):
2186 if (secondary in self.my_node_info
2187 and instance.uuid not in self.my_inst_info):
2188 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
2189 break
2190
2191 self._VerifyOrphanVolumes(vg_name, node_vol_should, node_image, reserved)
2192
2193 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2194 feedback_fn("* Verifying N+1 Memory redundancy")
2195 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2196
2197 self._VerifyOtherNotes(feedback_fn, i_non_redundant, i_non_a_balanced,
2198 i_offline, n_offline, n_drained)
2199
2200 return not self.bad
2201
2202 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2203 """Analyze the post-hooks' result
2204
2205 This method analyses the hook result, handles it, and sends some
2206 nicely-formatted feedback back to the user.
2207
2208 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2209 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2210 @param hooks_results: the results of the multi-node hooks rpc call
2211 @param feedback_fn: function used send feedback back to the caller
2212 @param lu_result: previous Exec result
2213 @return: the new Exec result, based on the previous result
2214 and hook results
2215
2216 """
2217
2218
2219 if not self.my_node_uuids:
2220
2221 pass
2222 elif phase == constants.HOOKS_PHASE_POST:
2223
2224 feedback_fn("* Hooks Results")
2225 assert hooks_results, "invalid result from hooks"
2226
2227 for node_name in hooks_results:
2228 res = hooks_results[node_name]
2229 msg = res.fail_msg
2230 test = msg and not res.offline
2231 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
2232 "Communication failure in hooks execution: %s", msg)
2233 if test:
2234 lu_result = False
2235 continue
2236 if res.offline:
2237
2238 continue
2239 for script, hkr, output in res.payload:
2240 test = hkr == constants.HKR_FAIL
2241 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
2242 "Script %s failed, output:", script)
2243 if test:
2244 output = self._HOOKS_INDENT_RE.sub(" ", output)
2245 feedback_fn("%s" % output)
2246 lu_result = False
2247
2248 return lu_result
2249