1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 """Logical units for cluster verification."""
31
32 import itertools
33 import logging
34 import operator
35 import re
36 import time
37 import ganeti.masterd.instance
38 import ganeti.rpc.node as rpc
39
40 from ganeti import compat
41 from ganeti import constants
42 from ganeti import errors
43 from ganeti import locking
44 from ganeti import pathutils
45 from ganeti import utils
46 from ganeti import vcluster
47 from ganeti import hypervisor
48 from ganeti import opcodes
49
50 from ganeti.cmdlib.base import LogicalUnit, NoHooksLU, ResultWithJobs
51 from ganeti.cmdlib.common import ShareAll, ComputeAncillaryFiles, \
52 CheckNodePVs, ComputeIPolicyInstanceViolation, AnnotateDiskParams, \
53 SupportsOob
57 """Compute the set of all hypervisor parameters.
58
59 @type cluster: L{objects.Cluster}
60 @param cluster: the cluster object
61 @param instances: list of L{objects.Instance}
62 @param instances: additional instances from which to obtain parameters
63 @rtype: list of (origin, hypervisor, parameters)
64 @return: a list with all parameters found, indicating the hypervisor they
65 apply to, and the origin (can be "cluster", "os X", or "instance Y")
66
67 """
68 hvp_data = []
69
70 for hv_name in cluster.enabled_hypervisors:
71 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
72
73 for os_name, os_hvp in cluster.os_hvp.items():
74 for hv_name, hv_params in os_hvp.items():
75 if hv_params:
76 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
77 hvp_data.append(("os %s" % os_name, hv_name, full_params))
78
79
80 for instance in instances:
81 if instance.hvparams:
82 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
83 cluster.FillHV(instance)))
84
85 return hvp_data
86
89 """Mix-in for cluster/group verify LUs.
90
91 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
92 self.op and self._feedback_fn to be available.)
93
94 """
95
96 ETYPE_ERROR = constants.CV_ERROR
97 ETYPE_WARNING = constants.CV_WARNING
98
101 """Format multiple error messages.
102
103 Based on the opcode's error_codes parameter, either format a
104 parseable error code, or a simpler error string.
105
106 This must be called only from Exec and functions called from Exec.
107
108
109 @type error_descriptor: tuple (string, string, string)
110 @param error_descriptor: triplet describing the error (object_type,
111 code, description)
112 @type object_name: string
113 @param object_name: name of object (instance, node ..) the error relates to
114 @type message_list: list of strings
115 @param message_list: body of error messages
116 @type log_type: string
117 @param log_type: log message type (WARNING, ERROR ..)
118 """
119
120 if not message_list:
121 return
122
123 object_type, error_code, _ = error_descriptor
124
125
126 if error_code in self.op.ignore_errors:
127 log_type = self.ETYPE_WARNING
128
129 prefixed_list = []
130 if self.op.error_codes:
131 for msg in message_list:
132 prefixed_list.append(" - %s:%s:%s:%s:%s" % (
133 log_type, error_code, object_type, object_name, msg))
134 else:
135 if not object_name:
136 object_name = ""
137 for msg in message_list:
138 prefixed_list.append(" - %s: %s %s: %s" % (
139 log_type, object_type, object_name, msg))
140
141
142
143 self._feedback_fn(constants.ELOG_MESSAGE_LIST, prefixed_list)
144
145
146
147 if log_type == self.ETYPE_ERROR:
148 self.bad = True
149
152 """Log a single error message.
153
154 """
155 self._ErrorMsgList(error_descriptor, object_name, [message], log_type)
156
157
158
159 - def _ErrorIf(self, cond, *args, **kwargs):
160 """Log an error message if the passed condition is True.
161
162 """
163 if (bool(cond)
164 or self.op.debug_simulate_errors):
165 self._Error(*args, **kwargs)
166
167
168 - def _Error(self, ecode, item, message, *args, **kwargs):
177
180 """Submits all jobs necessary to verify the cluster.
181
182 """
183 REQ_BGL = False
184
186 self.needed_locks = {}
187
188 - def Exec(self, feedback_fn):
189 jobs = []
190
191 if self.op.group_name:
192 groups = [self.op.group_name]
193 depends_fn = lambda: None
194 else:
195 groups = self.cfg.GetNodeGroupList()
196
197
198 jobs.append([
199 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
200 ])
201
202
203 depends_fn = lambda: [(-len(jobs), [])]
204
205 jobs.extend(
206 [opcodes.OpClusterVerifyGroup(group_name=group,
207 ignore_errors=self.op.ignore_errors,
208 depends=depends_fn(),
209 verify_clutter=self.op.verify_clutter)]
210 for group in groups)
211
212
213 for op in itertools.chain(*jobs):
214 op.debug_simulate_errors = self.op.debug_simulate_errors
215 op.verbose = self.op.verbose
216 op.error_codes = self.op.error_codes
217 try:
218 op.skip_checks = self.op.skip_checks
219 except AttributeError:
220 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
221
222 return ResultWithJobs(jobs)
223
226 """Verifies the cluster disks status.
227
228 """
229 REQ_BGL = False
230
236
237 - def Exec(self, feedback_fn):
243
246 """Verifies the cluster config.
247
248 """
249 REQ_BGL = False
250
264
268
277
278 - def Exec(self, feedback_fn):
279 """Verify integrity of cluster, performing various test on nodes.
280
281 """
282 self.bad = False
283 self._feedback_fn = feedback_fn
284
285 feedback_fn("* Verifying cluster config")
286
287 msg_list = self.cfg.VerifyConfig()
288 self._ErrorMsgList(constants.CV_ECLUSTERCFG, None, msg_list)
289
290 feedback_fn("* Verifying cluster certificate files")
291
292 for cert_filename in pathutils.ALL_CERT_FILES:
293 (errcode, msg) = utils.VerifyCertificate(cert_filename)
294 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
295
296 self._ErrorIf(not utils.CanRead(constants.LUXID_USER,
297 pathutils.NODED_CERT_FILE),
298 constants.CV_ECLUSTERCERT,
299 None,
300 pathutils.NODED_CERT_FILE + " must be accessible by the " +
301 constants.LUXID_USER + " user")
302
303 feedback_fn("* Verifying hypervisor parameters")
304
305 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
306 self.all_inst_info.values()))
307
308 feedback_fn("* Verifying all nodes belong to an existing group")
309
310
311
312
313
314 dangling_nodes = set(node for node in self.all_node_info.values()
315 if node.group not in self.all_group_info)
316
317 dangling_instances = {}
318 no_node_instances = []
319
320 for inst in self.all_inst_info.values():
321 if inst.primary_node in [node.uuid for node in dangling_nodes]:
322 dangling_instances.setdefault(inst.primary_node, []).append(inst)
323 elif inst.primary_node not in self.all_node_info:
324 no_node_instances.append(inst)
325
326 pretty_dangling = [
327 "%s (%s)" %
328 (node.name,
329 utils.CommaJoin(inst.name for
330 inst in dangling_instances.get(node.uuid, [])))
331 for node in dangling_nodes]
332
333 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
334 None,
335 "the following nodes (and their instances) belong to a non"
336 " existing group: %s", utils.CommaJoin(pretty_dangling))
337
338 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
339 None,
340 "the following instances have a non-existing primary-node:"
341 " %s", utils.CommaJoin(inst.name for
342 inst in no_node_instances))
343
344 return not self.bad
345
348 """Verifies the status of a node group.
349
350 """
351 HPATH = "cluster-verify"
352 HTYPE = constants.HTYPE_CLUSTER
353 REQ_BGL = False
354
355 _HOOKS_INDENT_RE = re.compile("^", re.M)
356
358 """A class representing the logical and physical status of a node.
359
360 @type uuid: string
361 @ivar uuid: the node UUID to which this object refers
362 @ivar volumes: a structure as returned from
363 L{ganeti.backend.GetVolumeList} (runtime)
364 @ivar instances: a list of running instances (runtime)
365 @ivar pinst: list of configured primary instances (config)
366 @ivar sinst: list of configured secondary instances (config)
367 @ivar sbp: dictionary of {primary-node: list of instances} for all
368 instances for which this node is secondary (config)
369 @ivar mfree: free memory, as reported by hypervisor (runtime)
370 @ivar dfree: free disk, as reported by the node (runtime)
371 @ivar offline: the offline status (config)
372 @type rpc_fail: boolean
373 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
374 not whether the individual keys were correct) (runtime)
375 @type lvm_fail: boolean
376 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
377 @type hyp_fail: boolean
378 @ivar hyp_fail: whether the RPC call didn't return the instance list
379 @type ghost: boolean
380 @ivar ghost: whether this is a known node or not (config)
381 @type os_fail: boolean
382 @ivar os_fail: whether the RPC call didn't return valid OS data
383 @type oslist: list
384 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
385 @type vm_capable: boolean
386 @ivar vm_capable: whether the node can host instances
387 @type pv_min: float
388 @ivar pv_min: size in MiB of the smallest PVs
389 @type pv_max: float
390 @ivar pv_max: size in MiB of the biggest PVs
391
392 """
393 - def __init__(self, offline=False, uuid=None, vm_capable=True):
394 self.uuid = uuid
395 self.volumes = {}
396 self.instances = []
397 self.pinst = []
398 self.sinst = []
399 self.sbp = {}
400 self.mfree = 0
401 self.dfree = 0
402 self.offline = offline
403 self.vm_capable = vm_capable
404 self.rpc_fail = False
405 self.lvm_fail = False
406 self.hyp_fail = False
407 self.ghost = False
408 self.os_fail = False
409 self.oslist = {}
410 self.pv_min = None
411 self.pv_max = None
412
428
447
449 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
450 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
451
452 group_node_uuids = set(self.group_info.members)
453 group_inst_uuids = \
454 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
455
456 unlocked_node_uuids = \
457 group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE))
458
459 unlocked_inst_uuids = \
460 group_inst_uuids.difference(
461 [self.cfg.GetInstanceInfoByName(name).uuid
462 for name in self.owned_locks(locking.LEVEL_INSTANCE)])
463
464 if unlocked_node_uuids:
465 raise errors.OpPrereqError(
466 "Missing lock for nodes: %s" %
467 utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)),
468 errors.ECODE_STATE)
469
470 if unlocked_inst_uuids:
471 raise errors.OpPrereqError(
472 "Missing lock for instances: %s" %
473 utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)),
474 errors.ECODE_STATE)
475
476 self.all_node_info = self.cfg.GetAllNodesInfo()
477 self.all_inst_info = self.cfg.GetAllInstancesInfo()
478 self.all_disks_info = self.cfg.GetAllDisksInfo()
479
480 self.my_node_uuids = group_node_uuids
481 self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid])
482 for node_uuid in group_node_uuids)
483
484 self.my_inst_uuids = group_inst_uuids
485 self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid])
486 for inst_uuid in group_inst_uuids)
487
488
489
490 extra_lv_nodes = set()
491
492 for inst in self.my_inst_info.values():
493 disks = self.cfg.GetInstanceDisks(inst.uuid)
494 if utils.AnyDiskOfType(disks, constants.DTS_INT_MIRROR):
495 inst_nodes = self.cfg.GetInstanceNodes(inst.uuid)
496 for nuuid in inst_nodes:
497 if self.all_node_info[nuuid].group != self.group_uuid:
498 extra_lv_nodes.add(nuuid)
499
500 unlocked_lv_nodes = \
501 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
502
503 if unlocked_lv_nodes:
504 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
505 utils.CommaJoin(unlocked_lv_nodes),
506 errors.ECODE_STATE)
507 self.extra_lv_nodes = list(extra_lv_nodes)
508
510 """Perform some basic validation on data returned from a node.
511
512 - check the result data structure is well formed and has all the
513 mandatory fields
514 - check ganeti version
515
516 @type ninfo: L{objects.Node}
517 @param ninfo: the node to check
518 @param nresult: the results from the node
519 @rtype: boolean
520 @return: whether overall this call was successful (and we can expect
521 reasonable values in the respose)
522
523 """
524
525 test = not nresult or not isinstance(nresult, dict)
526 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
527 "unable to verify node: no data returned")
528 if test:
529 return False
530
531
532 local_version = constants.PROTOCOL_VERSION
533 remote_version = nresult.get("version", None)
534 test = not (remote_version and
535 isinstance(remote_version, (list, tuple)) and
536 len(remote_version) == 2)
537 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
538 "connection to node returned invalid data")
539 if test:
540 return False
541
542 test = local_version != remote_version[0]
543 self._ErrorIf(test, constants.CV_ENODEVERSION, ninfo.name,
544 "incompatible protocol versions: master %s,"
545 " node %s", local_version, remote_version[0])
546 if test:
547 return False
548
549
550
551
552 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
553 constants.CV_ENODEVERSION, ninfo.name,
554 "software version mismatch: master %s, node %s",
555 constants.RELEASE_VERSION, remote_version[1],
556 code=self.ETYPE_WARNING)
557
558 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
559 if ninfo.vm_capable and isinstance(hyp_result, dict):
560 for hv_name, hv_result in hyp_result.iteritems():
561 test = hv_result is not None
562 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
563 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
564
565 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
566 if ninfo.vm_capable and isinstance(hvp_result, list):
567 for item, hv_name, hv_result in hvp_result:
568 self._ErrorIf(True, constants.CV_ENODEHV, ninfo.name,
569 "hypervisor %s parameter verify failure (source %s): %s",
570 hv_name, item, hv_result)
571
572 test = nresult.get(constants.NV_NODESETUP,
573 ["Missing NODESETUP results"])
574 self._ErrorIf(test, constants.CV_ENODESETUP, ninfo.name,
575 "node setup error: %s", "; ".join(test))
576
577 return True
578
579 - def _VerifyNodeTime(self, ninfo, nresult,
580 nvinfo_starttime, nvinfo_endtime):
581 """Check the node time.
582
583 @type ninfo: L{objects.Node}
584 @param ninfo: the node to check
585 @param nresult: the remote results for the node
586 @param nvinfo_starttime: the start time of the RPC call
587 @param nvinfo_endtime: the end time of the RPC call
588
589 """
590 ntime = nresult.get(constants.NV_TIME, None)
591 try:
592 ntime_merged = utils.MergeTime(ntime)
593 except (ValueError, TypeError):
594 self._ErrorIf(True, constants.CV_ENODETIME, ninfo.name,
595 "Node returned invalid time")
596 return
597
598 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
599 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
600 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
601 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
602 else:
603 ntime_diff = None
604
605 self._ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, ninfo.name,
606 "Node time diverges by at least %s from master node time",
607 ntime_diff)
608
610 """Check the node LVM results and update info for cross-node checks.
611
612 @type ninfo: L{objects.Node}
613 @param ninfo: the node to check
614 @param nresult: the remote results for the node
615 @param vg_name: the configured VG name
616 @type nimg: L{NodeImage}
617 @param nimg: node image
618
619 """
620 if vg_name is None:
621 return
622
623
624 vglist = nresult.get(constants.NV_VGLIST, None)
625 test = not vglist
626 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
627 "unable to check volume groups")
628 if not test:
629 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
630 constants.MIN_VG_SIZE)
631 self._ErrorIf(vgstatus, constants.CV_ENODELVM, ninfo.name, vgstatus)
632
633
634 (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage)
635 for em in errmsgs:
636 self._Error(constants.CV_ENODELVM, ninfo.name, em)
637 if pvminmax is not None:
638 (nimg.pv_min, nimg.pv_max) = pvminmax
639
641 """Check cross-node DRBD version consistency.
642
643 @type node_verify_infos: dict
644 @param node_verify_infos: infos about nodes as returned from the
645 node_verify call.
646
647 """
648 node_versions = {}
649 for node_uuid, ndata in node_verify_infos.items():
650 nresult = ndata.payload
651 if nresult:
652 version = nresult.get(constants.NV_DRBDVERSION, None)
653 if version:
654 node_versions[node_uuid] = version
655
656 if len(set(node_versions.values())) > 1:
657 for node_uuid, version in sorted(node_versions.items()):
658 msg = "DRBD version mismatch: %s" % version
659 self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg,
660 code=self.ETYPE_WARNING)
661
663 """Check cross-node consistency in LVM.
664
665 @type node_image: dict
666 @param node_image: info about nodes, mapping from node to names to
667 L{NodeImage} objects
668 @param vg_name: the configured VG name
669
670 """
671 if vg_name is None:
672 return
673
674
675 if not self._exclusive_storage:
676 return
677
678
679
680
681 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
682 if not vals:
683 return
684 (pvmin, minnode_uuid) = min((ni.pv_min, ni.uuid) for ni in vals)
685 (pvmax, maxnode_uuid) = max((ni.pv_max, ni.uuid) for ni in vals)
686 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
687 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
688 "PV sizes differ too much in the group; smallest (%s MB) is"
689 " on %s, biggest (%s MB) is on %s",
690 pvmin, self.cfg.GetNodeName(minnode_uuid),
691 pvmax, self.cfg.GetNodeName(maxnode_uuid))
692
694 """Check the node bridges.
695
696 @type ninfo: L{objects.Node}
697 @param ninfo: the node to check
698 @param nresult: the remote results for the node
699 @param bridges: the expected list of bridges
700
701 """
702 if not bridges:
703 return
704
705 missing = nresult.get(constants.NV_BRIDGES, None)
706 test = not isinstance(missing, list)
707 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
708 "did not return valid bridge information")
709 if not test:
710 self._ErrorIf(bool(missing), constants.CV_ENODENET, ninfo.name,
711 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
712
730
732 """Check the node network connectivity results.
733
734 @type ninfo: L{objects.Node}
735 @param ninfo: the node to check
736 @param nresult: the remote results for the node
737
738 """
739 test = constants.NV_NODELIST not in nresult
740 self._ErrorIf(test, constants.CV_ENODESSH, ninfo.name,
741 "node hasn't returned node ssh connectivity data")
742 if not test:
743 if nresult[constants.NV_NODELIST]:
744 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
745 self._ErrorIf(True, constants.CV_ENODESSH, ninfo.name,
746 "ssh communication with node '%s': %s", a_node, a_msg)
747
748 test = constants.NV_NODENETTEST not in nresult
749 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
750 "node hasn't returned node tcp connectivity data")
751 if not test:
752 if nresult[constants.NV_NODENETTEST]:
753 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
754 for anode in nlist:
755 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name,
756 "tcp communication with node '%s': %s",
757 anode, nresult[constants.NV_NODENETTEST][anode])
758
759 test = constants.NV_MASTERIP not in nresult
760 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
761 "node hasn't returned node master IP reachability data")
762 if not test:
763 if not nresult[constants.NV_MASTERIP]:
764 if ninfo.uuid == self.master_node:
765 msg = "the master node cannot reach the master IP (not configured?)"
766 else:
767 msg = "cannot reach the master IP"
768 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg)
769
771 """Verify an instance.
772
773 This function checks to see if the required block devices are
774 available on the instance's node, and that the nodes are in the correct
775 state.
776
777 """
778 pnode_uuid = instance.primary_node
779 pnode_img = node_image[pnode_uuid]
780 groupinfo = self.cfg.GetAllNodeGroupsInfo()
781
782 node_vol_should = {}
783 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
784
785 cluster = self.cfg.GetClusterInfo()
786 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
787 self.group_info)
788 err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg)
789 self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name,
790 utils.CommaJoin(err), code=self.ETYPE_WARNING)
791
792 for node_uuid in node_vol_should:
793 n_img = node_image[node_uuid]
794 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
795
796 continue
797 for volume in node_vol_should[node_uuid]:
798 test = volume not in n_img.volumes
799 self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name,
800 "volume %s missing on node %s", volume,
801 self.cfg.GetNodeName(node_uuid))
802
803 if instance.admin_state == constants.ADMINST_UP:
804 test = instance.uuid not in pnode_img.instances and not pnode_img.offline
805 self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name,
806 "instance not running on its primary node %s",
807 self.cfg.GetNodeName(pnode_uuid))
808 self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE,
809 instance.name, "instance is marked as running and lives on"
810 " offline node %s", self.cfg.GetNodeName(pnode_uuid))
811
812 diskdata = [(nname, success, status, idx)
813 for (nname, disks) in diskstatus.items()
814 for idx, (success, status) in enumerate(disks)]
815
816 for nname, success, bdev_status, idx in diskdata:
817
818
819 snode = node_image[nname]
820 bad_snode = snode.ghost or snode.offline
821 self._ErrorIf(instance.disks_active and
822 not success and not bad_snode,
823 constants.CV_EINSTANCEFAULTYDISK, instance.name,
824 "couldn't retrieve status for disk/%s on %s: %s",
825 idx, self.cfg.GetNodeName(nname), bdev_status)
826
827 if instance.disks_active and success and bdev_status.is_degraded:
828 msg = "disk/%s on %s is degraded" % (idx, self.cfg.GetNodeName(nname))
829
830 code = self.ETYPE_ERROR
831 accepted_lds = [constants.LDS_OKAY, constants.LDS_SYNC]
832
833 if bdev_status.ldisk_status in accepted_lds:
834 code = self.ETYPE_WARNING
835
836 msg += "; local disk state is '%s'" % \
837 constants.LDS_NAMES[bdev_status.ldisk_status]
838
839 self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg,
840 code=code)
841
842 self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
843 constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid),
844 "instance %s, connection to primary node failed",
845 instance.name)
846
847 secondary_nodes = self.cfg.GetInstanceSecondaryNodes(instance.uuid)
848 self._ErrorIf(len(secondary_nodes) > 1,
849 constants.CV_EINSTANCELAYOUT, instance.name,
850 "instance has multiple secondary nodes: %s",
851 utils.CommaJoin(secondary_nodes),
852 code=self.ETYPE_WARNING)
853
854 inst_nodes = self.cfg.GetInstanceNodes(instance.uuid)
855 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, inst_nodes)
856 disks = self.cfg.GetInstanceDisks(instance.uuid)
857 if any(es_flags.values()):
858 if not utils.AllDiskOfType(disks, constants.DTS_EXCL_STORAGE):
859
860
861 es_nodes = [n
862 for (n, es) in es_flags.items()
863 if es]
864 unsupported = [d.dev_type for d in disks
865 if d.dev_type not in constants.DTS_EXCL_STORAGE]
866 self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name,
867 "instance uses disk types %s, which are not supported on"
868 " nodes that have exclusive storage set: %s",
869 utils.CommaJoin(unsupported),
870 utils.CommaJoin(self.cfg.GetNodeNames(es_nodes)))
871 for (idx, disk) in enumerate(disks):
872 self._ErrorIf(disk.spindles is None,
873 constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name,
874 "number of spindles not configured for disk %s while"
875 " exclusive storage is enabled, try running"
876 " gnt-cluster repair-disk-sizes", idx)
877
878 if utils.AnyDiskOfType(disks, constants.DTS_INT_MIRROR):
879 instance_nodes = utils.NiceSort(inst_nodes)
880 instance_groups = {}
881
882 for node_uuid in instance_nodes:
883 instance_groups.setdefault(self.all_node_info[node_uuid].group,
884 []).append(node_uuid)
885
886 pretty_list = [
887 "%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)),
888 groupinfo[group].name)
889
890 for group, nodes in sorted(instance_groups.items(),
891 key=lambda (_, nodes): pnode_uuid in nodes,
892 reverse=True)]
893
894 self._ErrorIf(len(instance_groups) > 1,
895 constants.CV_EINSTANCESPLITGROUPS,
896 instance.name, "instance has primary and secondary nodes in"
897 " different groups: %s", utils.CommaJoin(pretty_list),
898 code=self.ETYPE_WARNING)
899
900 inst_nodes_offline = []
901 for snode in secondary_nodes:
902 s_img = node_image[snode]
903 self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
904 self.cfg.GetNodeName(snode),
905 "instance %s, connection to secondary node failed",
906 instance.name)
907
908 if s_img.offline:
909 inst_nodes_offline.append(snode)
910
911
912 self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE,
913 instance.name, "instance has offline secondary node(s) %s",
914 utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline)))
915
916 for node_uuid in inst_nodes:
917 self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE,
918 instance.name, "instance lives on ghost node %s",
919 self.cfg.GetNodeName(node_uuid))
920 self._ErrorIf(not node_image[node_uuid].vm_capable,
921 constants.CV_EINSTANCEBADNODE, instance.name,
922 "instance lives on non-vm_capable node %s",
923 self.cfg.GetNodeName(node_uuid))
924
927 """Verify if there are any unknown volumes in the cluster.
928
929 The .os, .swap and backup volumes are ignored. All other volumes are
930 reported as unknown.
931
932 @type vg_name: string
933 @param vg_name: the name of the Ganeti-administered volume group
934 @type reserved: L{ganeti.utils.FieldSet}
935 @param reserved: a FieldSet of reserved volume names
936
937 """
938 for node_uuid, n_img in node_image.items():
939 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
940 self.all_node_info[node_uuid].group != self.group_uuid):
941
942 continue
943 for volume in n_img.volumes:
944
945 if volume.split('/')[0] != vg_name:
946 continue
947
948 test = ((node_uuid not in node_vol_should or
949 volume not in node_vol_should[node_uuid]) and
950 not reserved.Matches(volume))
951 self._ErrorIf(test, constants.CV_ENODEORPHANLV,
952 self.cfg.GetNodeName(node_uuid),
953 "volume %s is unknown", volume,
954 code=_VerifyErrors.ETYPE_WARNING)
955
957 """Verify N+1 Memory Resilience.
958
959 Check that if one single node dies we can still start all the
960 instances it was primary for.
961
962 """
963 cluster_info = self.cfg.GetClusterInfo()
964 for node_uuid, n_img in node_image.items():
965
966
967
968
969
970
971
972
973 if n_img.offline or \
974 self.all_node_info[node_uuid].group != self.group_uuid:
975
976
977
978
979 continue
980
981 for prinode, inst_uuids in n_img.sbp.items():
982 needed_mem = 0
983 for inst_uuid in inst_uuids:
984 bep = cluster_info.FillBE(all_insts[inst_uuid])
985 if bep[constants.BE_AUTO_BALANCE]:
986 needed_mem += bep[constants.BE_MINMEM]
987 test = n_img.mfree < needed_mem
988 self._ErrorIf(test, constants.CV_ENODEN1,
989 self.cfg.GetNodeName(node_uuid),
990 "not enough memory to accomodate instance failovers"
991 " should node %s fail (%dMiB needed, %dMiB available)",
992 self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)
993
995 """Verifies the consistency of the client certificates.
996
997 This includes several aspects:
998 - the individual validation of all nodes' certificates
999 - the consistency of the master candidate certificate map
1000 - the consistency of the master candidate certificate map with the
1001 certificates that the master candidates are actually using.
1002
1003 @param nodes: the list of nodes to consider in this verification
1004 @param all_nvinfo: the map of results of the verify_node call to
1005 all nodes
1006
1007 """
1008 candidate_certs = self.cfg.GetClusterInfo().candidate_certs
1009 if candidate_certs is None or len(candidate_certs) == 0:
1010 self._ErrorIf(
1011 True, constants.CV_ECLUSTERCLIENTCERT, None,
1012 "The cluster's list of master candidate certificates is empty."
1013 " If you just updated the cluster, please run"
1014 " 'gnt-cluster renew-crypto --new-node-certificates'.")
1015 return
1016
1017 self._ErrorIf(
1018 len(candidate_certs) != len(set(candidate_certs.values())),
1019 constants.CV_ECLUSTERCLIENTCERT, None,
1020 "There are at least two master candidates configured to use the same"
1021 " certificate.")
1022
1023
1024 for node in nodes:
1025 if node.offline:
1026 continue
1027
1028 nresult = all_nvinfo[node.uuid]
1029 if nresult.fail_msg or not nresult.payload:
1030 continue
1031
1032 (errcode, msg) = nresult.payload.get(constants.NV_CLIENT_CERT, None)
1033
1034 self._ErrorIf(
1035 errcode is not None, constants.CV_ECLUSTERCLIENTCERT, None,
1036 "Client certificate of node '%s' failed validation: %s (code '%s')",
1037 node.uuid, msg, errcode)
1038
1039 if not errcode:
1040 digest = msg
1041 if node.master_candidate:
1042 if node.uuid in candidate_certs:
1043 self._ErrorIf(
1044 digest != candidate_certs[node.uuid],
1045 constants.CV_ECLUSTERCLIENTCERT, None,
1046 "Client certificate digest of master candidate '%s' does not"
1047 " match its entry in the cluster's map of master candidate"
1048 " certificates. Expected: %s Got: %s", node.uuid,
1049 digest, candidate_certs[node.uuid])
1050 else:
1051 self._ErrorIf(
1052 True, constants.CV_ECLUSTERCLIENTCERT, None,
1053 "The master candidate '%s' does not have an entry in the"
1054 " map of candidate certificates.", node.uuid)
1055 self._ErrorIf(
1056 digest in candidate_certs.values(),
1057 constants.CV_ECLUSTERCLIENTCERT, None,
1058 "Master candidate '%s' is using a certificate of another node.",
1059 node.uuid)
1060 else:
1061 self._ErrorIf(
1062 node.uuid in candidate_certs,
1063 constants.CV_ECLUSTERCLIENTCERT, None,
1064 "Node '%s' is not a master candidate, but still listed in the"
1065 " map of master candidate certificates.", node.uuid)
1066 self._ErrorIf(
1067 (node.uuid not in candidate_certs) and
1068 (digest in candidate_certs.values()),
1069 constants.CV_ECLUSTERCLIENTCERT, None,
1070 "Node '%s' is not a master candidate and is incorrectly using a"
1071 " certificate of another node which is master candidate.",
1072 node.uuid)
1073
1075 """Evaluates the verification results of the SSH setup and clutter test.
1076
1077 @param nodes: List of L{objects.Node} objects
1078 @param all_nvinfo: RPC results
1079
1080 """
1081 for node in nodes:
1082 if not node.offline:
1083 nresult = all_nvinfo[node.uuid]
1084 if nresult.fail_msg or not nresult.payload:
1085 self._ErrorIf(True, constants.CV_ENODESSH, node.name,
1086 "Could not verify the SSH setup of this node.")
1087 return
1088 for ssh_test in [constants.NV_SSH_SETUP, constants.NV_SSH_CLUTTER]:
1089 result = nresult.payload.get(ssh_test, None)
1090 error_msg = ""
1091 if isinstance(result, list):
1092 error_msg = " ".join(result)
1093 self._ErrorIf(result,
1094 constants.CV_ENODESSH, None, error_msg)
1095
1096 - def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo,
1097 (files_all, files_opt, files_mc, files_vm)):
1098 """Verifies file checksums collected from all nodes.
1099
1100 @param nodes: List of L{objects.Node} objects
1101 @param master_node_uuid: UUID of master node
1102 @param all_nvinfo: RPC results
1103
1104 """
1105
1106 files2nodefn = [
1107 (files_all, None),
1108 (files_mc, lambda node: (node.master_candidate or
1109 node.uuid == master_node_uuid)),
1110 (files_vm, lambda node: node.vm_capable),
1111 ]
1112
1113
1114 nodefiles = {}
1115 for (files, fn) in files2nodefn:
1116 if fn is None:
1117 filenodes = nodes
1118 else:
1119 filenodes = filter(fn, nodes)
1120 nodefiles.update((filename,
1121 frozenset(map(operator.attrgetter("uuid"), filenodes)))
1122 for filename in files)
1123
1124 assert set(nodefiles) == (files_all | files_mc | files_vm)
1125
1126 fileinfo = dict((filename, {}) for filename in nodefiles)
1127 ignore_nodes = set()
1128
1129 for node in nodes:
1130 if node.offline:
1131 ignore_nodes.add(node.uuid)
1132 continue
1133
1134 nresult = all_nvinfo[node.uuid]
1135
1136 if nresult.fail_msg or not nresult.payload:
1137 node_files = None
1138 else:
1139 fingerprints = nresult.payload.get(constants.NV_FILELIST, {})
1140 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
1141 for (key, value) in fingerprints.items())
1142 del fingerprints
1143
1144 test = not (node_files and isinstance(node_files, dict))
1145 self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name,
1146 "Node did not return file checksum data")
1147 if test:
1148 ignore_nodes.add(node.uuid)
1149 continue
1150
1151
1152 for (filename, checksum) in node_files.items():
1153 assert filename in nodefiles
1154 fileinfo[filename].setdefault(checksum, set()).add(node.uuid)
1155
1156 for (filename, checksums) in fileinfo.items():
1157 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1158
1159
1160 with_file = frozenset(node_uuid
1161 for node_uuids in fileinfo[filename].values()
1162 for node_uuid in node_uuids) - ignore_nodes
1163
1164 expected_nodes = nodefiles[filename] - ignore_nodes
1165
1166
1167 missing_file = expected_nodes - with_file
1168
1169 if filename in files_opt:
1170
1171 self._ErrorIf(missing_file and missing_file != expected_nodes,
1172 constants.CV_ECLUSTERFILECHECK, None,
1173 "File %s is optional, but it must exist on all or no"
1174 " nodes (not found on %s)",
1175 filename,
1176 utils.CommaJoin(
1177 utils.NiceSort(
1178 map(self.cfg.GetNodeName, missing_file))))
1179 else:
1180 self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None,
1181 "File %s is missing from node(s) %s", filename,
1182 utils.CommaJoin(
1183 utils.NiceSort(
1184 map(self.cfg.GetNodeName, missing_file))))
1185
1186
1187 unexpected = with_file - expected_nodes
1188 self._ErrorIf(unexpected,
1189 constants.CV_ECLUSTERFILECHECK, None,
1190 "File %s should not exist on node(s) %s",
1191 filename, utils.CommaJoin(
1192 utils.NiceSort(map(self.cfg.GetNodeName, unexpected))))
1193
1194
1195 test = len(checksums) > 1
1196 if test:
1197 variants = ["variant %s on %s" %
1198 (idx + 1,
1199 utils.CommaJoin(utils.NiceSort(
1200 map(self.cfg.GetNodeName, node_uuids))))
1201 for (idx, (checksum, node_uuids)) in
1202 enumerate(sorted(checksums.items()))]
1203 else:
1204 variants = []
1205
1206 self._ErrorIf(test, constants.CV_ECLUSTERFILECHECK, None,
1207 "File %s found with %s different checksums (%s)",
1208 filename, len(checksums), "; ".join(variants))
1209
1211 """Verify the drbd helper.
1212
1213 """
1214 if drbd_helper:
1215 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1216 test = (helper_result is None)
1217 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
1218 "no drbd usermode helper returned")
1219 if helper_result:
1220 status, payload = helper_result
1221 test = not status
1222 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
1223 "drbd usermode helper check unsuccessful: %s", payload)
1224 test = status and (payload != drbd_helper)
1225 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
1226 "wrong drbd usermode helper: %s", payload)
1227
1228 @staticmethod
1230 """Gives the DRBD information in a map for a node.
1231
1232 @type ninfo: L{objects.Node}
1233 @param ninfo: the node to check
1234 @param instanceinfo: the dict of instances
1235 @param disks_info: the dict of disks
1236 @param drbd_map: the DRBD map as returned by
1237 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1238 @type error_if: callable like L{_ErrorIf}
1239 @param error_if: The error reporting function
1240 @return: dict from minor number to (disk_uuid, instance_uuid, active)
1241
1242 """
1243 node_drbd = {}
1244 for minor, disk_uuid in drbd_map[ninfo.uuid].items():
1245 test = disk_uuid not in disks_info
1246 error_if(test, constants.CV_ECLUSTERCFG, None,
1247 "ghost disk '%s' in temporary DRBD map", disk_uuid)
1248
1249
1250
1251 if test:
1252 node_drbd[minor] = (disk_uuid, None, False)
1253 else:
1254 disk_active = False
1255 disk_instance = None
1256 for (inst_uuid, inst) in instanceinfo.items():
1257 if disk_uuid in inst.disks:
1258 disk_active = inst.disks_active
1259 disk_instance = inst_uuid
1260 break
1261 node_drbd[minor] = (disk_uuid, disk_instance, disk_active)
1262 return node_drbd
1263
1264 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, disks_info,
1265 drbd_helper, drbd_map):
1266 """Verifies and the node DRBD status.
1267
1268 @type ninfo: L{objects.Node}
1269 @param ninfo: the node to check
1270 @param nresult: the remote results for the node
1271 @param instanceinfo: the dict of instances
1272 @param disks_info: the dict of disks
1273 @param drbd_helper: the configured DRBD usermode helper
1274 @param drbd_map: the DRBD map as returned by
1275 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1276
1277 """
1278 self._VerifyNodeDrbdHelper(ninfo, nresult, drbd_helper)
1279
1280
1281 node_drbd = self._ComputeDrbdMinors(ninfo, instanceinfo, disks_info,
1282 drbd_map, self._ErrorIf)
1283
1284
1285 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1286 test = not isinstance(used_minors, (tuple, list))
1287 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
1288 "cannot parse drbd status file: %s", str(used_minors))
1289 if test:
1290
1291 return
1292
1293 for minor, (disk_uuid, inst_uuid, must_exist) in node_drbd.items():
1294 test = minor not in used_minors and must_exist
1295 if inst_uuid is not None:
1296 attached = "(attached in instance '%s')" % \
1297 self.cfg.GetInstanceName(inst_uuid)
1298 else:
1299 attached = "(detached)"
1300 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
1301 "drbd minor %d of disk %s %s is not active",
1302 minor, disk_uuid, attached)
1303 for minor in used_minors:
1304 test = minor not in node_drbd
1305 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
1306 "unallocated drbd minor %d is in use", minor)
1307
1309 """Builds the node OS structures.
1310
1311 @type ninfo: L{objects.Node}
1312 @param ninfo: the node to check
1313 @param nresult: the remote results for the node
1314 @param nimg: the node image object
1315
1316 """
1317 remote_os = nresult.get(constants.NV_OSLIST, None)
1318 test = (not isinstance(remote_os, list) or
1319 not compat.all(isinstance(v, list) and len(v) == 8
1320 for v in remote_os))
1321
1322 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
1323 "node hasn't returned valid OS data")
1324
1325 nimg.os_fail = test
1326
1327 if test:
1328 return
1329
1330 os_dict = {}
1331
1332 for (name, os_path, status, diagnose,
1333 variants, parameters, api_ver,
1334 trusted) in nresult[constants.NV_OSLIST]:
1335
1336 if name not in os_dict:
1337 os_dict[name] = []
1338
1339
1340
1341 parameters = [tuple(v) for v in parameters]
1342 os_dict[name].append((os_path, status, diagnose,
1343 set(variants), set(parameters), set(api_ver),
1344 trusted))
1345
1346 nimg.oslist = os_dict
1347
1349 """Verifies the node OS list.
1350
1351 @type ninfo: L{objects.Node}
1352 @param ninfo: the node to check
1353 @param nimg: the node image object
1354 @param base: the 'template' node we match against (e.g. from the master)
1355
1356 """
1357 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1358
1359 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1360 for os_name, os_data in nimg.oslist.items():
1361 assert os_data, "Empty OS status for OS %s?!" % os_name
1362 f_path, f_status, f_diag, f_var, f_param, f_api, f_trusted = os_data[0]
1363 self._ErrorIf(not f_status, constants.CV_ENODEOS, ninfo.name,
1364 "Invalid OS %s (located at %s): %s",
1365 os_name, f_path, f_diag)
1366 self._ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, ninfo.name,
1367 "OS '%s' has multiple entries"
1368 " (first one shadows the rest): %s",
1369 os_name, utils.CommaJoin([v[0] for v in os_data]))
1370
1371 test = os_name not in base.oslist
1372 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
1373 "Extra OS %s not present on reference node (%s)",
1374 os_name, self.cfg.GetNodeName(base.uuid))
1375 if test:
1376 continue
1377 assert base.oslist[os_name], "Base node has empty OS status?"
1378 _, b_status, _, b_var, b_param, b_api, b_trusted = base.oslist[os_name][0]
1379 if not b_status:
1380
1381 continue
1382 for kind, a, b in [("API version", f_api, b_api),
1383 ("variants list", f_var, b_var),
1384 ("parameters", beautify_params(f_param),
1385 beautify_params(b_param))]:
1386 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
1387 "OS %s for %s differs from reference node %s:"
1388 " [%s] vs. [%s]", kind, os_name,
1389 self.cfg.GetNodeName(base.uuid),
1390 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1391 for kind, a, b in [("trusted", f_trusted, b_trusted)]:
1392 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
1393 "OS %s for %s differs from reference node %s:"
1394 " %s vs. %s", kind, os_name,
1395 self.cfg.GetNodeName(base.uuid), a, b)
1396
1397
1398 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1399 self._ErrorIf(missing, constants.CV_ENODEOS, ninfo.name,
1400 "OSes present on reference node %s"
1401 " but missing on this node: %s",
1402 self.cfg.GetNodeName(base.uuid), utils.CommaJoin(missing))
1403
1405 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
1406
1407 @type ninfo: L{objects.Node}
1408 @param ninfo: the node to check
1409 @param nresult: the remote results for the node
1410 @type is_master: bool
1411 @param is_master: Whether node is the master node
1412
1413 """
1414 cluster = self.cfg.GetClusterInfo()
1415 if (is_master and
1416 (cluster.IsFileStorageEnabled() or
1417 cluster.IsSharedFileStorageEnabled())):
1418 try:
1419 fspaths = nresult[constants.NV_ACCEPTED_STORAGE_PATHS]
1420 except KeyError:
1421
1422 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
1423 "Node did not return forbidden file storage paths")
1424 else:
1425 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
1426 "Found forbidden file storage paths: %s",
1427 utils.CommaJoin(fspaths))
1428 else:
1429 self._ErrorIf(constants.NV_ACCEPTED_STORAGE_PATHS in nresult,
1430 constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
1431 "Node should not have returned forbidden file storage"
1432 " paths")
1433
1434 - def _VerifyStoragePaths(self, ninfo, nresult, file_disk_template,
1435 verify_key, error_key):
1436 """Verifies (file) storage paths.
1437
1438 @type ninfo: L{objects.Node}
1439 @param ninfo: the node to check
1440 @param nresult: the remote results for the node
1441 @type file_disk_template: string
1442 @param file_disk_template: file-based disk template, whose directory
1443 is supposed to be verified
1444 @type verify_key: string
1445 @param verify_key: key for the verification map of this file
1446 verification step
1447 @param error_key: error key to be added to the verification results
1448 in case something goes wrong in this verification step
1449
1450 """
1451 assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageTypes(
1452 constants.ST_FILE, constants.ST_SHARED_FILE, constants.ST_GLUSTER
1453 ))
1454
1455 cluster = self.cfg.GetClusterInfo()
1456 if cluster.IsDiskTemplateEnabled(file_disk_template):
1457 self._ErrorIf(
1458 verify_key in nresult,
1459 error_key, ninfo.name,
1460 "The configured %s storage path is unusable: %s" %
1461 (file_disk_template, nresult.get(verify_key)))
1462
1473
1484
1495
1497 """Verifies out of band functionality of a node.
1498
1499 @type ninfo: L{objects.Node}
1500 @param ninfo: the node to check
1501 @param nresult: the remote results for the node
1502
1503 """
1504
1505
1506 if ((ninfo.master_candidate or ninfo.master_capable) and
1507 constants.NV_OOB_PATHS in nresult):
1508 for path_result in nresult[constants.NV_OOB_PATHS]:
1509 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH,
1510 ninfo.name, path_result)
1511
1513 """Verifies and updates the node volume data.
1514
1515 This function will update a L{NodeImage}'s internal structures
1516 with data from the remote call.
1517
1518 @type ninfo: L{objects.Node}
1519 @param ninfo: the node to check
1520 @param nresult: the remote results for the node
1521 @param nimg: the node image object
1522 @param vg_name: the configured VG name
1523
1524 """
1525 nimg.lvm_fail = True
1526 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1527 if vg_name is None:
1528 pass
1529 elif isinstance(lvdata, basestring):
1530 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
1531 "LVM problem on node: %s", utils.SafeEncode(lvdata))
1532 elif not isinstance(lvdata, dict):
1533 self._ErrorIf(True, constants.CV_ENODELVM, ninfo.name,
1534 "rpc call to node failed (lvlist)")
1535 else:
1536 nimg.volumes = lvdata
1537 nimg.lvm_fail = False
1538
1540 """Verifies and updates the node instance list.
1541
1542 If the listing was successful, then updates this node's instance
1543 list. Otherwise, it marks the RPC call as failed for the instance
1544 list key.
1545
1546 @type ninfo: L{objects.Node}
1547 @param ninfo: the node to check
1548 @param nresult: the remote results for the node
1549 @param nimg: the node image object
1550
1551 """
1552 idata = nresult.get(constants.NV_INSTANCELIST, None)
1553 test = not isinstance(idata, list)
1554 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
1555 "rpc call to node failed (instancelist): %s",
1556 utils.SafeEncode(str(idata)))
1557 if test:
1558 nimg.hyp_fail = True
1559 else:
1560 nimg.instances = [uuid for (uuid, _) in
1561 self.cfg.GetMultiInstanceInfoByName(idata)]
1562
1564 """Verifies and computes a node information map
1565
1566 @type ninfo: L{objects.Node}
1567 @param ninfo: the node to check
1568 @param nresult: the remote results for the node
1569 @param nimg: the node image object
1570 @param vg_name: the configured VG name
1571
1572 """
1573
1574 hv_info = nresult.get(constants.NV_HVINFO, None)
1575 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1576 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
1577 "rpc call to node failed (hvinfo)")
1578 if not test:
1579 try:
1580 nimg.mfree = int(hv_info["memory_free"])
1581 except (ValueError, TypeError):
1582 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
1583 "node returned invalid nodeinfo, check hypervisor")
1584
1585
1586 if vg_name is not None:
1587 test = (constants.NV_VGLIST not in nresult or
1588 vg_name not in nresult[constants.NV_VGLIST])
1589 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
1590 "node didn't return data for the volume group '%s'"
1591 " - it is either missing or broken", vg_name)
1592 if not test:
1593 try:
1594 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1595 except (ValueError, TypeError):
1596 self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
1597 "node returned invalid LVM info, check LVM status")
1598
1600 """Gets per-disk status information for all instances.
1601
1602 @type node_uuids: list of strings
1603 @param node_uuids: Node UUIDs
1604 @type node_image: dict of (UUID, L{objects.Node})
1605 @param node_image: Node objects
1606 @type instanceinfo: dict of (UUID, L{objects.Instance})
1607 @param instanceinfo: Instance objects
1608 @rtype: {instance: {node: [(succes, payload)]}}
1609 @return: a dictionary of per-instance dictionaries with nodes as
1610 keys and disk information as values; the disk information is a
1611 list of tuples (success, payload)
1612
1613 """
1614 node_disks = {}
1615 node_disks_dev_inst_only = {}
1616 diskless_instances = set()
1617 nodisk_instances = set()
1618
1619 for nuuid in node_uuids:
1620 node_inst_uuids = list(itertools.chain(node_image[nuuid].pinst,
1621 node_image[nuuid].sinst))
1622 diskless_instances.update(uuid for uuid in node_inst_uuids
1623 if not instanceinfo[uuid].disks)
1624 disks = [(inst_uuid, disk)
1625 for inst_uuid in node_inst_uuids
1626 for disk in self.cfg.GetInstanceDisks(inst_uuid)]
1627
1628 if not disks:
1629 nodisk_instances.update(uuid for uuid in node_inst_uuids
1630 if instanceinfo[uuid].disks)
1631
1632 continue
1633
1634 node_disks[nuuid] = disks
1635
1636
1637 dev_inst_only = []
1638 for (inst_uuid, dev) in disks:
1639 (anno_disk,) = AnnotateDiskParams(instanceinfo[inst_uuid], [dev],
1640 self.cfg)
1641 dev_inst_only.append((anno_disk, instanceinfo[inst_uuid]))
1642
1643 node_disks_dev_inst_only[nuuid] = dev_inst_only
1644
1645 assert len(node_disks) == len(node_disks_dev_inst_only)
1646
1647
1648 result = self.rpc.call_blockdev_getmirrorstatus_multi(
1649 node_disks.keys(), node_disks_dev_inst_only)
1650
1651 assert len(result) == len(node_disks)
1652
1653 instdisk = {}
1654
1655 for (nuuid, nres) in result.items():
1656 node = self.cfg.GetNodeInfo(nuuid)
1657 disks = node_disks[node.uuid]
1658
1659 if nres.offline:
1660
1661 data = len(disks) * [(False, "node offline")]
1662 else:
1663 msg = nres.fail_msg
1664 self._ErrorIf(msg, constants.CV_ENODERPC, node.name,
1665 "while getting disk information: %s", msg)
1666 if msg:
1667
1668 data = len(disks) * [(False, msg)]
1669 else:
1670 data = []
1671 for idx, i in enumerate(nres.payload):
1672 if isinstance(i, (tuple, list)) and len(i) == 2:
1673 data.append(i)
1674 else:
1675 logging.warning("Invalid result from node %s, entry %d: %s",
1676 node.name, idx, i)
1677 data.append((False, "Invalid result from the remote node"))
1678
1679 for ((inst_uuid, _), status) in zip(disks, data):
1680 instdisk.setdefault(inst_uuid, {}).setdefault(node.uuid, []) \
1681 .append(status)
1682
1683
1684 for inst_uuid in diskless_instances:
1685 assert inst_uuid not in instdisk
1686 instdisk[inst_uuid] = {}
1687
1688 for inst_uuid in nodisk_instances:
1689 assert inst_uuid not in instdisk
1690 instdisk[inst_uuid] = {}
1691
1692 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
1693 len(nuuids) <= len(
1694 self.cfg.GetInstanceNodes(instanceinfo[inst].uuid)) and
1695 compat.all(isinstance(s, (tuple, list)) and
1696 len(s) == 2 for s in statuses)
1697 for inst, nuuids in instdisk.items()
1698 for nuuid, statuses in nuuids.items())
1699 if __debug__:
1700 instdisk_keys = set(instdisk)
1701 instanceinfo_keys = set(instanceinfo)
1702 assert instdisk_keys == instanceinfo_keys, \
1703 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
1704 (instdisk_keys, instanceinfo_keys))
1705
1706 return instdisk
1707
1708 @staticmethod
1710 """Create endless iterators for all potential SSH check hosts.
1711
1712 """
1713 nodes = [node for node in all_nodes
1714 if (node.group != group_uuid and
1715 not node.offline)]
1716 keyfunc = operator.attrgetter("group")
1717
1718 return map(itertools.cycle,
1719 [sorted(map(operator.attrgetter("name"), names))
1720 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
1721 keyfunc)])
1722
1723 @classmethod
1725 """Choose which nodes should talk to which other nodes.
1726
1727 We will make nodes contact all nodes in their group, and one node from
1728 every other group.
1729
1730 @rtype: tuple of (string, dict of strings to list of strings, string)
1731 @return: a tuple containing the list of all online nodes, a dictionary
1732 mapping node names to additional nodes of other node groups to which
1733 connectivity should be tested, and a list of all online master
1734 candidates
1735
1736 @warning: This algorithm has a known issue if one node group is much
1737 smaller than others (e.g. just one node). In such a case all other
1738 nodes will talk to the single node.
1739
1740 """
1741 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
1742 online_mcs = sorted(node.name for node in group_nodes
1743 if (node.master_candidate and not node.offline))
1744 sel = cls._SshNodeSelector(group_uuid, all_nodes)
1745
1746 return (online_nodes,
1747 dict((name, sorted([i.next() for i in sel]))
1748 for name in online_nodes),
1749 online_mcs)
1750
1752 """Prepare the input data for the SSH setup verification.
1753
1754 """
1755 all_nodes_info = self.cfg.GetAllNodesInfo()
1756 potential_master_candidates = self.cfg.GetPotentialMasterCandidates()
1757 node_status = [
1758 (uuid, node_info.name, node_info.master_candidate,
1759 node_info.name in potential_master_candidates, not node_info.offline)
1760 for (uuid, node_info) in all_nodes_info.items()]
1761 return node_status
1762
1764 """Build hooks env.
1765
1766 Cluster-Verify hooks just ran in the post phase and their failure makes
1767 the output be logged in the verify output and the verification to fail.
1768
1769 """
1770 env = {
1771 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
1772 }
1773
1774 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
1775 for node in self.my_node_info.values())
1776
1777 return env
1778
1780 """Build hooks nodes.
1781
1782 """
1783 return ([], list(self.my_node_info.keys()))
1784
1785 @staticmethod
1786 - def _VerifyOtherNotes(feedback_fn, i_non_redundant, i_non_a_balanced,
1787 i_offline, n_offline, n_drained):
1788 feedback_fn("* Other Notes")
1789 if i_non_redundant:
1790 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1791 % len(i_non_redundant))
1792
1793 if i_non_a_balanced:
1794 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1795 % len(i_non_a_balanced))
1796
1797 if i_offline:
1798 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
1799
1800 if n_offline:
1801 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1802
1803 if n_drained:
1804 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1805
1806 - def Exec(self, feedback_fn):
1807 """Verify integrity of the node group, performing various test on nodes.
1808
1809 """
1810
1811 feedback_fn("* Verifying group '%s'" % self.group_info.name)
1812
1813 if not self.my_node_uuids:
1814
1815 feedback_fn("* Empty node group, skipping verification")
1816 return True
1817
1818 self.bad = False
1819 verbose = self.op.verbose
1820 self._feedback_fn = feedback_fn
1821
1822 vg_name = self.cfg.GetVGName()
1823 drbd_helper = self.cfg.GetDRBDHelper()
1824 cluster = self.cfg.GetClusterInfo()
1825 hypervisors = cluster.enabled_hypervisors
1826 node_data_list = self.my_node_info.values()
1827
1828 i_non_redundant = []
1829 i_non_a_balanced = []
1830 i_offline = 0
1831 n_offline = 0
1832 n_drained = 0
1833 node_vol_should = {}
1834
1835
1836
1837
1838 filemap = ComputeAncillaryFiles(cluster, False)
1839
1840
1841 master_node_uuid = self.master_node = self.cfg.GetMasterNode()
1842 master_ip = self.cfg.GetMasterIP()
1843
1844 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_uuids))
1845
1846 user_scripts = []
1847 if self.cfg.GetUseExternalMipScript():
1848 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
1849
1850 node_verify_param = {
1851 constants.NV_FILELIST:
1852 map(vcluster.MakeVirtualPath,
1853 utils.UniqueSequence(filename
1854 for files in filemap
1855 for filename in files)),
1856 constants.NV_NODELIST:
1857 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
1858 self.all_node_info.values()),
1859 constants.NV_HYPERVISOR: hypervisors,
1860 constants.NV_HVPARAMS:
1861 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
1862 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
1863 for node in node_data_list
1864 if not node.offline],
1865 constants.NV_INSTANCELIST: hypervisors,
1866 constants.NV_VERSION: None,
1867 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1868 constants.NV_NODESETUP: None,
1869 constants.NV_TIME: None,
1870 constants.NV_MASTERIP: (self.cfg.GetMasterNodeName(), master_ip),
1871 constants.NV_OSLIST: None,
1872 constants.NV_NONVMNODES: self.cfg.GetNonVmCapableNodeNameList(),
1873 constants.NV_USERSCRIPTS: user_scripts,
1874 constants.NV_CLIENT_CERT: None,
1875 }
1876
1877 if self.cfg.GetClusterInfo().modify_ssh_setup:
1878 node_verify_param[constants.NV_SSH_SETUP] = self._PrepareSshSetupCheck()
1879 if self.op.verify_clutter:
1880 node_verify_param[constants.NV_SSH_CLUTTER] = True
1881
1882 if vg_name is not None:
1883 node_verify_param[constants.NV_VGLIST] = None
1884 node_verify_param[constants.NV_LVLIST] = vg_name
1885 node_verify_param[constants.NV_PVLIST] = [vg_name]
1886
1887 if cluster.IsDiskTemplateEnabled(constants.DT_DRBD8):
1888 if drbd_helper:
1889 node_verify_param[constants.NV_DRBDVERSION] = None
1890 node_verify_param[constants.NV_DRBDLIST] = None
1891 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
1892
1893 if cluster.IsFileStorageEnabled() or \
1894 cluster.IsSharedFileStorageEnabled():
1895
1896 node_verify_param[constants.NV_ACCEPTED_STORAGE_PATHS] = \
1897 self.cfg.GetMasterNodeName()
1898 if cluster.IsFileStorageEnabled():
1899 node_verify_param[constants.NV_FILE_STORAGE_PATH] = \
1900 cluster.file_storage_dir
1901 if cluster.IsSharedFileStorageEnabled():
1902 node_verify_param[constants.NV_SHARED_FILE_STORAGE_PATH] = \
1903 cluster.shared_file_storage_dir
1904
1905
1906
1907 bridges = set()
1908 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
1909 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
1910 bridges.add(default_nicpp[constants.NIC_LINK])
1911 for inst_uuid in self.my_inst_info.values():
1912 for nic in inst_uuid.nics:
1913 full_nic = cluster.SimpleFillNIC(nic.nicparams)
1914 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
1915 bridges.add(full_nic[constants.NIC_LINK])
1916
1917 if bridges:
1918 node_verify_param[constants.NV_BRIDGES] = list(bridges)
1919
1920
1921 node_image = dict((node.uuid, self.NodeImage(offline=node.offline,
1922 uuid=node.uuid,
1923 vm_capable=node.vm_capable))
1924 for node in node_data_list)
1925
1926
1927 oob_paths = []
1928 for node in self.all_node_info.values():
1929 path = SupportsOob(self.cfg, node)
1930 if path and path not in oob_paths:
1931 oob_paths.append(path)
1932
1933 if oob_paths:
1934 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
1935
1936 for inst_uuid in self.my_inst_uuids:
1937 instance = self.my_inst_info[inst_uuid]
1938 if instance.admin_state == constants.ADMINST_OFFLINE:
1939 i_offline += 1
1940
1941 inst_nodes = self.cfg.GetInstanceNodes(instance.uuid)
1942 for nuuid in inst_nodes:
1943 if nuuid not in node_image:
1944 gnode = self.NodeImage(uuid=nuuid)
1945 gnode.ghost = (nuuid not in self.all_node_info)
1946 node_image[nuuid] = gnode
1947
1948 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
1949
1950 pnode = instance.primary_node
1951 node_image[pnode].pinst.append(instance.uuid)
1952
1953 for snode in self.cfg.GetInstanceSecondaryNodes(instance.uuid):
1954 nimg = node_image[snode]
1955 nimg.sinst.append(instance.uuid)
1956 if pnode not in nimg.sbp:
1957 nimg.sbp[pnode] = []
1958 nimg.sbp[pnode].append(instance.uuid)
1959
1960 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg,
1961 self.my_node_info.keys())
1962
1963
1964 self._exclusive_storage = compat.any(es_flags.values())
1965 if self._exclusive_storage:
1966 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
1967
1968 node_group_uuids = dict(map(lambda n: (n.name, n.group),
1969 self.cfg.GetAllNodesInfo().values()))
1970 groups_config = self.cfg.GetAllNodeGroupsInfoDict()
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985 with self.cfg.GetConfigManager(shared=True, forcelock=True):
1986 feedback_fn("* Gathering information about nodes (%s nodes)" %
1987 len(self.my_node_uuids))
1988
1989 self.cfg.FlushConfig()
1990
1991
1992
1993
1994 nvinfo_starttime = time.time()
1995
1996
1997
1998
1999
2000 cluster_name = self.cfg.GetClusterName()
2001 hvparams = self.cfg.GetClusterInfo().hvparams
2002 all_nvinfo = self.rpc.call_node_verify(self.my_node_uuids,
2003 node_verify_param,
2004 cluster_name,
2005 hvparams,
2006 node_group_uuids,
2007 groups_config)
2008 nvinfo_endtime = time.time()
2009
2010 if self.extra_lv_nodes and vg_name is not None:
2011 feedback_fn("* Gathering information about extra nodes (%s nodes)" %
2012 len(self.extra_lv_nodes))
2013 extra_lv_nvinfo = \
2014 self.rpc.call_node_verify(self.extra_lv_nodes,
2015 {constants.NV_LVLIST: vg_name},
2016 self.cfg.GetClusterName(),
2017 self.cfg.GetClusterInfo().hvparams,
2018 node_group_uuids,
2019 groups_config)
2020 else:
2021 extra_lv_nvinfo = {}
2022
2023
2024
2025 absent_node_uuids = set(self.all_node_info).difference(self.my_node_info)
2026 if absent_node_uuids:
2027 vf_nvinfo = all_nvinfo.copy()
2028 vf_node_info = list(self.my_node_info.values())
2029 additional_node_uuids = []
2030 if master_node_uuid not in self.my_node_info:
2031 additional_node_uuids.append(master_node_uuid)
2032 vf_node_info.append(self.all_node_info[master_node_uuid])
2033
2034
2035 for node_uuid in absent_node_uuids:
2036 nodeinfo = self.all_node_info[node_uuid]
2037 if (nodeinfo.vm_capable and not nodeinfo.offline and
2038 node_uuid != master_node_uuid):
2039 additional_node_uuids.append(node_uuid)
2040 vf_node_info.append(self.all_node_info[node_uuid])
2041 break
2042 key = constants.NV_FILELIST
2043
2044 feedback_fn("* Gathering information about the master node")
2045 vf_nvinfo.update(self.rpc.call_node_verify(
2046 additional_node_uuids, {key: node_verify_param[key]},
2047 self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams,
2048 node_group_uuids,
2049 groups_config))
2050 else:
2051 vf_nvinfo = all_nvinfo
2052 vf_node_info = self.my_node_info.values()
2053
2054 all_drbd_map = self.cfg.ComputeDRBDMap()
2055
2056 feedback_fn("* Gathering disk information (%s nodes)" %
2057 len(self.my_node_uuids))
2058 instdisk = self._CollectDiskInfo(self.my_node_info.keys(), node_image,
2059 self.my_inst_info)
2060
2061 feedback_fn("* Verifying configuration file consistency")
2062
2063 self._VerifyClientCertificates(self.my_node_info.values(), all_nvinfo)
2064 if self.cfg.GetClusterInfo().modify_ssh_setup:
2065 self._VerifySshSetup(self.my_node_info.values(), all_nvinfo)
2066 self._VerifyFiles(vf_node_info, master_node_uuid, vf_nvinfo, filemap)
2067
2068 feedback_fn("* Verifying node status")
2069
2070 refos_img = None
2071
2072 for node_i in node_data_list:
2073 nimg = node_image[node_i.uuid]
2074
2075 if node_i.offline:
2076 if verbose:
2077 feedback_fn("* Skipping offline node %s" % (node_i.name,))
2078 n_offline += 1
2079 continue
2080
2081 if node_i.uuid == master_node_uuid:
2082 ntype = "master"
2083 elif node_i.master_candidate:
2084 ntype = "master candidate"
2085 elif node_i.drained:
2086 ntype = "drained"
2087 n_drained += 1
2088 else:
2089 ntype = "regular"
2090 if verbose:
2091 feedback_fn("* Verifying node %s (%s)" % (node_i.name, ntype))
2092
2093 msg = all_nvinfo[node_i.uuid].fail_msg
2094 self._ErrorIf(msg, constants.CV_ENODERPC, node_i.name,
2095 "while contacting node: %s", msg)
2096 if msg:
2097 nimg.rpc_fail = True
2098 continue
2099
2100 nresult = all_nvinfo[node_i.uuid].payload
2101
2102 nimg.call_ok = self._VerifyNode(node_i, nresult)
2103 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2104 self._VerifyNodeNetwork(node_i, nresult)
2105 self._VerifyNodeUserScripts(node_i, nresult)
2106 self._VerifyOob(node_i, nresult)
2107 self._VerifyAcceptedFileStoragePaths(node_i, nresult,
2108 node_i.uuid == master_node_uuid)
2109 self._VerifyFileStoragePaths(node_i, nresult)
2110 self._VerifySharedFileStoragePaths(node_i, nresult)
2111 self._VerifyGlusterStoragePaths(node_i, nresult)
2112
2113 if nimg.vm_capable:
2114 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
2115 if constants.DT_DRBD8 in cluster.enabled_disk_templates:
2116 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info,
2117 self.all_disks_info, drbd_helper, all_drbd_map)
2118
2119 if (constants.DT_PLAIN in cluster.enabled_disk_templates) or \
2120 (constants.DT_DRBD8 in cluster.enabled_disk_templates):
2121 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2122 self._UpdateNodeInstances(node_i, nresult, nimg)
2123 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2124 self._UpdateNodeOS(node_i, nresult, nimg)
2125
2126 if not nimg.os_fail:
2127 if refos_img is None:
2128 refos_img = nimg
2129 self._VerifyNodeOS(node_i, nimg, refos_img)
2130 self._VerifyNodeBridges(node_i, nresult, bridges)
2131
2132
2133
2134
2135 non_primary_inst_uuids = set(nimg.instances).difference(nimg.pinst)
2136
2137 for inst_uuid in non_primary_inst_uuids:
2138 test = inst_uuid in self.all_inst_info
2139 self._ErrorIf(test, constants.CV_EINSTANCEWRONGNODE,
2140 self.cfg.GetInstanceName(inst_uuid),
2141 "instance should not run on node %s", node_i.name)
2142 self._ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2143 "node is running unknown instance %s", inst_uuid)
2144
2145 self._VerifyGroupDRBDVersion(all_nvinfo)
2146 self._VerifyGroupLVM(node_image, vg_name)
2147
2148 for node_uuid, result in extra_lv_nvinfo.items():
2149 self._UpdateNodeVolumes(self.all_node_info[node_uuid], result.payload,
2150 node_image[node_uuid], vg_name)
2151
2152 feedback_fn("* Verifying instance status")
2153 for inst_uuid in self.my_inst_uuids:
2154 instance = self.my_inst_info[inst_uuid]
2155 if verbose:
2156 feedback_fn("* Verifying instance %s" % instance.name)
2157 self._VerifyInstance(instance, node_image, instdisk[inst_uuid])
2158
2159
2160
2161 inst_disks = self.cfg.GetInstanceDisks(instance.uuid)
2162 if not utils.AllDiskOfType(inst_disks, constants.DTS_MIRRORED):
2163 i_non_redundant.append(instance)
2164
2165 if not cluster.FillBE(instance)[constants.BE_AUTO_BALANCE]:
2166 i_non_a_balanced.append(instance)
2167
2168 feedback_fn("* Verifying orphan volumes")
2169 reserved = utils.FieldSet(*cluster.reserved_lvs)
2170
2171
2172
2173
2174 for instance in self.all_inst_info.values():
2175 for secondary in self.cfg.GetInstanceSecondaryNodes(instance.uuid):
2176 if (secondary in self.my_node_info
2177 and instance.uuid not in self.my_inst_info):
2178 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
2179 break
2180
2181 self._VerifyOrphanVolumes(vg_name, node_vol_should, node_image, reserved)
2182
2183 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2184 feedback_fn("* Verifying N+1 Memory redundancy")
2185 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2186
2187 self._VerifyOtherNotes(feedback_fn, i_non_redundant, i_non_a_balanced,
2188 i_offline, n_offline, n_drained)
2189
2190 return not self.bad
2191
2192 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2193 """Analyze the post-hooks' result
2194
2195 This method analyses the hook result, handles it, and sends some
2196 nicely-formatted feedback back to the user.
2197
2198 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2199 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2200 @param hooks_results: the results of the multi-node hooks rpc call
2201 @param feedback_fn: function used send feedback back to the caller
2202 @param lu_result: previous Exec result
2203 @return: the new Exec result, based on the previous result
2204 and hook results
2205
2206 """
2207
2208
2209 if not self.my_node_uuids:
2210
2211 pass
2212 elif phase == constants.HOOKS_PHASE_POST:
2213
2214 feedback_fn("* Hooks Results")
2215 assert hooks_results, "invalid result from hooks"
2216
2217 for node_name in hooks_results:
2218 res = hooks_results[node_name]
2219 msg = res.fail_msg
2220 test = msg and not res.offline
2221 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
2222 "Communication failure in hooks execution: %s", msg)
2223 if test:
2224 lu_result = False
2225 continue
2226 if res.offline:
2227
2228 continue
2229 for script, hkr, output in res.payload:
2230 test = hkr == constants.HKR_FAIL
2231 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
2232 "Script %s failed, output:", script)
2233 if test:
2234 output = self._HOOKS_INDENT_RE.sub(" ", output)
2235 feedback_fn("%s" % output)
2236 lu_result = False
2237
2238 return lu_result
2239