1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Module implementing the master-side code."""
23
24
25
26
27
28
29
30
31 import os
32 import os.path
33 import time
34 import re
35 import platform
36 import logging
37 import copy
38 import OpenSSL
39 import socket
40 import tempfile
41 import shutil
42 import itertools
43 import operator
44
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
61
62 import ganeti.masterd.instance
66 """Tells if node supports OOB.
67
68 @type cfg: L{config.ConfigWriter}
69 @param cfg: The cluster configuration
70 @type node: L{objects.Node}
71 @param node: The node
72 @return: The OOB script if supported or an empty string otherwise
73
74 """
75 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
76
80 """Logical Unit base class.
81
82 Subclasses must follow these rules:
83 - implement ExpandNames
84 - implement CheckPrereq (except when tasklets are used)
85 - implement Exec (except when tasklets are used)
86 - implement BuildHooksEnv
87 - redefine HPATH and HTYPE
88 - optionally redefine their run requirements:
89 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
90
91 Note that all commands require root permissions.
92
93 @ivar dry_run_result: the value (if any) that will be returned to the caller
94 in dry-run mode (signalled by opcode dry_run parameter)
95
96 """
97 HPATH = None
98 HTYPE = None
99 REQ_BGL = True
100
101 - def __init__(self, processor, op, context, rpc):
102 """Constructor for LogicalUnit.
103
104 This needs to be overridden in derived classes in order to check op
105 validity.
106
107 """
108 self.proc = processor
109 self.op = op
110 self.cfg = context.cfg
111 self.context = context
112 self.rpc = rpc
113
114 self.needed_locks = None
115 self.acquired_locks = {}
116 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
117 self.add_locks = {}
118 self.remove_locks = {}
119
120 self.recalculate_locks = {}
121 self.__ssh = None
122
123 self.Log = processor.Log
124 self.LogWarning = processor.LogWarning
125 self.LogInfo = processor.LogInfo
126 self.LogStep = processor.LogStep
127
128 self.dry_run_result = None
129
130 if (not hasattr(self.op, "debug_level") or
131 not isinstance(self.op.debug_level, int)):
132 self.op.debug_level = 0
133
134
135 self.tasklets = None
136
137
138 self.op.Validate(True)
139
140 self.CheckArguments()
141
143 """Returns the SshRunner object
144
145 """
146 if not self.__ssh:
147 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
148 return self.__ssh
149
150 ssh = property(fget=__GetSSH)
151
153 """Check syntactic validity for the opcode arguments.
154
155 This method is for doing a simple syntactic check and ensure
156 validity of opcode parameters, without any cluster-related
157 checks. While the same can be accomplished in ExpandNames and/or
158 CheckPrereq, doing these separate is better because:
159
160 - ExpandNames is left as as purely a lock-related function
161 - CheckPrereq is run after we have acquired locks (and possible
162 waited for them)
163
164 The function is allowed to change the self.op attribute so that
165 later methods can no longer worry about missing parameters.
166
167 """
168 pass
169
171 """Expand names for this LU.
172
173 This method is called before starting to execute the opcode, and it should
174 update all the parameters of the opcode to their canonical form (e.g. a
175 short node name must be fully expanded after this method has successfully
176 completed). This way locking, hooks, logging, etc. can work correctly.
177
178 LUs which implement this method must also populate the self.needed_locks
179 member, as a dict with lock levels as keys, and a list of needed lock names
180 as values. Rules:
181
182 - use an empty dict if you don't need any lock
183 - if you don't need any lock at a particular level omit that level
184 - don't put anything for the BGL level
185 - if you want all locks at a level use locking.ALL_SET as a value
186
187 If you need to share locks (rather than acquire them exclusively) at one
188 level you can modify self.share_locks, setting a true value (usually 1) for
189 that level. By default locks are not shared.
190
191 This function can also define a list of tasklets, which then will be
192 executed in order instead of the usual LU-level CheckPrereq and Exec
193 functions, if those are not defined by the LU.
194
195 Examples::
196
197 # Acquire all nodes and one instance
198 self.needed_locks = {
199 locking.LEVEL_NODE: locking.ALL_SET,
200 locking.LEVEL_INSTANCE: ['instance1.example.com'],
201 }
202 # Acquire just two nodes
203 self.needed_locks = {
204 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
205 }
206 # Acquire no locks
207 self.needed_locks = {} # No, you can't leave it to the default value None
208
209 """
210
211
212
213 if self.REQ_BGL:
214 self.needed_locks = {}
215 else:
216 raise NotImplementedError
217
219 """Declare LU locking needs for a level
220
221 While most LUs can just declare their locking needs at ExpandNames time,
222 sometimes there's the need to calculate some locks after having acquired
223 the ones before. This function is called just before acquiring locks at a
224 particular level, but after acquiring the ones at lower levels, and permits
225 such calculations. It can be used to modify self.needed_locks, and by
226 default it does nothing.
227
228 This function is only called if you have something already set in
229 self.needed_locks for the level.
230
231 @param level: Locking level which is going to be locked
232 @type level: member of ganeti.locking.LEVELS
233
234 """
235
237 """Check prerequisites for this LU.
238
239 This method should check that the prerequisites for the execution
240 of this LU are fulfilled. It can do internode communication, but
241 it should be idempotent - no cluster or system changes are
242 allowed.
243
244 The method should raise errors.OpPrereqError in case something is
245 not fulfilled. Its return value is ignored.
246
247 This method should also update all the parameters of the opcode to
248 their canonical form if it hasn't been done by ExpandNames before.
249
250 """
251 if self.tasklets is not None:
252 for (idx, tl) in enumerate(self.tasklets):
253 logging.debug("Checking prerequisites for tasklet %s/%s",
254 idx + 1, len(self.tasklets))
255 tl.CheckPrereq()
256 else:
257 pass
258
259 - def Exec(self, feedback_fn):
260 """Execute the LU.
261
262 This method should implement the actual work. It should raise
263 errors.OpExecError for failures that are somewhat dealt with in
264 code, or expected.
265
266 """
267 if self.tasklets is not None:
268 for (idx, tl) in enumerate(self.tasklets):
269 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
270 tl.Exec(feedback_fn)
271 else:
272 raise NotImplementedError
273
275 """Build hooks environment for this LU.
276
277 This method should return a three-node tuple consisting of: a dict
278 containing the environment that will be used for running the
279 specific hook for this LU, a list of node names on which the hook
280 should run before the execution, and a list of node names on which
281 the hook should run after the execution.
282
283 The keys of the dict must not have 'GANETI_' prefixed as this will
284 be handled in the hooks runner. Also note additional keys will be
285 added by the hooks runner. If the LU doesn't define any
286 environment, an empty dict (and not None) should be returned.
287
288 No nodes should be returned as an empty list (and not None).
289
290 Note that if the HPATH for a LU class is None, this function will
291 not be called.
292
293 """
294 raise NotImplementedError
295
296 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
297 """Notify the LU about the results of its hooks.
298
299 This method is called every time a hooks phase is executed, and notifies
300 the Logical Unit about the hooks' result. The LU can then use it to alter
301 its result based on the hooks. By default the method does nothing and the
302 previous result is passed back unchanged but any LU can define it if it
303 wants to use the local cluster hook-scripts somehow.
304
305 @param phase: one of L{constants.HOOKS_PHASE_POST} or
306 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
307 @param hook_results: the results of the multi-node hooks rpc call
308 @param feedback_fn: function used send feedback back to the caller
309 @param lu_result: the previous Exec result this LU had, or None
310 in the PRE phase
311 @return: the new Exec result, based on the previous result
312 and hook results
313
314 """
315
316
317
318 return lu_result
319
321 """Helper function to expand and lock an instance.
322
323 Many LUs that work on an instance take its name in self.op.instance_name
324 and need to expand it and then declare the expanded name for locking. This
325 function does it, and then updates self.op.instance_name to the expanded
326 name. It also initializes needed_locks as a dict, if this hasn't been done
327 before.
328
329 """
330 if self.needed_locks is None:
331 self.needed_locks = {}
332 else:
333 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
334 "_ExpandAndLockInstance called with instance-level locks set"
335 self.op.instance_name = _ExpandInstanceName(self.cfg,
336 self.op.instance_name)
337 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
338
340 """Helper function to declare instances' nodes for locking.
341
342 This function should be called after locking one or more instances to lock
343 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
344 with all primary or secondary nodes for instances already locked and
345 present in self.needed_locks[locking.LEVEL_INSTANCE].
346
347 It should be called from DeclareLocks, and for safety only works if
348 self.recalculate_locks[locking.LEVEL_NODE] is set.
349
350 In the future it may grow parameters to just lock some instance's nodes, or
351 to just lock primaries or secondary nodes, if needed.
352
353 If should be called in DeclareLocks in a way similar to::
354
355 if level == locking.LEVEL_NODE:
356 self._LockInstancesNodes()
357
358 @type primary_only: boolean
359 @param primary_only: only lock primary nodes of locked instances
360
361 """
362 assert locking.LEVEL_NODE in self.recalculate_locks, \
363 "_LockInstancesNodes helper function called with no nodes to recalculate"
364
365
366
367
368
369
370 wanted_nodes = []
371 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
372 instance = self.context.cfg.GetInstanceInfo(instance_name)
373 wanted_nodes.append(instance.primary_node)
374 if not primary_only:
375 wanted_nodes.extend(instance.secondary_nodes)
376
377 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
378 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
379 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
380 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
381
382 del self.recalculate_locks[locking.LEVEL_NODE]
383
386 """Simple LU which runs no hooks.
387
388 This LU is intended as a parent for other LogicalUnits which will
389 run no hooks, in order to reduce duplicate code.
390
391 """
392 HPATH = None
393 HTYPE = None
394
396 """Empty BuildHooksEnv for NoHooksLu.
397
398 This just raises an error.
399
400 """
401 assert False, "BuildHooksEnv called for NoHooksLUs"
402
405 """Tasklet base class.
406
407 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
408 they can mix legacy code with tasklets. Locking needs to be done in the LU,
409 tasklets know nothing about locks.
410
411 Subclasses must follow these rules:
412 - Implement CheckPrereq
413 - Implement Exec
414
415 """
417 self.lu = lu
418
419
420 self.cfg = lu.cfg
421 self.rpc = lu.rpc
422
424 """Check prerequisites for this tasklets.
425
426 This method should check whether the prerequisites for the execution of
427 this tasklet are fulfilled. It can do internode communication, but it
428 should be idempotent - no cluster or system changes are allowed.
429
430 The method should raise errors.OpPrereqError in case something is not
431 fulfilled. Its return value is ignored.
432
433 This method should also update all parameters to their canonical form if it
434 hasn't been done before.
435
436 """
437 pass
438
439 - def Exec(self, feedback_fn):
440 """Execute the tasklet.
441
442 This method should implement the actual work. It should raise
443 errors.OpExecError for failures that are somewhat dealt with in code, or
444 expected.
445
446 """
447 raise NotImplementedError
448
451 """Base for query utility classes.
452
453 """
454
455 FIELDS = None
456
457 - def __init__(self, names, fields, use_locking):
458 """Initializes this class.
459
460 """
461 self.names = names
462 self.use_locking = use_locking
463
464 self.query = query.Query(self.FIELDS, fields)
465 self.requested_data = self.query.RequestedData()
466
467 self.do_locking = None
468 self.wanted = None
469
470 - def _GetNames(self, lu, all_names, lock_level):
471 """Helper function to determine names asked for in the query.
472
473 """
474 if self.do_locking:
475 names = lu.acquired_locks[lock_level]
476 else:
477 names = all_names
478
479 if self.wanted == locking.ALL_SET:
480 assert not self.names
481
482 return utils.NiceSort(names)
483
484
485 assert self.names
486 assert not self.do_locking or lu.acquired_locks[lock_level]
487
488 missing = set(self.wanted).difference(names)
489 if missing:
490 raise errors.OpExecError("Some items were removed before retrieving"
491 " their data: %s" % missing)
492
493
494 return self.wanted
495
496 @classmethod
498 """Returns list of available fields.
499
500 @return: List of L{objects.QueryFieldDefinition}
501
502 """
503 return query.QueryFields(cls.FIELDS, fields)
504
506 """Expand names for this query.
507
508 See L{LogicalUnit.ExpandNames}.
509
510 """
511 raise NotImplementedError()
512
514 """Declare locks for this query.
515
516 See L{LogicalUnit.DeclareLocks}.
517
518 """
519 raise NotImplementedError()
520
522 """Collects all data for this query.
523
524 @return: Query data object
525
526 """
527 raise NotImplementedError()
528
534
540
543 """Returns list of checked and expanded node names.
544
545 @type lu: L{LogicalUnit}
546 @param lu: the logical unit on whose behalf we execute
547 @type nodes: list
548 @param nodes: list of node names or None for all nodes
549 @rtype: list
550 @return: the list of nodes, sorted
551 @raise errors.ProgrammerError: if the nodes parameter is wrong type
552
553 """
554 if nodes:
555 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
556
557 return utils.NiceSort(lu.cfg.GetNodeList())
558
561 """Returns list of checked and expanded instance names.
562
563 @type lu: L{LogicalUnit}
564 @param lu: the logical unit on whose behalf we execute
565 @type instances: list
566 @param instances: list of instance names or None for all instances
567 @rtype: list
568 @return: the list of instances, sorted
569 @raise errors.OpPrereqError: if the instances parameter is wrong type
570 @raise errors.OpPrereqError: if any of the passed instances is not found
571
572 """
573 if instances:
574 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
575 else:
576 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
577 return wanted
578
579
580 -def _GetUpdatedParams(old_params, update_dict,
581 use_default=True, use_none=False):
582 """Return the new version of a parameter dictionary.
583
584 @type old_params: dict
585 @param old_params: old parameters
586 @type update_dict: dict
587 @param update_dict: dict containing new parameter values, or
588 constants.VALUE_DEFAULT to reset the parameter to its default
589 value
590 @param use_default: boolean
591 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
592 values as 'to be deleted' values
593 @param use_none: boolean
594 @type use_none: whether to recognise C{None} values as 'to be
595 deleted' values
596 @rtype: dict
597 @return: the new parameter dictionary
598
599 """
600 params_copy = copy.deepcopy(old_params)
601 for key, val in update_dict.iteritems():
602 if ((use_default and val == constants.VALUE_DEFAULT) or
603 (use_none and val is None)):
604 try:
605 del params_copy[key]
606 except KeyError:
607 pass
608 else:
609 params_copy[key] = val
610 return params_copy
611
614 """Checks whether all selected fields are valid.
615
616 @type static: L{utils.FieldSet}
617 @param static: static fields set
618 @type dynamic: L{utils.FieldSet}
619 @param dynamic: dynamic fields set
620
621 """
622 f = utils.FieldSet()
623 f.Extend(static)
624 f.Extend(dynamic)
625
626 delta = f.NonMatching(selected)
627 if delta:
628 raise errors.OpPrereqError("Unknown output fields selected: %s"
629 % ",".join(delta), errors.ECODE_INVAL)
630
633 """Validates that given hypervisor params are not global ones.
634
635 This will ensure that instances don't get customised versions of
636 global params.
637
638 """
639 used_globals = constants.HVC_GLOBALS.intersection(params)
640 if used_globals:
641 msg = ("The following hypervisor parameters are global and cannot"
642 " be customized at instance level, please modify them at"
643 " cluster level: %s" % utils.CommaJoin(used_globals))
644 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
645
648 """Ensure that a given node is online.
649
650 @param lu: the LU on behalf of which we make the check
651 @param node: the node to check
652 @param msg: if passed, should be a message to replace the default one
653 @raise errors.OpPrereqError: if the node is offline
654
655 """
656 if msg is None:
657 msg = "Can't use offline node"
658 if lu.cfg.GetNodeInfo(node).offline:
659 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
660
663 """Ensure that a given node is not drained.
664
665 @param lu: the LU on behalf of which we make the check
666 @param node: the node to check
667 @raise errors.OpPrereqError: if the node is drained
668
669 """
670 if lu.cfg.GetNodeInfo(node).drained:
671 raise errors.OpPrereqError("Can't use drained node %s" % node,
672 errors.ECODE_STATE)
673
676 """Ensure that a given node is vm capable.
677
678 @param lu: the LU on behalf of which we make the check
679 @param node: the node to check
680 @raise errors.OpPrereqError: if the node is not vm capable
681
682 """
683 if not lu.cfg.GetNodeInfo(node).vm_capable:
684 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
685 errors.ECODE_STATE)
686
689 """Ensure that a node supports a given OS.
690
691 @param lu: the LU on behalf of which we make the check
692 @param node: the node to check
693 @param os_name: the OS to query about
694 @param force_variant: whether to ignore variant errors
695 @raise errors.OpPrereqError: if the node is not supporting the OS
696
697 """
698 result = lu.rpc.call_os_get(node, os_name)
699 result.Raise("OS '%s' not in supported OS list for node %s" %
700 (os_name, node),
701 prereq=True, ecode=errors.ECODE_INVAL)
702 if not force_variant:
703 _CheckOSVariant(result.payload, os_name)
704
707 """Ensure that a node has the given secondary ip.
708
709 @type lu: L{LogicalUnit}
710 @param lu: the LU on behalf of which we make the check
711 @type node: string
712 @param node: the node to check
713 @type secondary_ip: string
714 @param secondary_ip: the ip to check
715 @type prereq: boolean
716 @param prereq: whether to throw a prerequisite or an execute error
717 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
718 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
719
720 """
721 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
722 result.Raise("Failure checking secondary ip on node %s" % node,
723 prereq=prereq, ecode=errors.ECODE_ENVIRON)
724 if not result.payload:
725 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
726 " please fix and re-run this command" % secondary_ip)
727 if prereq:
728 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
729 else:
730 raise errors.OpExecError(msg)
731
734 """Reads the cluster domain secret.
735
736 """
737 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
738 strict=True)
739
755
758 """Expand an item name.
759
760 @param fn: the function to use for expansion
761 @param name: requested item name
762 @param kind: text description ('Node' or 'Instance')
763 @return: the resolved (full) name
764 @raise errors.OpPrereqError: if the item is not found
765
766 """
767 full_name = fn(name)
768 if full_name is None:
769 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
770 errors.ECODE_NOENT)
771 return full_name
772
777
782
783
784 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
785 memory, vcpus, nics, disk_template, disks,
786 bep, hvp, hypervisor_name):
787 """Builds instance related env variables for hooks
788
789 This builds the hook environment from individual variables.
790
791 @type name: string
792 @param name: the name of the instance
793 @type primary_node: string
794 @param primary_node: the name of the instance's primary node
795 @type secondary_nodes: list
796 @param secondary_nodes: list of secondary nodes as strings
797 @type os_type: string
798 @param os_type: the name of the instance's OS
799 @type status: boolean
800 @param status: the should_run status of the instance
801 @type memory: string
802 @param memory: the memory size of the instance
803 @type vcpus: string
804 @param vcpus: the count of VCPUs the instance has
805 @type nics: list
806 @param nics: list of tuples (ip, mac, mode, link) representing
807 the NICs the instance has
808 @type disk_template: string
809 @param disk_template: the disk template of the instance
810 @type disks: list
811 @param disks: the list of (size, mode) pairs
812 @type bep: dict
813 @param bep: the backend parameters for the instance
814 @type hvp: dict
815 @param hvp: the hypervisor parameters for the instance
816 @type hypervisor_name: string
817 @param hypervisor_name: the hypervisor for the instance
818 @rtype: dict
819 @return: the hook environment for this instance
820
821 """
822 if status:
823 str_status = "up"
824 else:
825 str_status = "down"
826 env = {
827 "OP_TARGET": name,
828 "INSTANCE_NAME": name,
829 "INSTANCE_PRIMARY": primary_node,
830 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
831 "INSTANCE_OS_TYPE": os_type,
832 "INSTANCE_STATUS": str_status,
833 "INSTANCE_MEMORY": memory,
834 "INSTANCE_VCPUS": vcpus,
835 "INSTANCE_DISK_TEMPLATE": disk_template,
836 "INSTANCE_HYPERVISOR": hypervisor_name,
837 }
838
839 if nics:
840 nic_count = len(nics)
841 for idx, (ip, mac, mode, link) in enumerate(nics):
842 if ip is None:
843 ip = ""
844 env["INSTANCE_NIC%d_IP" % idx] = ip
845 env["INSTANCE_NIC%d_MAC" % idx] = mac
846 env["INSTANCE_NIC%d_MODE" % idx] = mode
847 env["INSTANCE_NIC%d_LINK" % idx] = link
848 if mode == constants.NIC_MODE_BRIDGED:
849 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
850 else:
851 nic_count = 0
852
853 env["INSTANCE_NIC_COUNT"] = nic_count
854
855 if disks:
856 disk_count = len(disks)
857 for idx, (size, mode) in enumerate(disks):
858 env["INSTANCE_DISK%d_SIZE" % idx] = size
859 env["INSTANCE_DISK%d_MODE" % idx] = mode
860 else:
861 disk_count = 0
862
863 env["INSTANCE_DISK_COUNT"] = disk_count
864
865 for source, kind in [(bep, "BE"), (hvp, "HV")]:
866 for key, value in source.items():
867 env["INSTANCE_%s_%s" % (kind, key)] = value
868
869 return env
870
873 """Build a list of nic information tuples.
874
875 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
876 value in LUInstanceQueryData.
877
878 @type lu: L{LogicalUnit}
879 @param lu: the logical unit on whose behalf we execute
880 @type nics: list of L{objects.NIC}
881 @param nics: list of nics to convert to hooks tuples
882
883 """
884 hooks_nics = []
885 cluster = lu.cfg.GetClusterInfo()
886 for nic in nics:
887 ip = nic.ip
888 mac = nic.mac
889 filled_params = cluster.SimpleFillNIC(nic.nicparams)
890 mode = filled_params[constants.NIC_MODE]
891 link = filled_params[constants.NIC_LINK]
892 hooks_nics.append((ip, mac, mode, link))
893 return hooks_nics
894
897 """Builds instance related env variables for hooks from an object.
898
899 @type lu: L{LogicalUnit}
900 @param lu: the logical unit on whose behalf we execute
901 @type instance: L{objects.Instance}
902 @param instance: the instance for which we should build the
903 environment
904 @type override: dict
905 @param override: dictionary with key/values that will override
906 our values
907 @rtype: dict
908 @return: the hook environment dictionary
909
910 """
911 cluster = lu.cfg.GetClusterInfo()
912 bep = cluster.FillBE(instance)
913 hvp = cluster.FillHV(instance)
914 args = {
915 'name': instance.name,
916 'primary_node': instance.primary_node,
917 'secondary_nodes': instance.secondary_nodes,
918 'os_type': instance.os,
919 'status': instance.admin_up,
920 'memory': bep[constants.BE_MEMORY],
921 'vcpus': bep[constants.BE_VCPUS],
922 'nics': _NICListToTuple(lu, instance.nics),
923 'disk_template': instance.disk_template,
924 'disks': [(disk.size, disk.mode) for disk in instance.disks],
925 'bep': bep,
926 'hvp': hvp,
927 'hypervisor_name': instance.hypervisor,
928 }
929 if override:
930 args.update(override)
931 return _BuildInstanceHookEnv(**args)
932
935 """Adjust the candidate pool after node operations.
936
937 """
938 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
939 if mod_list:
940 lu.LogInfo("Promoted nodes to master candidate role: %s",
941 utils.CommaJoin(node.name for node in mod_list))
942 for name in mod_list:
943 lu.context.ReaddNode(name)
944 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
945 if mc_now > mc_max:
946 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
947 (mc_now, mc_max))
948
959
973
982
985 """Check whether an OS name conforms to the os variants specification.
986
987 @type os_obj: L{objects.OS}
988 @param os_obj: OS object to check
989 @type name: string
990 @param name: OS name passed by the user, to check for validity
991
992 """
993 if not os_obj.supported_variants:
994 return
995 variant = objects.OS.GetVariant(name)
996 if not variant:
997 raise errors.OpPrereqError("OS name must include a variant",
998 errors.ECODE_INVAL)
999
1000 if variant not in os_obj.supported_variants:
1001 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1002
1006
1009 """Returns a list of all primary and secondary instances on a node.
1010
1011 """
1012
1013 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1014
1017 """Returns primary instances on a node.
1018
1019 """
1020 return _GetNodeInstancesInner(cfg,
1021 lambda inst: node_name == inst.primary_node)
1022
1030
1033 """Returns the arguments for a storage type.
1034
1035 """
1036
1037 if storage_type == constants.ST_FILE:
1038
1039 return [[cfg.GetFileStorageDir()]]
1040
1041 return []
1042
1045 faulty = []
1046
1047 for dev in instance.disks:
1048 cfg.SetDiskID(dev, node_name)
1049
1050 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1051 result.Raise("Failed to get disk status from node %s" % node_name,
1052 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1053
1054 for idx, bdev_status in enumerate(result.payload):
1055 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1056 faulty.append(idx)
1057
1058 return faulty
1059
1062 """Check the sanity of iallocator and node arguments and use the
1063 cluster-wide iallocator if appropriate.
1064
1065 Check that at most one of (iallocator, node) is specified. If none is
1066 specified, then the LU's opcode's iallocator slot is filled with the
1067 cluster-wide default iallocator.
1068
1069 @type iallocator_slot: string
1070 @param iallocator_slot: the name of the opcode iallocator slot
1071 @type node_slot: string
1072 @param node_slot: the name of the opcode target node slot
1073
1074 """
1075 node = getattr(lu.op, node_slot, None)
1076 iallocator = getattr(lu.op, iallocator_slot, None)
1077
1078 if node is not None and iallocator is not None:
1079 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1080 errors.ECODE_INVAL)
1081 elif node is None and iallocator is None:
1082 default_iallocator = lu.cfg.GetDefaultIAllocator()
1083 if default_iallocator:
1084 setattr(lu.op, iallocator_slot, default_iallocator)
1085 else:
1086 raise errors.OpPrereqError("No iallocator or node given and no"
1087 " cluster-wide default iallocator found."
1088 " Please specify either an iallocator or a"
1089 " node, or set a cluster-wide default"
1090 " iallocator.")
1091
1092
1093 -class LUClusterPostInit(LogicalUnit):
1094 """Logical unit for running hooks after cluster initialization.
1095
1096 """
1097 HPATH = "cluster-init"
1098 HTYPE = constants.HTYPE_CLUSTER
1099
1100 - def BuildHooksEnv(self):
1101 """Build hooks env.
1102
1103 """
1104 env = {"OP_TARGET": self.cfg.GetClusterName()}
1105 mn = self.cfg.GetMasterNode()
1106 return env, [], [mn]
1107
1108 - def Exec(self, feedback_fn):
1109 """Nothing to do.
1110
1111 """
1112 return True
1113
1116 """Logical unit for destroying the cluster.
1117
1118 """
1119 HPATH = "cluster-destroy"
1120 HTYPE = constants.HTYPE_CLUSTER
1121
1123 """Build hooks env.
1124
1125 """
1126 env = {"OP_TARGET": self.cfg.GetClusterName()}
1127 return env, [], []
1128
1130 """Check prerequisites.
1131
1132 This checks whether the cluster is empty.
1133
1134 Any errors are signaled by raising errors.OpPrereqError.
1135
1136 """
1137 master = self.cfg.GetMasterNode()
1138
1139 nodelist = self.cfg.GetNodeList()
1140 if len(nodelist) != 1 or nodelist[0] != master:
1141 raise errors.OpPrereqError("There are still %d node(s) in"
1142 " this cluster." % (len(nodelist) - 1),
1143 errors.ECODE_INVAL)
1144 instancelist = self.cfg.GetInstanceList()
1145 if instancelist:
1146 raise errors.OpPrereqError("There are still %d instance(s) in"
1147 " this cluster." % len(instancelist),
1148 errors.ECODE_INVAL)
1149
1150 - def Exec(self, feedback_fn):
1168
1171 """Verifies a certificate for LUClusterVerify.
1172
1173 @type filename: string
1174 @param filename: Path to PEM file
1175
1176 """
1177 try:
1178 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1179 utils.ReadFile(filename))
1180 except Exception, err:
1181 return (LUClusterVerify.ETYPE_ERROR,
1182 "Failed to load X509 certificate %s: %s" % (filename, err))
1183
1184 (errcode, msg) = \
1185 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1186 constants.SSL_CERT_EXPIRATION_ERROR)
1187
1188 if msg:
1189 fnamemsg = "While verifying %s: %s" % (filename, msg)
1190 else:
1191 fnamemsg = None
1192
1193 if errcode is None:
1194 return (None, fnamemsg)
1195 elif errcode == utils.CERT_WARNING:
1196 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1197 elif errcode == utils.CERT_ERROR:
1198 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1199
1200 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1201
1204 """Verifies the cluster status.
1205
1206 """
1207 HPATH = "cluster-verify"
1208 HTYPE = constants.HTYPE_CLUSTER
1209 REQ_BGL = False
1210
1211 TCLUSTER = "cluster"
1212 TNODE = "node"
1213 TINSTANCE = "instance"
1214
1215 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1216 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1217 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1218 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1219 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1220 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1221 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1222 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1223 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1224 ENODEDRBD = (TNODE, "ENODEDRBD")
1225 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1226 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1227 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1228 ENODEHV = (TNODE, "ENODEHV")
1229 ENODELVM = (TNODE, "ENODELVM")
1230 ENODEN1 = (TNODE, "ENODEN1")
1231 ENODENET = (TNODE, "ENODENET")
1232 ENODEOS = (TNODE, "ENODEOS")
1233 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1234 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1235 ENODERPC = (TNODE, "ENODERPC")
1236 ENODESSH = (TNODE, "ENODESSH")
1237 ENODEVERSION = (TNODE, "ENODEVERSION")
1238 ENODESETUP = (TNODE, "ENODESETUP")
1239 ENODETIME = (TNODE, "ENODETIME")
1240 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1241
1242 ETYPE_FIELD = "code"
1243 ETYPE_ERROR = "ERROR"
1244 ETYPE_WARNING = "WARNING"
1245
1246 _HOOKS_INDENT_RE = re.compile("^", re.M)
1247
1249 """A class representing the logical and physical status of a node.
1250
1251 @type name: string
1252 @ivar name: the node name to which this object refers
1253 @ivar volumes: a structure as returned from
1254 L{ganeti.backend.GetVolumeList} (runtime)
1255 @ivar instances: a list of running instances (runtime)
1256 @ivar pinst: list of configured primary instances (config)
1257 @ivar sinst: list of configured secondary instances (config)
1258 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1259 of this node (config)
1260 @ivar mfree: free memory, as reported by hypervisor (runtime)
1261 @ivar dfree: free disk, as reported by the node (runtime)
1262 @ivar offline: the offline status (config)
1263 @type rpc_fail: boolean
1264 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1265 not whether the individual keys were correct) (runtime)
1266 @type lvm_fail: boolean
1267 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1268 @type hyp_fail: boolean
1269 @ivar hyp_fail: whether the RPC call didn't return the instance list
1270 @type ghost: boolean
1271 @ivar ghost: whether this is a known node or not (config)
1272 @type os_fail: boolean
1273 @ivar os_fail: whether the RPC call didn't return valid OS data
1274 @type oslist: list
1275 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1276 @type vm_capable: boolean
1277 @ivar vm_capable: whether the node can host instances
1278
1279 """
1280 - def __init__(self, offline=False, name=None, vm_capable=True):
1281 self.name = name
1282 self.volumes = {}
1283 self.instances = []
1284 self.pinst = []
1285 self.sinst = []
1286 self.sbp = {}
1287 self.mfree = 0
1288 self.dfree = 0
1289 self.offline = offline
1290 self.vm_capable = vm_capable
1291 self.rpc_fail = False
1292 self.lvm_fail = False
1293 self.hyp_fail = False
1294 self.ghost = False
1295 self.os_fail = False
1296 self.oslist = {}
1297
1304
1305 - def _Error(self, ecode, item, msg, *args, **kwargs):
1306 """Format an error message.
1307
1308 Based on the opcode's error_codes parameter, either format a
1309 parseable error code, or a simpler error string.
1310
1311 This must be called only from Exec and functions called from Exec.
1312
1313 """
1314 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1315 itype, etxt = ecode
1316
1317 if args:
1318 msg = msg % args
1319
1320 if self.op.error_codes:
1321 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1322 else:
1323 if item:
1324 item = " " + item
1325 else:
1326 item = ""
1327 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1328
1329 self._feedback_fn(" - %s" % msg)
1330
1331 - def _ErrorIf(self, cond, *args, **kwargs):
1332 """Log an error message if the passed condition is True.
1333
1334 """
1335 cond = bool(cond) or self.op.debug_simulate_errors
1336 if cond:
1337 self._Error(*args, **kwargs)
1338
1339 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1340 self.bad = self.bad or cond
1341
1343 """Perform some basic validation on data returned from a node.
1344
1345 - check the result data structure is well formed and has all the
1346 mandatory fields
1347 - check ganeti version
1348
1349 @type ninfo: L{objects.Node}
1350 @param ninfo: the node to check
1351 @param nresult: the results from the node
1352 @rtype: boolean
1353 @return: whether overall this call was successful (and we can expect
1354 reasonable values in the respose)
1355
1356 """
1357 node = ninfo.name
1358 _ErrorIf = self._ErrorIf
1359
1360
1361 test = not nresult or not isinstance(nresult, dict)
1362 _ErrorIf(test, self.ENODERPC, node,
1363 "unable to verify node: no data returned")
1364 if test:
1365 return False
1366
1367
1368 local_version = constants.PROTOCOL_VERSION
1369 remote_version = nresult.get("version", None)
1370 test = not (remote_version and
1371 isinstance(remote_version, (list, tuple)) and
1372 len(remote_version) == 2)
1373 _ErrorIf(test, self.ENODERPC, node,
1374 "connection to node returned invalid data")
1375 if test:
1376 return False
1377
1378 test = local_version != remote_version[0]
1379 _ErrorIf(test, self.ENODEVERSION, node,
1380 "incompatible protocol versions: master %s,"
1381 " node %s", local_version, remote_version[0])
1382 if test:
1383 return False
1384
1385
1386
1387
1388 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1389 self.ENODEVERSION, node,
1390 "software version mismatch: master %s, node %s",
1391 constants.RELEASE_VERSION, remote_version[1],
1392 code=self.ETYPE_WARNING)
1393
1394 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1395 if ninfo.vm_capable and isinstance(hyp_result, dict):
1396 for hv_name, hv_result in hyp_result.iteritems():
1397 test = hv_result is not None
1398 _ErrorIf(test, self.ENODEHV, node,
1399 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1400
1401 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1402 if ninfo.vm_capable and isinstance(hvp_result, list):
1403 for item, hv_name, hv_result in hvp_result:
1404 _ErrorIf(True, self.ENODEHV, node,
1405 "hypervisor %s parameter verify failure (source %s): %s",
1406 hv_name, item, hv_result)
1407
1408 test = nresult.get(constants.NV_NODESETUP,
1409 ["Missing NODESETUP results"])
1410 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1411 "; ".join(test))
1412
1413 return True
1414
1415 - def _VerifyNodeTime(self, ninfo, nresult,
1416 nvinfo_starttime, nvinfo_endtime):
1417 """Check the node time.
1418
1419 @type ninfo: L{objects.Node}
1420 @param ninfo: the node to check
1421 @param nresult: the remote results for the node
1422 @param nvinfo_starttime: the start time of the RPC call
1423 @param nvinfo_endtime: the end time of the RPC call
1424
1425 """
1426 node = ninfo.name
1427 _ErrorIf = self._ErrorIf
1428
1429 ntime = nresult.get(constants.NV_TIME, None)
1430 try:
1431 ntime_merged = utils.MergeTime(ntime)
1432 except (ValueError, TypeError):
1433 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1434 return
1435
1436 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1437 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1438 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1439 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1440 else:
1441 ntime_diff = None
1442
1443 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1444 "Node time diverges by at least %s from master node time",
1445 ntime_diff)
1446
1448 """Check the node LVM results.
1449
1450 @type ninfo: L{objects.Node}
1451 @param ninfo: the node to check
1452 @param nresult: the remote results for the node
1453 @param vg_name: the configured VG name
1454
1455 """
1456 if vg_name is None:
1457 return
1458
1459 node = ninfo.name
1460 _ErrorIf = self._ErrorIf
1461
1462
1463 vglist = nresult.get(constants.NV_VGLIST, None)
1464 test = not vglist
1465 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1466 if not test:
1467 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1468 constants.MIN_VG_SIZE)
1469 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1470
1471
1472 pvlist = nresult.get(constants.NV_PVLIST, None)
1473 test = pvlist is None
1474 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1475 if not test:
1476
1477
1478
1479 for _, pvname, owner_vg in pvlist:
1480 test = ":" in pvname
1481 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1482 " '%s' of VG '%s'", pvname, owner_vg)
1483
1485 """Check the node bridges.
1486
1487 @type ninfo: L{objects.Node}
1488 @param ninfo: the node to check
1489 @param nresult: the remote results for the node
1490 @param bridges: the expected list of bridges
1491
1492 """
1493 if not bridges:
1494 return
1495
1496 node = ninfo.name
1497 _ErrorIf = self._ErrorIf
1498
1499 missing = nresult.get(constants.NV_BRIDGES, None)
1500 test = not isinstance(missing, list)
1501 _ErrorIf(test, self.ENODENET, node,
1502 "did not return valid bridge information")
1503 if not test:
1504 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1505 utils.CommaJoin(sorted(missing)))
1506
1508 """Check the node network connectivity results.
1509
1510 @type ninfo: L{objects.Node}
1511 @param ninfo: the node to check
1512 @param nresult: the remote results for the node
1513
1514 """
1515 node = ninfo.name
1516 _ErrorIf = self._ErrorIf
1517
1518 test = constants.NV_NODELIST not in nresult
1519 _ErrorIf(test, self.ENODESSH, node,
1520 "node hasn't returned node ssh connectivity data")
1521 if not test:
1522 if nresult[constants.NV_NODELIST]:
1523 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1524 _ErrorIf(True, self.ENODESSH, node,
1525 "ssh communication with node '%s': %s", a_node, a_msg)
1526
1527 test = constants.NV_NODENETTEST not in nresult
1528 _ErrorIf(test, self.ENODENET, node,
1529 "node hasn't returned node tcp connectivity data")
1530 if not test:
1531 if nresult[constants.NV_NODENETTEST]:
1532 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1533 for anode in nlist:
1534 _ErrorIf(True, self.ENODENET, node,
1535 "tcp communication with node '%s': %s",
1536 anode, nresult[constants.NV_NODENETTEST][anode])
1537
1538 test = constants.NV_MASTERIP not in nresult
1539 _ErrorIf(test, self.ENODENET, node,
1540 "node hasn't returned node master IP reachability data")
1541 if not test:
1542 if not nresult[constants.NV_MASTERIP]:
1543 if node == self.master_node:
1544 msg = "the master node cannot reach the master IP (not configured?)"
1545 else:
1546 msg = "cannot reach the master IP"
1547 _ErrorIf(True, self.ENODENET, node, msg)
1548
1549 - def _VerifyInstance(self, instance, instanceconfig, node_image,
1550 diskstatus):
1551 """Verify an instance.
1552
1553 This function checks to see if the required block devices are
1554 available on the instance's node.
1555
1556 """
1557 _ErrorIf = self._ErrorIf
1558 node_current = instanceconfig.primary_node
1559
1560 node_vol_should = {}
1561 instanceconfig.MapLVsByNode(node_vol_should)
1562
1563 for node in node_vol_should:
1564 n_img = node_image[node]
1565 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1566
1567 continue
1568 for volume in node_vol_should[node]:
1569 test = volume not in n_img.volumes
1570 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1571 "volume %s missing on node %s", volume, node)
1572
1573 if instanceconfig.admin_up:
1574 pri_img = node_image[node_current]
1575 test = instance not in pri_img.instances and not pri_img.offline
1576 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1577 "instance not running on its primary node %s",
1578 node_current)
1579
1580 for node, n_img in node_image.items():
1581 if node != node_current:
1582 test = instance in n_img.instances
1583 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1584 "instance should not run on node %s", node)
1585
1586 diskdata = [(nname, success, status, idx)
1587 for (nname, disks) in diskstatus.items()
1588 for idx, (success, status) in enumerate(disks)]
1589
1590 for nname, success, bdev_status, idx in diskdata:
1591
1592
1593 snode = node_image[nname]
1594 bad_snode = snode.ghost or snode.offline
1595 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1596 self.EINSTANCEFAULTYDISK, instance,
1597 "couldn't retrieve status for disk/%s on %s: %s",
1598 idx, nname, bdev_status)
1599 _ErrorIf((instanceconfig.admin_up and success and
1600 bdev_status.ldisk_status == constants.LDS_FAULTY),
1601 self.EINSTANCEFAULTYDISK, instance,
1602 "disk/%s on %s is faulty", idx, nname)
1603
1605 """Verify if there are any unknown volumes in the cluster.
1606
1607 The .os, .swap and backup volumes are ignored. All other volumes are
1608 reported as unknown.
1609
1610 @type reserved: L{ganeti.utils.FieldSet}
1611 @param reserved: a FieldSet of reserved volume names
1612
1613 """
1614 for node, n_img in node_image.items():
1615 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1616
1617 continue
1618 for volume in n_img.volumes:
1619 test = ((node not in node_vol_should or
1620 volume not in node_vol_should[node]) and
1621 not reserved.Matches(volume))
1622 self._ErrorIf(test, self.ENODEORPHANLV, node,
1623 "volume %s is unknown", volume)
1624
1626 """Verify the list of running instances.
1627
1628 This checks what instances are running but unknown to the cluster.
1629
1630 """
1631 for node, n_img in node_image.items():
1632 for o_inst in n_img.instances:
1633 test = o_inst not in instancelist
1634 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1635 "instance %s on node %s should not exist", o_inst, node)
1636
1638 """Verify N+1 Memory Resilience.
1639
1640 Check that if one single node dies we can still start all the
1641 instances it was primary for.
1642
1643 """
1644 for node, n_img in node_image.items():
1645
1646
1647
1648
1649
1650
1651
1652
1653 if n_img.offline:
1654
1655
1656
1657
1658 continue
1659 for prinode, instances in n_img.sbp.items():
1660 needed_mem = 0
1661 for instance in instances:
1662 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1663 if bep[constants.BE_AUTO_BALANCE]:
1664 needed_mem += bep[constants.BE_MEMORY]
1665 test = n_img.mfree < needed_mem
1666 self._ErrorIf(test, self.ENODEN1, node,
1667 "not enough memory to accomodate instance failovers"
1668 " should node %s fail (%dMiB needed, %dMiB available)",
1669 prinode, needed_mem, n_img.mfree)
1670
1671 - def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1672 master_files):
1673 """Verifies and computes the node required file checksums.
1674
1675 @type ninfo: L{objects.Node}
1676 @param ninfo: the node to check
1677 @param nresult: the remote results for the node
1678 @param file_list: required list of files
1679 @param local_cksum: dictionary of local files and their checksums
1680 @param master_files: list of files that only masters should have
1681
1682 """
1683 node = ninfo.name
1684 _ErrorIf = self._ErrorIf
1685
1686 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1687 test = not isinstance(remote_cksum, dict)
1688 _ErrorIf(test, self.ENODEFILECHECK, node,
1689 "node hasn't returned file checksum data")
1690 if test:
1691 return
1692
1693 for file_name in file_list:
1694 node_is_mc = ninfo.master_candidate
1695 must_have = (file_name not in master_files) or node_is_mc
1696
1697 test1 = file_name not in remote_cksum
1698
1699 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1700
1701 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1702 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1703 "file '%s' missing", file_name)
1704 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1705 "file '%s' has wrong checksum", file_name)
1706
1707 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1708 "file '%s' should not exist on non master"
1709 " candidates (and the file is outdated)", file_name)
1710
1711 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1712 "file '%s' should not exist"
1713 " on non master candidates", file_name)
1714
1715 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1716 drbd_map):
1717 """Verifies and the node DRBD status.
1718
1719 @type ninfo: L{objects.Node}
1720 @param ninfo: the node to check
1721 @param nresult: the remote results for the node
1722 @param instanceinfo: the dict of instances
1723 @param drbd_helper: the configured DRBD usermode helper
1724 @param drbd_map: the DRBD map as returned by
1725 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1726
1727 """
1728 node = ninfo.name
1729 _ErrorIf = self._ErrorIf
1730
1731 if drbd_helper:
1732 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1733 test = (helper_result == None)
1734 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1735 "no drbd usermode helper returned")
1736 if helper_result:
1737 status, payload = helper_result
1738 test = not status
1739 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1740 "drbd usermode helper check unsuccessful: %s", payload)
1741 test = status and (payload != drbd_helper)
1742 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1743 "wrong drbd usermode helper: %s", payload)
1744
1745
1746 node_drbd = {}
1747 for minor, instance in drbd_map[node].items():
1748 test = instance not in instanceinfo
1749 _ErrorIf(test, self.ECLUSTERCFG, None,
1750 "ghost instance '%s' in temporary DRBD map", instance)
1751
1752
1753
1754 if test:
1755 node_drbd[minor] = (instance, False)
1756 else:
1757 instance = instanceinfo[instance]
1758 node_drbd[minor] = (instance.name, instance.admin_up)
1759
1760
1761 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1762 test = not isinstance(used_minors, (tuple, list))
1763 _ErrorIf(test, self.ENODEDRBD, node,
1764 "cannot parse drbd status file: %s", str(used_minors))
1765 if test:
1766
1767 return
1768
1769 for minor, (iname, must_exist) in node_drbd.items():
1770 test = minor not in used_minors and must_exist
1771 _ErrorIf(test, self.ENODEDRBD, node,
1772 "drbd minor %d of instance %s is not active", minor, iname)
1773 for minor in used_minors:
1774 test = minor not in node_drbd
1775 _ErrorIf(test, self.ENODEDRBD, node,
1776 "unallocated drbd minor %d is in use", minor)
1777
1779 """Builds the node OS structures.
1780
1781 @type ninfo: L{objects.Node}
1782 @param ninfo: the node to check
1783 @param nresult: the remote results for the node
1784 @param nimg: the node image object
1785
1786 """
1787 node = ninfo.name
1788 _ErrorIf = self._ErrorIf
1789
1790 remote_os = nresult.get(constants.NV_OSLIST, None)
1791 test = (not isinstance(remote_os, list) or
1792 not compat.all(isinstance(v, list) and len(v) == 7
1793 for v in remote_os))
1794
1795 _ErrorIf(test, self.ENODEOS, node,
1796 "node hasn't returned valid OS data")
1797
1798 nimg.os_fail = test
1799
1800 if test:
1801 return
1802
1803 os_dict = {}
1804
1805 for (name, os_path, status, diagnose,
1806 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1807
1808 if name not in os_dict:
1809 os_dict[name] = []
1810
1811
1812
1813 parameters = [tuple(v) for v in parameters]
1814 os_dict[name].append((os_path, status, diagnose,
1815 set(variants), set(parameters), set(api_ver)))
1816
1817 nimg.oslist = os_dict
1818
1820 """Verifies the node OS list.
1821
1822 @type ninfo: L{objects.Node}
1823 @param ninfo: the node to check
1824 @param nimg: the node image object
1825 @param base: the 'template' node we match against (e.g. from the master)
1826
1827 """
1828 node = ninfo.name
1829 _ErrorIf = self._ErrorIf
1830
1831 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1832
1833 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1834 for os_name, os_data in nimg.oslist.items():
1835 assert os_data, "Empty OS status for OS %s?!" % os_name
1836 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1837 _ErrorIf(not f_status, self.ENODEOS, node,
1838 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1839 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1840 "OS '%s' has multiple entries (first one shadows the rest): %s",
1841 os_name, utils.CommaJoin([v[0] for v in os_data]))
1842
1843 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1844 and not f_var, self.ENODEOS, node,
1845 "OS %s with API at least %d does not declare any variant",
1846 os_name, constants.OS_API_V15)
1847
1848 test = os_name not in base.oslist
1849 _ErrorIf(test, self.ENODEOS, node,
1850 "Extra OS %s not present on reference node (%s)",
1851 os_name, base.name)
1852 if test:
1853 continue
1854 assert base.oslist[os_name], "Base node has empty OS status?"
1855 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1856 if not b_status:
1857
1858 continue
1859 for kind, a, b in [("API version", f_api, b_api),
1860 ("variants list", f_var, b_var),
1861 ("parameters", beautify_params(f_param),
1862 beautify_params(b_param))]:
1863 _ErrorIf(a != b, self.ENODEOS, node,
1864 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1865 kind, os_name, base.name,
1866 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1867
1868
1869 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1870 _ErrorIf(missing, self.ENODEOS, node,
1871 "OSes present on reference node %s but missing on this node: %s",
1872 base.name, utils.CommaJoin(missing))
1873
1875 """Verifies out of band functionality of a node.
1876
1877 @type ninfo: L{objects.Node}
1878 @param ninfo: the node to check
1879 @param nresult: the remote results for the node
1880
1881 """
1882 node = ninfo.name
1883
1884
1885 if ((ninfo.master_candidate or ninfo.master_capable) and
1886 constants.NV_OOB_PATHS in nresult):
1887 for path_result in nresult[constants.NV_OOB_PATHS]:
1888 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1889
1891 """Verifies and updates the node volume data.
1892
1893 This function will update a L{NodeImage}'s internal structures
1894 with data from the remote call.
1895
1896 @type ninfo: L{objects.Node}
1897 @param ninfo: the node to check
1898 @param nresult: the remote results for the node
1899 @param nimg: the node image object
1900 @param vg_name: the configured VG name
1901
1902 """
1903 node = ninfo.name
1904 _ErrorIf = self._ErrorIf
1905
1906 nimg.lvm_fail = True
1907 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1908 if vg_name is None:
1909 pass
1910 elif isinstance(lvdata, basestring):
1911 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1912 utils.SafeEncode(lvdata))
1913 elif not isinstance(lvdata, dict):
1914 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1915 else:
1916 nimg.volumes = lvdata
1917 nimg.lvm_fail = False
1918
1920 """Verifies and updates the node instance list.
1921
1922 If the listing was successful, then updates this node's instance
1923 list. Otherwise, it marks the RPC call as failed for the instance
1924 list key.
1925
1926 @type ninfo: L{objects.Node}
1927 @param ninfo: the node to check
1928 @param nresult: the remote results for the node
1929 @param nimg: the node image object
1930
1931 """
1932 idata = nresult.get(constants.NV_INSTANCELIST, None)
1933 test = not isinstance(idata, list)
1934 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1935 " (instancelist): %s", utils.SafeEncode(str(idata)))
1936 if test:
1937 nimg.hyp_fail = True
1938 else:
1939 nimg.instances = idata
1940
1942 """Verifies and computes a node information map
1943
1944 @type ninfo: L{objects.Node}
1945 @param ninfo: the node to check
1946 @param nresult: the remote results for the node
1947 @param nimg: the node image object
1948 @param vg_name: the configured VG name
1949
1950 """
1951 node = ninfo.name
1952 _ErrorIf = self._ErrorIf
1953
1954
1955 hv_info = nresult.get(constants.NV_HVINFO, None)
1956 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1957 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1958 if not test:
1959 try:
1960 nimg.mfree = int(hv_info["memory_free"])
1961 except (ValueError, TypeError):
1962 _ErrorIf(True, self.ENODERPC, node,
1963 "node returned invalid nodeinfo, check hypervisor")
1964
1965
1966 if vg_name is not None:
1967 test = (constants.NV_VGLIST not in nresult or
1968 vg_name not in nresult[constants.NV_VGLIST])
1969 _ErrorIf(test, self.ENODELVM, node,
1970 "node didn't return data for the volume group '%s'"
1971 " - it is either missing or broken", vg_name)
1972 if not test:
1973 try:
1974 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1975 except (ValueError, TypeError):
1976 _ErrorIf(True, self.ENODERPC, node,
1977 "node returned invalid LVM info, check LVM status")
1978
1980 """Gets per-disk status information for all instances.
1981
1982 @type nodelist: list of strings
1983 @param nodelist: Node names
1984 @type node_image: dict of (name, L{objects.Node})
1985 @param node_image: Node objects
1986 @type instanceinfo: dict of (name, L{objects.Instance})
1987 @param instanceinfo: Instance objects
1988 @rtype: {instance: {node: [(succes, payload)]}}
1989 @return: a dictionary of per-instance dictionaries with nodes as
1990 keys and disk information as values; the disk information is a
1991 list of tuples (success, payload)
1992
1993 """
1994 _ErrorIf = self._ErrorIf
1995
1996 node_disks = {}
1997 node_disks_devonly = {}
1998 diskless_instances = set()
1999 diskless = constants.DT_DISKLESS
2000
2001 for nname in nodelist:
2002 node_instances = list(itertools.chain(node_image[nname].pinst,
2003 node_image[nname].sinst))
2004 diskless_instances.update(inst for inst in node_instances
2005 if instanceinfo[inst].disk_template == diskless)
2006 disks = [(inst, disk)
2007 for inst in node_instances
2008 for disk in instanceinfo[inst].disks]
2009
2010 if not disks:
2011
2012 continue
2013
2014 node_disks[nname] = disks
2015
2016
2017
2018 devonly = [dev.Copy() for (_, dev) in disks]
2019
2020 for dev in devonly:
2021 self.cfg.SetDiskID(dev, nname)
2022
2023 node_disks_devonly[nname] = devonly
2024
2025 assert len(node_disks) == len(node_disks_devonly)
2026
2027
2028 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2029 node_disks_devonly)
2030
2031 assert len(result) == len(node_disks)
2032
2033 instdisk = {}
2034
2035 for (nname, nres) in result.items():
2036 disks = node_disks[nname]
2037
2038 if nres.offline:
2039
2040 data = len(disks) * [(False, "node offline")]
2041 else:
2042 msg = nres.fail_msg
2043 _ErrorIf(msg, self.ENODERPC, nname,
2044 "while getting disk information: %s", msg)
2045 if msg:
2046
2047 data = len(disks) * [(False, msg)]
2048 else:
2049 data = []
2050 for idx, i in enumerate(nres.payload):
2051 if isinstance(i, (tuple, list)) and len(i) == 2:
2052 data.append(i)
2053 else:
2054 logging.warning("Invalid result from node %s, entry %d: %s",
2055 nname, idx, i)
2056 data.append((False, "Invalid result from the remote node"))
2057
2058 for ((inst, _), status) in zip(disks, data):
2059 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2060
2061
2062 for inst in diskless_instances:
2063 assert inst not in instdisk
2064 instdisk[inst] = {}
2065
2066 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2067 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2068 compat.all(isinstance(s, (tuple, list)) and
2069 len(s) == 2 for s in statuses)
2070 for inst, nnames in instdisk.items()
2071 for nname, statuses in nnames.items())
2072 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2073
2074 return instdisk
2075
2089
2090
2092 """Build hooks env.
2093
2094 Cluster-Verify hooks just ran in the post phase and their failure makes
2095 the output be logged in the verify output and the verification to fail.
2096
2097 """
2098 all_nodes = self.cfg.GetNodeList()
2099 env = {
2100 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2101 }
2102 for node in self.cfg.GetAllNodesInfo().values():
2103 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2104
2105 return env, [], all_nodes
2106
2107 - def Exec(self, feedback_fn):
2108 """Verify integrity of cluster, performing various test on nodes.
2109
2110 """
2111
2112 self.bad = False
2113 _ErrorIf = self._ErrorIf
2114 verbose = self.op.verbose
2115 self._feedback_fn = feedback_fn
2116 feedback_fn("* Verifying global settings")
2117 for msg in self.cfg.VerifyConfig():
2118 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2119
2120
2121 for cert_filename in constants.ALL_CERT_FILES:
2122 (errcode, msg) = _VerifyCertificate(cert_filename)
2123 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2124
2125 vg_name = self.cfg.GetVGName()
2126 drbd_helper = self.cfg.GetDRBDHelper()
2127 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2128 cluster = self.cfg.GetClusterInfo()
2129 nodeinfo_byname = self.cfg.GetAllNodesInfo()
2130 nodelist = utils.NiceSort(nodeinfo_byname.keys())
2131 nodeinfo = [nodeinfo_byname[nname] for nname in nodelist]
2132 instanceinfo = self.cfg.GetAllInstancesInfo()
2133 instancelist = utils.NiceSort(instanceinfo.keys())
2134 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2135 i_non_redundant = []
2136 i_non_a_balanced = []
2137 n_offline = 0
2138 n_drained = 0
2139 node_vol_should = {}
2140
2141
2142
2143 master_files = [constants.CLUSTER_CONF_FILE]
2144 master_node = self.master_node = self.cfg.GetMasterNode()
2145 master_ip = self.cfg.GetMasterIP()
2146
2147 file_names = ssconf.SimpleStore().GetFileList()
2148 file_names.extend(constants.ALL_CERT_FILES)
2149 file_names.extend(master_files)
2150 if cluster.modify_etc_hosts:
2151 file_names.append(constants.ETC_HOSTS)
2152
2153 local_checksums = utils.FingerprintFiles(file_names)
2154
2155
2156 hvp_data = []
2157 for hv_name in hypervisors:
2158 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2159 for os_name, os_hvp in cluster.os_hvp.items():
2160 for hv_name, hv_params in os_hvp.items():
2161 if not hv_params:
2162 continue
2163 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2164 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2165
2166 for instance in instanceinfo.values():
2167 if not instance.hvparams:
2168 continue
2169 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2170 cluster.FillHV(instance)))
2171
2172 self._VerifyHVP(hvp_data)
2173
2174 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2175 node_verify_param = {
2176 constants.NV_FILELIST: file_names,
2177 constants.NV_NODELIST: [node.name for node in nodeinfo
2178 if not node.offline],
2179 constants.NV_HYPERVISOR: hypervisors,
2180 constants.NV_HVPARAMS: hvp_data,
2181 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2182 node.secondary_ip) for node in nodeinfo
2183 if not node.offline],
2184 constants.NV_INSTANCELIST: hypervisors,
2185 constants.NV_VERSION: None,
2186 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2187 constants.NV_NODESETUP: None,
2188 constants.NV_TIME: None,
2189 constants.NV_MASTERIP: (master_node, master_ip),
2190 constants.NV_OSLIST: None,
2191 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2192 }
2193
2194 if vg_name is not None:
2195 node_verify_param[constants.NV_VGLIST] = None
2196 node_verify_param[constants.NV_LVLIST] = vg_name
2197 node_verify_param[constants.NV_PVLIST] = [vg_name]
2198 node_verify_param[constants.NV_DRBDLIST] = None
2199
2200 if drbd_helper:
2201 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2202
2203
2204
2205 bridges = set()
2206 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2207 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2208 bridges.add(default_nicpp[constants.NIC_LINK])
2209 for instance in instanceinfo.values():
2210 for nic in instance.nics:
2211 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2212 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2213 bridges.add(full_nic[constants.NIC_LINK])
2214
2215 if bridges:
2216 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2217
2218
2219 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2220 name=node.name,
2221 vm_capable=node.vm_capable))
2222 for node in nodeinfo)
2223
2224
2225 oob_paths = []
2226 for node in nodeinfo:
2227 path = _SupportsOob(self.cfg, node)
2228 if path and path not in oob_paths:
2229 oob_paths.append(path)
2230
2231 if oob_paths:
2232 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2233
2234 for instance in instancelist:
2235 inst_config = instanceinfo[instance]
2236
2237 for nname in inst_config.all_nodes:
2238 if nname not in node_image:
2239
2240 gnode = self.NodeImage(name=nname)
2241 gnode.ghost = True
2242 node_image[nname] = gnode
2243
2244 inst_config.MapLVsByNode(node_vol_should)
2245
2246 pnode = inst_config.primary_node
2247 node_image[pnode].pinst.append(instance)
2248
2249 for snode in inst_config.secondary_nodes:
2250 nimg = node_image[snode]
2251 nimg.sinst.append(instance)
2252 if pnode not in nimg.sbp:
2253 nimg.sbp[pnode] = []
2254 nimg.sbp[pnode].append(instance)
2255
2256
2257
2258
2259
2260
2261
2262
2263 nvinfo_starttime = time.time()
2264 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2265 self.cfg.GetClusterName())
2266 nvinfo_endtime = time.time()
2267
2268 all_drbd_map = self.cfg.ComputeDRBDMap()
2269
2270 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2271 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2272
2273 feedback_fn("* Verifying node status")
2274
2275 refos_img = None
2276
2277 for node_i in nodeinfo:
2278 node = node_i.name
2279 nimg = node_image[node]
2280
2281 if node_i.offline:
2282 if verbose:
2283 feedback_fn("* Skipping offline node %s" % (node,))
2284 n_offline += 1
2285 continue
2286
2287 if node == master_node:
2288 ntype = "master"
2289 elif node_i.master_candidate:
2290 ntype = "master candidate"
2291 elif node_i.drained:
2292 ntype = "drained"
2293 n_drained += 1
2294 else:
2295 ntype = "regular"
2296 if verbose:
2297 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2298
2299 msg = all_nvinfo[node].fail_msg
2300 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2301 if msg:
2302 nimg.rpc_fail = True
2303 continue
2304
2305 nresult = all_nvinfo[node].payload
2306
2307 nimg.call_ok = self._VerifyNode(node_i, nresult)
2308 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2309 self._VerifyNodeNetwork(node_i, nresult)
2310 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2311 master_files)
2312
2313 self._VerifyOob(node_i, nresult)
2314
2315 if nimg.vm_capable:
2316 self._VerifyNodeLVM(node_i, nresult, vg_name)
2317 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2318 all_drbd_map)
2319
2320 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2321 self._UpdateNodeInstances(node_i, nresult, nimg)
2322 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2323 self._UpdateNodeOS(node_i, nresult, nimg)
2324 if not nimg.os_fail:
2325 if refos_img is None:
2326 refos_img = nimg
2327 self._VerifyNodeOS(node_i, nimg, refos_img)
2328 self._VerifyNodeBridges(node_i, nresult, bridges)
2329
2330 feedback_fn("* Verifying instance status")
2331 for instance in instancelist:
2332 if verbose:
2333 feedback_fn("* Verifying instance %s" % instance)
2334 inst_config = instanceinfo[instance]
2335 self._VerifyInstance(instance, inst_config, node_image,
2336 instdisk[instance])
2337 inst_nodes_offline = []
2338
2339 pnode = inst_config.primary_node
2340 pnode_img = node_image[pnode]
2341 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2342 self.ENODERPC, pnode, "instance %s, connection to"
2343 " primary node failed", instance)
2344
2345 _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2346 "instance lives on offline node %s", inst_config.primary_node)
2347
2348
2349
2350
2351
2352
2353 if not inst_config.secondary_nodes:
2354 i_non_redundant.append(instance)
2355
2356 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2357 instance, "instance has multiple secondary nodes: %s",
2358 utils.CommaJoin(inst_config.secondary_nodes),
2359 code=self.ETYPE_WARNING)
2360
2361 if inst_config.disk_template in constants.DTS_NET_MIRROR:
2362 pnode = inst_config.primary_node
2363 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2364 instance_groups = {}
2365
2366 for node in instance_nodes:
2367 instance_groups.setdefault(nodeinfo_byname[node].group,
2368 []).append(node)
2369
2370 pretty_list = [
2371 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2372
2373 for group, nodes in sorted(instance_groups.items(),
2374 key=lambda (_, nodes): pnode in nodes,
2375 reverse=True)]
2376
2377 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2378 instance, "instance has primary and secondary nodes in"
2379 " different groups: %s", utils.CommaJoin(pretty_list),
2380 code=self.ETYPE_WARNING)
2381
2382 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2383 i_non_a_balanced.append(instance)
2384
2385 for snode in inst_config.secondary_nodes:
2386 s_img = node_image[snode]
2387 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2388 "instance %s, connection to secondary node failed", instance)
2389
2390 if s_img.offline:
2391 inst_nodes_offline.append(snode)
2392
2393
2394 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2395 "instance has offline secondary node(s) %s",
2396 utils.CommaJoin(inst_nodes_offline))
2397
2398 for node in inst_config.all_nodes:
2399 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2400 "instance lives on ghost node %s", node)
2401 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2402 instance, "instance lives on non-vm_capable node %s", node)
2403
2404 feedback_fn("* Verifying orphan volumes")
2405 reserved = utils.FieldSet(*cluster.reserved_lvs)
2406 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2407
2408 feedback_fn("* Verifying orphan instances")
2409 self._VerifyOrphanInstances(instancelist, node_image)
2410
2411 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2412 feedback_fn("* Verifying N+1 Memory redundancy")
2413 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2414
2415 feedback_fn("* Other Notes")
2416 if i_non_redundant:
2417 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2418 % len(i_non_redundant))
2419
2420 if i_non_a_balanced:
2421 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2422 % len(i_non_a_balanced))
2423
2424 if n_offline:
2425 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2426
2427 if n_drained:
2428 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2429
2430 return not self.bad
2431
2432 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2433 """Analyze the post-hooks' result
2434
2435 This method analyses the hook result, handles it, and sends some
2436 nicely-formatted feedback back to the user.
2437
2438 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2439 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2440 @param hooks_results: the results of the multi-node hooks rpc call
2441 @param feedback_fn: function used send feedback back to the caller
2442 @param lu_result: previous Exec result
2443 @return: the new Exec result, based on the previous result
2444 and hook results
2445
2446 """
2447
2448
2449 if phase == constants.HOOKS_PHASE_POST:
2450
2451 feedback_fn("* Hooks Results")
2452 assert hooks_results, "invalid result from hooks"
2453
2454 for node_name in hooks_results:
2455 res = hooks_results[node_name]
2456 msg = res.fail_msg
2457 test = msg and not res.offline
2458 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2459 "Communication failure in hooks execution: %s", msg)
2460 if res.offline or msg:
2461
2462
2463
2464 lu_result = 1
2465 continue
2466 for script, hkr, output in res.payload:
2467 test = hkr == constants.HKR_FAIL
2468 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2469 "Script %s failed, output:", script)
2470 if test:
2471 output = self._HOOKS_INDENT_RE.sub(' ', output)
2472 feedback_fn("%s" % output)
2473 lu_result = 0
2474
2475 return lu_result
2476
2479 """Verifies the cluster disks status.
2480
2481 """
2482 REQ_BGL = False
2483
2490
2491 - def Exec(self, feedback_fn):
2492 """Verify integrity of cluster disks.
2493
2494 @rtype: tuple of three items
2495 @return: a tuple of (dict of node-to-node_error, list of instances
2496 which need activate-disks, dict of instance: (node, volume) for
2497 missing volumes
2498
2499 """
2500 result = res_nodes, res_instances, res_missing = {}, [], {}
2501
2502 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2503 instances = self.cfg.GetAllInstancesInfo().values()
2504
2505 nv_dict = {}
2506 for inst in instances:
2507 inst_lvs = {}
2508 if not inst.admin_up:
2509 continue
2510 inst.MapLVsByNode(inst_lvs)
2511
2512 for node, vol_list in inst_lvs.iteritems():
2513 for vol in vol_list:
2514 nv_dict[(node, vol)] = inst
2515
2516 if not nv_dict:
2517 return result
2518
2519 node_lvs = self.rpc.call_lv_list(nodes, [])
2520 for node, node_res in node_lvs.items():
2521 if node_res.offline:
2522 continue
2523 msg = node_res.fail_msg
2524 if msg:
2525 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2526 res_nodes[node] = msg
2527 continue
2528
2529 lvs = node_res.payload
2530 for lv_name, (_, _, lv_online) in lvs.items():
2531 inst = nv_dict.pop((node, lv_name), None)
2532 if (not lv_online and inst is not None
2533 and inst.name not in res_instances):
2534 res_instances.append(inst.name)
2535
2536
2537
2538 for key, inst in nv_dict.iteritems():
2539 if inst.name not in res_missing:
2540 res_missing[inst.name] = []
2541 res_missing[inst.name].append(key)
2542
2543 return result
2544
2547 """Verifies the cluster disks sizes.
2548
2549 """
2550 REQ_BGL = False
2551
2570
2574
2576 """Check prerequisites.
2577
2578 This only checks the optional instance list against the existing names.
2579
2580 """
2581 if self.wanted_names is None:
2582 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2583
2584 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2585 in self.wanted_names]
2586
2588 """Ensure children of the disk have the needed disk size.
2589
2590 This is valid mainly for DRBD8 and fixes an issue where the
2591 children have smaller disk size.
2592
2593 @param disk: an L{ganeti.objects.Disk} object
2594
2595 """
2596 if disk.dev_type == constants.LD_DRBD8:
2597 assert disk.children, "Empty children for DRBD8?"
2598 fchild = disk.children[0]
2599 mismatch = fchild.size < disk.size
2600 if mismatch:
2601 self.LogInfo("Child disk has size %d, parent %d, fixing",
2602 fchild.size, disk.size)
2603 fchild.size = disk.size
2604
2605
2606 return self._EnsureChildSizes(fchild) or mismatch
2607 else:
2608 return False
2609
2610 - def Exec(self, feedback_fn):
2611 """Verify the size of cluster disks.
2612
2613 """
2614
2615
2616 per_node_disks = {}
2617 for instance in self.wanted_instances:
2618 pnode = instance.primary_node
2619 if pnode not in per_node_disks:
2620 per_node_disks[pnode] = []
2621 for idx, disk in enumerate(instance.disks):
2622 per_node_disks[pnode].append((instance, idx, disk))
2623
2624 changed = []
2625 for node, dskl in per_node_disks.items():
2626 newl = [v[2].Copy() for v in dskl]
2627 for dsk in newl:
2628 self.cfg.SetDiskID(dsk, node)
2629 result = self.rpc.call_blockdev_getsize(node, newl)
2630 if result.fail_msg:
2631 self.LogWarning("Failure in blockdev_getsize call to node"
2632 " %s, ignoring", node)
2633 continue
2634 if len(result.payload) != len(dskl):
2635 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2636 " result.payload=%s", node, len(dskl), result.payload)
2637 self.LogWarning("Invalid result from node %s, ignoring node results",
2638 node)
2639 continue
2640 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2641 if size is None:
2642 self.LogWarning("Disk %d of instance %s did not return size"
2643 " information, ignoring", idx, instance.name)
2644 continue
2645 if not isinstance(size, (int, long)):
2646 self.LogWarning("Disk %d of instance %s did not return valid"
2647 " size information, ignoring", idx, instance.name)
2648 continue
2649 size = size >> 20
2650 if size != disk.size:
2651 self.LogInfo("Disk %d of instance %s has mismatched size,"
2652 " correcting: recorded %d, actual %d", idx,
2653 instance.name, disk.size, size)
2654 disk.size = size
2655 self.cfg.Update(instance, feedback_fn)
2656 changed.append((instance.name, idx, size))
2657 if self._EnsureChildSizes(disk):
2658 self.cfg.Update(instance, feedback_fn)
2659 changed.append((instance.name, idx, disk.size))
2660 return changed
2661
2739
2742 """Change the parameters of the cluster.
2743
2744 """
2745 HPATH = "cluster-modify"
2746 HTYPE = constants.HTYPE_CLUSTER
2747 REQ_BGL = False
2748
2761
2769
2771 """Build hooks env.
2772
2773 """
2774 env = {
2775 "OP_TARGET": self.cfg.GetClusterName(),
2776 "NEW_VG_NAME": self.op.vg_name,
2777 }
2778 mn = self.cfg.GetMasterNode()
2779 return env, [mn], [mn]
2780
2782 """Check prerequisites.
2783
2784 This checks whether the given params don't conflict and
2785 if the given volume group is valid.
2786
2787 """
2788 if self.op.vg_name is not None and not self.op.vg_name:
2789 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2790 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2791 " instances exist", errors.ECODE_INVAL)
2792
2793 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2794 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2795 raise errors.OpPrereqError("Cannot disable drbd helper while"
2796 " drbd-based instances exist",
2797 errors.ECODE_INVAL)
2798
2799 node_list = self.acquired_locks[locking.LEVEL_NODE]
2800
2801
2802 if self.op.vg_name:
2803 vglist = self.rpc.call_vg_list(node_list)
2804 for node in node_list:
2805 msg = vglist[node].fail_msg
2806 if msg:
2807
2808 self.LogWarning("Error while gathering data on node %s"
2809 " (ignoring node): %s", node, msg)
2810 continue
2811 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2812 self.op.vg_name,
2813 constants.MIN_VG_SIZE)
2814 if vgstatus:
2815 raise errors.OpPrereqError("Error on node '%s': %s" %
2816 (node, vgstatus), errors.ECODE_ENVIRON)
2817
2818 if self.op.drbd_helper:
2819
2820 helpers = self.rpc.call_drbd_helper(node_list)
2821 for node in node_list:
2822 ninfo = self.cfg.GetNodeInfo(node)
2823 if ninfo.offline:
2824 self.LogInfo("Not checking drbd helper on offline node %s", node)
2825 continue
2826 msg = helpers[node].fail_msg
2827 if msg:
2828 raise errors.OpPrereqError("Error checking drbd helper on node"
2829 " '%s': %s" % (node, msg),
2830 errors.ECODE_ENVIRON)
2831 node_helper = helpers[node].payload
2832 if node_helper != self.op.drbd_helper:
2833 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2834 (node, node_helper), errors.ECODE_ENVIRON)
2835
2836 self.cluster = cluster = self.cfg.GetClusterInfo()
2837
2838 if self.op.beparams:
2839 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2840 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2841
2842 if self.op.ndparams:
2843 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2844 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2845
2846
2847
2848 if self.new_ndparams["oob_program"] == "":
2849 self.new_ndparams["oob_program"] = \
2850 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2851
2852 if self.op.nicparams:
2853 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2854 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2855 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2856 nic_errors = []
2857
2858
2859 for instance in self.cfg.GetAllInstancesInfo().values():
2860 for nic_idx, nic in enumerate(instance.nics):
2861 params_copy = copy.deepcopy(nic.nicparams)
2862 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2863
2864
2865 try:
2866 objects.NIC.CheckParameterSyntax(params_filled)
2867 except errors.ConfigurationError, err:
2868 nic_errors.append("Instance %s, nic/%d: %s" %
2869 (instance.name, nic_idx, err))
2870
2871
2872 target_mode = params_filled[constants.NIC_MODE]
2873 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2874 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
2875 " address" % (instance.name, nic_idx))
2876 if nic_errors:
2877 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2878 "\n".join(nic_errors))
2879
2880
2881 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2882 if self.op.hvparams:
2883 for hv_name, hv_dict in self.op.hvparams.items():
2884 if hv_name not in self.new_hvparams:
2885 self.new_hvparams[hv_name] = hv_dict
2886 else:
2887 self.new_hvparams[hv_name].update(hv_dict)
2888
2889
2890 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2891 if self.op.os_hvp:
2892 for os_name, hvs in self.op.os_hvp.items():
2893 if os_name not in self.new_os_hvp:
2894 self.new_os_hvp[os_name] = hvs
2895 else:
2896 for hv_name, hv_dict in hvs.items():
2897 if hv_name not in self.new_os_hvp[os_name]:
2898 self.new_os_hvp[os_name][hv_name] = hv_dict
2899 else:
2900 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2901
2902
2903 self.new_osp = objects.FillDict(cluster.osparams, {})
2904 if self.op.osparams:
2905 for os_name, osp in self.op.osparams.items():
2906 if os_name not in self.new_osp:
2907 self.new_osp[os_name] = {}
2908
2909 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2910 use_none=True)
2911
2912 if not self.new_osp[os_name]:
2913
2914 del self.new_osp[os_name]
2915 else:
2916
2917 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2918 os_name, self.new_osp[os_name])
2919
2920
2921 if self.op.enabled_hypervisors is not None:
2922 self.hv_list = self.op.enabled_hypervisors
2923 for hv in self.hv_list:
2924
2925
2926
2927
2928
2929 if hv not in new_hvp:
2930 new_hvp[hv] = {}
2931 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2932 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2933 else:
2934 self.hv_list = cluster.enabled_hypervisors
2935
2936 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2937
2938 for hv_name, hv_params in self.new_hvparams.items():
2939 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2940 (self.op.enabled_hypervisors and
2941 hv_name in self.op.enabled_hypervisors)):
2942
2943 hv_class = hypervisor.GetHypervisor(hv_name)
2944 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2945 hv_class.CheckParameterSyntax(hv_params)
2946 _CheckHVParams(self, node_list, hv_name, hv_params)
2947
2948 if self.op.os_hvp:
2949
2950
2951 for os_name, os_hvp in self.new_os_hvp.items():
2952 for hv_name, hv_params in os_hvp.items():
2953 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2954
2955 cluster_defaults = self.new_hvparams.get(hv_name, {})
2956 new_osp = objects.FillDict(cluster_defaults, hv_params)
2957 hv_class = hypervisor.GetHypervisor(hv_name)
2958 hv_class.CheckParameterSyntax(new_osp)
2959 _CheckHVParams(self, node_list, hv_name, new_osp)
2960
2961 if self.op.default_iallocator:
2962 alloc_script = utils.FindFile(self.op.default_iallocator,
2963 constants.IALLOCATOR_SEARCH_PATH,
2964 os.path.isfile)
2965 if alloc_script is None:
2966 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2967 " specified" % self.op.default_iallocator,
2968 errors.ECODE_INVAL)
2969
2970 - def Exec(self, feedback_fn):
2971 """Change the parameters of the cluster.
2972
2973 """
2974 if self.op.vg_name is not None:
2975 new_volume = self.op.vg_name
2976 if not new_volume:
2977 new_volume = None
2978 if new_volume != self.cfg.GetVGName():
2979 self.cfg.SetVGName(new_volume)
2980 else:
2981 feedback_fn("Cluster LVM configuration already in desired"
2982 " state, not changing")
2983 if self.op.drbd_helper is not None:
2984 new_helper = self.op.drbd_helper
2985 if not new_helper:
2986 new_helper = None
2987 if new_helper != self.cfg.GetDRBDHelper():
2988 self.cfg.SetDRBDHelper(new_helper)
2989 else:
2990 feedback_fn("Cluster DRBD helper already in desired state,"
2991 " not changing")
2992 if self.op.hvparams:
2993 self.cluster.hvparams = self.new_hvparams
2994 if self.op.os_hvp:
2995 self.cluster.os_hvp = self.new_os_hvp
2996 if self.op.enabled_hypervisors is not None:
2997 self.cluster.hvparams = self.new_hvparams
2998 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2999 if self.op.beparams:
3000 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3001 if self.op.nicparams:
3002 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3003 if self.op.osparams:
3004 self.cluster.osparams = self.new_osp
3005 if self.op.ndparams:
3006 self.cluster.ndparams = self.new_ndparams
3007
3008 if self.op.candidate_pool_size is not None:
3009 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3010
3011 _AdjustCandidatePool(self, [])
3012
3013 if self.op.maintain_node_health is not None:
3014 self.cluster.maintain_node_health = self.op.maintain_node_health
3015
3016 if self.op.prealloc_wipe_disks is not None:
3017 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3018
3019 if self.op.add_uids is not None:
3020 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3021
3022 if self.op.remove_uids is not None:
3023 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3024
3025 if self.op.uid_pool is not None:
3026 self.cluster.uid_pool = self.op.uid_pool
3027
3028 if self.op.default_iallocator is not None:
3029 self.cluster.default_iallocator = self.op.default_iallocator
3030
3031 if self.op.reserved_lvs is not None:
3032 self.cluster.reserved_lvs = self.op.reserved_lvs
3033
3034 def helper_os(aname, mods, desc):
3035 desc += " OS list"
3036 lst = getattr(self.cluster, aname)
3037 for key, val in mods:
3038 if key == constants.DDM_ADD:
3039 if val in lst:
3040 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3041 else:
3042 lst.append(val)
3043 elif key == constants.DDM_REMOVE:
3044 if val in lst:
3045 lst.remove(val)
3046 else:
3047 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3048 else:
3049 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3050
3051 if self.op.hidden_os:
3052 helper_os("hidden_os", self.op.hidden_os, "hidden")
3053
3054 if self.op.blacklisted_os:
3055 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3056
3057 if self.op.master_netdev:
3058 master = self.cfg.GetMasterNode()
3059 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3060 self.cluster.master_netdev)
3061 result = self.rpc.call_node_stop_master(master, False)
3062 result.Raise("Could not disable the master ip")
3063 feedback_fn("Changing master_netdev from %s to %s" %
3064 (self.cluster.master_netdev, self.op.master_netdev))
3065 self.cluster.master_netdev = self.op.master_netdev
3066
3067 self.cfg.Update(self.cluster, feedback_fn)
3068
3069 if self.op.master_netdev:
3070 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3071 self.op.master_netdev)
3072 result = self.rpc.call_node_start_master(master, False, False)
3073 if result.fail_msg:
3074 self.LogWarning("Could not re-enable the master ip on"
3075 " the master, please restart manually: %s",
3076 result.fail_msg)
3077
3080 """Helper for uploading a file and showing warnings.
3081
3082 """
3083 if os.path.exists(fname):
3084 result = lu.rpc.call_upload_file(nodes, fname)
3085 for to_node, to_result in result.items():
3086 msg = to_result.fail_msg
3087 if msg:
3088 msg = ("Copy of file %s to node %s failed: %s" %
3089 (fname, to_node, msg))
3090 lu.proc.LogWarning(msg)
3091
3094 """Distribute additional files which are part of the cluster configuration.
3095
3096 ConfigWriter takes care of distributing the config and ssconf files, but
3097 there are more files which should be distributed to all nodes. This function
3098 makes sure those are copied.
3099
3100 @param lu: calling logical unit
3101 @param additional_nodes: list of nodes not in the config to distribute to
3102 @type additional_vm: boolean
3103 @param additional_vm: whether the additional nodes are vm-capable or not
3104
3105 """
3106
3107 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3108 dist_nodes = lu.cfg.GetOnlineNodeList()
3109 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3110 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3111 if additional_nodes is not None:
3112 dist_nodes.extend(additional_nodes)
3113 if additional_vm:
3114 vm_nodes.extend(additional_nodes)
3115 if myself.name in dist_nodes:
3116 dist_nodes.remove(myself.name)
3117 if myself.name in vm_nodes:
3118 vm_nodes.remove(myself.name)
3119
3120
3121 dist_files = set([constants.ETC_HOSTS,
3122 constants.SSH_KNOWN_HOSTS_FILE,
3123 constants.RAPI_CERT_FILE,
3124 constants.RAPI_USERS_FILE,
3125 constants.CONFD_HMAC_KEY,
3126 constants.CLUSTER_DOMAIN_SECRET_FILE,
3127 ])
3128
3129 vm_files = set()
3130 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3131 for hv_name in enabled_hypervisors:
3132 hv_class = hypervisor.GetHypervisor(hv_name)
3133 vm_files.update(hv_class.GetAncillaryFiles())
3134
3135
3136 for fname in dist_files:
3137 _UploadHelper(lu, dist_nodes, fname)
3138 for fname in vm_files:
3139 _UploadHelper(lu, vm_nodes, fname)
3140
3143 """Force the redistribution of cluster configuration.
3144
3145 This is a very simple LU.
3146
3147 """
3148