1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Module implementing the master-side code."""
23
24
25
26
27
28
29
30
31 import os
32 import os.path
33 import time
34 import re
35 import platform
36 import logging
37 import copy
38 import OpenSSL
39 import socket
40 import tempfile
41 import shutil
42
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import ht
57
58 import ganeti.masterd.instance
59
60
61
62
63 _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
64
65
66
67 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
68 ht.TPositiveInt)
69
70
71 _PForce = ("force", False, ht.TBool)
72
73
74 _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
75
76
77 _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
78
79
80 _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
81
82
83 _PMigrationMode = ("mode", None,
84 ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
85
86
87 _PMigrationLive = ("live", None, ht.TMaybeBool)
92 """Logical Unit base class.
93
94 Subclasses must follow these rules:
95 - implement ExpandNames
96 - implement CheckPrereq (except when tasklets are used)
97 - implement Exec (except when tasklets are used)
98 - implement BuildHooksEnv
99 - redefine HPATH and HTYPE
100 - optionally redefine their run requirements:
101 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
102
103 Note that all commands require root permissions.
104
105 @ivar dry_run_result: the value (if any) that will be returned to the caller
106 in dry-run mode (signalled by opcode dry_run parameter)
107 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
108 they should get if not already defined, and types they must match
109
110 """
111 HPATH = None
112 HTYPE = None
113 _OP_PARAMS = []
114 REQ_BGL = True
115
116 - def __init__(self, processor, op, context, rpc):
117 """Constructor for LogicalUnit.
118
119 This needs to be overridden in derived classes in order to check op
120 validity.
121
122 """
123 self.proc = processor
124 self.op = op
125 self.cfg = context.cfg
126 self.context = context
127 self.rpc = rpc
128
129 self.needed_locks = None
130 self.acquired_locks = {}
131 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
132 self.add_locks = {}
133 self.remove_locks = {}
134
135 self.recalculate_locks = {}
136 self.__ssh = None
137
138 self.Log = processor.Log
139 self.LogWarning = processor.LogWarning
140 self.LogInfo = processor.LogInfo
141 self.LogStep = processor.LogStep
142
143 self.dry_run_result = None
144
145 if (not hasattr(self.op, "debug_level") or
146 not isinstance(self.op.debug_level, int)):
147 self.op.debug_level = 0
148
149
150 self.tasklets = None
151
152
153 op_id = self.op.OP_ID
154 for attr_name, aval, test in self._OP_PARAMS:
155 if not hasattr(op, attr_name):
156 if aval == ht.NoDefault:
157 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
158 (op_id, attr_name), errors.ECODE_INVAL)
159 else:
160 if callable(aval):
161 dval = aval()
162 else:
163 dval = aval
164 setattr(self.op, attr_name, dval)
165 attr_val = getattr(op, attr_name)
166 if test == ht.NoType:
167
168 continue
169 if not callable(test):
170 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
171 " given type is not a proper type (%s)" %
172 (op_id, attr_name, test))
173 if not test(attr_val):
174 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
175 self.op.OP_ID, attr_name, type(attr_val), attr_val)
176 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
177 (op_id, attr_name), errors.ECODE_INVAL)
178
179 self.CheckArguments()
180
182 """Returns the SshRunner object
183
184 """
185 if not self.__ssh:
186 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
187 return self.__ssh
188
189 ssh = property(fget=__GetSSH)
190
192 """Check syntactic validity for the opcode arguments.
193
194 This method is for doing a simple syntactic check and ensure
195 validity of opcode parameters, without any cluster-related
196 checks. While the same can be accomplished in ExpandNames and/or
197 CheckPrereq, doing these separate is better because:
198
199 - ExpandNames is left as as purely a lock-related function
200 - CheckPrereq is run after we have acquired locks (and possible
201 waited for them)
202
203 The function is allowed to change the self.op attribute so that
204 later methods can no longer worry about missing parameters.
205
206 """
207 pass
208
210 """Expand names for this LU.
211
212 This method is called before starting to execute the opcode, and it should
213 update all the parameters of the opcode to their canonical form (e.g. a
214 short node name must be fully expanded after this method has successfully
215 completed). This way locking, hooks, logging, ecc. can work correctly.
216
217 LUs which implement this method must also populate the self.needed_locks
218 member, as a dict with lock levels as keys, and a list of needed lock names
219 as values. Rules:
220
221 - use an empty dict if you don't need any lock
222 - if you don't need any lock at a particular level omit that level
223 - don't put anything for the BGL level
224 - if you want all locks at a level use locking.ALL_SET as a value
225
226 If you need to share locks (rather than acquire them exclusively) at one
227 level you can modify self.share_locks, setting a true value (usually 1) for
228 that level. By default locks are not shared.
229
230 This function can also define a list of tasklets, which then will be
231 executed in order instead of the usual LU-level CheckPrereq and Exec
232 functions, if those are not defined by the LU.
233
234 Examples::
235
236 # Acquire all nodes and one instance
237 self.needed_locks = {
238 locking.LEVEL_NODE: locking.ALL_SET,
239 locking.LEVEL_INSTANCE: ['instance1.example.com'],
240 }
241 # Acquire just two nodes
242 self.needed_locks = {
243 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
244 }
245 # Acquire no locks
246 self.needed_locks = {} # No, you can't leave it to the default value None
247
248 """
249
250
251
252 if self.REQ_BGL:
253 self.needed_locks = {}
254 else:
255 raise NotImplementedError
256
258 """Declare LU locking needs for a level
259
260 While most LUs can just declare their locking needs at ExpandNames time,
261 sometimes there's the need to calculate some locks after having acquired
262 the ones before. This function is called just before acquiring locks at a
263 particular level, but after acquiring the ones at lower levels, and permits
264 such calculations. It can be used to modify self.needed_locks, and by
265 default it does nothing.
266
267 This function is only called if you have something already set in
268 self.needed_locks for the level.
269
270 @param level: Locking level which is going to be locked
271 @type level: member of ganeti.locking.LEVELS
272
273 """
274
276 """Check prerequisites for this LU.
277
278 This method should check that the prerequisites for the execution
279 of this LU are fulfilled. It can do internode communication, but
280 it should be idempotent - no cluster or system changes are
281 allowed.
282
283 The method should raise errors.OpPrereqError in case something is
284 not fulfilled. Its return value is ignored.
285
286 This method should also update all the parameters of the opcode to
287 their canonical form if it hasn't been done by ExpandNames before.
288
289 """
290 if self.tasklets is not None:
291 for (idx, tl) in enumerate(self.tasklets):
292 logging.debug("Checking prerequisites for tasklet %s/%s",
293 idx + 1, len(self.tasklets))
294 tl.CheckPrereq()
295 else:
296 pass
297
298 - def Exec(self, feedback_fn):
299 """Execute the LU.
300
301 This method should implement the actual work. It should raise
302 errors.OpExecError for failures that are somewhat dealt with in
303 code, or expected.
304
305 """
306 if self.tasklets is not None:
307 for (idx, tl) in enumerate(self.tasklets):
308 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
309 tl.Exec(feedback_fn)
310 else:
311 raise NotImplementedError
312
314 """Build hooks environment for this LU.
315
316 This method should return a three-node tuple consisting of: a dict
317 containing the environment that will be used for running the
318 specific hook for this LU, a list of node names on which the hook
319 should run before the execution, and a list of node names on which
320 the hook should run after the execution.
321
322 The keys of the dict must not have 'GANETI_' prefixed as this will
323 be handled in the hooks runner. Also note additional keys will be
324 added by the hooks runner. If the LU doesn't define any
325 environment, an empty dict (and not None) should be returned.
326
327 No nodes should be returned as an empty list (and not None).
328
329 Note that if the HPATH for a LU class is None, this function will
330 not be called.
331
332 """
333 raise NotImplementedError
334
335 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
336 """Notify the LU about the results of its hooks.
337
338 This method is called every time a hooks phase is executed, and notifies
339 the Logical Unit about the hooks' result. The LU can then use it to alter
340 its result based on the hooks. By default the method does nothing and the
341 previous result is passed back unchanged but any LU can define it if it
342 wants to use the local cluster hook-scripts somehow.
343
344 @param phase: one of L{constants.HOOKS_PHASE_POST} or
345 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
346 @param hook_results: the results of the multi-node hooks rpc call
347 @param feedback_fn: function used send feedback back to the caller
348 @param lu_result: the previous Exec result this LU had, or None
349 in the PRE phase
350 @return: the new Exec result, based on the previous result
351 and hook results
352
353 """
354
355
356
357 return lu_result
358
360 """Helper function to expand and lock an instance.
361
362 Many LUs that work on an instance take its name in self.op.instance_name
363 and need to expand it and then declare the expanded name for locking. This
364 function does it, and then updates self.op.instance_name to the expanded
365 name. It also initializes needed_locks as a dict, if this hasn't been done
366 before.
367
368 """
369 if self.needed_locks is None:
370 self.needed_locks = {}
371 else:
372 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
373 "_ExpandAndLockInstance called with instance-level locks set"
374 self.op.instance_name = _ExpandInstanceName(self.cfg,
375 self.op.instance_name)
376 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
377
379 """Helper function to declare instances' nodes for locking.
380
381 This function should be called after locking one or more instances to lock
382 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
383 with all primary or secondary nodes for instances already locked and
384 present in self.needed_locks[locking.LEVEL_INSTANCE].
385
386 It should be called from DeclareLocks, and for safety only works if
387 self.recalculate_locks[locking.LEVEL_NODE] is set.
388
389 In the future it may grow parameters to just lock some instance's nodes, or
390 to just lock primaries or secondary nodes, if needed.
391
392 If should be called in DeclareLocks in a way similar to::
393
394 if level == locking.LEVEL_NODE:
395 self._LockInstancesNodes()
396
397 @type primary_only: boolean
398 @param primary_only: only lock primary nodes of locked instances
399
400 """
401 assert locking.LEVEL_NODE in self.recalculate_locks, \
402 "_LockInstancesNodes helper function called with no nodes to recalculate"
403
404
405
406
407
408
409 wanted_nodes = []
410 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
411 instance = self.context.cfg.GetInstanceInfo(instance_name)
412 wanted_nodes.append(instance.primary_node)
413 if not primary_only:
414 wanted_nodes.extend(instance.secondary_nodes)
415
416 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
417 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
418 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
419 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
420
421 del self.recalculate_locks[locking.LEVEL_NODE]
422
425 """Simple LU which runs no hooks.
426
427 This LU is intended as a parent for other LogicalUnits which will
428 run no hooks, in order to reduce duplicate code.
429
430 """
431 HPATH = None
432 HTYPE = None
433
435 """Empty BuildHooksEnv for NoHooksLu.
436
437 This just raises an error.
438
439 """
440 assert False, "BuildHooksEnv called for NoHooksLUs"
441
444 """Tasklet base class.
445
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
449
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
452 - Implement Exec
453
454 """
456 self.lu = lu
457
458
459 self.cfg = lu.cfg
460 self.rpc = lu.rpc
461
463 """Check prerequisites for this tasklets.
464
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
468
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
471
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
474
475 """
476 pass
477
478 - def Exec(self, feedback_fn):
479 """Execute the tasklet.
480
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
483 expected.
484
485 """
486 raise NotImplementedError
487
490 """Returns list of checked and expanded node names.
491
492 @type lu: L{LogicalUnit}
493 @param lu: the logical unit on whose behalf we execute
494 @type nodes: list
495 @param nodes: list of node names or None for all nodes
496 @rtype: list
497 @return: the list of nodes, sorted
498 @raise errors.ProgrammerError: if the nodes parameter is wrong type
499
500 """
501 if not nodes:
502 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
503 " non-empty list of nodes whose name is to be expanded.")
504
505 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
506 return utils.NiceSort(wanted)
507
510 """Returns list of checked and expanded instance names.
511
512 @type lu: L{LogicalUnit}
513 @param lu: the logical unit on whose behalf we execute
514 @type instances: list
515 @param instances: list of instance names or None for all instances
516 @rtype: list
517 @return: the list of instances, sorted
518 @raise errors.OpPrereqError: if the instances parameter is wrong type
519 @raise errors.OpPrereqError: if any of the passed instances is not found
520
521 """
522 if instances:
523 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
524 else:
525 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
526 return wanted
527
528
529 -def _GetUpdatedParams(old_params, update_dict,
530 use_default=True, use_none=False):
531 """Return the new version of a parameter dictionary.
532
533 @type old_params: dict
534 @param old_params: old parameters
535 @type update_dict: dict
536 @param update_dict: dict containing new parameter values, or
537 constants.VALUE_DEFAULT to reset the parameter to its default
538 value
539 @param use_default: boolean
540 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
541 values as 'to be deleted' values
542 @param use_none: boolean
543 @type use_none: whether to recognise C{None} values as 'to be
544 deleted' values
545 @rtype: dict
546 @return: the new parameter dictionary
547
548 """
549 params_copy = copy.deepcopy(old_params)
550 for key, val in update_dict.iteritems():
551 if ((use_default and val == constants.VALUE_DEFAULT) or
552 (use_none and val is None)):
553 try:
554 del params_copy[key]
555 except KeyError:
556 pass
557 else:
558 params_copy[key] = val
559 return params_copy
560
563 """Checks whether all selected fields are valid.
564
565 @type static: L{utils.FieldSet}
566 @param static: static fields set
567 @type dynamic: L{utils.FieldSet}
568 @param dynamic: dynamic fields set
569
570 """
571 f = utils.FieldSet()
572 f.Extend(static)
573 f.Extend(dynamic)
574
575 delta = f.NonMatching(selected)
576 if delta:
577 raise errors.OpPrereqError("Unknown output fields selected: %s"
578 % ",".join(delta), errors.ECODE_INVAL)
579
582 """Validates that given hypervisor params are not global ones.
583
584 This will ensure that instances don't get customised versions of
585 global params.
586
587 """
588 used_globals = constants.HVC_GLOBALS.intersection(params)
589 if used_globals:
590 msg = ("The following hypervisor parameters are global and cannot"
591 " be customized at instance level, please modify them at"
592 " cluster level: %s" % utils.CommaJoin(used_globals))
593 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
594
597 """Ensure that a given node is online.
598
599 @param lu: the LU on behalf of which we make the check
600 @param node: the node to check
601 @param msg: if passed, should be a message to replace the default one
602 @raise errors.OpPrereqError: if the node is offline
603
604 """
605 if msg is None:
606 msg = "Can't use offline node"
607 if lu.cfg.GetNodeInfo(node).offline:
608 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
609
612 """Ensure that a given node is not drained.
613
614 @param lu: the LU on behalf of which we make the check
615 @param node: the node to check
616 @raise errors.OpPrereqError: if the node is drained
617
618 """
619 if lu.cfg.GetNodeInfo(node).drained:
620 raise errors.OpPrereqError("Can't use drained node %s" % node,
621 errors.ECODE_STATE)
622
625 """Ensure that a given node is vm capable.
626
627 @param lu: the LU on behalf of which we make the check
628 @param node: the node to check
629 @raise errors.OpPrereqError: if the node is not vm capable
630
631 """
632 if not lu.cfg.GetNodeInfo(node).vm_capable:
633 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
634 errors.ECODE_STATE)
635
638 """Ensure that a node supports a given OS.
639
640 @param lu: the LU on behalf of which we make the check
641 @param node: the node to check
642 @param os_name: the OS to query about
643 @param force_variant: whether to ignore variant errors
644 @raise errors.OpPrereqError: if the node is not supporting the OS
645
646 """
647 result = lu.rpc.call_os_get(node, os_name)
648 result.Raise("OS '%s' not in supported OS list for node %s" %
649 (os_name, node),
650 prereq=True, ecode=errors.ECODE_INVAL)
651 if not force_variant:
652 _CheckOSVariant(result.payload, os_name)
653
656 """Ensure that a node has the given secondary ip.
657
658 @type lu: L{LogicalUnit}
659 @param lu: the LU on behalf of which we make the check
660 @type node: string
661 @param node: the node to check
662 @type secondary_ip: string
663 @param secondary_ip: the ip to check
664 @type prereq: boolean
665 @param prereq: whether to throw a prerequisite or an execute error
666 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
667 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
668
669 """
670 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
671 result.Raise("Failure checking secondary ip on node %s" % node,
672 prereq=prereq, ecode=errors.ECODE_ENVIRON)
673 if not result.payload:
674 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
675 " please fix and re-run this command" % secondary_ip)
676 if prereq:
677 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
678 else:
679 raise errors.OpExecError(msg)
680
691
704
716
719 """Reads the cluster domain secret.
720
721 """
722 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
723 strict=True)
724
740
743 """Expand an item name.
744
745 @param fn: the function to use for expansion
746 @param name: requested item name
747 @param kind: text description ('Node' or 'Instance')
748 @return: the resolved (full) name
749 @raise errors.OpPrereqError: if the item is not found
750
751 """
752 full_name = fn(name)
753 if full_name is None:
754 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
755 errors.ECODE_NOENT)
756 return full_name
757
762
767
768
769 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
770 memory, vcpus, nics, disk_template, disks,
771 bep, hvp, hypervisor_name):
772 """Builds instance related env variables for hooks
773
774 This builds the hook environment from individual variables.
775
776 @type name: string
777 @param name: the name of the instance
778 @type primary_node: string
779 @param primary_node: the name of the instance's primary node
780 @type secondary_nodes: list
781 @param secondary_nodes: list of secondary nodes as strings
782 @type os_type: string
783 @param os_type: the name of the instance's OS
784 @type status: boolean
785 @param status: the should_run status of the instance
786 @type memory: string
787 @param memory: the memory size of the instance
788 @type vcpus: string
789 @param vcpus: the count of VCPUs the instance has
790 @type nics: list
791 @param nics: list of tuples (ip, mac, mode, link) representing
792 the NICs the instance has
793 @type disk_template: string
794 @param disk_template: the disk template of the instance
795 @type disks: list
796 @param disks: the list of (size, mode) pairs
797 @type bep: dict
798 @param bep: the backend parameters for the instance
799 @type hvp: dict
800 @param hvp: the hypervisor parameters for the instance
801 @type hypervisor_name: string
802 @param hypervisor_name: the hypervisor for the instance
803 @rtype: dict
804 @return: the hook environment for this instance
805
806 """
807 if status:
808 str_status = "up"
809 else:
810 str_status = "down"
811 env = {
812 "OP_TARGET": name,
813 "INSTANCE_NAME": name,
814 "INSTANCE_PRIMARY": primary_node,
815 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
816 "INSTANCE_OS_TYPE": os_type,
817 "INSTANCE_STATUS": str_status,
818 "INSTANCE_MEMORY": memory,
819 "INSTANCE_VCPUS": vcpus,
820 "INSTANCE_DISK_TEMPLATE": disk_template,
821 "INSTANCE_HYPERVISOR": hypervisor_name,
822 }
823
824 if nics:
825 nic_count = len(nics)
826 for idx, (ip, mac, mode, link) in enumerate(nics):
827 if ip is None:
828 ip = ""
829 env["INSTANCE_NIC%d_IP" % idx] = ip
830 env["INSTANCE_NIC%d_MAC" % idx] = mac
831 env["INSTANCE_NIC%d_MODE" % idx] = mode
832 env["INSTANCE_NIC%d_LINK" % idx] = link
833 if mode == constants.NIC_MODE_BRIDGED:
834 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
835 else:
836 nic_count = 0
837
838 env["INSTANCE_NIC_COUNT"] = nic_count
839
840 if disks:
841 disk_count = len(disks)
842 for idx, (size, mode) in enumerate(disks):
843 env["INSTANCE_DISK%d_SIZE" % idx] = size
844 env["INSTANCE_DISK%d_MODE" % idx] = mode
845 else:
846 disk_count = 0
847
848 env["INSTANCE_DISK_COUNT"] = disk_count
849
850 for source, kind in [(bep, "BE"), (hvp, "HV")]:
851 for key, value in source.items():
852 env["INSTANCE_%s_%s" % (kind, key)] = value
853
854 return env
855
858 """Build a list of nic information tuples.
859
860 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
861 value in LUQueryInstanceData.
862
863 @type lu: L{LogicalUnit}
864 @param lu: the logical unit on whose behalf we execute
865 @type nics: list of L{objects.NIC}
866 @param nics: list of nics to convert to hooks tuples
867
868 """
869 hooks_nics = []
870 cluster = lu.cfg.GetClusterInfo()
871 for nic in nics:
872 ip = nic.ip
873 mac = nic.mac
874 filled_params = cluster.SimpleFillNIC(nic.nicparams)
875 mode = filled_params[constants.NIC_MODE]
876 link = filled_params[constants.NIC_LINK]
877 hooks_nics.append((ip, mac, mode, link))
878 return hooks_nics
879
882 """Builds instance related env variables for hooks from an object.
883
884 @type lu: L{LogicalUnit}
885 @param lu: the logical unit on whose behalf we execute
886 @type instance: L{objects.Instance}
887 @param instance: the instance for which we should build the
888 environment
889 @type override: dict
890 @param override: dictionary with key/values that will override
891 our values
892 @rtype: dict
893 @return: the hook environment dictionary
894
895 """
896 cluster = lu.cfg.GetClusterInfo()
897 bep = cluster.FillBE(instance)
898 hvp = cluster.FillHV(instance)
899 args = {
900 'name': instance.name,
901 'primary_node': instance.primary_node,
902 'secondary_nodes': instance.secondary_nodes,
903 'os_type': instance.os,
904 'status': instance.admin_up,
905 'memory': bep[constants.BE_MEMORY],
906 'vcpus': bep[constants.BE_VCPUS],
907 'nics': _NICListToTuple(lu, instance.nics),
908 'disk_template': instance.disk_template,
909 'disks': [(disk.size, disk.mode) for disk in instance.disks],
910 'bep': bep,
911 'hvp': hvp,
912 'hypervisor_name': instance.hypervisor,
913 }
914 if override:
915 args.update(override)
916 return _BuildInstanceHookEnv(**args)
917
920 """Adjust the candidate pool after node operations.
921
922 """
923 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
924 if mod_list:
925 lu.LogInfo("Promoted nodes to master candidate role: %s",
926 utils.CommaJoin(node.name for node in mod_list))
927 for name in mod_list:
928 lu.context.ReaddNode(name)
929 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
930 if mc_now > mc_max:
931 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
932 (mc_now, mc_max))
933
944
958
967
970 """Check whether an OS name conforms to the os variants specification.
971
972 @type os_obj: L{objects.OS}
973 @param os_obj: OS object to check
974 @type name: string
975 @param name: OS name passed by the user, to check for validity
976
977 """
978 if not os_obj.supported_variants:
979 return
980 variant = objects.OS.GetVariant(name)
981 if not variant:
982 raise errors.OpPrereqError("OS name must include a variant",
983 errors.ECODE_INVAL)
984
985 if variant not in os_obj.supported_variants:
986 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
987
991
994 """Returns a list of all primary and secondary instances on a node.
995
996 """
997
998 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
999
1002 """Returns primary instances on a node.
1003
1004 """
1005 return _GetNodeInstancesInner(cfg,
1006 lambda inst: node_name == inst.primary_node)
1007
1015
1018 """Returns the arguments for a storage type.
1019
1020 """
1021
1022 if storage_type == constants.ST_FILE:
1023
1024 return [[cfg.GetFileStorageDir()]]
1025
1026 return []
1027
1030 faulty = []
1031
1032 for dev in instance.disks:
1033 cfg.SetDiskID(dev, node_name)
1034
1035 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1036 result.Raise("Failed to get disk status from node %s" % node_name,
1037 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1038
1039 for idx, bdev_status in enumerate(result.payload):
1040 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1041 faulty.append(idx)
1042
1043 return faulty
1044
1047 """Check the sanity of iallocator and node arguments and use the
1048 cluster-wide iallocator if appropriate.
1049
1050 Check that at most one of (iallocator, node) is specified. If none is
1051 specified, then the LU's opcode's iallocator slot is filled with the
1052 cluster-wide default iallocator.
1053
1054 @type iallocator_slot: string
1055 @param iallocator_slot: the name of the opcode iallocator slot
1056 @type node_slot: string
1057 @param node_slot: the name of the opcode target node slot
1058
1059 """
1060 node = getattr(lu.op, node_slot, None)
1061 iallocator = getattr(lu.op, iallocator_slot, None)
1062
1063 if node is not None and iallocator is not None:
1064 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1065 errors.ECODE_INVAL)
1066 elif node is None and iallocator is None:
1067 default_iallocator = lu.cfg.GetDefaultIAllocator()
1068 if default_iallocator:
1069 setattr(lu.op, iallocator_slot, default_iallocator)
1070 else:
1071 raise errors.OpPrereqError("No iallocator or node given and no"
1072 " cluster-wide default iallocator found."
1073 " Please specify either an iallocator or a"
1074 " node, or set a cluster-wide default"
1075 " iallocator.")
1076
1077
1078 -class LUPostInitCluster(LogicalUnit):
1079 """Logical unit for running hooks after cluster initialization.
1080
1081 """
1082 HPATH = "cluster-init"
1083 HTYPE = constants.HTYPE_CLUSTER
1084
1085 - def BuildHooksEnv(self):
1086 """Build hooks env.
1087
1088 """
1089 env = {"OP_TARGET": self.cfg.GetClusterName()}
1090 mn = self.cfg.GetMasterNode()
1091 return env, [], [mn]
1092
1093 - def Exec(self, feedback_fn):
1094 """Nothing to do.
1095
1096 """
1097 return True
1098
1101 """Logical unit for destroying the cluster.
1102
1103 """
1104 HPATH = "cluster-destroy"
1105 HTYPE = constants.HTYPE_CLUSTER
1106
1108 """Build hooks env.
1109
1110 """
1111 env = {"OP_TARGET": self.cfg.GetClusterName()}
1112 return env, [], []
1113
1115 """Check prerequisites.
1116
1117 This checks whether the cluster is empty.
1118
1119 Any errors are signaled by raising errors.OpPrereqError.
1120
1121 """
1122 master = self.cfg.GetMasterNode()
1123
1124 nodelist = self.cfg.GetNodeList()
1125 if len(nodelist) != 1 or nodelist[0] != master:
1126 raise errors.OpPrereqError("There are still %d node(s) in"
1127 " this cluster." % (len(nodelist) - 1),
1128 errors.ECODE_INVAL)
1129 instancelist = self.cfg.GetInstanceList()
1130 if instancelist:
1131 raise errors.OpPrereqError("There are still %d instance(s) in"
1132 " this cluster." % len(instancelist),
1133 errors.ECODE_INVAL)
1134
1135 - def Exec(self, feedback_fn):
1153
1156 """Verifies a certificate for LUVerifyCluster.
1157
1158 @type filename: string
1159 @param filename: Path to PEM file
1160
1161 """
1162 try:
1163 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1164 utils.ReadFile(filename))
1165 except Exception, err:
1166 return (LUVerifyCluster.ETYPE_ERROR,
1167 "Failed to load X509 certificate %s: %s" % (filename, err))
1168
1169 (errcode, msg) = \
1170 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1171 constants.SSL_CERT_EXPIRATION_ERROR)
1172
1173 if msg:
1174 fnamemsg = "While verifying %s: %s" % (filename, msg)
1175 else:
1176 fnamemsg = None
1177
1178 if errcode is None:
1179 return (None, fnamemsg)
1180 elif errcode == utils.CERT_WARNING:
1181 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1182 elif errcode == utils.CERT_ERROR:
1183 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1184
1185 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1186
1189 """Verifies the cluster status.
1190
1191 """
1192 HPATH = "cluster-verify"
1193 HTYPE = constants.HTYPE_CLUSTER
1194 _OP_PARAMS = [
1195 ("skip_checks", ht.EmptyList,
1196 ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1197 ("verbose", False, ht.TBool),
1198 ("error_codes", False, ht.TBool),
1199 ("debug_simulate_errors", False, ht.TBool),
1200 ]
1201 REQ_BGL = False
1202
1203 TCLUSTER = "cluster"
1204 TNODE = "node"
1205 TINSTANCE = "instance"
1206
1207 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1208 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1209 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1210 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1211 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1212 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1213 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1214 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1215 ENODEDRBD = (TNODE, "ENODEDRBD")
1216 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1217 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1218 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1219 ENODEHV = (TNODE, "ENODEHV")
1220 ENODELVM = (TNODE, "ENODELVM")
1221 ENODEN1 = (TNODE, "ENODEN1")
1222 ENODENET = (TNODE, "ENODENET")
1223 ENODEOS = (TNODE, "ENODEOS")
1224 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1225 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1226 ENODERPC = (TNODE, "ENODERPC")
1227 ENODESSH = (TNODE, "ENODESSH")
1228 ENODEVERSION = (TNODE, "ENODEVERSION")
1229 ENODESETUP = (TNODE, "ENODESETUP")
1230 ENODETIME = (TNODE, "ENODETIME")
1231
1232 ETYPE_FIELD = "code"
1233 ETYPE_ERROR = "ERROR"
1234 ETYPE_WARNING = "WARNING"
1235
1237 """A class representing the logical and physical status of a node.
1238
1239 @type name: string
1240 @ivar name: the node name to which this object refers
1241 @ivar volumes: a structure as returned from
1242 L{ganeti.backend.GetVolumeList} (runtime)
1243 @ivar instances: a list of running instances (runtime)
1244 @ivar pinst: list of configured primary instances (config)
1245 @ivar sinst: list of configured secondary instances (config)
1246 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1247 of this node (config)
1248 @ivar mfree: free memory, as reported by hypervisor (runtime)
1249 @ivar dfree: free disk, as reported by the node (runtime)
1250 @ivar offline: the offline status (config)
1251 @type rpc_fail: boolean
1252 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1253 not whether the individual keys were correct) (runtime)
1254 @type lvm_fail: boolean
1255 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1256 @type hyp_fail: boolean
1257 @ivar hyp_fail: whether the RPC call didn't return the instance list
1258 @type ghost: boolean
1259 @ivar ghost: whether this is a known node or not (config)
1260 @type os_fail: boolean
1261 @ivar os_fail: whether the RPC call didn't return valid OS data
1262 @type oslist: list
1263 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1264 @type vm_capable: boolean
1265 @ivar vm_capable: whether the node can host instances
1266
1267 """
1268 - def __init__(self, offline=False, name=None, vm_capable=True):
1269 self.name = name
1270 self.volumes = {}
1271 self.instances = []
1272 self.pinst = []
1273 self.sinst = []
1274 self.sbp = {}
1275 self.mfree = 0
1276 self.dfree = 0
1277 self.offline = offline
1278 self.vm_capable = vm_capable
1279 self.rpc_fail = False
1280 self.lvm_fail = False
1281 self.hyp_fail = False
1282 self.ghost = False
1283 self.os_fail = False
1284 self.oslist = {}
1285
1292
1293 - def _Error(self, ecode, item, msg, *args, **kwargs):
1294 """Format an error message.
1295
1296 Based on the opcode's error_codes parameter, either format a
1297 parseable error code, or a simpler error string.
1298
1299 This must be called only from Exec and functions called from Exec.
1300
1301 """
1302 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1303 itype, etxt = ecode
1304
1305 if args:
1306 msg = msg % args
1307
1308 if self.op.error_codes:
1309 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1310 else:
1311 if item:
1312 item = " " + item
1313 else:
1314 item = ""
1315 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1316
1317 self._feedback_fn(" - %s" % msg)
1318
1319 - def _ErrorIf(self, cond, *args, **kwargs):
1320 """Log an error message if the passed condition is True.
1321
1322 """
1323 cond = bool(cond) or self.op.debug_simulate_errors
1324 if cond:
1325 self._Error(*args, **kwargs)
1326
1327 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1328 self.bad = self.bad or cond
1329
1331 """Perform some basic validation on data returned from a node.
1332
1333 - check the result data structure is well formed and has all the
1334 mandatory fields
1335 - check ganeti version
1336
1337 @type ninfo: L{objects.Node}
1338 @param ninfo: the node to check
1339 @param nresult: the results from the node
1340 @rtype: boolean
1341 @return: whether overall this call was successful (and we can expect
1342 reasonable values in the respose)
1343
1344 """
1345 node = ninfo.name
1346 _ErrorIf = self._ErrorIf
1347
1348
1349 test = not nresult or not isinstance(nresult, dict)
1350 _ErrorIf(test, self.ENODERPC, node,
1351 "unable to verify node: no data returned")
1352 if test:
1353 return False
1354
1355
1356 local_version = constants.PROTOCOL_VERSION
1357 remote_version = nresult.get("version", None)
1358 test = not (remote_version and
1359 isinstance(remote_version, (list, tuple)) and
1360 len(remote_version) == 2)
1361 _ErrorIf(test, self.ENODERPC, node,
1362 "connection to node returned invalid data")
1363 if test:
1364 return False
1365
1366 test = local_version != remote_version[0]
1367 _ErrorIf(test, self.ENODEVERSION, node,
1368 "incompatible protocol versions: master %s,"
1369 " node %s", local_version, remote_version[0])
1370 if test:
1371 return False
1372
1373
1374
1375
1376 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1377 self.ENODEVERSION, node,
1378 "software version mismatch: master %s, node %s",
1379 constants.RELEASE_VERSION, remote_version[1],
1380 code=self.ETYPE_WARNING)
1381
1382 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1383 if ninfo.vm_capable and isinstance(hyp_result, dict):
1384 for hv_name, hv_result in hyp_result.iteritems():
1385 test = hv_result is not None
1386 _ErrorIf(test, self.ENODEHV, node,
1387 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1388
1389 test = nresult.get(constants.NV_NODESETUP,
1390 ["Missing NODESETUP results"])
1391 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1392 "; ".join(test))
1393
1394 return True
1395
1396 - def _VerifyNodeTime(self, ninfo, nresult,
1397 nvinfo_starttime, nvinfo_endtime):
1398 """Check the node time.
1399
1400 @type ninfo: L{objects.Node}
1401 @param ninfo: the node to check
1402 @param nresult: the remote results for the node
1403 @param nvinfo_starttime: the start time of the RPC call
1404 @param nvinfo_endtime: the end time of the RPC call
1405
1406 """
1407 node = ninfo.name
1408 _ErrorIf = self._ErrorIf
1409
1410 ntime = nresult.get(constants.NV_TIME, None)
1411 try:
1412 ntime_merged = utils.MergeTime(ntime)
1413 except (ValueError, TypeError):
1414 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1415 return
1416
1417 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1418 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1419 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1420 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1421 else:
1422 ntime_diff = None
1423
1424 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1425 "Node time diverges by at least %s from master node time",
1426 ntime_diff)
1427
1429 """Check the node time.
1430
1431 @type ninfo: L{objects.Node}
1432 @param ninfo: the node to check
1433 @param nresult: the remote results for the node
1434 @param vg_name: the configured VG name
1435
1436 """
1437 if vg_name is None:
1438 return
1439
1440 node = ninfo.name
1441 _ErrorIf = self._ErrorIf
1442
1443
1444 vglist = nresult.get(constants.NV_VGLIST, None)
1445 test = not vglist
1446 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1447 if not test:
1448 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1449 constants.MIN_VG_SIZE)
1450 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1451
1452
1453 pvlist = nresult.get(constants.NV_PVLIST, None)
1454 test = pvlist is None
1455 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1456 if not test:
1457
1458
1459
1460 for _, pvname, owner_vg in pvlist:
1461 test = ":" in pvname
1462 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1463 " '%s' of VG '%s'", pvname, owner_vg)
1464
1466 """Check the node time.
1467
1468 @type ninfo: L{objects.Node}
1469 @param ninfo: the node to check
1470 @param nresult: the remote results for the node
1471
1472 """
1473 node = ninfo.name
1474 _ErrorIf = self._ErrorIf
1475
1476 test = constants.NV_NODELIST not in nresult
1477 _ErrorIf(test, self.ENODESSH, node,
1478 "node hasn't returned node ssh connectivity data")
1479 if not test:
1480 if nresult[constants.NV_NODELIST]:
1481 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1482 _ErrorIf(True, self.ENODESSH, node,
1483 "ssh communication with node '%s': %s", a_node, a_msg)
1484
1485 test = constants.NV_NODENETTEST not in nresult
1486 _ErrorIf(test, self.ENODENET, node,
1487 "node hasn't returned node tcp connectivity data")
1488 if not test:
1489 if nresult[constants.NV_NODENETTEST]:
1490 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1491 for anode in nlist:
1492 _ErrorIf(True, self.ENODENET, node,
1493 "tcp communication with node '%s': %s",
1494 anode, nresult[constants.NV_NODENETTEST][anode])
1495
1496 test = constants.NV_MASTERIP not in nresult
1497 _ErrorIf(test, self.ENODENET, node,
1498 "node hasn't returned node master IP reachability data")
1499 if not test:
1500 if not nresult[constants.NV_MASTERIP]:
1501 if node == self.master_node:
1502 msg = "the master node cannot reach the master IP (not configured?)"
1503 else:
1504 msg = "cannot reach the master IP"
1505 _ErrorIf(True, self.ENODENET, node, msg)
1506
1507 - def _VerifyInstance(self, instance, instanceconfig, node_image,
1508 diskstatus):
1509 """Verify an instance.
1510
1511 This function checks to see if the required block devices are
1512 available on the instance's node.
1513
1514 """
1515 _ErrorIf = self._ErrorIf
1516 node_current = instanceconfig.primary_node
1517
1518 node_vol_should = {}
1519 instanceconfig.MapLVsByNode(node_vol_should)
1520
1521 for node in node_vol_should:
1522 n_img = node_image[node]
1523 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1524
1525 continue
1526 for volume in node_vol_should[node]:
1527 test = volume not in n_img.volumes
1528 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1529 "volume %s missing on node %s", volume, node)
1530
1531 if instanceconfig.admin_up:
1532 pri_img = node_image[node_current]
1533 test = instance not in pri_img.instances and not pri_img.offline
1534 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1535 "instance not running on its primary node %s",
1536 node_current)
1537
1538 for node, n_img in node_image.items():
1539 if (not node == node_current):
1540 test = instance in n_img.instances
1541 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1542 "instance should not run on node %s", node)
1543
1544 diskdata = [(nname, success, status, idx)
1545 for (nname, disks) in diskstatus.items()
1546 for idx, (success, status) in enumerate(disks)]
1547
1548 for nname, success, bdev_status, idx in diskdata:
1549 _ErrorIf(instanceconfig.admin_up and not success,
1550 self.EINSTANCEFAULTYDISK, instance,
1551 "couldn't retrieve status for disk/%s on %s: %s",
1552 idx, nname, bdev_status)
1553 _ErrorIf((instanceconfig.admin_up and success and
1554 bdev_status.ldisk_status == constants.LDS_FAULTY),
1555 self.EINSTANCEFAULTYDISK, instance,
1556 "disk/%s on %s is faulty", idx, nname)
1557
1559 """Verify if there are any unknown volumes in the cluster.
1560
1561 The .os, .swap and backup volumes are ignored. All other volumes are
1562 reported as unknown.
1563
1564 @type reserved: L{ganeti.utils.FieldSet}
1565 @param reserved: a FieldSet of reserved volume names
1566
1567 """
1568 for node, n_img in node_image.items():
1569 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1570
1571 continue
1572 for volume in n_img.volumes:
1573 test = ((node not in node_vol_should or
1574 volume not in node_vol_should[node]) and
1575 not reserved.Matches(volume))
1576 self._ErrorIf(test, self.ENODEORPHANLV, node,
1577 "volume %s is unknown", volume)
1578
1580 """Verify the list of running instances.
1581
1582 This checks what instances are running but unknown to the cluster.
1583
1584 """
1585 for node, n_img in node_image.items():
1586 for o_inst in n_img.instances:
1587 test = o_inst not in instancelist
1588 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1589 "instance %s on node %s should not exist", o_inst, node)
1590
1592 """Verify N+1 Memory Resilience.
1593
1594 Check that if one single node dies we can still start all the
1595 instances it was primary for.
1596
1597 """
1598 for node, n_img in node_image.items():
1599
1600
1601
1602
1603
1604
1605
1606
1607 for prinode, instances in n_img.sbp.items():
1608 needed_mem = 0
1609 for instance in instances:
1610 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1611 if bep[constants.BE_AUTO_BALANCE]:
1612 needed_mem += bep[constants.BE_MEMORY]
1613 test = n_img.mfree < needed_mem
1614 self._ErrorIf(test, self.ENODEN1, node,
1615 "not enough memory on to accommodate"
1616 " failovers should peer node %s fail", prinode)
1617
1618 - def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1619 master_files):
1620 """Verifies and computes the node required file checksums.
1621
1622 @type ninfo: L{objects.Node}
1623 @param ninfo: the node to check
1624 @param nresult: the remote results for the node
1625 @param file_list: required list of files
1626 @param local_cksum: dictionary of local files and their checksums
1627 @param master_files: list of files that only masters should have
1628
1629 """
1630 node = ninfo.name
1631 _ErrorIf = self._ErrorIf
1632
1633 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1634 test = not isinstance(remote_cksum, dict)
1635 _ErrorIf(test, self.ENODEFILECHECK, node,
1636 "node hasn't returned file checksum data")
1637 if test:
1638 return
1639
1640 for file_name in file_list:
1641 node_is_mc = ninfo.master_candidate
1642 must_have = (file_name not in master_files) or node_is_mc
1643
1644 test1 = file_name not in remote_cksum
1645
1646 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1647
1648 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1649 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1650 "file '%s' missing", file_name)
1651 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1652 "file '%s' has wrong checksum", file_name)
1653
1654 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1655 "file '%s' should not exist on non master"
1656 " candidates (and the file is outdated)", file_name)
1657
1658 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1659 "file '%s' should not exist"
1660 " on non master candidates", file_name)
1661
1662 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1663 drbd_map):
1664 """Verifies and the node DRBD status.
1665
1666 @type ninfo: L{objects.Node}
1667 @param ninfo: the node to check
1668 @param nresult: the remote results for the node
1669 @param instanceinfo: the dict of instances
1670 @param drbd_helper: the configured DRBD usermode helper
1671 @param drbd_map: the DRBD map as returned by
1672 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1673
1674 """
1675 node = ninfo.name
1676 _ErrorIf = self._ErrorIf
1677
1678 if drbd_helper:
1679 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1680 test = (helper_result == None)
1681 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1682 "no drbd usermode helper returned")
1683 if helper_result:
1684 status, payload = helper_result
1685 test = not status
1686 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1687 "drbd usermode helper check unsuccessful: %s", payload)
1688 test = status and (payload != drbd_helper)
1689 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1690 "wrong drbd usermode helper: %s", payload)
1691
1692
1693 node_drbd = {}
1694 for minor, instance in drbd_map[node].items():
1695 test = instance not in instanceinfo
1696 _ErrorIf(test, self.ECLUSTERCFG, None,
1697 "ghost instance '%s' in temporary DRBD map", instance)
1698
1699
1700
1701 if test:
1702 node_drbd[minor] = (instance, False)
1703 else:
1704 instance = instanceinfo[instance]
1705 node_drbd[minor] = (instance.name, instance.admin_up)
1706
1707
1708 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1709 test = not isinstance(used_minors, (tuple, list))
1710 _ErrorIf(test, self.ENODEDRBD, node,
1711 "cannot parse drbd status file: %s", str(used_minors))
1712 if test:
1713
1714 return
1715
1716 for minor, (iname, must_exist) in node_drbd.items():
1717 test = minor not in used_minors and must_exist
1718 _ErrorIf(test, self.ENODEDRBD, node,
1719 "drbd minor %d of instance %s is not active", minor, iname)
1720 for minor in used_minors:
1721 test = minor not in node_drbd
1722 _ErrorIf(test, self.ENODEDRBD, node,
1723 "unallocated drbd minor %d is in use", minor)
1724
1726 """Builds the node OS structures.
1727
1728 @type ninfo: L{objects.Node}
1729 @param ninfo: the node to check
1730 @param nresult: the remote results for the node
1731 @param nimg: the node image object
1732
1733 """
1734 node = ninfo.name
1735 _ErrorIf = self._ErrorIf
1736
1737 remote_os = nresult.get(constants.NV_OSLIST, None)
1738 test = (not isinstance(remote_os, list) or
1739 not compat.all(isinstance(v, list) and len(v) == 7
1740 for v in remote_os))
1741
1742 _ErrorIf(test, self.ENODEOS, node,
1743 "node hasn't returned valid OS data")
1744
1745 nimg.os_fail = test
1746
1747 if test:
1748 return
1749
1750 os_dict = {}
1751
1752 for (name, os_path, status, diagnose,
1753 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1754
1755 if name not in os_dict:
1756 os_dict[name] = []
1757
1758
1759
1760 parameters = [tuple(v) for v in parameters]
1761 os_dict[name].append((os_path, status, diagnose,
1762 set(variants), set(parameters), set(api_ver)))
1763
1764 nimg.oslist = os_dict
1765
1767 """Verifies the node OS list.
1768
1769 @type ninfo: L{objects.Node}
1770 @param ninfo: the node to check
1771 @param nimg: the node image object
1772 @param base: the 'template' node we match against (e.g. from the master)
1773
1774 """
1775 node = ninfo.name
1776 _ErrorIf = self._ErrorIf
1777
1778 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1779
1780 for os_name, os_data in nimg.oslist.items():
1781 assert os_data, "Empty OS status for OS %s?!" % os_name
1782 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1783 _ErrorIf(not f_status, self.ENODEOS, node,
1784 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1785 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1786 "OS '%s' has multiple entries (first one shadows the rest): %s",
1787 os_name, utils.CommaJoin([v[0] for v in os_data]))
1788
1789 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1790 and not f_var, self.ENODEOS, node,
1791 "OS %s with API at least %d does not declare any variant",
1792 os_name, constants.OS_API_V15)
1793
1794 test = os_name not in base.oslist
1795 _ErrorIf(test, self.ENODEOS, node,
1796 "Extra OS %s not present on reference node (%s)",
1797 os_name, base.name)
1798 if test:
1799 continue
1800 assert base.oslist[os_name], "Base node has empty OS status?"
1801 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1802 if not b_status:
1803
1804 continue
1805 for kind, a, b in [("API version", f_api, b_api),
1806 ("variants list", f_var, b_var),
1807 ("parameters", f_param, b_param)]:
1808 _ErrorIf(a != b, self.ENODEOS, node,
1809 "OS %s %s differs from reference node %s: %s vs. %s",
1810 kind, os_name, base.name,
1811 utils.CommaJoin(a), utils.CommaJoin(b))
1812
1813
1814 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1815 _ErrorIf(missing, self.ENODEOS, node,
1816 "OSes present on reference node %s but missing on this node: %s",
1817 base.name, utils.CommaJoin(missing))
1818
1820 """Verifies and updates the node volume data.
1821
1822 This function will update a L{NodeImage}'s internal structures
1823 with data from the remote call.
1824
1825 @type ninfo: L{objects.Node}
1826 @param ninfo: the node to check
1827 @param nresult: the remote results for the node
1828 @param nimg: the node image object
1829 @param vg_name: the configured VG name
1830
1831 """
1832 node = ninfo.name
1833 _ErrorIf = self._ErrorIf
1834
1835 nimg.lvm_fail = True
1836 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1837 if vg_name is None:
1838 pass
1839 elif isinstance(lvdata, basestring):
1840 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1841 utils.SafeEncode(lvdata))
1842 elif not isinstance(lvdata, dict):
1843 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1844 else:
1845 nimg.volumes = lvdata
1846 nimg.lvm_fail = False
1847
1849 """Verifies and updates the node instance list.
1850
1851 If the listing was successful, then updates this node's instance
1852 list. Otherwise, it marks the RPC call as failed for the instance
1853 list key.
1854
1855 @type ninfo: L{objects.Node}
1856 @param ninfo: the node to check
1857 @param nresult: the remote results for the node
1858 @param nimg: the node image object
1859
1860 """
1861 idata = nresult.get(constants.NV_INSTANCELIST, None)
1862 test = not isinstance(idata, list)
1863 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1864 " (instancelist): %s", utils.SafeEncode(str(idata)))
1865 if test:
1866 nimg.hyp_fail = True
1867 else:
1868 nimg.instances = idata
1869
1871 """Verifies and computes a node information map
1872
1873 @type ninfo: L{objects.Node}
1874 @param ninfo: the node to check
1875 @param nresult: the remote results for the node
1876 @param nimg: the node image object
1877 @param vg_name: the configured VG name
1878
1879 """
1880 node = ninfo.name
1881 _ErrorIf = self._ErrorIf
1882
1883
1884 hv_info = nresult.get(constants.NV_HVINFO, None)
1885 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1886 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1887 if not test:
1888 try:
1889 nimg.mfree = int(hv_info["memory_free"])
1890 except (ValueError, TypeError):
1891 _ErrorIf(True, self.ENODERPC, node,
1892 "node returned invalid nodeinfo, check hypervisor")
1893
1894
1895 if vg_name is not None:
1896 test = (constants.NV_VGLIST not in nresult or
1897 vg_name not in nresult[constants.NV_VGLIST])
1898 _ErrorIf(test, self.ENODELVM, node,
1899 "node didn't return data for the volume group '%s'"
1900 " - it is either missing or broken", vg_name)
1901 if not test:
1902 try:
1903 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1904 except (ValueError, TypeError):
1905 _ErrorIf(True, self.ENODERPC, node,
1906 "node returned invalid LVM info, check LVM status")
1907
1909 """Gets per-disk status information for all instances.
1910
1911 @type nodelist: list of strings
1912 @param nodelist: Node names
1913 @type node_image: dict of (name, L{objects.Node})
1914 @param node_image: Node objects
1915 @type instanceinfo: dict of (name, L{objects.Instance})
1916 @param instanceinfo: Instance objects
1917 @rtype: {instance: {node: [(succes, payload)]}}
1918 @return: a dictionary of per-instance dictionaries with nodes as
1919 keys and disk information as values; the disk information is a
1920 list of tuples (success, payload)
1921
1922 """
1923 _ErrorIf = self._ErrorIf
1924
1925 node_disks = {}
1926 node_disks_devonly = {}
1927
1928 for nname in nodelist:
1929 disks = [(inst, disk)
1930 for instlist in [node_image[nname].pinst,
1931 node_image[nname].sinst]
1932 for inst in instlist
1933 for disk in instanceinfo[inst].disks]
1934
1935 if not disks:
1936
1937 continue
1938
1939 node_disks[nname] = disks
1940
1941
1942
1943 devonly = [dev.Copy() for (_, dev) in disks]
1944
1945 for dev in devonly:
1946 self.cfg.SetDiskID(dev, nname)
1947
1948 node_disks_devonly[nname] = devonly
1949
1950 assert len(node_disks) == len(node_disks_devonly)
1951
1952
1953 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1954 node_disks_devonly)
1955
1956 assert len(result) == len(node_disks)
1957
1958 instdisk = {}
1959
1960 for (nname, nres) in result.items():
1961 disks = node_disks[nname]
1962
1963 if nres.offline:
1964
1965 data = len(disks) * [(False, "node offline")]
1966 else:
1967 msg = nres.fail_msg
1968 _ErrorIf(msg, self.ENODERPC, nname,
1969 "while getting disk information: %s", msg)
1970 if msg:
1971
1972 data = len(disks) * [(False, msg)]
1973 else:
1974 data = []
1975 for idx, i in enumerate(nres.payload):
1976 if isinstance(i, (tuple, list)) and len(i) == 2:
1977 data.append(i)
1978 else:
1979 logging.warning("Invalid result from node %s, entry %d: %s",
1980 nname, idx, i)
1981 data.append((False, "Invalid result from the remote node"))
1982
1983 for ((inst, _), status) in zip(disks, data):
1984 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
1985
1986 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
1987 len(nnames) <= len(instanceinfo[inst].all_nodes) and
1988 compat.all(isinstance(s, (tuple, list)) and
1989 len(s) == 2 for s in statuses)
1990 for inst, nnames in instdisk.items()
1991 for nname, statuses in nnames.items())
1992 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
1993
1994 return instdisk
1995
1997 """Build hooks env.
1998
1999 Cluster-Verify hooks just ran in the post phase and their failure makes
2000 the output be logged in the verify output and the verification to fail.
2001
2002 """
2003 all_nodes = self.cfg.GetNodeList()
2004 env = {
2005 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2006 }
2007 for node in self.cfg.GetAllNodesInfo().values():
2008 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2009
2010 return env, [], all_nodes
2011
2012 - def Exec(self, feedback_fn):
2013 """Verify integrity of cluster, performing various test on nodes.
2014
2015 """
2016 self.bad = False
2017 _ErrorIf = self._ErrorIf
2018 verbose = self.op.verbose
2019 self._feedback_fn = feedback_fn
2020 feedback_fn("* Verifying global settings")
2021 for msg in self.cfg.VerifyConfig():
2022 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2023
2024
2025 for cert_filename in constants.ALL_CERT_FILES:
2026 (errcode, msg) = _VerifyCertificate(cert_filename)
2027 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2028
2029 vg_name = self.cfg.GetVGName()
2030 drbd_helper = self.cfg.GetDRBDHelper()
2031 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2032 cluster = self.cfg.GetClusterInfo()
2033 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2034 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2035 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2036 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2037 for iname in instancelist)
2038 i_non_redundant = []
2039 i_non_a_balanced = []
2040 n_offline = 0
2041 n_drained = 0
2042 node_vol_should = {}
2043
2044
2045
2046 master_files = [constants.CLUSTER_CONF_FILE]
2047 master_node = self.master_node = self.cfg.GetMasterNode()
2048 master_ip = self.cfg.GetMasterIP()
2049
2050 file_names = ssconf.SimpleStore().GetFileList()
2051 file_names.extend(constants.ALL_CERT_FILES)
2052 file_names.extend(master_files)
2053 if cluster.modify_etc_hosts:
2054 file_names.append(constants.ETC_HOSTS)
2055
2056 local_checksums = utils.FingerprintFiles(file_names)
2057
2058 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2059 node_verify_param = {
2060 constants.NV_FILELIST: file_names,
2061 constants.NV_NODELIST: [node.name for node in nodeinfo
2062 if not node.offline],
2063 constants.NV_HYPERVISOR: hypervisors,
2064 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2065 node.secondary_ip) for node in nodeinfo
2066 if not node.offline],
2067 constants.NV_INSTANCELIST: hypervisors,
2068 constants.NV_VERSION: None,
2069 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2070 constants.NV_NODESETUP: None,
2071 constants.NV_TIME: None,
2072 constants.NV_MASTERIP: (master_node, master_ip),
2073 constants.NV_OSLIST: None,
2074 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2075 }
2076
2077 if vg_name is not None:
2078 node_verify_param[constants.NV_VGLIST] = None
2079 node_verify_param[constants.NV_LVLIST] = vg_name
2080 node_verify_param[constants.NV_PVLIST] = [vg_name]
2081 node_verify_param[constants.NV_DRBDLIST] = None
2082
2083 if drbd_helper:
2084 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2085
2086
2087 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2088 name=node.name,
2089 vm_capable=node.vm_capable))
2090 for node in nodeinfo)
2091
2092 for instance in instancelist:
2093 inst_config = instanceinfo[instance]
2094
2095 for nname in inst_config.all_nodes:
2096 if nname not in node_image:
2097
2098 gnode = self.NodeImage(name=nname)
2099 gnode.ghost = True
2100 node_image[nname] = gnode
2101
2102 inst_config.MapLVsByNode(node_vol_should)
2103
2104 pnode = inst_config.primary_node
2105 node_image[pnode].pinst.append(instance)
2106
2107 for snode in inst_config.secondary_nodes:
2108 nimg = node_image[snode]
2109 nimg.sinst.append(instance)
2110 if pnode not in nimg.sbp:
2111 nimg.sbp[pnode] = []
2112 nimg.sbp[pnode].append(instance)
2113
2114
2115
2116
2117
2118
2119
2120
2121 nvinfo_starttime = time.time()
2122 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2123 self.cfg.GetClusterName())
2124 nvinfo_endtime = time.time()
2125
2126 all_drbd_map = self.cfg.ComputeDRBDMap()
2127
2128 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2129 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2130
2131 feedback_fn("* Verifying node status")
2132
2133 refos_img = None
2134
2135 for node_i in nodeinfo:
2136 node = node_i.name
2137 nimg = node_image[node]
2138
2139 if node_i.offline:
2140 if verbose:
2141 feedback_fn("* Skipping offline node %s" % (node,))
2142 n_offline += 1
2143 continue
2144
2145 if node == master_node:
2146 ntype = "master"
2147 elif node_i.master_candidate:
2148 ntype = "master candidate"
2149 elif node_i.drained:
2150 ntype = "drained"
2151 n_drained += 1
2152 else:
2153 ntype = "regular"
2154 if verbose:
2155 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2156
2157 msg = all_nvinfo[node].fail_msg
2158 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2159 if msg:
2160 nimg.rpc_fail = True
2161 continue
2162
2163 nresult = all_nvinfo[node].payload
2164
2165 nimg.call_ok = self._VerifyNode(node_i, nresult)
2166 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2167 self._VerifyNodeNetwork(node_i, nresult)
2168 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2169 master_files)
2170
2171 if nimg.vm_capable:
2172 self._VerifyNodeLVM(node_i, nresult, vg_name)
2173 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2174 all_drbd_map)
2175
2176 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2177 self._UpdateNodeInstances(node_i, nresult, nimg)
2178 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2179 self._UpdateNodeOS(node_i, nresult, nimg)
2180 if not nimg.os_fail:
2181 if refos_img is None:
2182 refos_img = nimg
2183 self._VerifyNodeOS(node_i, nimg, refos_img)
2184
2185 feedback_fn("* Verifying instance status")
2186 for instance in instancelist:
2187 if verbose:
2188 feedback_fn("* Verifying instance %s" % instance)
2189 inst_config = instanceinfo[instance]
2190 self._VerifyInstance(instance, inst_config, node_image,
2191 instdisk[instance])
2192 inst_nodes_offline = []
2193
2194 pnode = inst_config.primary_node
2195 pnode_img = node_image[pnode]
2196 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2197 self.ENODERPC, pnode, "instance %s, connection to"
2198 " primary node failed", instance)
2199
2200 if pnode_img.offline:
2201 inst_nodes_offline.append(pnode)
2202
2203
2204
2205
2206
2207
2208 if not inst_config.secondary_nodes:
2209 i_non_redundant.append(instance)
2210 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2211 instance, "instance has multiple secondary nodes: %s",
2212 utils.CommaJoin(inst_config.secondary_nodes),
2213 code=self.ETYPE_WARNING)
2214
2215 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2216 i_non_a_balanced.append(instance)
2217
2218 for snode in inst_config.secondary_nodes:
2219 s_img = node_image[snode]
2220 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2221 "instance %s, connection to secondary node failed", instance)
2222
2223 if s_img.offline:
2224 inst_nodes_offline.append(snode)
2225
2226
2227 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2228 "instance lives on offline node(s) %s",
2229 utils.CommaJoin(inst_nodes_offline))
2230
2231 for node in inst_config.all_nodes:
2232 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2233 "instance lives on ghost node %s", node)
2234 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2235 instance, "instance lives on non-vm_capable node %s", node)
2236
2237 feedback_fn("* Verifying orphan volumes")
2238 reserved = utils.FieldSet(*cluster.reserved_lvs)
2239 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2240
2241 feedback_fn("* Verifying orphan instances")
2242 self._VerifyOrphanInstances(instancelist, node_image)
2243
2244 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2245 feedback_fn("* Verifying N+1 Memory redundancy")
2246 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2247
2248 feedback_fn("* Other Notes")
2249 if i_non_redundant:
2250 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2251 % len(i_non_redundant))
2252
2253 if i_non_a_balanced:
2254 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2255 % len(i_non_a_balanced))
2256
2257 if n_offline:
2258 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2259
2260 if n_drained:
2261 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2262
2263 return not self.bad
2264
2265 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2266 """Analyze the post-hooks' result
2267
2268 This method analyses the hook result, handles it, and sends some
2269 nicely-formatted feedback back to the user.
2270
2271 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2272 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2273 @param hooks_results: the results of the multi-node hooks rpc call
2274 @param feedback_fn: function used send feedback back to the caller
2275 @param lu_result: previous Exec result
2276 @return: the new Exec result, based on the previous result
2277 and hook results
2278
2279 """
2280
2281
2282 if phase == constants.HOOKS_PHASE_POST:
2283
2284 indent_re = re.compile('^', re.M)
2285 feedback_fn("* Hooks Results")
2286 assert hooks_results, "invalid result from hooks"
2287
2288 for node_name in hooks_results:
2289 res = hooks_results[node_name]
2290 msg = res.fail_msg
2291 test = msg and not res.offline
2292 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2293 "Communication failure in hooks execution: %s", msg)
2294 if res.offline or msg:
2295
2296
2297
2298 lu_result = 1
2299 continue
2300 for script, hkr, output in res.payload:
2301 test = hkr == constants.HKR_FAIL
2302 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2303 "Script %s failed, output:", script)
2304 if test:
2305 output = indent_re.sub(' ', output)
2306 feedback_fn("%s" % output)
2307 lu_result = 0
2308
2309 return lu_result
2310
2313 """Verifies the cluster disks status.
2314
2315 """
2316 REQ_BGL = False
2317
2324
2325 - def Exec(self, feedback_fn):
2326 """Verify integrity of cluster disks.
2327
2328 @rtype: tuple of three items
2329 @return: a tuple of (dict of node-to-node_error, list of instances
2330 which need activate-disks, dict of instance: (node, volume) for
2331 missing volumes
2332
2333 """
2334 result = res_nodes, res_instances, res_missing = {}, [], {}
2335
2336 vg_name = self.cfg.GetVGName()
2337 nodes = utils.NiceSort(self.cfg.GetNodeList())
2338 instances = [self.cfg.GetInstanceInfo(name)
2339 for name in self.cfg.GetInstanceList()]
2340
2341 nv_dict = {}
2342 for inst in instances:
2343 inst_lvs = {}
2344 if (not inst.admin_up or
2345 inst.disk_template not in constants.DTS_NET_MIRROR):
2346 continue
2347 inst.MapLVsByNode(inst_lvs)
2348
2349 for node, vol_list in inst_lvs.iteritems():
2350 for vol in vol_list:
2351 nv_dict[(node, vol)] = inst
2352
2353 if not nv_dict:
2354 return result
2355
2356 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2357
2358 for node in nodes:
2359
2360 node_res = node_lvs[node]
2361 if node_res.offline:
2362 continue
2363 msg = node_res.fail_msg
2364 if msg:
2365 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2366 res_nodes[node] = msg
2367 continue
2368
2369 lvs = node_res.payload
2370 for lv_name, (_, _, lv_online) in lvs.items():
2371 inst = nv_dict.pop((node, lv_name), None)
2372 if (not lv_online and inst is not None
2373 and inst.name not in res_instances):
2374 res_instances.append(inst.name)
2375
2376
2377
2378 for key, inst in nv_dict.iteritems():
2379 if inst.name not in res_missing:
2380 res_missing[inst.name] = []
2381 res_missing[inst.name].append(key)
2382
2383 return result
2384
2387 """Verifies the cluster disks sizes.
2388
2389 """
2390 _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2391 REQ_BGL = False
2392
2411
2415
2417 """Check prerequisites.
2418
2419 This only checks the optional instance list against the existing names.
2420
2421 """
2422 if self.wanted_names is None:
2423 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2424
2425 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2426 in self.wanted_names]
2427
2429 """Ensure children of the disk have the needed disk size.
2430
2431 This is valid mainly for DRBD8 and fixes an issue where the
2432 children have smaller disk size.
2433
2434 @param disk: an L{ganeti.objects.Disk} object
2435
2436 """
2437 if disk.dev_type == constants.LD_DRBD8:
2438 assert disk.children, "Empty children for DRBD8?"
2439 fchild = disk.children[0]
2440 mismatch = fchild.size < disk.size
2441 if mismatch:
2442 self.LogInfo("Child disk has size %d, parent %d, fixing",
2443 fchild.size, disk.size)
2444 fchild.size = disk.size
2445
2446
2447 return self._EnsureChildSizes(fchild) or mismatch
2448 else:
2449 return False
2450
2451 - def Exec(self, feedback_fn):
2452 """Verify the size of cluster disks.
2453
2454 """
2455
2456
2457 per_node_disks = {}
2458 for instance in self.wanted_instances:
2459 pnode = instance.primary_node
2460 if pnode not in per_node_disks:
2461 per_node_disks[pnode] = []
2462 for idx, disk in enumerate(instance.disks):
2463 per_node_disks[pnode].append((instance, idx, disk))
2464
2465 changed = []
2466 for node, dskl in per_node_disks.items():
2467 newl = [v[2].Copy() for v in dskl]
2468 for dsk in newl:
2469 self.cfg.SetDiskID(dsk, node)
2470 result = self.rpc.call_blockdev_getsizes(node, newl)
2471 if result.fail_msg:
2472 self.LogWarning("Failure in blockdev_getsizes call to node"
2473 " %s, ignoring", node)
2474 continue
2475 if len(result.data) != len(dskl):
2476 self.LogWarning("Invalid result from node %s, ignoring node results",
2477 node)
2478 continue
2479 for ((instance, idx, disk), size) in zip(dskl, result.data):
2480 if size is None:
2481 self.LogWarning("Disk %d of instance %s did not return size"
2482 " information, ignoring", idx, instance.name)
2483 continue
2484 if not isinstance(size, (int, long)):
2485 self.LogWarning("Disk %d of instance %s did not return valid"
2486 " size information, ignoring", idx, instance.name)
2487 continue
2488 size = size >> 20
2489 if size != disk.size:
2490 self.LogInfo("Disk %d of instance %s has mismatched size,"
2491 " correcting: recorded %d, actual %d", idx,
2492 instance.name, disk.size, size)
2493 disk.size = size
2494 self.cfg.Update(instance, feedback_fn)
2495 changed.append((instance.name, idx, size))
2496 if self._EnsureChildSizes(disk):
2497 self.cfg.Update(instance, feedback_fn)
2498 changed.append((instance.name, idx, disk.size))
2499 return changed
2500
2579
2582 """Change the parameters of the cluster.
2583
2584 """
2585 HPATH = "cluster-modify"
2586 HTYPE = constants.HTYPE_CLUSTER
2587 _OP_PARAMS = [
2588 ("vg_name", None, ht.TMaybeString),
2589 ("enabled_hypervisors", None,
2590 ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2591 ht.TNone)),
2592 ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2593 ht.TNone)),
2594 ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2595 ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2596 ht.TNone)),
2597 ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2598 ht.TNone)),
2599 ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2600 ("uid_pool", None, ht.NoType),
2601 ("add_uids", None, ht.NoType),
2602 ("remove_uids", None, ht.NoType),
2603 ("maintain_node_health", None, ht.TMaybeBool),
2604 ("prealloc_wipe_disks", None, ht.TMaybeBool),
2605 ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2606 ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2607 ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2608 ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2609 ("hidden_os", None, ht.TOr(ht.TListOf(\
2610 ht.TAnd(ht.TList,
2611 ht.TIsLength(2),
2612 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2613 ht.TNone)),
2614 ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2615 ht.TAnd(ht.TList,
2616 ht.TIsLength(2),
2617 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2618 ht.TNone)),
2619 ]
2620 REQ_BGL = False
2621
2634
2642
2644 """Build hooks env.
2645
2646 """
2647 env = {
2648 "OP_TARGET": self.cfg.GetClusterName(),
2649 "NEW_VG_NAME": self.op.vg_name,
2650 }
2651 mn = self.cfg.GetMasterNode()
2652 return env, [mn], [mn]
2653
2655 """Check prerequisites.
2656
2657 This checks whether the given params don't conflict and
2658 if the given volume group is valid.
2659
2660 """
2661 if self.op.vg_name is not None and not self.op.vg_name:
2662 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2663 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2664 " instances exist", errors.ECODE_INVAL)
2665
2666 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2667 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2668 raise errors.OpPrereqError("Cannot disable drbd helper while"
2669 " drbd-based instances exist",
2670 errors.ECODE_INVAL)
2671
2672 node_list = self.acquired_locks[locking.LEVEL_NODE]
2673
2674
2675 if self.op.vg_name:
2676 vglist = self.rpc.call_vg_list(node_list)
2677 for node in node_list:
2678 msg = vglist[node].fail_msg
2679 if msg:
2680
2681 self.LogWarning("Error while gathering data on node %s"
2682 " (ignoring node): %s", node, msg)
2683 continue
2684 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2685 self.op.vg_name,
2686 constants.MIN_VG_SIZE)
2687 if vgstatus:
2688 raise errors.OpPrereqError("Error on node '%s': %s" %
2689 (node, vgstatus), errors.ECODE_ENVIRON)
2690
2691 if self.op.drbd_helper:
2692
2693 helpers = self.rpc.call_drbd_helper(node_list)
2694 for node in node_list:
2695 ninfo = self.cfg.GetNodeInfo(node)
2696 if ninfo.offline:
2697 self.LogInfo("Not checking drbd helper on offline node %s", node)
2698 continue
2699 msg = helpers[node].fail_msg
2700 if msg:
2701 raise errors.OpPrereqError("Error checking drbd helper on node"
2702 " '%s': %s" % (node, msg),
2703 errors.ECODE_ENVIRON)
2704 node_helper = helpers[node].payload
2705 if node_helper != self.op.drbd_helper:
2706 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2707 (node, node_helper), errors.ECODE_ENVIRON)
2708
2709 self.cluster = cluster = self.cfg.GetClusterInfo()
2710
2711 if self.op.beparams:
2712 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2713 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2714
2715 if self.op.nicparams:
2716 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2717 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2718 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2719 nic_errors = []
2720
2721
2722 for instance in self.cfg.GetAllInstancesInfo().values():
2723 for nic_idx, nic in enumerate(instance.nics):
2724 params_copy = copy.deepcopy(nic.nicparams)
2725 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2726
2727
2728 try:
2729 objects.NIC.CheckParameterSyntax(params_filled)
2730 except errors.ConfigurationError, err:
2731 nic_errors.append("Instance %s, nic/%d: %s" %
2732 (instance.name, nic_idx, err))
2733
2734
2735 target_mode = params_filled[constants.NIC_MODE]
2736 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2737 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2738 (instance.name, nic_idx))
2739 if nic_errors:
2740 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2741 "\n".join(nic_errors))
2742
2743
2744 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2745 if self.op.hvparams:
2746 for hv_name, hv_dict in self.op.hvparams.items():
2747 if hv_name not in self.new_hvparams:
2748 self.new_hvparams[hv_name] = hv_dict
2749 else:
2750 self.new_hvparams[hv_name].update(hv_dict)
2751
2752
2753 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2754 if self.op.os_hvp:
2755 for os_name, hvs in self.op.os_hvp.items():
2756 if os_name not in self.new_os_hvp:
2757 self.new_os_hvp[os_name] = hvs
2758 else:
2759 for hv_name, hv_dict in hvs.items():
2760 if hv_name not in self.new_os_hvp[os_name]:
2761 self.new_os_hvp[os_name][hv_name] = hv_dict
2762 else:
2763 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2764
2765
2766 self.new_osp = objects.FillDict(cluster.osparams, {})
2767 if self.op.osparams:
2768 for os_name, osp in self.op.osparams.items():
2769 if os_name not in self.new_osp:
2770 self.new_osp[os_name] = {}
2771
2772 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2773 use_none=True)
2774
2775 if not self.new_osp[os_name]:
2776
2777 del self.new_osp[os_name]
2778 else:
2779
2780 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2781 os_name, self.new_osp[os_name])
2782
2783
2784 if self.op.enabled_hypervisors is not None:
2785 self.hv_list = self.op.enabled_hypervisors
2786 for hv in self.hv_list:
2787
2788
2789
2790
2791
2792 if hv not in new_hvp:
2793 new_hvp[hv] = {}
2794 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2795 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2796 else:
2797 self.hv_list = cluster.enabled_hypervisors
2798
2799 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2800
2801 for hv_name, hv_params in self.new_hvparams.items():
2802 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2803 (self.op.enabled_hypervisors and
2804 hv_name in self.op.enabled_hypervisors)):
2805
2806 hv_class = hypervisor.GetHypervisor(hv_name)
2807 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2808 hv_class.CheckParameterSyntax(hv_params)
2809 _CheckHVParams(self, node_list, hv_name, hv_params)
2810
2811 if self.op.os_hvp:
2812
2813
2814 for os_name, os_hvp in self.new_os_hvp.items():
2815 for hv_name, hv_params in os_hvp.items():
2816 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2817
2818 cluster_defaults = self.new_hvparams.get(hv_name, {})
2819 new_osp = objects.FillDict(cluster_defaults, hv_params)
2820 hv_class = hypervisor.GetHypervisor(hv_name)
2821 hv_class.CheckParameterSyntax(new_osp)
2822 _CheckHVParams(self, node_list, hv_name, new_osp)
2823
2824 if self.op.default_iallocator:
2825 alloc_script = utils.FindFile(self.op.default_iallocator,
2826 constants.IALLOCATOR_SEARCH_PATH,
2827 os.path.isfile)
2828 if alloc_script is None:
2829 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2830 " specified" % self.op.default_iallocator,
2831 errors.ECODE_INVAL)
2832
2833 - def Exec(self, feedback_fn):
2834 """Change the parameters of the cluster.
2835
2836 """
2837 if self.op.vg_name is not None:
2838 new_volume = self.op.vg_name
2839 if not new_volume:
2840 new_volume = None
2841 if new_volume != self.cfg.GetVGName():
2842 self.cfg.SetVGName(new_volume)
2843 else:
2844 feedback_fn("Cluster LVM configuration already in desired"
2845 " state, not changing")
2846 if self.op.drbd_helper is not None:
2847 new_helper = self.op.drbd_helper
2848 if not new_helper:
2849 new_helper = None
2850 if new_helper != self.cfg.GetDRBDHelper():
2851 self.cfg.SetDRBDHelper(new_helper)
2852 else:
2853 feedback_fn("Cluster DRBD helper already in desired state,"
2854 " not changing")
2855 if self.op.hvparams:
2856 self.cluster.hvparams = self.new_hvparams
2857 if self.op.os_hvp:
2858 self.cluster.os_hvp = self.new_os_hvp
2859 if self.op.enabled_hypervisors is not None:
2860 self.cluster.hvparams = self.new_hvparams
2861 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2862 if self.op.beparams:
2863 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2864 if self.op.nicparams:
2865 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2866 if self.op.osparams:
2867 self.cluster.osparams = self.new_osp
2868
2869 if self.op.candidate_pool_size is not None:
2870 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2871
2872 _AdjustCandidatePool(self, [])
2873
2874 if self.op.maintain_node_health is not None:
2875 self.cluster.maintain_node_health = self.op.maintain_node_health
2876
2877 if self.op.prealloc_wipe_disks is not None:
2878 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2879
2880 if self.op.add_uids is not None:
2881 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2882
2883 if self.op.remove_uids is not None:
2884 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2885
2886 if self.op.uid_pool is not None:
2887 self.cluster.uid_pool = self.op.uid_pool
2888
2889 if self.op.default_iallocator is not None:
2890 self.cluster.default_iallocator = self.op.default_iallocator
2891
2892 if self.op.reserved_lvs is not None:
2893 self.cluster.reserved_lvs = self.op.reserved_lvs
2894
2895 def helper_os(aname, mods, desc):
2896 desc += " OS list"
2897 lst = getattr(self.cluster, aname)
2898 for key, val in mods:
2899 if key == constants.DDM_ADD:
2900 if val in lst:
2901 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2902 else:
2903 lst.append(val)
2904 elif key == constants.DDM_REMOVE:
2905 if val in lst:
2906 lst.remove(val)
2907 else:
2908 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2909 else:
2910 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2911
2912 if self.op.hidden_os:
2913 helper_os("hidden_os", self.op.hidden_os, "hidden")
2914
2915 if self.op.blacklisted_os:
2916 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2917
2918 self.cfg.Update(self.cluster, feedback_fn)
2919
2922 """Helper for uploading a file and showing warnings.
2923
2924 """
2925 if os.path.exists(fname):
2926 result = lu.rpc.call_upload_file(nodes, fname)
2927 for to_node, to_result in result.items():
2928 msg = to_result.fail_msg
2929 if msg:
2930 msg = ("Copy of file %s to node %s failed: %s" %
2931 (fname, to_node, msg))
2932 lu.proc.LogWarning(msg)
2933
2936 """Distribute additional files which are part of the cluster configuration.
2937
2938 ConfigWriter takes care of distributing the config and ssconf files, but
2939 there are more files which should be distributed to all nodes. This function
2940 makes sure those are copied.
2941
2942 @param lu: calling logical unit
2943 @param additional_nodes: list of nodes not in the config to distribute to
2944 @type additional_vm: boolean
2945 @param additional_vm: whether the additional nodes are vm-capable or not
2946
2947 """
2948
2949 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2950 dist_nodes = lu.cfg.GetOnlineNodeList()
2951 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
2952 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
2953 if additional_nodes is not None:
2954 dist_nodes.extend(additional_nodes)
2955 if additional_vm:
2956 vm_nodes.extend(additional_nodes)
2957 if myself.name in dist_nodes:
2958 dist_nodes.remove(myself.name)
2959 if myself.name in vm_nodes:
2960 vm_nodes.remove(myself.name)
2961
2962
2963 dist_files = set([constants.ETC_HOSTS,
2964 constants.SSH_KNOWN_HOSTS_FILE,
2965 constants.RAPI_CERT_FILE,
2966 constants.RAPI_USERS_FILE,
2967 constants.CONFD_HMAC_KEY,
2968 constants.CLUSTER_DOMAIN_SECRET_FILE,
2969 ])
2970
2971 vm_files = set()
2972 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2973 for hv_name in enabled_hypervisors:
2974 hv_class = hypervisor.GetHypervisor(hv_name)
2975 vm_files.update(hv_class.GetAncillaryFiles())
2976
2977
2978 for fname in dist_files:
2979 _UploadHelper(lu, dist_nodes, fname)
2980 for fname in vm_files:
2981 _UploadHelper(lu, vm_nodes, fname)
2982
2985 """Force the redistribution of cluster configuration.
2986
2987 This is a very simple LU.
2988
2989 """
2990 REQ_BGL = False
2991
2997
2998 - def Exec(self, feedback_fn):
3004
3005
3006 -def _WaitForSync(lu, instance, disks=None, oneshot=False):
3007 """Sleep and poll for an instance's disk to sync.
3008
3009 """
3010 if not instance.disks or disks is not None and not disks:
3011 return True
3012
3013 disks = _ExpandCheckDisks(instance, disks)
3014
3015 if not oneshot:
3016 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3017
3018 node = instance.primary_node
3019
3020 for dev in disks:
3021 lu.cfg.SetDiskID(dev, node)
3022
3023
3024
3025 retries = 0
3026 degr_retries = 10
3027 while True:
3028 max_time = 0
3029 done = True
3030 cumul_degraded = False
3031 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3032 msg = rstats.fail_msg
3033 if msg:
3034 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3035 retries += 1
3036 if retries >= 10:
3037 raise errors.RemoteError("Can't contact node %s for mirror data,"
3038 " aborting." % node)
3039 time.sleep(6)
3040 continue
3041 rstats = rstats.payload
3042 retries = 0
3043 for i, mstat in enumerate(rstats):
3044 if mstat is None:
3045 lu.LogWarning("Can't compute data for node %s/%s",
3046 node, disks[i].iv_name)
3047 continue
3048
3049 cumul_degraded = (cumul_degraded or
3050 (mstat.is_degraded and mstat.sync_percent is None))
3051 if mstat.sync_percent is not None:
3052 done = False
3053 if mstat.estimated_time is not None:
3054 rem_time = ("%s remaining (estimated)" %
3055 utils.FormatSeconds(mstat.estimated_time))
3056 max_time = mstat.estimated_time
3057 else:
3058 rem_time = "no time estimate"
3059 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3060 (disks[i].iv_name, mstat.sync_percent, rem_time))
3061
3062
3063
3064
3065 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3066 logging.info("Degraded disks found, %d retries left", degr_retries)
3067 degr_retries -= 1
3068 time.sleep(1)
3069 continue
3070
3071 if done or oneshot:
3072 break
3073
3074 time.sleep(min(60, max_time))
3075
3076 if done:
3077 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3078 return not cumul_degraded
3079
3082 """Check that mirrors are not degraded.
3083
3084 The ldisk parameter, if True, will change the test from the
3085 is_degraded attribute (which represents overall non-ok status for
3086 the device(s)) to the ldisk (representing the local storage status).
3087
3088 """
3089 lu.cfg.SetDiskID(dev, node)
3090
3091 result = True
3092
3093 if on_primary or dev.AssembleOnSecondary():
3094 rstats = lu.rpc.call_blockdev_find(node, dev)
3095 msg = rstats.fail_msg
3096 if msg:
3097 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3098 result = False
3099 elif not rstats.payload:
3100 lu.LogWarning("Can't find disk on node %s", node)
3101 result = False
3102 else:
3103 if ldisk:
3104 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3105 else:
3106 result = result and not rstats.payload.is_degraded
3107
3108 if dev.children:
3109 for child in dev.children:
3110 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3111
3112 return result
3113
3116 """Logical unit for OS diagnose/query.
3117
3118 """
3119 _OP_PARAMS = [
3120 _POutputFields,
3121 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3122 ]
3123 REQ_BGL = False
3124 _HID = "hidden"
3125 _BLK = "blacklisted"
3126 _VLD = "valid"
3127 _FIELDS_STATIC = utils.FieldSet()
3128 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3129 "parameters", "api_versions", _HID, _BLK)
3130
3139
3141
3142
3143
3144 self.needed_locks = {}
3145
3146
3147
3148 @staticmethod
3150 """Remaps a per-node return list into an a per-os per-node dictionary
3151
3152 @param rlist: a map with node names as keys and OS objects as values
3153
3154 @rtype: dict
3155 @return: a dictionary with osnames as keys and as value another
3156 map, with nodes as keys and tuples of (path, status, diagnose,
3157 variants, parameters, api_versions) as values, eg::
3158
3159 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3160 (/srv/..., False, "invalid api")],
3161 "node2": [(/srv/..., True, "", [], [])]}
3162 }
3163
3164 """
3165 all_os = {}
3166
3167
3168
3169 good_nodes = [node_name for node_name in rlist
3170 if not rlist[node_name].fail_msg]
3171 for node_name, nr in rlist.items():
3172 if nr.fail_msg or not nr.payload:
3173 continue
3174 for (name, path, status, diagnose, variants,
3175 params, api_versions) in nr.payload:
3176 if name not in all_os:
3177
3178
3179 all_os[name] = {}
3180 for nname in good_nodes:
3181 all_os[name][nname] = []
3182
3183 params = [tuple(v) for v in params]
3184 all_os[name][node_name].append((path, status, diagnose,
3185 variants, params, api_versions))
3186 return all_os
3187
3188 - def Exec(self, feedback_fn):
3189 """Compute the list of OSes.
3190
3191 """
3192 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3193 node_data = self.rpc.call_os_diagnose(valid_nodes)
3194 pol = self._DiagnoseByOS(node_data)
3195 output = []
3196 cluster = self.cfg.GetClusterInfo()
3197
3198 for os_name in utils.NiceSort(pol.keys()):
3199 os_data = pol[os_name]
3200 row = []
3201 valid = True
3202 (variants, params, api_versions) = null_state = (set(), set(), set())
3203 for idx, osl in enumerate(os_data.values()):
3204 valid = bool(valid and osl and osl[0][1])
3205 if not valid:
3206 (variants, params, api_versions) = null_state
3207 break
3208 node_variants, node_params, node_api = osl[0][3:6]
3209 if idx == 0:
3210 variants = set(node_variants)
3211 params = set(node_params)
3212 api_versions = set(node_api)
3213 else:
3214 variants.intersection_update(node_variants)
3215 params.intersection_update(node_params)
3216 api_versions.intersection_update(node_api)
3217
3218 is_hid = os_name in cluster.hidden_os
3219 is_blk = os_name in cluster.blacklisted_os
3220 if ((self._HID not in self.op.output_fields and is_hid) or
3221 (self._BLK not in self.op.output_fields and is_blk) or
3222 (self._VLD not in self.op.output_fields and not valid)):
3223 continue
3224
3225 for field in self.op.output_fields:
3226 if field == "name":
3227 val = os_name
3228 elif field == self._VLD:
3229 val = valid
3230 elif field == "node_status":
3231
3232 val = {}
3233 for node_name, nos_list in os_data.items():
3234 val[node_name] = nos_list
3235 elif field == "variants":
3236 val = utils.NiceSort(list(variants))
3237 elif field == "parameters":
3238 val = list(params)
3239 elif field == "api_versions":
3240 val = list(api_versions)
3241 elif field == self._HID:
3242 val = is_hid
3243 elif field == self._BLK:
3244 val = is_blk
3245 else:
3246 raise errors.ParameterError(field)
3247 row.append(val)
3248 output.append(row)
3249
3250 return output
3251
3254 """Logical unit for removing a node.
3255
3256 """
3257 HPATH = "node-remove"
3258 HTYPE = constants.HTYPE_NODE
3259 _OP_PARAMS = [
3260 _PNodeName,
3261 ]
3262
3264 """Build hooks env.
3265
3266 This doesn't run on the target node in the pre phase as a failed
3267 node would then be impossible to remove.
3268
3269 """
3270 env = {
3271 "OP_TARGET": self.op.node_name,
3272 "NODE_NAME": self.op.node_name,
3273 }
3274 all_nodes = self.cfg.GetNodeList()
3275 try:
3276 all_nodes.remove(self.op.node_name)
3277 except ValueError:
3278 logging.warning("Node %s which is about to be removed not found"
3279 " in the all nodes list", self.op.node_name)
3280 return env, all_nodes, all_nodes
3281
3283 """Check prerequisites.
3284
3285 This checks:
3286 - the node exists in the configuration
3287 - it does not have primary or secondary instances
3288 - it's not the master
3289
3290 Any errors are signaled by raising errors.OpPrereqError.
3291
3292 """
3293 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3294 node = self.cfg.GetNodeInfo(self.op.node_name)
3295 assert node is not None
3296
3297 instance_list = self.cfg.GetInstanceList()
3298
3299 masternode = self.cfg.GetMasterNode()
3300 if node.name == masternode:
3301 raise errors.OpPrereqError("Node is the master node,"
3302 " you need to failover first.",
3303 errors.ECODE_INVAL)
3304
3305 for instance_name in instance_list:
3306 instance = self.cfg.GetInstanceInfo(instance_name)
3307 if node.name in instance.all_nodes:
3308 raise errors.OpPrereqError("Instance %s is still running on the node,"
3309 " please remove first." % instance_name,
3310 errors.ECODE_INVAL)
3311 self.op.node_name = node.name
3312 self.node = node
3313
3314 - def Exec(self, feedback_fn):
3350
3353 """Logical unit for querying nodes.
3354
3355 """
3356
3357 _OP_PARAMS = [
3358 _POutputFields,
3359 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3360 ("use_locking", False, ht.TBool),
3361 ]
3362 REQ_BGL = False
3363
3364 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3365 "master_candidate", "offline", "drained",
3366 "master_capable", "vm_capable"]
3367
3368 _FIELDS_DYNAMIC = utils.FieldSet(
3369 "dtotal", "dfree",
3370 "mtotal", "mnode", "mfree",
3371 "bootid",
3372 "ctotal", "cnodes", "csockets",
3373 )
3374
3375 _FIELDS_STATIC = utils.FieldSet(*[
3376 "pinst_cnt", "sinst_cnt",
3377 "pinst_list", "sinst_list",
3378 "pip", "sip", "tags",
3379 "master",
3380 "role"] + _SIMPLE_FIELDS
3381 )
3382
3387
3389 self.needed_locks = {}
3390 self.share_locks[locking.LEVEL_NODE] = 1
3391
3392 if self.op.names:
3393 self.wanted = _GetWantedNodes(self, self.op.names)
3394 else:
3395 self.wanted = locking.ALL_SET
3396
3397 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3398 self.do_locking = self.do_node_query and self.op.use_locking
3399 if self.do_locking:
3400
3401 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3402
3403 - def Exec(self, feedback_fn):
3404 """Computes the list of nodes and their attributes.
3405
3406 """
3407 all_info = self.cfg.GetAllNodesInfo()
3408 if self.do_locking:
3409 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3410 elif self.wanted != locking.ALL_SET:
3411 nodenames = self.wanted
3412 missing = set(nodenames).difference(all_info.keys())
3413 if missing:
3414 raise errors.OpExecError(
3415 "Some nodes were removed before retrieving their data: %s" % missing)
3416 else:
3417 nodenames = all_info.keys()
3418
3419 nodenames = utils.NiceSort(nodenames)
3420 nodelist = [all_info[name] for name in nodenames]
3421
3422
3423
3424 if self.do_node_query:
3425 live_data = {}
3426 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3427 self.cfg.GetHypervisorType())
3428 for name in nodenames:
3429 nodeinfo = node_data[name]
3430 if not nodeinfo.fail_msg and nodeinfo.payload:
3431 nodeinfo = nodeinfo.payload
3432 fn = utils.TryConvert
3433 live_data[name] = {
3434 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3435 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3436 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3437 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3438 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3439 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3440 "bootid": nodeinfo.get('bootid', None),
3441 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3442 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3443 }
3444 else:
3445 live_data[name] = {}
3446 else:
3447 live_data = dict.fromkeys(nodenames, {})
3448
3449 node_to_primary = dict([(name, set()) for name in nodenames])
3450 node_to_secondary = dict([(name, set()) for name in nodenames])
3451
3452 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3453 "sinst_cnt", "sinst_list"))
3454 if inst_fields & frozenset(self.op.output_fields):
3455 inst_data = self.cfg.GetAllInstancesInfo()
3456
3457 for inst in inst_data.values():
3458 if inst.primary_node in node_to_primary:
3459 node_to_primary[inst.primary_node].add(inst.name)
3460 for secnode in inst.secondary_nodes:
3461 if secnode in node_to_secondary:
3462 node_to_secondary[secnode].add(inst.name)
3463
3464 master_node = self.cfg.GetMasterNode()
3465
3466
3467
3468 output = []
3469 for node in nodelist:
3470 node_output = []
3471 for field in self.op.output_fields:
3472 if field in self._SIMPLE_FIELDS:
3473 val = getattr(node, field)
3474 elif field == "pinst_list":
3475 val = list(node_to_primary[node.name])
3476 elif field == "sinst_list":
3477 val = list(node_to_secondary[node.name])
3478 elif field == "pinst_cnt":
3479 val = len(node_to_primary[node.name])
3480 elif field == "sinst_cnt":
3481 val = len(node_to_secondary[node.name])
3482 elif field == "pip":
3483 val = node.primary_ip
3484 elif field == "sip":
3485 val = node.secondary_ip
3486 elif field == "tags":
3487 val = list(node.GetTags())
3488 elif field == "master":
3489 val = node.name == master_node
3490 elif self._FIELDS_DYNAMIC.Matches(field):
3491 val = live_data[node.name].get(field, None)
3492 elif field == "role":
3493 if node.name == master_node:
3494 val = "M"
3495 elif node.master_candidate:
3496 val = "C"
3497 elif node.drained:
3498 val = "D"
3499 elif node.offline:
3500 val = "O"
3501 else:
3502 val = "R"
3503 else:
3504 raise errors.ParameterError(field)
3505 node_output.append(val)
3506 output.append(node_output)
3507
3508 return output
3509
3512 """Logical unit for getting volumes on node(s).
3513
3514 """
3515 _OP_PARAMS = [
3516 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3517 ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3518 ]
3519 REQ_BGL = False
3520 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3521 _FIELDS_STATIC = utils.FieldSet("node")
3522
3527
3536
3537 - def Exec(self, feedback_fn):
3538 """Computes the list of nodes and their attributes.
3539
3540 """
3541 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3542 volumes = self.rpc.call_node_volumes(nodenames)
3543
3544 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3545 in self.cfg.GetInstanceList()]
3546
3547 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3548
3549 output = []
3550 for node in nodenames:
3551 nresult = volumes[node]
3552 if nresult.offline:
3553 continue
3554 msg = nresult.fail_msg
3555 if msg:
3556 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3557 continue
3558
3559 node_vols = nresult.payload[:]
3560 node_vols.sort(key=lambda vol: vol['dev'])
3561
3562 for vol in node_vols:
3563 node_output = []
3564 for field in self.op.output_fields:
3565 if field == "node":
3566 val = node
3567 elif field == "phys":
3568 val = vol['dev']
3569 elif field == "vg":
3570 val = vol['vg']
3571 elif field == "name":
3572 val = vol['name']
3573 elif field == "size":
3574 val = int(float(vol['size']))
3575 elif field == "instance":
3576 for inst in ilist:
3577 if node not in lv_by_node[inst]:
3578 continue
3579 if vol['name'] in lv_by_node[inst][node]:
3580 val = inst.name
3581 break
3582 else:
3583 val = '-'
3584 else:
3585 raise errors.ParameterError(field)
3586 node_output.append(str(val))
3587
3588 output.append(node_output)
3589
3590 return output
3591
3594 """Logical unit for getting information on storage units on node(s).
3595
3596 """
3597 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3598 _OP_PARAMS = [
3599 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3600 ("storage_type", ht.NoDefault, _CheckStorageType),
3601 ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3602 ("name", None, ht.TMaybeString),
3603 ]
3604 REQ_BGL = False
3605
3610
3620
3621 - def Exec(self, feedback_fn):
3622 """Computes the list of nodes and their attributes.
3623
3624 """
3625 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3626
3627
3628 if constants.SF_NAME in self.op.output_fields:
3629 fields = self.op.output_fields[:]
3630 else:
3631 fields = [constants.SF_NAME] + self.op.output_fields
3632
3633
3634 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3635 while extra in fields:
3636 fields.remove(extra)
3637
3638 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3639 name_idx = field_idx[constants.SF_NAME]
3640
3641 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3642 data = self.rpc.call_storage_list(self.nodes,
3643 self.op.storage_type, st_args,
3644 self.op.name, fields)
3645
3646 result = []
3647
3648 for node in utils.NiceSort(self.nodes):
3649 nresult = data[node]
3650 if nresult.offline:
3651 continue
3652
3653 msg = nresult.fail_msg
3654 if msg:
3655 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3656 continue
3657
3658 rows = dict([(row[name_idx], row) for row in nresult.payload])
3659
3660 for name in utils.NiceSort(rows.keys()):
3661 row = rows[name]
3662
3663 out = []
3664
3665 for field in self.op.output_fields:
3666 if field == constants.SF_NODE:
3667 val = node
3668 elif field == constants.SF_TYPE:
3669 val = self.op.storage_type
3670 elif field in field_idx:
3671 val = row[field_idx[field]]
3672 else:
3673 raise errors.ParameterError(field)
3674
3675 out.append(val)
3676
3677 result.append(out)
3678
3679 return result
3680
3683 """Logical unit for modifying a storage volume on a node.
3684
3685 """
3686 _OP_PARAMS = [
3687 _PNodeName,
3688 ("storage_type", ht.NoDefault, _CheckStorageType),
3689 ("name", ht.NoDefault, ht.TNonEmptyString),
3690 ("changes", ht.NoDefault, ht.TDict),
3691 ]
3692 REQ_BGL = False
3693
3712
3717
3718 - def Exec(self, feedback_fn):
3719 """Computes the list of nodes and their attributes.
3720
3721 """
3722 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3723 result = self.rpc.call_storage_modify(self.op.node_name,
3724 self.op.storage_type, st_args,
3725 self.op.name, self.op.changes)
3726 result.Raise("Failed to modify storage unit '%s' on %s" %
3727 (self.op.name, self.op.node_name))
3728
3731 """Logical unit for adding node to the cluster.
3732
3733 """
3734 HPATH = "node-add"
3735 HTYPE = constants.HTYPE_NODE
3736 _OP_PARAMS = [
3737 _PNodeName,
3738 ("primary_ip", None, ht.NoType),
3739 ("secondary_ip", None, ht.TMaybeString),
3740 ("readd", False, ht.TBool),
3741 ("group", None, ht.TMaybeString),
3742 ("master_capable", None, ht.TMaybeBool),
3743 ("vm_capable", None, ht.TMaybeBool),
3744 ]
3745 _NFLAGS = ["master_capable", "vm_capable"]
3746
3756
3758 """Build hooks env.
3759
3760 This will run on all nodes before, and on all nodes + the new node after.
3761
3762 """
3763 env = {
3764 "OP_TARGET": self.op.node_name,
3765 "NODE_NAME": self.op.node_name,
3766 "NODE_PIP": self.op.primary_ip,
3767 "NODE_SIP": self.op.secondary_ip,
3768 "MASTER_CAPABLE": str(self.op.master_capable),
3769 "VM_CAPABLE": str(self.op.vm_capable),
3770 }
3771 nodes_0 = self.cfg.GetNodeList()
3772 nodes_1 = nodes_0 + [self.op.node_name, ]
3773 return env, nodes_0, nodes_1
3774
3776 """Check prerequisites.
3777
3778 This checks:
3779 - the new node is not already in the config
3780 - it is resolvable
3781 - its parameters (single/dual homed) matches the cluster
3782
3783 Any errors are signaled by raising errors.OpPrereqError.
3784
3785 """
3786 cfg = self.cfg
3787 hostname = self.hostname
3788 node = hostname.name
3789 primary_ip = self.op.primary_ip = hostname.ip
3790 if self.op.secondary_ip is None:
3791 if self.primary_ip_family == netutils.IP6Address.family:
3792 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3793 " IPv4 address must be given as secondary",
3794 errors.ECODE_INVAL)
3795 self.op.secondary_ip = primary_ip
3796
3797 secondary_ip = self.op.secondary_ip
3798 if not netutils.IP4Address.IsValid(secondary_ip):
3799 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3800 " address" % secondary_ip, errors.ECODE_INVAL)
3801
3802 node_list = cfg.GetNodeList()
3803 if not self.op.readd and node in node_list:
3804 raise errors.OpPrereqError("Node %s is already in the configuration" %
3805 node, errors.ECODE_EXISTS)
3806 elif self.op.readd and node not in node_list:
3807 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3808 errors.ECODE_NOENT)
3809
3810 self.changed_primary_ip = False
3811
3812 for existing_node_name in node_list:
3813 existing_node = cfg.GetNodeInfo(existing_node_name)
3814
3815 if self.op.readd and node == existing_node_name:
3816 if existing_node.secondary_ip != secondary_ip:
3817 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3818 " address configuration as before",
3819 errors.ECODE_INVAL)
3820 if existing_node.primary_ip != primary_ip:
3821 self.changed_primary_ip = True
3822
3823 continue
3824
3825 if (existing_node.primary_ip == primary_ip or
3826 existing_node.secondary_ip == primary_ip or
3827 existing_node.primary_ip == secondary_ip or
3828 existing_node.secondary_ip == secondary_ip):
3829 raise errors.OpPrereqError("New node ip address(es) conflict with"
3830 " existing node %s" % existing_node.name,
3831 errors.ECODE_NOTUNIQUE)
3832
3833
3834
3835 if self.op.readd:
3836 old_node = self.cfg.GetNodeInfo(node)
3837 assert old_node is not None, "Can't retrieve locked node %s" % node
3838 for attr in self._NFLAGS:
3839 if getattr(self.op, attr) is None:
3840 setattr(self.op, attr, getattr(old_node, attr))
3841 else:
3842 for attr in self._NFLAGS:
3843 if getattr(self.op, attr) is None:
3844 setattr(self.op, attr, True)
3845
3846 if self.op.readd and not self.op.vm_capable:
3847 pri, sec = cfg.GetNodeInstances(node)
3848 if pri or sec:
3849 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
3850 " flag set to false, but it already holds"
3851 " instances" % node,
3852 errors.ECODE_STATE)
3853
3854
3855
3856 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3857 master_singlehomed = myself.secondary_ip == myself.primary_ip
3858 newbie_singlehomed = secondary_ip == primary_ip
3859 if master_singlehomed != newbie_singlehomed:
3860 if master_singlehomed:
3861 raise errors.OpPrereqError("The master has no secondary ip but the"
3862 " new node has one",
3863 errors.ECODE_INVAL)
3864 else:
3865 raise errors.OpPrereqError("The master has a secondary ip but the"
3866 " new node doesn't have one",
3867 errors.ECODE_INVAL)
3868
3869
3870 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3871 raise errors.OpPrereqError("Node not reachable by ping",
3872 errors.ECODE_ENVIRON)
3873
3874 if not newbie_singlehomed:
3875
3876 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3877 source=myself.secondary_ip):
3878 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3879 " based ping to node daemon port",
3880 errors.ECODE_ENVIRON)
3881
3882 if self.op.readd:
3883 exceptions = [node]
3884 else:
3885 exceptions = []
3886
3887 if self.op.master_capable:
3888 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3889 else:
3890 self.master_candidate = False
3891
3892 if self.op.readd:
3893 self.new_node = old_node
3894 else:
3895 node_group = cfg.LookupNodeGroup(self.op.group)
3896 self.new_node = objects.Node(name=node,
3897 primary_ip=primary_ip,
3898 secondary_ip=secondary_ip,
3899 master_candidate=self.master_candidate,
3900 offline=False, drained=False,
3901 group=node_group)
3902
3903 - def Exec(self, feedback_fn):
3904 """Adds the new node to the cluster.
3905
3906 """
3907 new_node = self.new_node
3908 node = new_node.name
3909
3910
3911
3912
3913
3914 if self.op.readd:
3915 new_node.drained = new_node.offline = False
3916 self.LogInfo("Readding a node, the offline/drained flags were reset")
3917
3918 new_node.master_candidate = self.master_candidate
3919 if self.changed_primary_ip:
3920 new_node.primary_ip = self.op.primary_ip
3921
3922
3923 for attr in self._NFLAGS:
3924 setattr(new_node, attr, getattr(self.op, attr))
3925
3926
3927 if new_node.master_candidate:
3928 self.LogInfo("Node will be a master candidate")
3929
3930
3931 result = self.rpc.call_version([node])[node]
3932 result.Raise("Can't get version information from node %s" % node)
3933 if constants.PROTOCOL_VERSION == result.payload:
3934 logging.info("Communication to node %s fine, sw version %s match",
3935 node, result.payload)
3936 else:
3937 raise errors.OpExecError("Version mismatch master version %s,"
3938 " node version %s" %
3939 (constants.PROTOCOL_VERSION, result.payload))
3940
3941
3942 if self.cfg.GetClusterInfo().modify_etc_hosts:
3943 master_node = self.cfg.GetMasterNode()
3944 result = self.rpc.call_etc_hosts_modify(master_node,
3945 constants.ETC_HOSTS_ADD,
3946 self.hostname.name,
3947 self.hostname.ip)
3948 result.Raise("Can't update hosts file with new host data")
3949
3950 if new_node.secondary_ip != new_node.primary_ip:
3951 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
3952 False)
3953
3954 node_verify_list = [self.cfg.GetMasterNode()]
3955 node_verify_param = {
3956 constants.NV_NODELIST: [node],
3957
3958 }
3959
3960 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3961 self.cfg.GetClusterName())
3962 for verifier in node_verify_list:
3963 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3964 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3965 if nl_payload:
3966 for failed in nl_payload:
3967 feedback_fn("ssh/hostname verification failed"
3968 " (checking from %s): %s" %
3969 (verifier, nl_payload[failed]))
3970 raise errors.OpExecError("ssh/hostname verification failed.")
3971
3972 if self.op.readd:
3973 _RedistributeAncillaryFiles(self)
3974 self.context.ReaddNode(new_node)
3975
3976 self.cfg.Update(new_node, feedback_fn)
3977
3978 if not new_node.master_candidate:
3979 result = self.rpc.call_node_demote_from_mc(new_node.name)
3980 msg = result.fail_msg
3981 if msg:
3982 self.LogWarning("Node failed to demote itself from master"
3983 " candidate status: %s" % msg)
3984 else:
3985 _RedistributeAncillaryFiles(self, additional_nodes=[node],
3986 additional_vm=self.op.vm_capable)
3987 self.context.AddNode(new_node, self.proc.GetECId())
3988
3991 """Modifies the parameters of a node.
3992
3993 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
3994 to the node role (as _ROLE_*)
3995 @cvar _R2F: a dictionary from node role to tuples of flags
3996 @cvar _FLAGS: a list of attribute names corresponding to the flags
3997
3998 """
3999 HPATH = "node-modify"
4000 HTYPE = constants.HTYPE_NODE
4001 _OP_PARAMS = [
4002 _PNodeName,
4003 ("master_candidate", None, ht.TMaybeBool),
4004 ("offline", None, ht.TMaybeBool),
4005 ("drained", None, ht.TMaybeBool),
4006 ("auto_promote", False, ht.TBool),
4007 ("master_capable", None, ht.TMaybeBool),
4008 ("vm_capable", None, ht.TMaybeBool),
4009 ("secondary_ip", None, ht.TMaybeString),
4010 _PForce,
4011 ]
4012 REQ_BGL = False
4013 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4014 _F2R = {
4015 (True, False, False): _ROLE_CANDIDATE,
4016 (False, True, False): _ROLE_DRAINED,
4017 (False, False, True): _ROLE_OFFLINE,
4018 (False, False, False): _ROLE_REGULAR,
4019 }
4020 _R2F = dict((v, k) for k, v in _F2R.items())
4021 _FLAGS = ["master_candidate", "drained", "offline"]
4022
4024 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4025 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4026 self.op.master_capable, self.op.vm_capable,
4027 self.op.secondary_ip]
4028 if all_mods.count(None) == len(all_mods):
4029 raise errors.OpPrereqError("Please pass at least one modification",
4030 errors.ECODE_INVAL)
4031 if all_mods.count(True) > 1:
4032 raise errors.OpPrereqError("Can't set the node into more than one"
4033 " state at the same time",
4034 errors.ECODE_INVAL)
4035
4036
4037 self.might_demote = (self.op.master_candidate == False or
4038 self.op.offline == True or
4039 self.op.drained == True or
4040 self.op.master_capable == False)
4041
4042 if self.op.secondary_ip:
4043 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4044 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4045 " address" % self.op.secondary_ip,
4046 errors.ECODE_INVAL)
4047
4048 self.lock_all = self.op.auto_promote and self.might_demote
4049 self.lock_instances = self.op.secondary_ip is not None
4050
4059
4079
4081 """Build hooks env.
4082
4083 This runs on the master node.
4084
4085 """
4086 env = {
4087 "OP_TARGET": self.op.node_name,
4088 "MASTER_CANDIDATE": str(self.op.master_candidate),
4089 "OFFLINE": str(self.op.offline),
4090 "DRAINED": str(self.op.drained),
4091 "MASTER_CAPABLE": str(self.op.master_capable),
4092 "VM_CAPABLE": str(self.op.vm_capable),
4093 }
4094 nl = [self.cfg.GetMasterNode(),
4095 self.op.node_name]
4096 return env, nl, nl
4097
4099 """Check prerequisites.
4100
4101 This only checks the instance list against the existing names.
4102
4103 """
4104 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4105
4106 if (self.op.master_candidate is not None or
4107 self.op.drained is not None or
4108 self.op.offline is not None):
4109
4110 if self.op.node_name == self.cfg.GetMasterNode():
4111 raise errors.OpPrereqError("The master role can be changed"
4112 " only via master-failover",
4113 errors.ECODE_INVAL)
4114
4115 if self.op.master_candidate and not node.master_capable:
4116 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4117 " it a master candidate" % node.name,
4118 errors.ECODE_STATE)
4119
4120 if self.op.vm_capable == False:
4121 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4122 if ipri or isec:
4123 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4124 " the vm_capable flag" % node.name,
4125 errors.ECODE_STATE)
4126
4127 if node.master_candidate and self.might_demote and not self.lock_all:
4128 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4129
4130
4131 (mc_remaining, mc_should, _) = \
4132 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4133 if mc_remaining < mc_should:
4134 raise errors.OpPrereqError("Not enough master candidates, please"
4135 " pass auto_promote to allow promotion",
4136 errors.ECODE_STATE)
4137
4138 self.old_flags = old_flags = (node.master_candidate,
4139 node.drained, node.offline)
4140 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4141 self.old_role = old_role = self._F2R[old_flags]
4142
4143
4144 for attr in self._FLAGS:
4145 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4146 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4147 setattr(self.op, attr, None)
4148
4149
4150
4151
4152
4153 if (self.op.drained == False or self.op.offline == False or
4154 (self.op.master_capable and not node.master_capable)):
4155 if _DecideSelfPromotion(self):
4156 self.op.master_candidate = True
4157 self.LogInfo("Auto-promoting node to master candidate")
4158
4159
4160 if self.op.master_capable == False and node.master_candidate:
4161 self.LogInfo("Demoting from master candidate")
4162 self.op.master_candidate = False
4163
4164
4165 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4166 if self.op.master_candidate:
4167 new_role = self._ROLE_CANDIDATE
4168 elif self.op.drained:
4169 new_role = self._ROLE_DRAINED
4170 elif self.op.offline:
4171 new_role = self._ROLE_OFFLINE
4172 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4173
4174
4175 new_role = self._ROLE_REGULAR
4176 else:
4177 new_role = old_role
4178
4179 self.new_role = new_role
4180
4181 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4182
4183 result = self.rpc.call_version([node.name])[node.name]
4184 if result.fail_msg:
4185 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4186 " to report its version: %s" %
4187 (node.name, result.fail_msg),
4188 errors.ECODE_STATE)
4189 else:
4190 self.LogWarning("Transitioning node from offline to online state"
4191 " without using re-add. Please make sure the node"
4192 " is healthy!")
4193
4194 if self.op.secondary_ip:
4195
4196 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4197 master_singlehomed = master.secondary_ip == master.primary_ip
4198 if master_singlehomed and self.op.secondary_ip:
4199 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4200 " homed cluster", errors.ECODE_INVAL)
4201
4202 if node.offline:
4203 if self.affected_instances:
4204 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4205 " node has instances (%s) configured"
4206 " to use it" % self.affected_instances)
4207 else:
4208
4209
4210 for instance in self.affected_instances:
4211 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4212
4213 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4214 if master.name != node.name:
4215
4216 if not netutils.TcpPing(self.op.secondary_ip,
4217 constants.DEFAULT_NODED_PORT,
4218 source=master.secondary_ip):
4219 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4220 " based ping to node daemon port",
4221 errors.ECODE_ENVIRON)
4222
4223 - def Exec(self, feedback_fn):
4224 """Modifies a node.
4225
4226 """
4227 node = self.node
4228 old_role = self.old_role
4229 new_role = self.new_role
4230
4231 result = []
4232
4233 for attr in ["master_capable", "vm_capable"]:
4234 val = getattr(self.op, attr)
4235 if val is not None:
4236 setattr(node, attr, val)
4237 result.append((attr, str(val)))
4238
4239 if new_role != old_role:
4240
4241 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4242 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4243 if msg:
4244 self.LogWarning("Node failed to demote itself: %s", msg)
4245
4246 new_flags = self._R2F[new_role]
4247 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4248 if of != nf:
4249 result.append((desc, str(nf)))
4250 (node.master_candidate, node.drained, node.offline) = new_flags
4251
4252
4253 if self.lock_all:
4254 _AdjustCandidatePool(self, [node.name])
4255
4256 if self.op.secondary_ip:
4257 node.secondary_ip = self.op.secondary_ip
4258 result.append(("secondary_ip", self.op.secondary_ip))
4259
4260
4261 self.cfg.Update(node, feedback_fn)
4262
4263
4264
4265 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4266 self.context.ReaddNode(node)
4267
4268 return result
4269
4272 """Powercycles a node.
4273
4274 """
4275 _OP_PARAMS = [
4276 _PNodeName,
4277 _PForce,
4278 ]
4279 REQ_BGL = False
4280
4287
4289 """Locking for PowercycleNode.
4290
4291 This is a last-resort option and shouldn't block on other
4292 jobs. Therefore, we grab no locks.
4293
4294 """
4295 self.needed_locks = {}
4296
4297 - def Exec(self, feedback_fn):
4305
4308 """Query cluster configuration.
4309
4310 """
4311 REQ_BGL = False
4312
4314 self.needed_locks = {}
4315
4316 - def Exec(self, feedback_fn):
4317 """Return cluster config.
4318
4319 """
4320 cluster = self.cfg.GetClusterInfo()
4321 os_hvp = {}
4322
4323
4324 for os_name, hv_dict in cluster.os_hvp.items():
4325 os_hvp[os_name] = {}
4326 for hv_name, hv_params in hv_dict.items():
4327 if hv_name in cluster.enabled_hypervisors:
4328 os_hvp[os_name][hv_name] = hv_params
4329
4330
4331 primary_ip_version = constants.IP4_VERSION
4332 if cluster.primary_ip_family == netutils.IP6Address.family:
4333 primary_ip_version = constants.IP6_VERSION
4334
4335 result = {
4336 "software_version": constants.RELEASE_VERSION,
4337 "protocol_version": constants.PROTOCOL_VERSION,
4338 "config_version": constants.CONFIG_VERSION,
4339 "os_api_version": max(constants.OS_API_VERSIONS),
4340 "export_version": constants.EXPORT_VERSION,
4341 "architecture": (platform.architecture()[0], platform.machine()),
4342 "name": cluster.cluster_name,
4343 "master": cluster.master_node,
4344 "default_hypervisor": cluster.enabled_hypervisors[0],
4345 "enabled_hypervisors": cluster.enabled_hypervisors,
4346 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4347 for hypervisor_name in cluster.enabled_hypervisors]),
4348 "os_hvp": os_hvp,
4349 "beparams": cluster.beparams,
4350 "osparams": cluster.osparams,
4351 "nicparams": cluster.nicparams,
4352 "candidate_pool_size": cluster.candidate_pool_size,
4353 "master_netdev": cluster.master_netdev,
4354 "volume_group_name": cluster.volume_group_name,
4355 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4356 "file_storage_dir": cluster.file_storage_dir,
4357 "maintain_node_health": cluster.maintain_node_health,
4358 "ctime": cluster.ctime,
4359 "mtime": cluster.mtime,
4360 "uuid": cluster.uuid,
4361 "tags": list(cluster.GetTags()),
4362 "uid_pool": cluster.uid_pool,
4363 "default_iallocator": cluster.default_iallocator,
4364 "reserved_lvs": cluster.reserved_lvs,
4365 "primary_ip_version": primary_ip_version,
4366 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4367 }
4368
4369 return result
4370
4410
4413 """Bring up an instance's disks.
4414
4415 """
4416 _OP_PARAMS = [
4417 _PInstanceName,
4418 ("ignore_size", False, ht.TBool),
4419 ]
4420 REQ_BGL = False
4421
4426
4430
4432 """Check prerequisites.
4433
4434 This checks that the instance is in the cluster.
4435
4436 """
4437 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4438 assert self.instance is not None, \
4439 "Cannot retrieve locked instance %s" % self.op.instance_name
4440 _CheckNodeOnline(self, self.instance.primary_node)
4441
4442 - def Exec(self, feedback_fn):
4443 """Activate the disks.
4444
4445 """
4446 disks_ok, disks_info = \
4447 _AssembleInstanceDisks(self, self.instance,
4448 ignore_size=self.op.ignore_size)
4449 if not disks_ok:
4450 raise errors.OpExecError("Cannot activate block devices")
4451
4452 return disks_info
4453
4457 """Prepare the block devices for an instance.
4458
4459 This sets up the block devices on all nodes.
4460
4461 @type lu: L{LogicalUnit}
4462 @param lu: the logical unit on whose behalf we execute
4463 @type instance: L{objects.Instance}
4464 @param instance: the instance for whose disks we assemble
4465 @type disks: list of L{objects.Disk} or None
4466 @param disks: which disks to assemble (or all, if None)
4467 @type ignore_secondaries: boolean
4468 @param ignore_secondaries: if true, errors on secondary nodes
4469 won't result in an error return from the function
4470 @type ignore_size: boolean
4471 @param ignore_size: if true, the current known size of the disk
4472 will not be used during the disk activation, useful for cases
4473 when the size is wrong
4474 @return: False if the operation failed, otherwise a list of
4475 (host, instance_visible_name, node_visible_name)
4476 with the mapping from node devices to instance devices
4477
4478 """
4479 device_info = []
4480 disks_ok = True
4481 iname = instance.name
4482 disks = _ExpandCheckDisks(instance, disks)
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494 for inst_disk in disks:
4495 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4496 if ignore_size:
4497 node_disk = node_disk.Copy()
4498 node_disk.UnsetSize()
4499 lu.cfg.SetDiskID(node_disk, node)
4500 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4501 msg = result.fail_msg
4502 if msg:
4503 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4504 " (is_primary=False, pass=1): %s",
4505 inst_disk.iv_name, node, msg)
4506 if not ignore_secondaries:
4507 disks_ok = False
4508
4509
4510
4511
4512 for inst_disk in disks:
4513 dev_path = None
4514
4515 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4516 if node != instance.primary_node:
4517 continue
4518 if ignore_size:
4519 node_disk = node_disk.Copy()
4520 node_disk.UnsetSize()
4521 lu.cfg.SetDiskID(node_disk, node)
4522 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4523 msg = result.fail_msg
4524 if msg:
4525 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4526 " (is_primary=True, pass=2): %s",
4527 inst_disk.iv_name, node, msg)
4528 disks_ok = False
4529 else:
4530 dev_path = result.payload
4531
4532 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4533
4534
4535
4536
4537 for disk in disks:
4538 lu.cfg.SetDiskID(disk, instance.primary_node)
4539
4540 return disks_ok, device_info
4541
4544 """Start the disks of an instance.
4545
4546 """
4547 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4548 ignore_secondaries=force)
4549 if not disks_ok:
4550 _ShutdownInstanceDisks(lu, instance)
4551 if force is not None and not force:
4552 lu.proc.LogWarning("", hint="If the message above refers to a"
4553 " secondary node,"
4554 " you can retry the operation using '--force'.")
4555 raise errors.OpExecError("Disk consistency error")
4556
4559 """Shutdown an instance's disks.
4560
4561 """
4562 _OP_PARAMS = [
4563 _PInstanceName,
4564 ]
4565 REQ_BGL = False
4566
4571
4575
4577 """Check prerequisites.
4578
4579 This checks that the instance is in the cluster.
4580
4581 """
4582 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4583 assert self.instance is not None, \
4584 "Cannot retrieve locked instance %s" % self.op.instance_name
4585
4586 - def Exec(self, feedback_fn):
4592
4595 """Shutdown block devices of an instance.
4596
4597 This function checks if an instance is running, before calling
4598 _ShutdownInstanceDisks.
4599
4600 """
4601 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4602 _ShutdownInstanceDisks(lu, instance, disks=disks)
4603
4606 """Return the instance disks selected by the disks list
4607
4608 @type disks: list of L{objects.Disk} or None
4609 @param disks: selected disks
4610 @rtype: list of L{objects.Disk}
4611 @return: selected instance disks to act on
4612
4613 """
4614 if disks is None:
4615 return instance.disks
4616 else:
4617 if not set(disks).issubset(instance.disks):
4618 raise errors.ProgrammerError("Can only act on disks belonging to the"
4619 " target instance")
4620 return disks
4621
4624 """Shutdown block devices of an instance.
4625
4626 This does the shutdown on all nodes of the instance.
4627
4628 If the ignore_primary is false, errors on the primary node are
4629 ignored.
4630
4631 """
4632 all_result = True
4633 disks = _ExpandCheckDisks(instance, disks)
4634
4635 for disk in disks:
4636 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4637 lu.cfg.SetDiskID(top_disk, node)
4638 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4639 msg = result.fail_msg
4640 if msg:
4641 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4642 disk.iv_name, node, msg)
4643 if not ignore_primary or node != instance.primary_node:
4644 all_result = False
4645 return all_result
4646
4649 """Checks if a node has enough free memory.
4650
4651 This function check if a given node has the needed amount of free
4652 memory. In case the node has less memory or we cannot get the
4653 information from the node, this function raise an OpPrereqError
4654 exception.
4655
4656 @type lu: C{LogicalUnit}
4657 @param lu: a logical unit from which we get configuration data
4658 @type node: C{str}
4659 @param node: the node to check
4660 @type reason: C{str}
4661 @param reason: string to use in the error message
4662 @type requested: C{int}
4663 @param requested: the amount of memory in MiB to check for
4664 @type hypervisor_name: C{str}
4665 @param hypervisor_name: the hypervisor to ask for memory stats
4666 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4667 we cannot check the node
4668
4669 """
4670 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4671 nodeinfo[node].Raise("Can't get data from node %s" % node,
4672 prereq=True, ecode=errors.ECODE_ENVIRON)
4673 free_mem = nodeinfo[node].payload.get('memory_free', None)
4674 if not isinstance(free_mem, int):
4675 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4676 " was '%s'" % (node, free_mem),
4677 errors.ECODE_ENVIRON)
4678 if requested > free_mem:
4679 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4680 " needed %s MiB, available %s MiB" %
4681 (node, reason, requested, free_mem),
4682 errors.ECODE_NORES)
4683
4686 """Checks if nodes have enough free disk space in the default VG.
4687
4688 This function check if all given nodes have the needed amount of
4689 free disk. In case any node has less disk or we cannot get the
4690 information from the node, this function raise an OpPrereqError
4691 exception.
4692
4693 @type lu: C{LogicalUnit}
4694 @param lu: a logical unit from which we get configuration data
4695 @type nodenames: C{list}
4696 @param nodenames: the list of node names to check
4697 @type requested: C{int}
4698 @param requested: the amount of disk in MiB to check for
4699 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4700 we cannot check the node
4701
4702 """
4703 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4704 lu.cfg.GetHypervisorType())
4705 for node in nodenames:
4706 info = nodeinfo[node]
4707 info.Raise("Cannot get current information from node %s" % node,
4708 prereq=True, ecode=errors.ECODE_ENVIRON)
4709 vg_free = info.payload.get("vg_free", None)
4710 if not isinstance(vg_free, int):
4711 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4712 " result was '%s'" % (node, vg_free),
4713 errors.ECODE_ENVIRON)
4714 if requested > vg_free:
4715 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4716 " required %d MiB, available %d MiB" %
4717 (node, requested, vg_free),
4718 errors.ECODE_NORES)
4719
4722 """Starts an instance.
4723
4724 """
4725 HPATH = "instance-start"
4726 HTYPE = constants.HTYPE_INSTANCE
4727 _OP_PARAMS = [
4728 _PInstanceName,
4729 _PForce,
4730 _PIgnoreOfflineNodes,
4731 ("hvparams", ht.EmptyDict, ht.TDict),
4732 ("beparams", ht.EmptyDict, ht.TDict),
4733 ]
4734 REQ_BGL = False
4735
4741
4744
4757
4759 """Check prerequisites.
4760
4761 This checks that the instance is in the cluster.
4762
4763 """
4764 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4765 assert self.instance is not None, \
4766 "Cannot retrieve locked instance %s" % self.op.instance_name
4767
4768
4769 if self.op.hvparams:
4770
4771 cluster = self.cfg.GetClusterInfo()
4772 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4773 filled_hvp = cluster.FillHV(instance)
4774 filled_hvp.update(self.op.hvparams)
4775 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4776 hv_type.CheckParameterSyntax(filled_hvp)
4777 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4778
4779 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4780
4781 if self.primary_offline and self.op.ignore_offline_nodes:
4782 self.proc.LogWarning("Ignoring offline primary node")
4783
4784 if self.op.hvparams or self.op.beparams:
4785 self.proc.LogWarning("Overridden parameters are ignored")
4786 else:
4787 _CheckNodeOnline(self, instance.primary_node)
4788
4789 bep = self.cfg.GetClusterInfo().FillBE(instance)
4790
4791
4792 _CheckInstanceBridgesExist(self, instance)
4793
4794 remote_info = self.rpc.call_instance_info(instance.primary_node,
4795 instance.name,
4796 instance.hypervisor)
4797 remote_info.Raise("Error checking node %s" % instance.primary_node,
4798 prereq=True, ecode=errors.ECODE_ENVIRON)
4799 if not remote_info.payload:
4800 _CheckNodeFreeMemory(self, instance.primary_node,
4801 "starting instance %s" % instance.name,
4802 bep[constants.BE_MEMORY], instance.hypervisor)
4803
4804 - def Exec(self, feedback_fn):
4827
4830 """Reboot an instance.
4831
4832 """
4833 HPATH = "instance-reboot"
4834 HTYPE = constants.HTYPE_INSTANCE
4835 _OP_PARAMS = [
4836 _PInstanceName,
4837 ("ignore_secondaries", False, ht.TBool),
4838 ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
4839 _PShutdownTimeout,
4840 ]
4841 REQ_BGL = False
4842
4845
4847 """Build hooks env.
4848
4849 This runs on master, primary and secondary nodes of the instance.
4850
4851 """
4852 env = {
4853 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4854 "REBOOT_TYPE": self.op.reboot_type,
4855 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4856 }
4857 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4858 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4859 return env, nl, nl
4860
4875
4876 - def Exec(self, feedback_fn):
4877 """Reboot the instance.
4878
4879 """
4880 instance = self.instance
4881 ignore_secondaries = self.op.ignore_secondaries
4882 reboot_type = self.op.reboot_type
4883
4884 node_current = instance.primary_node
4885
4886 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4887 constants.INSTANCE_REBOOT_HARD]:
4888 for disk in instance.disks:
4889 self.cfg.SetDiskID(disk, node_current)
4890 result = self.rpc.call_instance_reboot(node_current, instance,
4891 reboot_type,
4892 self.op.shutdown_timeout)
4893 result.Raise("Could not reboot instance")
4894 else:
4895 result = self.rpc.call_instance_shutdown(node_current, instance,
4896 self.op.shutdown_timeout)
4897 result.Raise("Could not shutdown instance for full reboot")
4898 _ShutdownInstanceDisks(self, instance)
4899 _StartInstanceDisks(self, instance, ignore_secondaries)
4900 result = self.rpc.call_instance_start(node_current, instance, None, None)
4901 msg = result.fail_msg
4902 if msg:
4903 _ShutdownInstanceDisks(self, instance)
4904 raise errors.OpExecError("Could not start instance for"
4905 " full reboot: %s" % msg)
4906
4907 self.cfg.MarkInstanceUp(instance.name)
4908
4975
4978 """Reinstall an instance.
4979
4980 """
4981 HPATH = "instance-reinstall"
4982 HTYPE = constants.HTYPE_INSTANCE
4983 _OP_PARAMS = [
4984 _PInstanceName,
4985 ("os_type", None, ht.TMaybeString),
4986 ("force_variant", False, ht.TBool),
4987 ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
4988 ]
4989 REQ_BGL = False
4990
4993
5003
5005 """Check prerequisites.
5006
5007 This checks that the instance is in the cluster and is not running.
5008
5009 """
5010 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5011 assert instance is not None, \
5012 "Cannot retrieve locked instance %s" % self.op.instance_name
5013 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5014 " offline, cannot reinstall")
5015 for node in instance.secondary_nodes:
5016 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5017 " cannot reinstall")
5018
5019 if instance.disk_template == constants.DT_DISKLESS:
5020 raise errors.OpPrereqError("Instance '%s' has no disks" %
5021 self.op.instance_name,
5022 errors.ECODE_INVAL)
5023 _CheckInstanceDown(self, instance, "cannot reinstall")
5024
5025 if self.op.os_type is not None:
5026
5027 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5028 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5029 instance_os = self.op.os_type
5030 else:
5031 instance_os = instance.os
5032
5033 nodelist = list(instance.all_nodes)
5034
5035 if self.op.osparams:
5036 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5037 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5038 self.os_inst = i_osdict
5039 else:
5040 self.os_inst = None
5041
5042 self.instance = instance
5043
5044 - def Exec(self, feedback_fn):
5045 """Reinstall the instance.
5046
5047 """
5048 inst = self.instance
5049
5050 if self.op.os_type is not None:
5051 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5052 inst.os = self.op.os_type
5053
5054 self.cfg.Update(inst, feedback_fn)
5055
5056 _StartInstanceDisks(self, inst, None)
5057 try:
5058 feedback_fn("Running the instance OS create scripts...")
5059
5060 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5061 self.op.debug_level,
5062 osparams=self.os_inst)
5063 result.Raise("Could not install OS for instance %s on node %s" %
5064 (inst.name, inst.primary_node))
5065 finally:
5066 _ShutdownInstanceDisks(self, inst)
5067
5131
5134 """Rename an instance.
5135
5136 """
5137 HPATH = "instance-rename"
5138 HTYPE = constants.HTYPE_INSTANCE
5139 _OP_PARAMS = [
5140 _PInstanceName,
5141 ("new_name", ht.NoDefault, ht.TNonEmptyString),
5142 ("ip_check", False, ht.TBool),
5143 ("name_check", True, ht.TBool),
5144 ]
5145
5147 """Check arguments.
5148
5149 """
5150 if self.op.ip_check and not self.op.name_check:
5151
5152 raise errors.OpPrereqError("Cannot do ip check without a name check",
5153 errors.ECODE_INVAL)
5154
5165
5194
5195 - def Exec(self, feedback_fn):
5196 """Reinstall the instance.
5197
5198 """
5199 inst = self.instance
5200 old_name = inst.name
5201
5202 if inst.disk_template == constants.DT_FILE:
5203 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5204
5205 self.cfg.RenameInstance(inst.name, self.op.new_name)
5206
5207 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5208 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5209
5210
5211 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5212
5213 if inst.disk_template == constants.DT_FILE:
5214 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5215 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5216 old_file_storage_dir,
5217 new_file_storage_dir)
5218 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5219 " (but the instance has been renamed in Ganeti)" %
5220 (inst.primary_node, old_file_storage_dir,
5221 new_file_storage_dir))
5222
5223 _StartInstanceDisks(self, inst, None)
5224 try:
5225 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5226 old_name, self.op.debug_level)
5227 msg = result.fail_msg
5228 if msg:
5229 msg = ("Could not run OS rename script for instance %s on node %s"
5230 " (but the instance has been renamed in Ganeti): %s" %
5231 (inst.name, inst.primary_node, msg))
5232 self.proc.LogWarning(msg)
5233 finally:
5234 _ShutdownInstanceDisks(self, inst)
5235
5236 return inst.name
5237
5240 """Remove an instance.
5241
5242 """
5243 HPATH = "instance-remove"
5244 HTYPE = constants.HTYPE_INSTANCE
5245 _OP_PARAMS = [
5246 _PInstanceName,
5247 ("ignore_failures", False, ht.TBool),
5248 _PShutdownTimeout,
5249 ]
5250 REQ_BGL = False
5251
5256
5260
5262 """Build hooks env.
5263
5264 This runs on master, primary and secondary nodes of the instance.
5265
5266 """
5267 env = _BuildInstanceHookEnvByObject(self, self.instance)
5268 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5269 nl = [self.cfg.GetMasterNode()]
5270 nl_post = list(self.instance.all_nodes) + nl
5271 return env, nl, nl_post
5272
5274 """Check prerequisites.
5275
5276 This checks that the instance is in the cluster.
5277
5278 """
5279 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5280 assert self.instance is not None, \
5281 "Cannot retrieve locked instance %s" % self.op.instance_name
5282
5283 - def Exec(self, feedback_fn):
5284 """Remove the instance.
5285
5286 """
5287 instance = self.instance
5288 logging.info("Shutting down instance %s on node %s",
5289 instance.name, instance.primary_node)
5290
5291 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5292 self.op.shutdown_timeout)
5293 msg = result.fail_msg
5294 if msg:
5295 if self.op.ignore_failures:
5296 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5297 else:
5298 raise errors.OpExecError("Could not shutdown instance %s on"
5299 " node %s: %s" %
5300 (instance.name, instance.primary_node, msg))
5301
5302 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5303
5306 """Utility function to remove an instance.
5307
5308 """
5309 logging.info("Removing block devices for instance %s", instance.name)
5310
5311 if not _RemoveDisks(lu, instance):
5312 if not ignore_failures:
5313 raise errors.OpExecError("Can't remove instance's disks")
5314 feedback_fn("Warning: can't remove instance's disks")
5315
5316 logging.info("Removing instance %s out of cluster config", instance.name)
5317
5318 lu.cfg.RemoveInstance(instance.name)
5319
5320 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5321 "Instance lock removal conflict"
5322
5323
5324 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5325
5328 """Logical unit for querying instances.
5329
5330 """
5331
5332 _OP_PARAMS = [
5333 ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
5334 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5335 ("use_locking", False, ht.TBool),
5336 ]
5337 REQ_BGL = False
5338 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5339 "serial_no", "ctime", "mtime", "uuid"]
5340 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5341 "admin_state",
5342 "disk_template", "ip", "mac", "bridge",
5343 "nic_mode", "nic_link",
5344 "sda_size", "sdb_size", "vcpus", "tags",
5345 "network_port", "beparams",
5346 r"(disk)\.(size)/([0-9]+)",
5347 r"(disk)\.(sizes)", "disk_usage",
5348 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5349 r"(nic)\.(bridge)/([0-9]+)",
5350 r"(nic)\.(macs|ips|modes|links|bridges)",
5351 r"(disk|nic)\.(count)",
5352 "hvparams", "custom_hvparams",
5353 "custom_beparams", "custom_nicparams",
5354 ] + _SIMPLE_FIELDS +
5355 ["hv/%s" % name
5356 for name in constants.HVS_PARAMETERS
5357 if name not in constants.HVC_GLOBALS] +
5358 ["be/%s" % name
5359 for name in constants.BES_PARAMETERS])
5360 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5361 "oper_ram",
5362 "oper_vcpus",
5363 "status")
5364
5365
5370
5387
5391
5392 - def Exec(self, feedback_fn):
5393 """Computes the list of nodes and their attributes.
5394
5395 """
5396
5397
5398 all_info = self.cfg.GetAllInstancesInfo()
5399 if self.wanted == locking.ALL_SET:
5400
5401 if self.do_locking:
5402 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5403 else:
5404 instance_names = all_info.keys()
5405 instance_names = utils.NiceSort(instance_names)
5406 else:
5407
5408 if self.do_locking:
5409 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5410 else:
5411 tgt_set = all_info.keys()
5412 missing = set(self.wanted).difference(tgt_set)
5413 if missing:
5414 raise errors.OpExecError("Some instances were removed before"
5415 " retrieving their data: %s" % missing)
5416 instance_names = self.wanted
5417
5418 instance_list = [all_info[iname] for iname in instance_names]
5419
5420
5421
5422 nodes = frozenset([inst.primary_node for inst in instance_list])
5423 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5424
5425 bad_nodes = []
5426 off_nodes = []
5427 if self.do_node_query:
5428 live_data = {}
5429 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5430 for name in nodes:
5431 result = node_data[name]
5432 if result.offline:
5433
5434 off_nodes.append(name)
5435 if result.fail_msg:
5436 bad_nodes.append(name)
5437 else:
5438 if result.payload:
5439 live_data.update(result.payload)
5440
5441 else:
5442 live_data = dict([(name, {}) for name in instance_names])
5443
5444
5445
5446 HVPREFIX = "hv/"
5447 BEPREFIX = "be/"
5448 output = []
5449 cluster = self.cfg.GetClusterInfo()
5450 for instance in instance_list:
5451 iout = []
5452 i_hv = cluster.FillHV(instance, skip_globals=True)
5453 i_be = cluster.FillBE(instance)
5454 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5455 for field in self.op.output_fields:
5456 st_match = self._FIELDS_STATIC.Matches(field)
5457 if field in self._SIMPLE_FIELDS:
5458 val = getattr(instance, field)
5459 elif field == "pnode":
5460 val = instance.primary_node
5461 elif field == "snodes":
5462 val = list(instance.secondary_nodes)
5463 elif field == "admin_state":
5464 val = instance.admin_up
5465 elif field == "oper_state":
5466 if instance.primary_node in bad_nodes:
5467 val = None
5468 else:
5469 val = bool(live_data.get(instance.name))
5470 elif field == "status":
5471 if instance.primary_node in off_nodes:
5472 val = "ERROR_nodeoffline"
5473 elif instance.primary_node in bad_nodes:
5474 val = "ERROR_nodedown"
5475 else:
5476 running = bool(live_data.get(instance.name))
5477 if running:
5478 if instance.admin_up:
5479 val = "running"
5480 else:
5481 val = "ERROR_up"
5482 else:
5483 if instance.admin_up:
5484 val = "ERROR_down"
5485 else:
5486 val = "ADMIN_down"
5487 elif field == "oper_ram":
5488 if instance.primary_node in bad_nodes:
5489 val = None
5490 elif instance.name in live_data:
5491 val = live_data[instance.name].get("memory", "?")
5492 else:
5493 val = "-"
5494 elif field == "oper_vcpus":
5495 if instance.primary_node in bad_nodes:
5496 val = None
5497 elif instance.name in live_data:
5498 val = live_data[instance.name].get("vcpus", "?")
5499 else:
5500 val = "-"
5501 elif field == "vcpus":
5502 val = i_be[constants.BE_VCPUS]
5503 elif field == "disk_template":
5504 val = instance.disk_template
5505 elif field == "ip":
5506 if instance.nics:
5507 val = instance.nics[0].ip
5508 else:
5509 val = None
5510 elif field == "nic_mode":
5511 if instance.nics:
5512 val = i_nicp[0][constants.NIC_MODE]
5513 else:
5514 val = None
5515 elif field == "nic_link":
5516 if instance.nics:
5517 val = i_nicp[0][constants.NIC_LINK]
5518 else:
5519 val = None
5520 elif field == "bridge":
5521 if (instance.nics and
5522 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5523 val = i_nicp[0][constants.NIC_LINK]
5524 else:
5525 val = None
5526 elif field == "mac":
5527 if instance.nics:
5528 val = instance.nics[0].mac
5529 else:
5530 val = None
5531 elif field == "custom_nicparams":
5532 val = [nic.nicparams for nic in instance.nics]
5533 elif field == "sda_size" or field == "sdb_size":
5534 idx = ord(field[2]) - ord('a')
5535 try:
5536 val = instance.FindDisk(idx).size
5537 except errors.OpPrereqError:
5538 val = None
5539 elif field == "disk_usage":
5540 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5541 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5542 elif field == "tags":
5543 val = list(instance.GetTags())
5544 elif field == "custom_hvparams":
5545 val = instance.hvparams
5546 elif field == "hvparams":
5547 val = i_hv
5548 elif (field.startswith(HVPREFIX) and
5549 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5550 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5551 val = i_hv.get(field[len(HVPREFIX):], None)
5552 elif field == "custom_beparams":
5553 val = instance.beparams
5554 elif field == "beparams":
5555 val = i_be
5556 elif (field.startswith(BEPREFIX) and
5557 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5558 val = i_be.get(field[len(BEPREFIX):], None)
5559 elif st_match and st_match.groups():
5560
5561 st_groups = st_match.groups()
5562 if st_groups and st_groups[0] == "disk":
5563 if st_groups[1] == "count":
5564 val = len(instance.disks)
5565 elif st_groups[1] == "sizes":
5566 val = [disk.size for disk in instance.disks]
5567 elif st_groups[1] == "size":
5568 try:
5569 val = instance.FindDisk(st_groups[2]).size
5570 except errors.OpPrereqError:
5571 val = None
5572 else:
5573 assert False, "Unhandled disk parameter"
5574 elif st_groups[0] == "nic":
5575 if st_groups[1] == "count":
5576 val = len(instance.nics)
5577 elif st_groups[1] == "macs":
5578 val = [nic.mac for nic in instance.nics]
5579 elif st_groups[1] == "ips":
5580 val = [nic.ip for nic in instance.nics]
5581 elif st_groups[1] == "modes":
5582 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5583 elif st_groups[1] == "links":
5584 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5585 elif st_groups[1] == "bridges":
5586 val = []
5587 for nicp in i_nicp:
5588 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5589 val.append(nicp[constants.NIC_LINK])
5590 else:
5591 val.append(None)
5592 else:
5593
5594 nic_idx = int(st_groups[2])
5595 if nic_idx >= len(instance.nics):
5596 val = None
5597 else:
5598 if st_groups[1] == "mac":
5599 val = instance.nics[nic_idx].mac
5600 elif st_groups[1] == "ip":
5601 val = instance.nics[nic_idx].ip
5602 elif st_groups[1] == "mode":
5603 val = i_nicp[nic_idx][constants.NIC_MODE]
5604 elif st_groups[1] == "link":
5605 val = i_nicp[nic_idx][constants.NIC_LINK]
5606 elif st_groups[1] == "bridge":
5607 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5608 if nic_mode == constants.NIC_MODE_BRIDGED:
5609 val = i_nicp[nic_idx][constants.NIC_LINK]
5610 else:
5611 val = None
5612 else:
5613 assert False, "Unhandled NIC parameter"
5614 else:
5615 assert False, ("Declared but unhandled variable parameter '%s'" %
5616 field)
5617 else:
5618 assert False, "Declared but unhandled parameter '%s'" % field
5619 iout.append(val)
5620 output.append(iout)
5621
5622 return output
5623
5626 """Failover an instance.
5627
5628 """
5629 HPATH = "instance-failover"
5630 HTYPE = constants.HTYPE_INSTANCE
5631 _OP_PARAMS = [
5632 _PInstanceName,
5633 ("ignore_consistency", False, ht.TBool),
5634 _PShutdownTimeout,
5635 ]
5636 REQ_BGL = False
5637
5642
5646
5648 """Build hooks env.
5649
5650 This runs on master, primary and secondary nodes of the instance.
5651
5652 """
5653 instance = self.instance
5654 source_node = instance.primary_node
5655 target_node = instance.secondary_nodes[0]
5656 env = {
5657 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5658 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5659 "OLD_PRIMARY": source_node,
5660 "OLD_SECONDARY": target_node,
5661 "NEW_PRIMARY": target_node,
5662 "NEW_SECONDARY": source_node,
5663 }
5664 env.update(_BuildInstanceHookEnvByObject(self, instance))
5665 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5666 nl_post = list(nl)
5667 nl_post.append(source_node)
5668 return env, nl, nl_post
5669
5671 """Check prerequisites.
5672
5673 This checks that the instance is in the cluster.
5674
5675 """
5676 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5677 assert self.instance is not None, \
5678 "Cannot retrieve locked instance %s" % self.op.instance_name
5679
5680 bep = self.cfg.GetClusterInfo().FillBE(instance)
5681 if instance.disk_template not in constants.DTS_NET_MIRROR:
5682 raise errors.OpPrereqError("Instance's disk layout is not"
5683 " network mirrored, cannot failover.",
5684 errors.ECODE_STATE)
5685
5686 secondary_nodes = instance.secondary_nodes
5687 if not secondary_nodes:
5688 raise errors.ProgrammerError("no secondary node but using "
5689 "a mirrored disk template")
5690
5691 target_node = secondary_nodes[0]
5692 _CheckNodeOnline(self, target_node)
5693 _CheckNodeNotDrained(self, target_node)
5694 if instance.admin_up:
5695
5696 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5697 instance.name, bep[constants.BE_MEMORY],
5698 instance.hypervisor)
5699 else:
5700 self.LogInfo("Not checking memory on the secondary node as"
5701 " instance will not be started")
5702
5703
5704 _CheckInstanceBridgesExist(self, instance, node=target_node)
5705
5706 - def Exec(self, feedback_fn):
5707 """Failover an instance.
5708
5709 The failover is done by shutting it down on its present node and
5710 starting it on the secondary.
5711
5712 """
5713 instance = self.instance
5714 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5715
5716 source_node = instance.primary_node
5717 target_node = instance.secondary_nodes[0]
5718
5719 if instance.admin_up:
5720 feedback_fn("* checking disk consistency between source and target")
5721 for dev in instance.disks:
5722
5723 if not _CheckDiskConsistency(self, dev, target_node, False):
5724 if not self.op.ignore_consistency:
5725 raise errors.OpExecError("Disk %s is degraded on target node,"
5726 " aborting failover." % dev.iv_name)
5727 else:
5728 feedback_fn("* not checking disk consistency as instance is not running")
5729
5730 feedback_fn("* shutting down instance on source node")
5731 logging.info("Shutting down instance %s on node %s",
5732 instance.name, source_node)
5733
5734 result = self.rpc.call_instance_shutdown(source_node, instance,
5735 self.op.shutdown_timeout)
5736 msg = result.fail_msg
5737 if msg:
5738 if self.op.ignore_consistency or primary_node.offline:
5739 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5740 " Proceeding anyway. Please make sure node"
5741 " %s is down. Error details: %s",
5742 instance.name, source_node, source_node, msg)
5743 else:
5744 raise errors.OpExecError("Could not shutdown instance %s on"
5745 " node %s: %s" %
5746 (instance.name, source_node, msg))
5747
5748 feedback_fn("* deactivating the instance's disks on source node")
5749 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5750 raise errors.OpExecError("Can't shut down the instance's disks.")
5751
5752 instance.primary_node = target_node
5753
5754 self.cfg.Update(instance, feedback_fn)
5755
5756
5757 if instance.admin_up:
5758 feedback_fn("* activating the instance's disks on target node")
5759 logging.info("Starting instance %s on node %s",
5760 instance.name, target_node)
5761
5762 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5763 ignore_secondaries=True)
5764 if not disks_ok:
5765 _ShutdownInstanceDisks(self, instance)
5766 raise errors.OpExecError("Can't activate the instance's disks")
5767
5768 feedback_fn("* starting the instance on the target node")
5769 result = self.rpc.call_instance_start(target_node, instance, None, None)
5770 msg = result.fail_msg
5771 if msg:
5772 _ShutdownInstanceDisks(self, instance)
5773 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5774 (instance.name, target_node, msg))
5775
5778 """Migrate an instance.
5779
5780 This is migration without shutting down, compared to the failover,
5781 which is done with shutdown.
5782
5783 """
5784 HPATH = "instance-migrate"
5785 HTYPE = constants.HTYPE_INSTANCE
5786 _OP_PARAMS = [
5787 _PInstanceName,
5788 _PMigrationMode,
5789 _PMigrationLive,
5790 ("cleanup", False, ht.TBool),
5791 ]
5792
5793 REQ_BGL = False
5794
5804
5808
5810 """Build hooks env.
5811
5812 This runs on master, primary and secondary nodes of the instance.
5813
5814 """
5815 instance = self._migrater.instance
5816 source_node = instance.primary_node
5817 target_node = instance.secondary_nodes[0]
5818 env = _BuildInstanceHookEnvByObject(self, instance)
5819 env["MIGRATE_LIVE"] = self._migrater.live
5820 env["MIGRATE_CLEANUP"] = self.op.cleanup
5821 env.update({
5822 "OLD_PRIMARY": source_node,
5823 "OLD_SECONDARY": target_node,
5824 "NEW_PRIMARY": target_node,
5825 "NEW_SECONDARY": source_node,
5826 })
5827 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5828 nl_post = list(nl)
5829 nl_post.append(source_node)
5830 return env, nl, nl_post
5831
5834 """Move an instance by data-copying.
5835
5836 """
5837 HPATH = "instance-move"
5838 HTYPE = constants.HTYPE_INSTANCE
5839 _OP_PARAMS = [
5840 _PInstanceName,
5841 ("target_node", ht.NoDefault, ht.TNonEmptyString),
5842 _PShutdownTimeout,
5843 ]
5844 REQ_BGL = False
5845
5852
5856
5858 """Build hooks env.
5859
5860 This runs on master, primary and secondary nodes of the instance.
5861
5862 """
5863 env = {
5864 "TARGET_NODE": self.op.target_node,
5865 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5866 }
5867 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5868 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5869 self.op.target_node]
5870 return env, nl, nl
5871
5873 """Check prerequisites.
5874
5875 This checks that the instance is in the cluster.
5876
5877 """
5878 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5879 assert self.instance is not None, \
5880 "Cannot retrieve locked instance %s" % self.op.instance_name
5881
5882 node = self.cfg.GetNodeInfo(self.op.target_node)
5883 assert node is not None, \
5884 "Cannot retrieve locked node %s" % self.op.target_node
5885
5886 self.target_node = target_node = node.name
5887
5888 if target_node == instance.primary_node:
5889 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5890 (instance.name, target_node),
5891 errors.ECODE_STATE)
5892
5893 bep = self.cfg.GetClusterInfo().FillBE(instance)
5894
5895 for idx, dsk in enumerate(instance.disks):
5896 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5897 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5898 " cannot copy" % idx, errors.ECODE_STATE)
5899
5900 _CheckNodeOnline(self, target_node)
5901 _CheckNodeNotDrained(self, target_node)
5902 _CheckNodeVmCapable(self, target_node)
5903
5904 if instance.admin_up:
5905
5906 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5907 instance.name, bep[constants.BE_MEMORY],
5908 instance.hypervisor)
5909 else:
5910 self.LogInfo("Not checking memory on the secondary node as"
5911 " instance will not be started")
5912
5913
5914 _CheckInstanceBridgesExist(self, instance, node=target_node)
5915
5916 - def Exec(self, feedback_fn):
5917 """Move an instance.
5918
5919 The move is done by shutting it down on its present node, copying
5920 the data over (slow) and starting it on the new node.
5921
5922 """
5923 instance = self.instance
5924
5925 source_node = instance.primary_node
5926 target_node = self.target_node
5927
5928 self.LogInfo("Shutting down instance %s on source node %s",
5929 instance.name, source_node)
5930
5931 result = self.rpc.call_instance_shutdown(source_node, instance,
5932 self.op.shutdown_timeout)
5933 msg = result.fail_msg
5934 if msg:
5935 if self.op.ignore_consistency:
5936 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5937 " Proceeding anyway. Please make sure node"
5938 " %s is down. Error details: %s",
5939 instance.name, source_node, source_node, msg)
5940 else:
5941 raise errors.OpExecError("Could not shutdown instance %s on"
5942 " node %s: %s" %
5943 (instance.name, source_node, msg))
5944
5945
5946 try:
5947 _CreateDisks(self, instance, target_node=target_node)
5948 except errors.OpExecError:
5949 self.LogWarning("Device creation failed, reverting...")
5950 try:
5951 _RemoveDisks(self, instance, target_node=target_node)
5952 finally:
5953 self.cfg.ReleaseDRBDMinors(instance.name)
5954 raise
5955
5956 cluster_name = self.cfg.GetClusterInfo().cluster_name
5957
5958 errs = []
5959
5960 for idx, disk in enumerate(instance.disks):
5961 self.LogInfo("Copying data for disk %d", idx)
5962 result = self.rpc.call_blockdev_assemble(target_node, disk,
5963 instance.name, True)
5964 if result.fail_msg:
5965 self.LogWarning("Can't assemble newly created disk %d: %s",
5966 idx, result.fail_msg)
5967 errs.append(result.fail_msg)
5968 break
5969 dev_path = result.payload
5970 result = self.rpc.call_blockdev_export(source_node, disk,
5971 target_node, dev_path,
5972 cluster_name)
5973 if result.fail_msg:
5974 self.LogWarning("Can't copy data over for disk %d: %s",
5975 idx, result.fail_msg)
5976 errs.append(result.fail_msg)
5977 break
5978
5979 if errs:
5980 self.LogWarning("Some disks failed to copy, aborting")
5981 try:
5982 _RemoveDisks(self, instance, target_node=target_node)
5983 finally:
5984 self.cfg.ReleaseDRBDMinors(instance.name)
5985 raise errors.OpExecError("Errors during disk copy: %s" %
5986 (",".join(errs),))
5987
5988 instance.primary_node = target_node
5989 self.cfg.Update(instance, feedback_fn)
5990
5991 self.LogInfo("Removing the disks on the original node")
5992 _RemoveDisks(self, instance, target_node=source_node)
5993
5994
5995 if instance.admin_up:
5996 self.LogInfo("Starting instance %s on node %s",
5997 instance.name, target_node)
5998
5999 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6000 ignore_secondaries=True)
6001 if not disks_ok:
6002 _ShutdownInstanceDisks(self, instance)
6003 raise errors.OpExecError("Can't activate the instance's disks")
6004
6005 result = self.rpc.call_instance_start(target_node, instance, None, None)
6006 msg = result.fail_msg
6007 if msg:
6008 _ShutdownInstanceDisks(self, instance)
6009 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6010 (instance.name, target_node, msg))
6011
6067
6070 """Tasklet class for instance migration.
6071
6072 @type live: boolean
6073 @ivar live: whether the migration will be done live or non-live;
6074 this variable is initalized only after CheckPrereq has run
6075
6076 """
6077 - def __init__(self, lu, instance_name, cleanup):
6078 """Initializes this class.
6079
6080 """
6081 Tasklet.__init__(self, lu)
6082
6083
6084 self.instance_name = instance_name
6085 self.cleanup = cleanup
6086 self.live = False
6087
6089 """Check prerequisites.
6090
6091 This checks that the instance is in the cluster.
6092
6093 """
6094 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6095 instance = self.cfg.GetInstanceInfo(instance_name)
6096 assert instance is not None
6097
6098 if instance.disk_template != constants.DT_DRBD8:
6099 raise errors.OpPrereqError("Instance's disk layout is not"
6100 " drbd8, cannot migrate.", errors.ECODE_STATE)
6101
6102 secondary_nodes = instance.secondary_nodes
6103 if not secondary_nodes:
6104 raise errors.ConfigurationError("No secondary node but using"
6105 " drbd8 disk template")
6106
6107 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6108
6109 target_node = secondary_nodes[0]
6110
6111 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6112 instance.name, i_be[constants.BE_MEMORY],
6113 instance.hypervisor)
6114
6115
6116 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6117
6118 if not self.cleanup:
6119 _CheckNodeNotDrained(self.lu, target_node)
6120 result = self.rpc.call_instance_migratable(instance.primary_node,
6121 instance)
6122 result.Raise("Can't migrate, please use failover",
6123 prereq=True, ecode=errors.ECODE_STATE)
6124
6125 self.instance = instance
6126
6127 if self.lu.op.live is not None and self.lu.op.mode is not None:
6128 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6129 " parameters are accepted",
6130 errors.ECODE_INVAL)
6131 if self.lu.op.live is not None:
6132 if self.lu.op.live:
6133 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6134 else:
6135 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6136
6137
6138 self.lu.op.live = None
6139 elif self.lu.op.mode is None:
6140
6141 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6142 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6143
6144 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6145
6147 """Poll with custom rpc for disk sync.
6148
6149 This uses our own step-based rpc call.
6150
6151 """
6152 self.feedback_fn("* wait until resync is done")
6153 all_done = False
6154 while not all_done:
6155 all_done = True
6156 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6157 self.nodes_ip,
6158 self.instance.disks)
6159 min_percent = 100
6160 for node, nres in result.items():
6161 nres.Raise("Cannot resync disks on node %s" % node)
6162 node_done, node_percent = nres.payload
6163 all_done = all_done and node_done
6164 if node_percent is not None:
6165 min_percent = min(min_percent, node_percent)
6166 if not all_done:
6167 if min_percent < 100:
6168 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6169 time.sleep(2)
6170
6172 """Demote a node to secondary.
6173
6174 """
6175 self.feedback_fn("* switching node %s to secondary mode" % node)
6176
6177 for dev in self.instance.disks:
6178 self.cfg.SetDiskID(dev, node)
6179
6180 result = self.rpc.call_blockdev_close(node, self.instance.name,
6181 self.instance.disks)
6182 result.Raise("Cannot change disk to secondary on node %s" % node)
6183
6185 """Disconnect from the network.
6186
6187 """
6188 self.feedback_fn("* changing into standalone mode")
6189 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6190 self.instance.disks)
6191 for node, nres in result.items():
6192 nres.Raise("Cannot disconnect disks node %s" % node)
6193
6195 """Reconnect to the network.
6196
6197 """
6198 if multimaster:
6199 msg = "dual-master"
6200 else:
6201 msg = "single-master"
6202 self.feedback_fn("* changing disks into %s mode" % msg)
6203 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6204 self.instance.disks,
6205 self.instance.name, multimaster)
6206 for node, nres in result.items():
6207 nres.Raise("Cannot change disks config on node %s" % node)
6208
6210 """Try to cleanup after a failed migration.
6211
6212 The cleanup is done by:
6213 - check that the instance is running only on one node
6214 (and update the config if needed)
6215 - change disks on its secondary node to secondary
6216 - wait until disks are fully synchronized
6217 - disconnect from the network
6218 - change disks into single-master mode
6219 - wait again until disks are fully synchronized
6220
6221 """
6222 instance = self.instance
6223 target_node = self.target_node
6224 source_node = self.source_node
6225
6226
6227 self.feedback_fn("* checking where the instance actually runs"
6228 " (if this hangs, the hypervisor might be in"
6229 " a bad state)")
6230 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6231 for node, result in ins_l.items():
6232 result.Raise("Can't contact node %s" % node)
6233
6234 runningon_source = instance.name in ins_l[source_node].payload
6235 runningon_target = instance.name in ins_l[target_node].payload
6236
6237 if runningon_source and runningon_target:
6238 raise errors.OpExecError("Instance seems to be running on two nodes,"
6239 " or the hypervisor is confused. You will have"
6240 " to ensure manually that it runs only on one"
6241 " and restart this operation.")
6242
6243 if not (runningon_source or runningon_target):
6244 raise errors.OpExecError("Instance does not seem to be running at all."
6245 " In this case, it's safer to repair by"
6246 " running 'gnt-instance stop' to ensure disk"
6247 " shutdown, and then restarting it.")
6248
6249 if runningon_target:
6250
6251 self.feedback_fn("* instance running on secondary node (%s),"
6252 " updating config" % target_node)
6253 instance.primary_node = target_node
6254 self.cfg.Update(instance, self.feedback_fn)
6255 demoted_node = source_node
6256 else:
6257 self.feedback_fn("* instance confirmed to be running on its"
6258 " primary node (%s)" % source_node)
6259 demoted_node = target_node
6260
6261 self._EnsureSecondary(demoted_node)
6262 try:
6263 self._WaitUntilSync()
6264 except errors.OpExecError:
6265
6266
6267 pass
6268 self._GoStandalone()
6269 self._GoReconnect(False)
6270 self._WaitUntilSync()
6271
6272 self.feedback_fn("* done")
6273
6275 """Try to revert the disk status after a failed migration.
6276
6277 """
6278 target_node = self.target_node
6279 try:
6280 self._EnsureSecondary(target_node)
6281 self._GoStandalone()
6282 self._GoReconnect(False)
6283 self._WaitUntilSync()
6284 except errors.OpExecError, err:
6285 self.lu.LogWarning("Migration failed and I can't reconnect the"
6286 " drives: error '%s'\n"
6287 "Please look and recover the instance status" %
6288 str(err))
6289
6291 """Call the hypervisor code to abort a started migration.
6292
6293 """
6294 instance = self.instance
6295 target_node = self.target_node
6296 migration_info = self.migration_info
6297
6298 abort_result = self.rpc.call_finalize_migration(target_node,
6299 instance,
6300 migration_info,
6301 False)
6302 abort_msg = abort_result.fail_msg
6303 if abort_msg:
6304 logging.error("Aborting migration failed on target node %s: %s",
6305 target_node, abort_msg)
6306
6307
6308
6310 """Migrate an instance.
6311
6312 The migrate is done by:
6313 - change the disks into dual-master mode
6314 - wait until disks are fully synchronized again
6315 - migrate the instance
6316 - change disks on the new secondary node (the old primary) to secondary
6317 - wait until disks are fully synchronized
6318 - change disks into single-master mode
6319
6320 """
6321 instance = self.instance
6322 target_node = self.target_node
6323 source_node = self.source_node
6324
6325 self.feedback_fn("* checking disk consistency between source and target")
6326 for dev in instance.disks:
6327 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6328 raise errors.OpExecError("Disk %s is degraded or not fully"
6329 " synchronized on target node,"
6330 " aborting migrate." % dev.iv_name)
6331
6332
6333 result = self.rpc.call_migration_info(source_node, instance)
6334 msg = result.fail_msg
6335 if msg:
6336 log_err = ("Failed fetching source migration information from %s: %s" %
6337 (source_node, msg))
6338 logging.error(log_err)
6339 raise errors.OpExecError(log_err)
6340
6341 self.migration_info = migration_info = result.payload
6342
6343
6344 self._EnsureSecondary(target_node)
6345 self._GoStandalone()
6346 self._GoReconnect(True)
6347 self._WaitUntilSync()
6348
6349 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6350 result = self.rpc.call_accept_instance(target_node,
6351 instance,
6352 migration_info,
6353 self.nodes_ip[target_node])
6354
6355 msg = result.fail_msg
6356 if msg:
6357 logging.error("Instance pre-migration failed, trying to revert"
6358 " disk status: %s", msg)
6359 self.feedback_fn("Pre-migration failed, aborting")
6360 self._AbortMigration()
6361 self._RevertDiskStatus()
6362 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6363 (instance.name, msg))
6364
6365 self.feedback_fn("* migrating instance to %s" % target_node)
6366 time.sleep(10)
6367 result = self.rpc.call_instance_migrate(source_node, instance,
6368 self.nodes_ip[target_node],
6369 self.live)
6370 msg = result.fail_msg
6371 if msg:
6372 logging.error("Instance migration failed, trying to revert"
6373 " disk status: %s", msg)
6374 self.feedback_fn("Migration failed, aborting")
6375 self._AbortMigration()
6376 self._RevertDiskStatus()
6377 raise errors.OpExecError("Could not migrate instance %s: %s" %
6378 (instance.name, msg))
6379 time.sleep(10)
6380
6381 instance.primary_node = target_node
6382
6383 self.cfg.Update(instance, self.feedback_fn)
6384
6385 result = self.rpc.call_finalize_migration(target_node,
6386 instance,
6387 migration_info,
6388 True)
6389 msg = result.fail_msg
6390 if msg:
6391 logging.error("Instance migration succeeded, but finalization failed:"
6392 " %s", msg)
6393 raise errors.OpExecError("Could not finalize instance migration: %s" %
6394 msg)
6395
6396 self._EnsureSecondary(source_node)
6397 self._WaitUntilSync()
6398 self._GoStandalone()
6399 self._GoReconnect(False)
6400 self._WaitUntilSync()
6401
6402 self.feedback_fn("* done")
6403
6404 - def Exec(self, feedback_fn):
6405 """Perform the migration.
6406
6407 """
6408 feedback_fn("Migrating instance %s" % self.instance.name)
6409
6410 self.feedback_fn = feedback_fn
6411
6412 self.source_node = self.instance.primary_node
6413 self.target_node = self.instance.secondary_nodes[0]
6414 self.all_nodes = [self.source_node, self.target_node]
6415 self.nodes_ip = {
6416 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6417 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6418 }
6419
6420 if self.cleanup:
6421 return self._ExecCleanup()
6422 else:
6423 return self._ExecMigration()
6424
6425
6426 -def _CreateBlockDev(lu, node, instance, device, force_create,
6427 info, force_open):
6428 """Create a tree of block devices on a given node.
6429
6430 If this device type has to be created on secondaries, create it and
6431 all its children.
6432
6433 If not, just recurse to children keeping the same 'force' value.
6434
6435 @param lu: the lu on whose behalf we execute
6436 @param node: the node on which to create the device
6437 @type instance: L{objects.Instance}
6438 @param instance: the instance which owns the device
6439 @type device: L{objects.Disk}
6440 @param device: the device to create
6441 @type force_create: boolean
6442 @param force_create: whether to force creation of this device; this
6443 will be change to True whenever we find a device which has
6444 CreateOnSecondary() attribute
6445 @param info: the extra 'metadata' we should attach to the device
6446 (this will be represented as a LVM tag)
6447 @type force_open: boolean
6448 @param force_open: this parameter will be passes to the
6449 L{backend.BlockdevCreate} function where it specifies
6450 whether we run on primary or not, and it affects both
6451 the child assembly and the device own Open() execution
6452
6453 """
6454 if device.CreateOnSecondary():
6455 force_create = True
6456
6457 if device.children:
6458 for child in device.children:
6459 _CreateBlockDev(lu, node, instance, child, force_create,
6460 info, force_open)
6461
6462 if not force_create:
6463 return
6464
6465 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6466
6469 """Create a single block device on a given node.
6470
6471 This will not recurse over children of the device, so they must be
6472 created in advance.
6473
6474 @param lu: the lu on whose behalf we execute
6475 @param node: the node on which to create the device
6476 @type instance: L{objects.Instance}
6477 @param instance: the instance which owns the device
6478 @type device: L{objects.Disk}
6479 @param device: the device to create
6480 @param info: the extra 'metadata' we should attach to the device
6481 (this will be represented as a LVM tag)
6482 @type force_open: boolean
6483 @param force_open: this parameter will be passes to the
6484 L{backend.BlockdevCreate} function where it specifies
6485 whether we run on primary or not, and it affects both
6486 the child assembly and the device own Open() execution
6487
6488 """
6489 lu.cfg.SetDiskID(device, node)
6490 result = lu.rpc.call_blockdev_create(node, device, device.size,
6491 instance.name, force_open, info)
6492 result.Raise("Can't create block device %s on"
6493 " node %s for instance %s" % (device, node, instance.name))
6494 if device.physical_id is None:
6495 device.physical_id = result.payload
6496
6499 """Generate a suitable LV name.
6500
6501 This will generate a logical volume name for the given instance.
6502
6503 """
6504 results = []
6505 for val in exts:
6506 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6507 results.append("%s%s" % (new_id, val))
6508 return results
6509
6510
6511 -def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6512 p_minor, s_minor):
6513 """Generate a drbd8 device complete with its children.
6514
6515 """
6516 port = lu.cfg.AllocatePort()
6517 vgname = lu.cfg.GetVGName()
6518 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6519 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6520 logical_id=(vgname, names[0]))
6521 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6522 logical_id=(vgname, names[1]))
6523 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6524 logical_id=(primary, secondary, port,
6525 p_minor, s_minor,
6526 shared_secret),
6527 children=[dev_data, dev_meta],
6528 iv_name=iv_name)
6529 return drbd_dev
6530
6531
6532 -def _GenerateDiskTemplate(lu, template_name,
6533 instance_name, primary_node,
6534 secondary_nodes, disk_info,
6535 file_storage_dir, file_driver,
6536 base_index):
6537 """Generate the entire disk layout for a given template type.
6538
6539 """
6540
6541
6542 vgname = lu.cfg.GetVGName()
6543 disk_count = len(disk_info)
6544 disks = []
6545 if template_name == constants.DT_DISKLESS:
6546 pass
6547 elif template_name == constants.DT_PLAIN:
6548 if len(secondary_nodes) != 0:
6549 raise errors.ProgrammerError("Wrong template configuration")
6550
6551 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6552 for i in range(disk_count)])
6553 for idx, disk in enumerate(disk_info):
6554 disk_index = idx + base_index
6555 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6556 logical_id=(vgname, names[idx]),
6557 iv_name="disk/%d" % disk_index,
6558 mode=disk["mode"])
6559 disks.append(disk_dev)
6560 elif template_name == constants.DT_DRBD8:
6561 if len(secondary_nodes) != 1:
6562 raise errors.ProgrammerError("Wrong template configuration")
6563 remote_node = secondary_nodes[0]
6564 minors = lu.cfg.AllocateDRBDMinor(
6565 [primary_node, remote_node] * len(disk_info), instance_name)
6566
6567 names = []
6568 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6569 for i in range(disk_count)]):
6570 names.append(lv_prefix + "_data")
6571 names.append(lv_prefix + "_meta")
6572 for idx, disk in enumerate(disk_info):
6573 disk_index = idx + base_index
6574 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6575 disk["size"], names[idx*2:idx*2+2],
6576 "disk/%d" % disk_index,
6577 minors[idx*2], minors[idx*2+1])
6578 disk_dev.mode = disk["mode"]
6579 disks.append(disk_dev)
6580 elif template_name == constants.DT_FILE:
6581 if len(secondary_nodes) != 0:
6582 raise errors.ProgrammerError("Wrong template configuration")
6583
6584 _RequireFileStorage()
6585
6586 for idx, disk in enumerate(disk_info):
6587 disk_index = idx + base_index
6588 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6589 iv_name="disk/%d" % disk_index,
6590 logical_id=(file_driver,
6591 "%s/disk%d" % (file_storage_dir,
6592 disk_index)),
6593 mode=disk["mode"])
6594 disks.append(disk_dev)
6595 else:
6596 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6597 return disks
6598
6599
6600 -def _GetInstanceInfoText(instance):
6601 """Compute that text that should be added to the disk's metadata.
6602
6603 """
6604 return "originstname+%s" % instance.name
6605
6606
6607 -def _CalcEta(time_taken, written, total_size):
6608 """Calculates the ETA based on size written and total size.
6609
6610 @param time_taken: The time taken so far
6611 @param written: amount written so far
6612 @param total_size: The total size of data to be written
6613 @return: The remaining time in seconds
6614
6615 """
6616 avg_time = time_taken / float(written)
6617 return (total_size - written) * avg_time
6618
6621 """Wipes instance disks.
6622
6623 @type lu: L{LogicalUnit}
6624 @param lu: the logical unit on whose behalf we execute
6625 @type instance: L{objects.Instance}
6626 @param instance: the instance whose disks we should create
6627 @return: the success of the wipe
6628
6629 """
6630 node = instance.primary_node
6631 for idx, device in enumerate(instance.disks):
6632 lu.LogInfo("* Wiping disk %d", idx)
6633 logging.info("Wiping disk %d for instance %s", idx, instance.name)
6634
6635
6636
6637 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6638 constants.MIN_WIPE_CHUNK_PERCENT)
6639
6640 offset = 0
6641 size = device.size
6642 last_output = 0
6643 start_time = time.time()
6644
6645 while offset < size:
6646 wipe_size = min(wipe_chunk_size, size - offset)
6647 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6648 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6649 (idx, offset, wipe_size))
6650 now = time.time()
6651 offset += wipe_size
6652 if now - last_output >= 60:
6653 eta = _CalcEta(now - start_time, offset, size)
6654 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6655 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6656 last_output = now
6657
6658
6659 -def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6660 """Create all disks for an instance.
6661
6662 This abstracts away some work from AddInstance.
6663
6664 @type lu: L{LogicalUnit}
6665 @param lu: the logical unit on whose behalf we execute
6666 @type instance: L{objects.Instance}
6667 @param instance: the instance whose disks we should create
6668 @type to_skip: list
6669 @param to_skip: list of indices to skip
6670 @type target_node: string
6671 @param target_node: if passed, overrides the target node for creation
6672 @rtype: boolean
6673 @return: the success of the creation
6674
6675 """
6676 info = _GetInstanceInfoText(instance)
6677 if target_node is None:
6678 pnode = instance.primary_node
6679 all_nodes = instance.all_nodes
6680 else:
6681 pnode = target_node
6682 all_nodes = [pnode]
6683
6684 if instance.disk_template == constants.DT_FILE:
6685 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6686 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6687
6688 result.Raise("Failed to create directory '%s' on"
6689 " node %s" % (file_storage_dir, pnode))
6690
6691
6692
6693 for idx, device in enumerate(instance.disks):
6694 if to_skip and idx in to_skip:
6695 continue
6696 logging.info("Creating volume %s for instance %s",
6697 device.iv_name, instance.name)
6698
6699 for node in all_nodes:
6700 f_create = node == pnode
6701 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6702
6705 """Remove all disks for an instance.
6706
6707 This abstracts away some work from `AddInstance()` and
6708 `RemoveInstance()`. Note that in case some of the devices couldn't
6709 be removed, the removal will continue with the other ones (compare
6710 with `_CreateDisks()`).
6711
6712 @type lu: L{LogicalUnit}
6713 @param lu: the logical unit on whose behalf we execute
6714 @type instance: L{objects.Instance}
6715 @param instance: the instance whose disks we should remove
6716 @type target_node: string
6717 @param target_node: used to override the node on which to remove the disks
6718 @rtype: boolean
6719 @return: the success of the removal
6720
6721 """
6722 logging.info("Removing block devices for instance %s", instance.name)
6723
6724 all_result = True
6725 for device in instance.disks:
6726 if target_node:
6727 edata = [(target_node, device)]
6728 else:
6729 edata = device.ComputeNodeTree(instance.primary_node)
6730 for node, disk in edata:
6731 lu.cfg.SetDiskID(disk, node)
6732 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6733 if msg:
6734 lu.LogWarning("Could not remove block device %s on node %s,"
6735 " continuing anyway: %s", device.iv_name, node, msg)
6736 all_result = False
6737
6738 if instance.disk_template == constants.DT_FILE:
6739 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6740 if target_node:
6741 tgt = target_node
6742 else:
6743 tgt = instance.primary_node
6744 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6745 if result.fail_msg:
6746 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6747 file_storage_dir, instance.primary_node, result.fail_msg)
6748 all_result = False
6749
6750 return all_result
6751
6754 """Compute disk size requirements in the volume group
6755
6756 """
6757
6758 req_size_dict = {
6759 constants.DT_DISKLESS: None,
6760 constants.DT_PLAIN: sum(d["size"] for d in disks),
6761
6762 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6763 constants.DT_FILE: None,
6764 }
6765
6766 if disk_template not in req_size_dict:
6767 raise errors.ProgrammerError("Disk template '%s' size requirement"
6768 " is unknown" % disk_template)
6769
6770 return req_size_dict[disk_template]
6771
6774 """Hypervisor parameter validation.
6775
6776 This function abstract the hypervisor parameter validation to be
6777 used in both instance create and instance modify.
6778
6779 @type lu: L{LogicalUnit}
6780 @param lu: the logical unit for which we check
6781 @type nodenames: list
6782 @param nodenames: the list of nodes on which we should check
6783 @type hvname: string
6784 @param hvname: the name of the hypervisor we should use
6785 @type hvparams: dict
6786 @param hvparams: the parameters which we need to check
6787 @raise errors.OpPrereqError: if the parameters are not valid
6788
6789 """
6790 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6791 hvname,
6792 hvparams)
6793 for node in nodenames:
6794 info = hvinfo[node]
6795 if info.offline:
6796 continue
6797 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6798
6801 """OS parameters validation.
6802
6803 @type lu: L{LogicalUnit}
6804 @param lu: the logical unit for which we check
6805 @type required: boolean
6806 @param required: whether the validation should fail if the OS is not
6807 found
6808 @type nodenames: list
6809 @param nodenames: the list of nodes on which we should check
6810 @type osname: string
6811 @param osname: the name of the hypervisor we should use
6812 @type osparams: dict
6813 @param osparams: the parameters which we need to check
6814 @raise errors.OpPrereqError: if the parameters are not valid
6815
6816 """
6817 result = lu.rpc.call_os_validate(required, nodenames, osname,
6818 [constants.OS_VALIDATE_PARAMETERS],
6819 osparams)
6820 for node, nres in result.items():
6821
6822
6823 nres.Raise("OS Parameters validation failed on node %s" % node)
6824 if not nres.payload:
6825 lu.LogInfo("OS %s not found on node %s, validation skipped",
6826 osname, node)
6827
6830 """Create an instance.
6831
6832 """
6833 HPATH = "instance-add"
6834 HTYPE = constants.HTYPE_INSTANCE
6835 _OP_PARAMS = [
6836 _PInstanceName,
6837 ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6838 ("start", True, ht.TBool),
6839 ("wait_for_sync", True, ht.TBool),
6840 ("ip_check", True, ht.TBool),
6841 ("name_check", True, ht.TBool),
6842 ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6843 ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6844 ("hvparams", ht.EmptyDict, ht.TDict),
6845 ("beparams", ht.EmptyDict, ht.TDict),
6846 ("osparams", ht.EmptyDict, ht.TDict),
6847 ("no_install", None, ht.TMaybeBool),
6848 ("os_type", None, ht.TMaybeString),
6849 ("force_variant", False, ht.TBool),
6850 ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6851 ("source_x509_ca", None, ht.TMaybeString),
6852 ("source_instance_name", None, ht.TMaybeString),
6853 ("src_node", None, ht.TMaybeString),
6854 ("src_path", None, ht.TMaybeString),
6855 ("pnode", None, ht.TMaybeString),
6856 ("snode", None, ht.TMaybeString),
6857 ("iallocator", None, ht.TMaybeString),
6858 ("hypervisor", None, ht.TMaybeString),
6859 ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6860 ("identify_defaults", False, ht.TBool),
6861 ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6862 ("file_storage_dir", None, ht.TMaybeString),
6863 ]
6864 REQ_BGL = False
6865
6867 """Check arguments.
6868
6869 """
6870
6871
6872 if self.op.no_install and self.op.start:
6873 self.LogInfo("No-installation mode selected, disabling startup")
6874 self.op.start = False
6875
6876 self.op.instance_name = \
6877 netutils.Hostname.GetNormalizedName(self.op.instance_name)
6878
6879 if self.op.ip_check and not self.op.name_check:
6880
6881 raise errors.OpPrereqError("Cannot do ip check without a name check",
6882 errors.ECODE_INVAL)
6883
6884
6885 for nic in self.op.nics:
6886 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6887
6888
6889 has_adopt = has_no_adopt = False
6890 for disk in self.op.disks:
6891 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6892 if "adopt" in disk:
6893 has_adopt = True
6894 else:
6895 has_no_adopt = True
6896 if has_adopt and has_no_adopt:
6897 raise errors.OpPrereqError("Either all disks are adopted or none is",
6898 errors.ECODE_INVAL)
6899 if has_adopt:
6900 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6901 raise errors.OpPrereqError("Disk adoption is not supported for the"
6902 " '%s' disk template" %
6903 self.op.disk_template,
6904 errors.ECODE_INVAL)
6905 if self.op.iallocator is not None:
6906 raise errors.OpPrereqError("Disk adoption not allowed with an"
6907 " iallocator script", errors.ECODE_INVAL)
6908 if self.op.mode == constants.INSTANCE_IMPORT:
6909 raise errors.OpPrereqError("Disk adoption not allowed for"
6910 " instance import", errors.ECODE_INVAL)
6911
6912 self.adopt_disks = has_adopt
6913
6914
6915 if self.op.name_check:
6916 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6917 self.op.instance_name = self.hostname1.name
6918
6919 self.check_ip = self.hostname1.ip
6920 else:
6921 self.check_ip = None
6922
6923
6924 if (self.op.file_driver and
6925 not self.op.file_driver in constants.FILE_DRIVER):
6926 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6927 self.op.file_driver, errors.ECODE_INVAL)
6928
6929 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6930 raise errors.OpPrereqError("File storage directory path not absolute",
6931 errors.ECODE_INVAL)
6932
6933
6934 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6935
6936 if self.op.pnode is not None:
6937 if self.op.disk_template in constants.DTS_NET_MIRROR:
6938 if self.op.snode is None:
6939 raise errors.OpPrereqError("The networked disk templates need"
6940 " a mirror node", errors.ECODE_INVAL)
6941 elif self.op.snode:
6942 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6943 " template")
6944 self.op.snode = None
6945
6946 self._cds = _GetClusterDomainSecret()
6947
6948 if self.op.mode == constants.INSTANCE_IMPORT:
6949
6950
6951
6952 self.op.force_variant = True
6953
6954 if self.op.no_install:
6955 self.LogInfo("No-installation mode has no effect during import")
6956
6957 elif self.op.mode == constants.INSTANCE_CREATE:
6958 if self.op.os_type is None:
6959 raise errors.OpPrereqError("No guest OS specified",
6960 errors.ECODE_INVAL)
6961 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6962 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6963 " installation" % self.op.os_type,
6964 errors.ECODE_STATE)
6965 if self.op.disk_template is None:
6966 raise errors.OpPrereqError("No disk template specified",
6967 errors.ECODE_INVAL)
6968
6969 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6970
6971 src_handshake = self.op.source_handshake
6972 if not src_handshake:
6973 raise errors.OpPrereqError("Missing source handshake",
6974 errors.ECODE_INVAL)
6975
6976 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6977 src_handshake)
6978 if errmsg:
6979 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6980 errors.ECODE_INVAL)
6981
6982
6983 self.source_x509_ca_pem = self.op.source_x509_ca
6984 if not self.source_x509_ca_pem:
6985 raise errors.OpPrereqError("Missing source X509 CA",
6986 errors.ECODE_INVAL)
6987
6988 try:
6989 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6990 self._cds)
6991 except OpenSSL.crypto.Error, err:
6992 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6993 (err, ), errors.ECODE_INVAL)
6994
6995 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6996 if errcode is not None:
6997 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6998 errors.ECODE_INVAL)
6999
7000 self.source_x509_ca = cert
7001
7002 src_instance_name = self.op.source_instance_name
7003 if not src_instance_name:
7004 raise errors.OpPrereqError("Missing source instance name",
7005 errors.ECODE_INVAL)
7006
7007 self.source_instance_name = \
7008 netutils.GetHostname(name=src_instance_name).name
7009
7010 else:
7011 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7012 self.op.mode, errors.ECODE_INVAL)
7013
7063
7065 """Run the allocator based on input opcode.
7066
7067 """
7068 nics = [n.ToDict() for n in self.nics]
7069 ial = IAllocator(self.cfg, self.rpc,
7070 mode=constants.IALLOCATOR_MODE_ALLOC,
7071 name=self.op.instance_name,
7072 disk_template=self.op.disk_template,
7073 tags=[],
7074 os=self.op.os_type,
7075 vcpus=self.be_full[constants.BE_VCPUS],
7076 mem_size=self.be_full[constants.BE_MEMORY],
7077 disks=self.disks,
7078 nics=nics,
7079 hypervisor=self.op.hypervisor,
7080 )
7081
7082 ial.Run(self.op.iallocator)
7083
7084 if not ial.success:
7085 raise errors.OpPrereqError("Can't compute nodes using"
7086 " iallocator '%s': %s" %
7087 (self.op.iallocator, ial.info),
7088 errors.ECODE_NORES)
7089 if len(ial.result) != ial.required_nodes:
7090 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7091 " of nodes (%s), required %s" %
7092 (self.op.iallocator, len(ial.result),
7093 ial.required_nodes), errors.ECODE_FAULT)
7094 self.op.pnode = ial.result[0]
7095 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7096 self.op.instance_name, self.op.iallocator,
7097 utils.CommaJoin(ial.result))
7098 if ial.required_nodes == 2:
7099 self.op.snode = ial.result[1]
7100
7102 """Build hooks env.
7103
7104 This runs on master, primary and secondary nodes of the instance.
7105
7106 """
7107 env = {
7108 "ADD_MODE": self.op.mode,
7109 }
7110 if self.op.mode == constants.INSTANCE_IMPORT:
7111 env["SRC_NODE"] = self.op.src_node
7112 env["SRC_PATH"] = self.op.src_path
7113 env["SRC_IMAGES"] = self.src_images
7114
7115 env.update(_BuildInstanceHookEnv(
7116 name=self.op.instance_name,
7117 primary_node=self.op.pnode,
7118 secondary_nodes=self.secondaries,
7119 status=self.op.start,
7120 os_type=self.op.os_type,
7121 memory=self.be_full[constants.BE_MEMORY],
7122 vcpus=self.be_full[constants.BE_VCPUS],
7123 nics=_NICListToTuple(self, self.nics),
7124 disk_template=self.op.disk_template,
7125 disks=[(d["size"], d["mode"]) for d in self.disks],
7126 bep=self.be_full,
7127 hvp=self.hv_full,
7128 hypervisor_name=self.op.hypervisor,
7129 ))
7130
7131 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7132 self.secondaries)
7133 return env, nl, nl
7134
7181
7183 """Use export parameters as defaults.
7184
7185 In case the opcode doesn't specify (as in override) some instance
7186 parameters, then try to use them from the export information, if
7187 that declares them.
7188
7189 """
7190 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7191
7192 if self.op.disk_template is None:
7193 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7194 self.op.disk_template = einfo.get(constants.INISECT_INS,
7195 "disk_template")
7196 else:
7197 raise errors.OpPrereqError("No disk template specified and the export"
7198 " is missing the disk_template information",
7199 errors.ECODE_INVAL)
7200
7201 if not self.op.disks:
7202 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7203 disks = []
7204
7205 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7206 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7207 disks.append({"size": disk_sz})
7208 self.op.disks = disks
7209 else:
7210 raise errors.OpPrereqError("No disk info specified and the export"
7211 " is missing the disk information",
7212 errors.ECODE_INVAL)
7213
7214 if (not self.op.nics and
7215 einfo.has_option(constants.INISECT_INS, "nic_count")):
7216 nics = []
7217 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7218 ndict = {}
7219 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7220 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7221 ndict[name] = v
7222 nics.append(ndict)
7223 self.op.nics = nics
7224
7225 if (self.op.hypervisor is None and
7226 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7227 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7228 if einfo.has_section(constants.INISECT_HYP):
7229
7230
7231 for name, value in einfo.items(constants.INISECT_HYP):
7232 if name not in self.op.hvparams:
7233 self.op.hvparams[name] = value
7234
7235 if einfo.has_section(constants.INISECT_BEP):
7236
7237 for name, value in einfo.items(constants.INISECT_BEP):
7238 if name not in self.op.beparams:
7239 self.op.beparams[name] = value
7240 else:
7241
7242 for name in constants.BES_PARAMETERS:
7243 if (name not in self.op.beparams and
7244 einfo.has_option(constants.INISECT_INS, name)):
7245 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7246
7247 if einfo.has_section(constants.INISECT_OSP):
7248
7249 for name, value in einfo.items(constants.INISECT_OSP):
7250 if name not in self.op.osparams:
7251 self.op.osparams[name] = value
7252
7254 """Revert the instance parameters to the default values.
7255
7256 """
7257
7258 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7259 for name in self.op.hvparams.keys():
7260 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7261 del self.op.hvparams[name]
7262
7263 be_defs = cluster.SimpleFillBE({})
7264 for name in self.op.beparams.keys():
7265 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7266 del self.op.beparams[name]
7267
7268 nic_defs = cluster.SimpleFillNIC({})
7269 for nic in self.op.nics:
7270 for name in constants.NICS_PARAMETERS:
7271 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7272 del nic[name]
7273
7274 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7275 for name in self.op.osparams.keys():
7276 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7277 del self.op.osparams[name]
7278
7280 """Check prerequisites.
7281
7282 """
7283 if self.op.mode == constants.INSTANCE_IMPORT:
7284 export_info = self._ReadExportInfo()
7285 self._ReadExportParams(export_info)
7286
7287 _CheckDiskTemplate(self.op.disk_template)
7288
7289 if (not self.cfg.GetVGName() and
7290 self.op.disk_template not in constants.DTS_NOT_LVM):
7291 raise errors.OpPrereqError("Cluster does not support lvm-based"
7292 " instances", errors.ECODE_STATE)
7293
7294 if self.op.hypervisor is None:
7295 self.op.hypervisor = self.cfg.GetHypervisorType()
7296
7297 cluster = self.cfg.GetClusterInfo()
7298 enabled_hvs = cluster.enabled_hypervisors
7299 if self.op.hypervisor not in enabled_hvs:
7300 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7301 " cluster (%s)" % (self.op.hypervisor,
7302 ",".join(enabled_hvs)),
7303 errors.ECODE_STATE)
7304
7305
7306 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7307 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7308 self.op.hvparams)
7309 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7310 hv_type.CheckParameterSyntax(filled_hvp)
7311 self.hv_full = filled_hvp
7312
7313 _CheckGlobalHvParams(self.op.hvparams)
7314
7315
7316 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7317 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7318
7319
7320 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7321
7322
7323
7324 if self.op.identify_defaults:
7325 self._RevertToDefaults(cluster)
7326
7327
7328 self.nics = []
7329 for idx, nic in enumerate(self.op.nics):
7330 nic_mode_req = nic.get("mode", None)
7331 nic_mode = nic_mode_req
7332 if nic_mode is None:
7333 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7334
7335
7336 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7337 default_ip_mode = constants.VALUE_AUTO
7338 else:
7339 default_ip_mode = constants.VALUE_NONE
7340
7341
7342 ip = nic.get("ip", default_ip_mode)
7343 if ip is None or ip.lower() == constants.VALUE_NONE:
7344 nic_ip = None
7345 elif ip.lower() == constants.VALUE_AUTO:
7346 if not self.op.name_check:
7347 raise errors.OpPrereqError("IP address set to auto but name checks"
7348 " have been skipped",
7349 errors.ECODE_INVAL)
7350 nic_ip = self.hostname1.ip
7351 else:
7352 if not netutils.IPAddress.IsValid(ip):
7353 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7354 errors.ECODE_INVAL)
7355 nic_ip = ip
7356
7357
7358 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7359 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7360 errors.ECODE_INVAL)
7361
7362
7363 mac = nic.get("mac", constants.VALUE_AUTO)
7364 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7365 mac = utils.NormalizeAndValidateMac(mac)
7366
7367 try:
7368 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7369 except errors.ReservationError:
7370 raise errors.OpPrereqError("MAC address %s already in use"
7371 " in cluster" % mac,
7372 errors.ECODE_NOTUNIQUE)
7373
7374
7375 bridge = nic.get("bridge", None)
7376 link = nic.get("link", None)
7377 if bridge and link:
7378 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7379 " at the same time", errors.ECODE_INVAL)
7380 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7381 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7382 errors.ECODE_INVAL)
7383 elif bridge:
7384 link = bridge
7385
7386 nicparams = {}
7387 if nic_mode_req:
7388 nicparams[constants.NIC_MODE] = nic_mode_req
7389 if link:
7390 nicparams[constants.NIC_LINK] = link
7391
7392 check_params = cluster.SimpleFillNIC(nicparams)
7393 objects.NIC.CheckParameterSyntax(check_params)
7394 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7395
7396
7397 self.disks = []
7398 for disk in self.op.disks:
7399 mode = disk.get("mode", constants.DISK_RDWR)
7400 if mode not in constants.DISK_ACCESS_SET:
7401 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7402 mode, errors.ECODE_INVAL)
7403 size = disk.get("size", None)
7404 if size is None:
7405 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7406 try:
7407 size = int(size)
7408 except (TypeError, ValueError):
7409 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7410 errors.ECODE_INVAL)
7411 new_disk = {"size": size, "mode": mode}
7412 if "adopt" in disk:
7413 new_disk["adopt"] = disk["adopt"]
7414 self.disks.append(new_disk)
7415
7416 if self.op.mode == constants.INSTANCE_IMPORT:
7417
7418
7419 instance_disks = len(self.disks)
7420 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7421 if instance_disks < export_disks:
7422 raise errors.OpPrereqError("Not enough disks to import."
7423 " (instance: %d, export: %d)" %
7424 (instance_disks, export_disks),
7425 errors.ECODE_INVAL)
7426
7427 disk_images = []
7428 for idx in range(export_disks):
7429 option = 'disk%d_dump' % idx
7430 if export_info.has_option(constants.INISECT_INS, option):
7431
7432 export_name = export_info.get(constants.INISECT_INS, option)
7433 image = utils.PathJoin(self.op.src_path, export_name)
7434 disk_images.append(image)
7435 else:
7436 disk_images.append(False)
7437
7438 self.src_images = disk_images
7439
7440 old_name = export_info.get(constants.INISECT_INS, 'name')
7441 try:
7442 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7443 except (TypeError, ValueError), err:
7444 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7445 " an integer: %s" % str(err),
7446 errors.ECODE_STATE)
7447 if self.op.instance_name == old_name:
7448 for idx, nic in enumerate(self.nics):
7449 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7450 nic_mac_ini = 'nic%d_mac' % idx
7451 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7452
7453
7454
7455
7456 if self.op.ip_check:
7457 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7458 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7459 (self.check_ip, self.op.instance_name),
7460 errors.ECODE_NOTUNIQUE)
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470 for nic in self.nics:
7471 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7472 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7473
7474
7475
7476 if self.op.iallocator is not None:
7477 self._RunAllocator()
7478
7479
7480
7481
7482 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7483 assert self.pnode is not None, \
7484 "Cannot retrieve locked node %s" % self.op.pnode
7485 if pnode.offline:
7486 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7487 pnode.name, errors.ECODE_STATE)
7488 if pnode.drained:
7489 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7490 pnode.name, errors.ECODE_STATE)
7491 if not pnode.vm_capable:
7492 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7493 " '%s'" % pnode.name, errors.ECODE_STATE)
7494
7495 self.secondaries = []
7496
7497
7498 if self.op.disk_template in constants.DTS_NET_MIRROR:
7499 if self.op.snode == pnode.name:
7500 raise errors.OpPrereqError("The secondary node cannot be the"
7501 " primary node.", errors.ECODE_INVAL)
7502 _CheckNodeOnline(self, self.op.snode)
7503 _CheckNodeNotDrained(self, self.op.snode)
7504 _CheckNodeVmCapable(self, self.op.snode)
7505 self.secondaries.append(self.op.snode)
7506
7507 nodenames = [pnode.name] + self.secondaries
7508
7509 req_size = _ComputeDiskSize(self.op.disk_template,
7510 self.disks)
7511
7512
7513 if req_size is not None and not self.adopt_disks:
7514 _CheckNodesFreeDisk(self, nodenames, req_size)
7515
7516 if self.adopt_disks:
7517 all_lvs = set([i["adopt"] for i in self.disks])
7518 if len(all_lvs) != len(self.disks):
7519 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7520 errors.ECODE_INVAL)
7521 for lv_name in all_lvs:
7522 try:
7523 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7524 except errors.ReservationError:
7525 raise errors.OpPrereqError("LV named %s used by another instance" %
7526 lv_name, errors.ECODE_NOTUNIQUE)
7527
7528 node_lvs = self.rpc.call_lv_list([pnode.name],
7529 self.cfg.GetVGName())[pnode.name]
7530 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7531 node_lvs = node_lvs.payload
7532 delta = all_lvs.difference(node_lvs.keys())
7533 if delta:
7534 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7535 utils.CommaJoin(delta),
7536 errors.ECODE_INVAL)
7537 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7538 if online_lvs:
7539 raise errors.OpPrereqError("Online logical volumes found, cannot"
7540 " adopt: %s" % utils.CommaJoin(online_lvs),
7541 errors.ECODE_STATE)
7542
7543 for dsk in self.disks:
7544 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7545
7546 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7547
7548 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7549
7550 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7551
7552 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7553
7554
7555 if self.op.start:
7556 _CheckNodeFreeMemory(self, self.pnode.name,
7557 "creating instance %s" % self.op.instance_name,
7558 self.be_full[constants.BE_MEMORY],
7559 self.op.hypervisor)
7560
7561 self.dry_run_result = list(nodenames)
7562
7563 - def Exec(self, feedback_fn):
7564 """Create and add the instance to the cluster.
7565
7566 """
7567 instance = self.op.instance_name
7568 pnode_name = self.pnode.name
7569
7570 ht_kind = self.op.hypervisor
7571 if ht_kind in constants.HTS_REQ_PORT:
7572 network_port = self.cfg.AllocatePort()
7573 else:
7574 network_port = None
7575
7576 if constants.ENABLE_FILE_STORAGE:
7577
7578 if self.op.file_storage_dir is None:
7579 string_file_storage_dir = ""
7580 else:
7581 string_file_storage_dir = self.op.file_storage_dir
7582
7583
7584 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7585 string_file_storage_dir, instance)
7586 else:
7587 file_storage_dir = ""
7588
7589 disks = _GenerateDiskTemplate(self,
7590 self.op.disk_template,
7591 instance, pnode_name,
7592 self.secondaries,
7593 self.disks,
7594 file_storage_dir,
7595 self.op.file_driver,
7596 0)
7597
7598 iobj = objects.Instance(name=instance, os=self.op.os_type,
7599 primary_node=pnode_name,
7600 nics=self.nics, disks=disks,
7601 disk_template=self.op.disk_template,
7602 admin_up=False,
7603 network_port=network_port,
7604 beparams=self.op.beparams,
7605 hvparams=self.op.hvparams,
7606 hypervisor=self.op.hypervisor,
7607 osparams=self.op.osparams,
7608 )
7609
7610 if self.adopt_disks:
7611
7612
7613 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7614 rename_to = []
7615 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7616 rename_to.append(t_dsk.logical_id)
7617 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7618 self.cfg.SetDiskID(t_dsk, pnode_name)
7619 result = self.rpc.call_blockdev_rename(pnode_name,
7620 zip(tmp_disks, rename_to))
7621 result.Raise("Failed to rename adoped LVs")
7622 else:
7623 feedback_fn("* creating instance disks...")
7624 try:
7625 _CreateDisks(self, iobj)
7626 except errors.OpExecError:
7627 self.LogWarning("Device creation failed, reverting...")
7628 try:
7629 _RemoveDisks(self, iobj)
7630 finally:
7631 self.cfg.ReleaseDRBDMinors(instance)
7632 raise
7633
7634 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7635 feedback_fn("* wiping instance disks...")
7636 try:
7637 _WipeDisks(self, iobj)
7638 except errors.OpExecError:
7639 self.LogWarning("Device wiping failed, reverting...")
7640 try:
7641 _RemoveDisks(self, iobj)
7642 finally:
7643 self.cfg.ReleaseDRBDMinors(instance)
7644 raise
7645
7646 feedback_fn("adding instance %s to cluster config" % instance)
7647
7648 self.cfg.AddInstance(iobj, self.proc.GetECId())
7649
7650
7651
7652 del self.remove_locks[locking.LEVEL_INSTANCE]
7653
7654 if self.op.mode == constants.INSTANCE_IMPORT:
7655 nodes_keep = [self.op.src_node]
7656 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7657 if node != self.op.src_node]
7658 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7659 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7660 else:
7661 self.context.glm.release(locking.LEVEL_NODE)
7662 del self.acquired_locks[locking.LEVEL_NODE]
7663
7664 if self.op.wait_for_sync:
7665 disk_abort = not _WaitForSync(self, iobj)
7666 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7667
7668 time.sleep(15)
7669 feedback_fn("* checking mirrors status")
7670 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7671 else:
7672 disk_abort = False
7673
7674 if disk_abort:
7675 _RemoveDisks(self, iobj)
7676 self.cfg.RemoveInstance(iobj.name)
7677
7678 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7679 raise errors.OpExecError("There are some degraded disks for"
7680 " this instance")
7681
7682 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7683 if self.op.mode == constants.INSTANCE_CREATE:
7684 if not self.op.no_install:
7685 feedback_fn("* running the instance OS create scripts...")
7686
7687 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7688 self.op.debug_level)
7689 result.Raise("Could not add os for instance %s"
7690 " on node %s" % (instance, pnode_name))
7691
7692 elif self.op.mode == constants.INSTANCE_IMPORT:
7693 feedback_fn("* running the instance OS import scripts...")
7694
7695 transfers = []
7696
7697 for idx, image in enumerate(self.src_images):
7698 if not image:
7699 continue
7700
7701
7702 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7703 constants.IEIO_FILE, (image, ),
7704 constants.IEIO_SCRIPT,
7705 (iobj.disks[idx], idx),
7706 None)
7707 transfers.append(dt)
7708
7709 import_result = \
7710 masterd.instance.TransferInstanceData(self, feedback_fn,
7711 self.op.src_node, pnode_name,
7712 self.pnode.secondary_ip,
7713 iobj, transfers)
7714 if not compat.all(import_result):
7715 self.LogWarning("Some disks for instance %s on node %s were not"
7716 " imported successfully" % (instance, pnode_name))
7717
7718 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7719 feedback_fn("* preparing remote import...")
7720 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7721 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7722
7723 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7724 self.source_x509_ca,
7725 self._cds, timeouts)
7726 if not compat.all(disk_results):
7727
7728
7729 self.LogWarning("Some disks for instance %s on node %s were not"
7730 " imported successfully" % (instance, pnode_name))
7731
7732
7733 assert iobj.name == instance
7734 feedback_fn("Running rename script for %s" % instance)
7735 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7736 self.source_instance_name,
7737 self.op.debug_level)
7738 if result.fail_msg:
7739 self.LogWarning("Failed to run rename script for %s on node"
7740 " %s: %s" % (instance, pnode_name, result.fail_msg))
7741
7742 else:
7743
7744 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7745 % self.op.mode)
7746
7747 if self.op.start:
7748 iobj.admin_up = True
7749 self.cfg.Update(iobj, feedback_fn)
7750 logging.info("Starting instance %s on node %s", instance, pnode_name)
7751 feedback_fn("* starting instance...")
7752 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7753 result.Raise("Could not start instance")
7754
7755 return list(iobj.all_nodes)
7756
7759 """Connect to an instance's console.
7760
7761 This is somewhat special in that it returns the command line that
7762 you need to run on the master node in order to connect to the
7763 console.
7764
7765 """
7766 _OP_PARAMS = [
7767 _PInstanceName
7768 ]
7769 REQ_BGL = False
7770
7773
7775 """Check prerequisites.
7776
7777 This checks that the instance is in the cluster.
7778
7779 """
7780 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7781 assert self.instance is not None, \
7782 "Cannot retrieve locked instance %s" % self.op.instance_name
7783 _CheckNodeOnline(self, self.instance.primary_node)
7784
7785 - def Exec(self, feedback_fn):
7786 """Connect to the console of an instance
7787
7788 """
7789 instance = self.instance
7790 node = instance.primary_node
7791
7792 node_insts = self.rpc.call_instance_list([node],
7793 [instance.hypervisor])[node]
7794 node_insts.Raise("Can't get node information from %s" % node)
7795
7796 if instance.name not in node_insts.payload:
7797 if instance.admin_up:
7798 state = "ERROR_down"
7799 else:
7800 state = "ADMIN_down"
7801 raise errors.OpExecError("Instance %s is not running (state %s)" %
7802 (instance.name, state))
7803
7804 logging.debug("Connecting to console of %s on %s", instance.name, node)
7805
7806 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7807 cluster = self.cfg.GetClusterInfo()
7808
7809
7810 hvparams = cluster.FillHV(instance)
7811 beparams = cluster.FillBE(instance)
7812 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7813
7814
7815 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7816
7819 """Replace the disks of an instance.
7820
7821 """
7822 HPATH = "mirrors-replace"
7823 HTYPE = constants.HTYPE_INSTANCE
7824 _OP_PARAMS = [
7825 _PInstanceName,
7826 ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7827 ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7828 ("remote_node", None, ht.TMaybeString),
7829 ("iallocator", None, ht.TMaybeString),
7830 ("early_release", False, ht.TBool),
7831 ]
7832 REQ_BGL = False
7833
7837
7864
7871
7873 """Build hooks env.
7874
7875 This runs on the master, the primary and all the secondaries.
7876
7877 """
7878 instance = self.replacer.instance
7879 env = {
7880 "MODE": self.op.mode,
7881 "NEW_SECONDARY": self.op.remote_node,
7882 "OLD_SECONDARY": instance.secondary_nodes[0],
7883 }
7884 env.update(_BuildInstanceHookEnvByObject(self, instance))
7885 nl = [
7886 self.cfg.GetMasterNode(),
7887 instance.primary_node,
7888 ]
7889 if self.op.remote_node is not None:
7890 nl.append(self.op.remote_node)
7891 return env, nl, nl
7892
7895 """Replaces disks for an instance.
7896
7897 Note: Locking is not within the scope of this class.
7898
7899 """
7900 - def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7901 disks, delay_iallocator, early_release):
7902 """Initializes this class.
7903
7904 """
7905 Tasklet.__init__(self, lu)
7906
7907
7908 self.instance_name = instance_name
7909 self.mode = mode
7910 self.iallocator_name = iallocator_name
7911 self.remote_node = remote_node
7912 self.disks = disks
7913 self.delay_iallocator = delay_iallocator
7914 self.early_release = early_release
7915
7916
7917 self.instance = None
7918 self.new_node = None
7919 self.target_node = None
7920 self.other_node = None
7921 self.remote_node_info = None
7922 self.node_secondary_ip = None
7923
7924 @staticmethod
7926 """Helper function for users of this class.
7927
7928 """
7929
7930 if mode == constants.REPLACE_DISK_CHG:
7931 if remote_node is None and iallocator is None:
7932 raise errors.OpPrereqError("When changing the secondary either an"
7933 " iallocator script must be used or the"
7934 " new node given", errors.ECODE_INVAL)
7935
7936 if remote_node is not None and iallocator is not None:
7937 raise errors.OpPrereqError("Give either the iallocator or the new"
7938 " secondary, not both", errors.ECODE_INVAL)
7939
7940 elif remote_node is not None or iallocator is not None:
7941
7942 raise errors.OpPrereqError("The iallocator and new node options can"
7943 " only be used when changing the"
7944 " secondary node", errors.ECODE_INVAL)
7945
7946 @staticmethod
7947 - def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7948 """Compute a new secondary node using an IAllocator.
7949
7950 """
7951 ial = IAllocator(lu.cfg, lu.rpc,
7952 mode=constants.IALLOCATOR_MODE_RELOC,
7953 name=instance_name,
7954 relocate_from=relocate_from)
7955
7956 ial.Run(iallocator_name)
7957
7958 if not ial.success:
7959 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7960 " %s" % (iallocator_name, ial.info),
7961 errors.ECODE_NORES)
7962
7963 if len(ial.result) != ial.required_nodes:
7964 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7965 " of nodes (%s), required %s" %
7966 (iallocator_name,
7967 len(ial.result), ial.required_nodes),
7968 errors.ECODE_FAULT)
7969
7970 remote_node_name = ial.result[0]
7971
7972 lu.LogInfo("Selected new secondary for instance '%s': %s",
7973 instance_name, remote_node_name)
7974
7975 return remote_node_name
7976
7980
8003
8005 """Check prerequisites, second part.
8006
8007 This function should always be part of CheckPrereq. It was separated and is
8008 now called from Exec because during node evacuation iallocator was only
8009 called with an unmodified cluster model, not taking planned changes into
8010 account.
8011
8012 """
8013 instance = self.instance
8014 secondary_node = instance.secondary_nodes[0]
8015
8016 if self.iallocator_name is None:
8017 remote_node = self.remote_node
8018 else:
8019 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8020 instance.name, instance.secondary_nodes)
8021
8022 if remote_node is not None:
8023 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8024 assert self.remote_node_info is not None, \
8025 "Cannot retrieve locked node %s" % remote_node
8026 else:
8027 self.remote_node_info = None
8028
8029 if remote_node == self.instance.primary_node:
8030 raise errors.OpPrereqError("The specified node is the primary node of"
8031 " the instance.", errors.ECODE_INVAL)
8032
8033 if remote_node == secondary_node:
8034 raise errors.OpPrereqError("The specified node is already the"
8035 " secondary node of the instance.",
8036 errors.ECODE_INVAL)
8037
8038 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8039 constants.REPLACE_DISK_CHG):
8040 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8041 errors.ECODE_INVAL)
8042
8043 if self.mode == constants.REPLACE_DISK_AUTO:
8044 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8045 faulty_secondary = self._FindFaultyDisks(secondary_node)
8046
8047 if faulty_primary and faulty_secondary:
8048 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8049 " one node and can not be repaired"
8050 " automatically" % self.instance_name,
8051 errors.ECODE_STATE)
8052
8053 if faulty_primary:
8054 self.disks = faulty_primary
8055 self.target_node = instance.primary_node
8056 self.other_node = secondary_node
8057 check_nodes = [self.target_node, self.other_node]
8058 elif faulty_secondary:
8059 self.disks = faulty_secondary
8060 self.target_node = secondary_node
8061 self.other_node = instance.primary_node
8062 check_nodes = [self.target_node, self.other_node]
8063 else:
8064 self.disks = []
8065 check_nodes = []
8066
8067 else:
8068
8069 if self.mode == constants.REPLACE_DISK_PRI:
8070 self.target_node = instance.primary_node
8071 self.other_node = secondary_node
8072 check_nodes = [self.target_node, self.other_node]
8073
8074 elif self.mode == constants.REPLACE_DISK_SEC:
8075 self.target_node = secondary_node
8076 self.other_node = instance.primary_node
8077 check_nodes = [self.target_node, self.other_node]
8078
8079 elif self.mode == constants.REPLACE_DISK_CHG:
8080 self.new_node = remote_node
8081 self.other_node = instance.primary_node
8082 self.target_node = secondary_node
8083 check_nodes = [self.new_node, self.other_node]
8084
8085 _CheckNodeNotDrained(self.lu, remote_node)
8086 _CheckNodeVmCapable(self.lu, remote_node)
8087
8088 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8089 assert old_node_info is not None
8090 if old_node_info.offline and not self.early_release:
8091
8092 self.early_release = True
8093 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8094 " early-release mode", secondary_node)
8095
8096 else:
8097 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8098 self.mode)
8099
8100
8101 if not self.disks:
8102 self.disks = range(len(self.instance.disks))
8103
8104 for node in check_nodes:
8105 _CheckNodeOnline(self.lu, node)
8106
8107
8108 for disk_idx in self.disks:
8109 instance.FindDisk(disk_idx)
8110
8111
8112 node_2nd_ip = {}
8113
8114 for node_name in [self.target_node, self.other_node, self.new_node]:
8115 if node_name is not None:
8116 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8117
8118 self.node_secondary_ip = node_2nd_ip
8119
8120 - def Exec(self, feedback_fn):
8121 """Execute disk replacement.
8122
8123 This dispatches the disk replacement to the appropriate handler.
8124
8125 """
8126 if self.delay_iallocator:
8127 self._CheckPrereq2()
8128
8129 if not self.disks:
8130 feedback_fn("No disks need replacement")
8131 return
8132
8133 feedback_fn("Replacing disk(s) %s for %s" %
8134 (utils.CommaJoin(self.disks), self.instance.name))
8135
8136 activate_disks = (not self.instance.admin_up)
8137
8138
8139 if activate_disks:
8140 _StartInstanceDisks(self.lu, self.instance, True)
8141
8142 try:
8143
8144 if self.new_node is not None:
8145 fn = self._ExecDrbd8Secondary
8146 else:
8147 fn = self._ExecDrbd8DiskOnly
8148
8149 return fn(feedback_fn)
8150
8151 finally:
8152
8153
8154 if activate_disks:
8155 _SafeShutdownInstanceDisks(self.lu, self.instance)
8156
8158 self.lu.LogInfo("Checking volume groups")
8159
8160 vgname = self.cfg.GetVGName()
8161
8162
8163 results = self.rpc.call_vg_list(nodes)
8164 if not results:
8165 raise errors.OpExecError("Can't list volume groups on the nodes")
8166
8167 for node in nodes:
8168 res = results[node]
8169 res.Raise("Error checking node %s" % node)
8170 if vgname not in res.payload:
8171 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8172 (vgname, node))
8173
8175
8176 for idx, dev in enumerate(self.instance.disks):
8177 if idx not in self.disks:
8178 continue
8179
8180 for node in nodes:
8181 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8182 self.cfg.SetDiskID(dev, node)
8183
8184 result = self.rpc.call_blockdev_find(node, dev)
8185
8186 msg = result.fail_msg
8187 if msg or not result.payload:
8188 if not msg:
8189 msg = "disk not found"
8190 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8191 (idx, node, msg))
8192
8194 for idx, dev in enumerate(self.instance.disks):
8195 if idx not in self.disks:
8196 continue
8197
8198 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8199 (idx, node_name))
8200
8201 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8202 ldisk=ldisk):
8203 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8204 " replace disks for instance %s" %
8205 (node_name, self.instance.name))
8206
8208 vgname = self.cfg.GetVGName()
8209 iv_names = {}
8210
8211 for idx, dev in enumerate(self.instance.disks):
8212 if idx not in self.disks:
8213 continue
8214
8215 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8216
8217 self.cfg.SetDiskID(dev, node_name)
8218
8219 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8220 names = _GenerateUniqueNames(self.lu, lv_names)
8221
8222 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8223 logical_id=(vgname, names[0]))
8224 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8225 logical_id=(vgname, names[1]))
8226
8227 new_lvs = [lv_data, lv_meta]
8228 old_lvs = dev.children
8229 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8230
8231
8232 for new_lv in new_lvs:
8233 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8234 _GetInstanceInfoText(self.instance), False)
8235
8236 return iv_names
8237
8239 for name, (dev, _, _) in iv_names.iteritems():
8240 self.cfg.SetDiskID(dev, node_name)
8241
8242 result = self.rpc.call_blockdev_find(node_name, dev)
8243
8244 msg = result.fail_msg
8245 if msg or not result.payload:
8246 if not msg:
8247 msg = "disk not found"
8248 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8249 (name, msg))
8250
8251 if result.payload.is_degraded:
8252 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8253
8255 for name, (_, old_lvs, _) in iv_names.iteritems():
8256 self.lu.LogInfo("Remove logical volumes for %s" % name)
8257
8258 for lv in old_lvs:
8259 self.cfg.SetDiskID(lv, node_name)
8260
8261 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8262 if msg:
8263 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8264 hint="remove unused LVs manually")
8265
8269
8271 """Replace a disk on the primary or secondary for DRBD 8.
8272
8273 The algorithm for replace is quite complicated:
8274
8275 1. for each disk to be replaced:
8276
8277 1. create new LVs on the target node with unique names
8278 1. detach old LVs from the drbd device
8279 1. rename old LVs to name_replaced.<time_t>
8280 1. rename new LVs to old LVs
8281 1. attach the new LVs (with the old names now) to the drbd device
8282
8283 1. wait for sync across all devices
8284
8285 1. for each modified disk:
8286
8287 1. remove old LVs (which have the name name_replaces.<time_t>)
8288
8289 Failures are not very well handled.
8290
8291 """
8292 steps_total = 6
8293
8294
8295 self.lu.LogStep(1, steps_total, "Check device existence")
8296 self._CheckDisksExistence([self.other_node, self.target_node])
8297 self._CheckVolumeGroup([self.target_node, self.other_node])
8298
8299
8300 self.lu.LogStep(2, steps_total, "Check peer consistency")
8301 self._CheckDisksConsistency(self.other_node,
8302 self.other_node == self.instance.primary_node,
8303 False)
8304
8305
8306 self.lu.LogStep(3, steps_total, "Allocate new storage")
8307 iv_names = self._CreateNewStorage(self.target_node)
8308
8309
8310 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8311 for dev, old_lvs, new_lvs in iv_names.itervalues():
8312 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8313
8314 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8315 old_lvs)
8316 result.Raise("Can't detach drbd from local storage on node"
8317 " %s for device %s" % (self.target_node, dev.iv_name))
8318
8319
8320
8321
8322
8323
8324
8325
8326
8327
8328 temp_suffix = int(time.time())
8329 ren_fn = lambda d, suff: (d.physical_id[0],
8330 d.physical_id[1] + "_replaced-%s" % suff)
8331
8332
8333 rename_old_to_new = []
8334 for to_ren in old_lvs:
8335 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8336 if not result.fail_msg and result.payload:
8337
8338 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8339
8340 self.lu.LogInfo("Renaming the old LVs on the target node")
8341 result = self.rpc.call_blockdev_rename(self.target_node,
8342 rename_old_to_new)
8343 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8344
8345
8346 self.lu.LogInfo("Renaming the new LVs on the target node")
8347 rename_new_to_old = [(new, old.physical_id)
8348 for old, new in zip(old_lvs, new_lvs)]
8349 result = self.rpc.call_blockdev_rename(self.target_node,
8350 rename_new_to_old)
8351 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8352
8353 for old, new in zip(old_lvs, new_lvs):
8354 new.logical_id = old.logical_id
8355 self.cfg.SetDiskID(new, self.target_node)
8356
8357 for disk in old_lvs:
8358 disk.logical_id = ren_fn(disk, temp_suffix)
8359 self.cfg.SetDiskID(disk, self.target_node)
8360
8361
8362 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8363 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8364 new_lvs)
8365 msg = result.fail_msg
8366 if msg:
8367 for new_lv in new_lvs:
8368 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8369 new_lv).fail_msg
8370 if msg2:
8371 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8372 hint=("cleanup manually the unused logical"
8373 "volumes"))
8374 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8375
8376 dev.children = new_lvs
8377
8378 self.cfg.Update(self.instance, feedback_fn)
8379
8380 cstep = 5
8381 if self.early_release:
8382 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8383 cstep += 1
8384 self._RemoveOldStorage(self.target_node, iv_names)
8385
8386
8387 self._ReleaseNodeLock([self.target_node, self.other_node])
8388
8389
8390
8391
8392 self.lu.LogStep(cstep, steps_total, "Sync devices")
8393 cstep += 1
8394 _WaitForSync(self.lu, self.instance)
8395
8396
8397 self._CheckDevices(self.instance.primary_node, iv_names)
8398
8399
8400 if not self.early_release:
8401 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8402 cstep += 1
8403 self._RemoveOldStorage(self.target_node, iv_names)
8404
8406 """Replace the secondary node for DRBD 8.
8407
8408 The algorithm for replace is quite complicated:
8409 - for all disks of the instance:
8410 - create new LVs on the new node with same names
8411 - shutdown the drbd device on the old secondary
8412 - disconnect the drbd network on the primary
8413 - create the drbd device on the new secondary
8414 - network attach the drbd on the primary, using an artifice:
8415 the drbd code for Attach() will connect to the network if it
8416 finds a device which is connected to the good local disks but
8417 not network enabled
8418 - wait for sync across all devices
8419 - remove all disks from the old secondary
8420
8421 Failures are not very well handled.
8422
8423 """
8424 steps_total = 6
8425
8426
8427 self.lu.LogStep(1, steps_total, "Check device existence")
8428 self._CheckDisksExistence([self.instance.primary_node])
8429 self._CheckVolumeGroup([self.instance.primary_node])
8430
8431
8432 self.lu.LogStep(2, steps_total, "Check peer consistency")
8433 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8434
8435
8436 self.lu.LogStep(3, steps_total, "Allocate new storage")
8437 for idx, dev in enumerate(self.instance.disks):
8438 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8439 (self.new_node, idx))
8440
8441 for new_lv in dev.children:
8442 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8443 _GetInstanceInfoText(self.instance), False)
8444
8445
8446
8447
8448 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8449 minors = self.cfg.AllocateDRBDMinor([self.new_node
8450 for dev in self.instance.disks],
8451 self.instance.name)
8452 logging.debug("Allocated minors %r", minors)
8453
8454 iv_names = {}
8455 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8456 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8457 (self.new_node, idx))
8458
8459
8460
8461
8462 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8463 if self.instance.primary_node == o_node1:
8464 p_minor = o_minor1
8465 else:
8466 assert self.instance.primary_node == o_node2, "Three-node instance?"
8467 p_minor = o_minor2
8468
8469 new_alone_id = (self.instance.primary_node, self.new_node, None,
8470 p_minor, new_minor, o_secret)
8471 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8472 p_minor, new_minor, o_secret)
8473
8474 iv_names[idx] = (dev, dev.children, new_net_id)
8475 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8476 new_net_id)
8477 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8478 logical_id=new_alone_id,
8479 children=dev.children,
8480 size=dev.size)
8481 try:
8482 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8483 _GetInstanceInfoText(self.instance), False)
8484 except errors.GenericError:
8485 self.cfg.ReleaseDRBDMinors(self.instance.name)
8486 raise
8487
8488
8489 for idx, dev in enumerate(self.instance.disks):
8490 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8491 self.cfg.SetDiskID(dev, self.target_node)
8492 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8493 if msg:
8494 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8495 "node: %s" % (idx, msg),
8496 hint=("Please cleanup this device manually as"
8497 " soon as possible"))
8498
8499 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8500 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8501 self.node_secondary_ip,
8502 self.instance.disks)\
8503 [self.instance.primary_node]
8504
8505 msg = result.fail_msg
8506 if msg:
8507
8508 self.cfg.ReleaseDRBDMinors(self.instance.name)
8509 raise errors.OpExecError("Can't detach the disks from the network on"
8510 " old node: %s" % (msg,))
8511
8512
8513
8514 self.lu.LogInfo("Updating instance configuration")
8515 for dev, _, new_logical_id in iv_names.itervalues():
8516 dev.logical_id = new_logical_id
8517 self.cfg.SetDiskID(dev, self.instance.primary_node)
8518
8519 self.cfg.Update(self.instance, feedback_fn)
8520
8521
8522 self.lu.LogInfo("Attaching primary drbds to new secondary"
8523 " (standalone => connected)")
8524 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8525 self.new_node],
8526 self.node_secondary_ip,
8527 self.instance.disks,
8528 self.instance.name,
8529 False)
8530 for to_node, to_result in result.items():
8531 msg = to_result.fail_msg
8532 if msg:
8533 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8534 to_node, msg,
8535 hint=("please do a gnt-instance info to see the"
8536 " status of disks"))
8537 cstep = 5
8538 if self.early_release:
8539 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8540 cstep += 1
8541 self._RemoveOldStorage(self.target_node, iv_names)
8542
8543
8544 self._ReleaseNodeLock([self.instance.primary_node,
8545 self.target_node,
8546 self.new_node])
8547
8548
8549
8550
8551 self.lu.LogStep(cstep, steps_total, "Sync devices")
8552 cstep += 1
8553 _WaitForSync(self.lu, self.instance)
8554
8555
8556 self._CheckDevices(self.instance.primary_node, iv_names)
8557
8558
8559 if not self.early_release:
8560 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8561 self._RemoveOldStorage(self.target_node, iv_names)
8562
8565 """Repairs the volume group on a node.
8566
8567 """
8568 _OP_PARAMS = [
8569 _PNodeName,
8570 ("storage_type", ht.NoDefault, _CheckStorageType),
8571 ("name", ht.NoDefault, ht.TNonEmptyString),
8572 ("ignore_consistency", False, ht.TBool),
8573 ]
8574 REQ_BGL = False
8575
8586
8591
8605
8607 """Check prerequisites.
8608
8609 """
8610
8611 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8612 if not inst.admin_up:
8613 continue
8614 check_nodes = set(inst.all_nodes)
8615 check_nodes.discard(self.op.node_name)
8616 for inst_node_name in check_nodes:
8617 self._CheckFaultyDisks(inst, inst_node_name)
8618
8619 - def Exec(self, feedback_fn):
8620 feedback_fn("Repairing storage unit '%s' on %s ..." %
8621 (self.op.name, self.op.node_name))
8622
8623 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8624 result = self.rpc.call_storage_execute(self.op.node_name,
8625 self.op.storage_type, st_args,
8626 self.op.name,
8627 constants.SO_FIX_CONSISTENCY)
8628 result.Raise("Failed to repair storage unit '%s' on %s" %
8629 (self.op.name, self.op.node_name))
8630
8633 """Computes the node evacuation strategy.
8634
8635 """
8636 _OP_PARAMS = [
8637 ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8638 ("remote_node", None, ht.TMaybeString),
8639 ("iallocator", None, ht.TMaybeString),
8640 ]
8641 REQ_BGL = False
8642
8645
8654
8655 - def Exec(self, feedback_fn):
8656 if self.op.remote_node is not None:
8657 instances = []
8658 for node in self.op.nodes:
8659 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8660 result = []
8661 for i in instances:
8662 if i.primary_node == self.op.remote_node:
8663 raise errors.OpPrereqError("Node %s is the primary node of"
8664 " instance %s, cannot use it as"
8665 " secondary" %
8666 (self.op.remote_node, i.name),
8667 errors.ECODE_INVAL)
8668 result.append([i.name, self.op.remote_node])
8669 else:
8670 ial = IAllocator(self.cfg, self.rpc,
8671 mode=constants.IALLOCATOR_MODE_MEVAC,
8672 evac_nodes=self.op.nodes)
8673 ial.Run(self.op.iallocator, validate=True)
8674 if not ial.success:
8675 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8676 errors.ECODE_NORES)
8677 result = ial.result
8678 return result
8679
8682 """Grow a disk of an instance.
8683
8684 """
8685 HPATH = "disk-grow"
8686 HTYPE = constants.HTYPE_INSTANCE
8687 _OP_PARAMS = [
8688 _PInstanceName,
8689 ("disk", ht.NoDefault, ht.TInt),
8690 ("amount", ht.NoDefault, ht.TInt),
8691 ("wait_for_sync", True, ht.TBool),
8692 ]
8693 REQ_BGL = False
8694
8699
8703
8705 """Build hooks env.
8706
8707 This runs on the master, the primary and all the secondaries.
8708
8709 """
8710 env = {
8711 "DISK": self.op.disk,
8712 "AMOUNT": self.op.amount,
8713 }
8714 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8715 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8716 return env, nl, nl
8717
8743
8744 - def Exec(self, feedback_fn):
8745 """Execute disk grow.
8746
8747 """
8748 instance = self.instance
8749 disk = self.disk
8750
8751 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8752 if not disks_ok:
8753 raise errors.OpExecError("Cannot activate block device to grow")
8754
8755 for node in instance.all_nodes:
8756 self.cfg.SetDiskID(disk, node)
8757 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8758 result.Raise("Grow request failed to node %s" % node)
8759
8760
8761
8762
8763
8764
8765 time.sleep(5)
8766
8767 disk.RecordGrow(self.op.amount)
8768 self.cfg.Update(instance, feedback_fn)
8769 if self.op.wait_for_sync:
8770 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8771 if disk_abort:
8772 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8773 " status.\nPlease check the instance.")
8774 if not instance.admin_up:
8775 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8776 elif not instance.admin_up:
8777 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8778 " not supposed to be running because no wait for"
8779 " sync mode was requested.")
8780
8783 """Query runtime instance data.
8784
8785 """
8786 _OP_PARAMS = [
8787 ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8788 ("static", False, ht.TBool),
8789 ]
8790 REQ_BGL = False
8791
8808
8812
8814 """Check prerequisites.
8815
8816 This only checks the optional instance list against the existing names.
8817
8818 """
8819 if self.wanted_names is None:
8820 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8821
8822 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8823 in self.wanted_names]
8824
8826 """Returns the status of a block device
8827
8828 """
8829 if self.op.static or not node:
8830 return None
8831
8832 self.cfg.SetDiskID(dev, node)
8833
8834 result = self.rpc.call_blockdev_find(node, dev)
8835 if result.offline:
8836 return None
8837
8838 result.Raise("Can't compute disk status for %s" % instance_name)
8839
8840 status = result.payload
8841 if status is None:
8842 return None
8843
8844 return (status.dev_path, status.major, status.minor,
8845 status.sync_percent, status.estimated_time,
8846 status.is_degraded, status.ldisk_status)
8847
8849 """Compute block device status.
8850
8851 """
8852 if dev.dev_type in constants.LDS_DRBD:
8853
8854 if dev.logical_id[0] == instance.primary_node:
8855 snode = dev.logical_id[1]
8856 else:
8857 snode = dev.logical_id[0]
8858
8859 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8860 instance.name, dev)
8861 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8862
8863 if dev.children:
8864 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8865 for child in dev.children]
8866 else:
8867 dev_children = []
8868
8869 data = {
8870 "iv_name": dev.iv_name,
8871 "dev_type": dev.dev_type,
8872 "logical_id": dev.logical_id,
8873 "physical_id": dev.physical_id,
8874 "pstatus": dev_pstatus,
8875 "sstatus": dev_sstatus,
8876 "children": dev_children,
8877 "mode": dev.mode,
8878 "size": dev.size,
8879 }
8880
8881 return data
8882
8883 - def Exec(self, feedback_fn):
8884 """Gather and return data"""
8885 result = {}
8886
8887 cluster = self.cfg.GetClusterInfo()
8888
8889 for instance in self.wanted_instances:
8890 if not self.op.static:
8891 remote_info = self.rpc.call_instance_info(instance.primary_node,
8892 instance.name,
8893 instance.hypervisor)
8894 remote_info.Raise("Error checking node %s" % instance.primary_node)
8895 remote_info = remote_info.payload
8896 if remote_info and "state" in remote_info:
8897 remote_state = "up"
8898 else:
8899 remote_state = "down"
8900 else:
8901 remote_state = None
8902 if instance.admin_up:
8903 config_state = "up"
8904 else:
8905 config_state = "down"
8906
8907 disks = [self._ComputeDiskStatus(instance, None, device)
8908 for device in instance.disks]
8909
8910 idict = {
8911 "name": instance.name,
8912 "config_state": config_state,
8913 "run_state": remote_state,
8914 "pnode": instance.primary_node,
8915 "snodes": instance.secondary_nodes,
8916 "os": instance.os,
8917
8918 "nics": _NICListToTuple(self, instance.nics),
8919 "disk_template": instance.disk_template,
8920 "disks": disks,
8921 "hypervisor": instance.hypervisor,
8922 "network_port": instance.network_port,
8923 "hv_instance": instance.hvparams,
8924 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8925 "be_instance": instance.beparams,
8926 "be_actual": cluster.FillBE(instance),
8927 "os_instance": instance.osparams,
8928 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8929 "serial_no": instance.serial_no,
8930 "mtime": instance.mtime,
8931 "ctime": instance.ctime,
8932 "uuid": instance.uuid,
8933 }
8934
8935 result[instance.name] = idict
8936
8937 return result
8938
8941 """Modifies an instances's parameters.
8942
8943 """
8944 HPATH = "instance-modify"
8945 HTYPE = constants.HTYPE_INSTANCE
8946 _OP_PARAMS = [
8947 _PInstanceName,
8948 ("nics", ht.EmptyList, ht.TList),
8949 ("disks", ht.EmptyList, ht.TList),
8950 ("beparams", ht.EmptyDict, ht.TDict),
8951 ("hvparams", ht.EmptyDict, ht.TDict),
8952 ("disk_template", None, ht.TMaybeString),
8953 ("remote_node", None, ht.TMaybeString),
8954 ("os_name", None, ht.TMaybeString),
8955 ("force_variant", False, ht.TBool),
8956 ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
8957 _PForce,
8958 ]
8959 REQ_BGL = False
8960
8962 if not (self.op.nics or self.op.disks or self.op.disk_template or
8963 self.op.hvparams or self.op.beparams or self.op.os_name):
8964 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8965
8966 if self.op.hvparams:
8967 _CheckGlobalHvParams(self.op.hvparams)
8968
8969
8970 disk_addremove = 0
8971 for disk_op, disk_dict in self.op.disks:
8972 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8973 if disk_op == constants.DDM_REMOVE:
8974 disk_addremove += 1
8975 continue
8976 elif disk_op == constants.DDM_ADD:
8977 disk_addremove += 1
8978 else:
8979 if not isinstance(disk_op, int):
8980 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8981 if not isinstance(disk_dict, dict):
8982 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8983 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8984
8985 if disk_op == constants.DDM_ADD:
8986 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8987 if mode not in constants.DISK_ACCESS_SET:
8988 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8989 errors.ECODE_INVAL)
8990 size = disk_dict.get('size', None)
8991 if size is None:
8992 raise errors.OpPrereqError("Required disk parameter size missing",
8993 errors.ECODE_INVAL)
8994 try:
8995 size = int(size)
8996 except (TypeError, ValueError), err:
8997 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8998 str(err), errors.ECODE_INVAL)
8999 disk_dict['size'] = size
9000 else:
9001
9002 if 'size' in disk_dict:
9003 raise errors.OpPrereqError("Disk size change not possible, use"
9004 " grow-disk", errors.ECODE_INVAL)
9005
9006 if disk_addremove > 1:
9007 raise errors.OpPrereqError("Only one disk add or remove operation"
9008 " supported at a time", errors.ECODE_INVAL)
9009
9010 if self.op.disks and self.op.disk_template is not None:
9011 raise errors.OpPrereqError("Disk template conversion and other disk"
9012 " changes not supported at the same time",
9013 errors.ECODE_INVAL)
9014
9015 if self.op.disk_template:
9016 _CheckDiskTemplate(self.op.disk_template)
9017 if (self.op.disk_template in constants.DTS_NET_MIRROR and
9018 self.op.remote_node is None):
9019 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9020 " one requires specifying a secondary node",
9021 errors.ECODE_INVAL)
9022
9023
9024 nic_addremove = 0
9025 for nic_op, nic_dict in self.op.nics:
9026 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9027 if nic_op == constants.DDM_REMOVE:
9028 nic_addremove += 1
9029 continue
9030 elif nic_op == constants.DDM_ADD:
9031 nic_addremove += 1
9032 else:
9033 if not isinstance(nic_op, int):
9034 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9035 if not isinstance(nic_dict, dict):
9036 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9037 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9038
9039
9040 nic_ip = nic_dict.get('ip', None)
9041 if nic_ip is not None:
9042 if nic_ip.lower() == constants.VALUE_NONE:
9043 nic_dict['ip'] = None
9044 else:
9045 if not netutils.IPAddress.IsValid(nic_ip):
9046 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9047 errors.ECODE_INVAL)
9048
9049 nic_bridge = nic_dict.get('bridge', None)
9050 nic_link = nic_dict.get('link', None)
9051 if nic_bridge and nic_link:
9052 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9053 " at the same time", errors.ECODE_INVAL)
9054 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9055 nic_dict['bridge'] = None
9056 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9057 nic_dict['link'] = None
9058
9059 if nic_op == constants.DDM_ADD:
9060 nic_mac = nic_dict.get('mac', None)
9061 if nic_mac is None:
9062 nic_dict['mac'] = constants.VALUE_AUTO
9063
9064 if 'mac' in nic_dict:
9065 nic_mac = nic_dict['mac']
9066 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9067 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9068
9069 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9070 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9071 " modifying an existing nic",
9072 errors.ECODE_INVAL)
9073
9074 if nic_addremove > 1:
9075 raise errors.OpPrereqError("Only one NIC add or remove operation"
9076 " supported at a time", errors.ECODE_INVAL)
9077
9082
9089
9091 """Build hooks env.
9092
9093 This runs on the master, primary and secondaries.
9094
9095 """
9096 args = dict()
9097 if constants.BE_MEMORY in self.be_new:
9098 args['memory'] = self.be_new[constants.BE_MEMORY]
9099 if constants.BE_VCPUS in self.be_new:
9100 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9101
9102
9103 if self.op.nics:
9104 args['nics'] = []
9105 nic_override = dict(self.op.nics)
9106 for idx, nic in enumerate(self.instance.nics):
9107 if idx in nic_override:
9108 this_nic_override = nic_override[idx]
9109 else:
9110 this_nic_override = {}
9111 if 'ip' in this_nic_override:
9112 ip = this_nic_override['ip']
9113 else:
9114 ip = nic.ip
9115 if 'mac' in this_nic_override:
9116 mac = this_nic_override['mac']
9117 else:
9118 mac = nic.mac
9119 if idx in self.nic_pnew:
9120 nicparams = self.nic_pnew[idx]
9121 else:
9122 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9123 mode = nicparams[constants.NIC_MODE]
9124 link = nicparams[constants.NIC_LINK]
9125 args['nics'].append((ip, mac, mode, link))
9126 if constants.DDM_ADD in nic_override:
9127 ip = nic_override[constants.DDM_ADD].get('ip', None)
9128 mac = nic_override[constants.DDM_ADD]['mac']
9129 nicparams = self.nic_pnew[constants.DDM_ADD]
9130 mode = nicparams[constants.NIC_MODE]
9131 link = nicparams[constants.NIC_LINK]
9132 args['nics'].append((ip, mac, mode, link))
9133 elif constants.DDM_REMOVE in nic_override:
9134 del args['nics'][-1]
9135
9136 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9137 if self.op.disk_template:
9138 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9139 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9140 return env, nl, nl
9141
9143 """Check prerequisites.
9144
9145 This only checks the instance list against the existing names.
9146
9147 """
9148
9149
9150 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9151 cluster = self.cluster = self.cfg.GetClusterInfo()
9152 assert self.instance is not None, \
9153 "Cannot retrieve locked instance %s" % self.op.instance_name
9154 pnode = instance.primary_node
9155 nodelist = list(instance.all_nodes)
9156
9157
9158 if self.op.os_name and not self.op.force:
9159 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9160 self.op.force_variant)
9161 instance_os = self.op.os_name
9162 else:
9163 instance_os = instance.os
9164
9165 if self.op.disk_template:
9166 if instance.disk_template == self.op.disk_template:
9167 raise errors.OpPrereqError("Instance already has disk template %s" %
9168 instance.disk_template, errors.ECODE_INVAL)
9169
9170 if (instance.disk_template,
9171 self.op.disk_template) not in self._DISK_CONVERSIONS:
9172 raise errors.OpPrereqError("Unsupported disk template conversion from"
9173 " %s to %s" % (instance.disk_template,
9174 self.op.disk_template),
9175 errors.ECODE_INVAL)
9176 _CheckInstanceDown(self, instance, "cannot change disk template")
9177 if self.op.disk_template in constants.DTS_NET_MIRROR:
9178 if self.op.remote_node == pnode:
9179 raise errors.OpPrereqError("Given new secondary node %s is the same"
9180 " as the primary node of the instance" %
9181 self.op.remote_node, errors.ECODE_STATE)
9182 _CheckNodeOnline(self, self.op.remote_node)
9183 _CheckNodeNotDrained(self, self.op.remote_node)
9184 disks = [{"size": d.size} for d in instance.disks]
9185 required = _ComputeDiskSize(self.op.disk_template, disks)
9186 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
9187
9188
9189 if self.op.hvparams:
9190 hv_type = instance.hypervisor
9191 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9192 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9193 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9194
9195
9196 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9197 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9198 self.hv_new = hv_new
9199 self.hv_inst = i_hvdict
9200 else:
9201 self.hv_new = self.hv_inst = {}
9202
9203
9204 if self.op.beparams:
9205 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9206 use_none=True)
9207 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9208 be_new = cluster.SimpleFillBE(i_bedict)
9209 self.be_new = be_new
9210 self.be_inst = i_bedict
9211 else:
9212 self.be_new = self.be_inst = {}
9213
9214
9215 if self.op.osparams:
9216 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9217 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9218 self.os_inst = i_osdict
9219 else:
9220 self.os_inst = {}
9221
9222 self.warn = []
9223
9224 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9225 mem_check_list = [pnode]
9226 if be_new[constants.BE_AUTO_BALANCE]:
9227
9228 mem_check_list.extend(instance.secondary_nodes)
9229 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9230 instance.hypervisor)
9231 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
9232 instance.hypervisor)
9233 pninfo = nodeinfo[pnode]
9234 msg = pninfo.fail_msg
9235 if msg:
9236
9237 self.warn.append("Can't get info from primary node %s: %s" %
9238 (pnode, msg))
9239 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9240 self.warn.append("Node data from primary node %s doesn't contain"
9241 " free memory information" % pnode)
9242 elif instance_info.fail_msg:
9243 self.warn.append("Can't get instance runtime information: %s" %
9244 instance_info.fail_msg)
9245 else:
9246 if instance_info.payload:
9247 current_mem = int(instance_info.payload['memory'])
9248 else:
9249
9250
9251
9252 current_mem = 0
9253 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9254 pninfo.payload['memory_free'])
9255 if miss_mem > 0:
9256 raise errors.OpPrereqError("This change will prevent the instance"
9257 " from starting, due to %d MB of memory"
9258 " missing on its primary node" % miss_mem,
9259 errors.ECODE_NORES)
9260
9261 if be_new[constants.BE_AUTO_BALANCE]:
9262 for node, nres in nodeinfo.items():
9263 if node not in instance.secondary_nodes:
9264 continue
9265 msg = nres.fail_msg
9266 if msg:
9267 self.warn.append("Can't get info from secondary node %s: %s" %
9268 (node, msg))
9269 elif not isinstance(nres.payload.get('memory_free', None), int):
9270 self.warn.append("Secondary node %s didn't return free"
9271 " memory information" % node)
9272 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9273 self.warn.append("Not enough memory to failover instance to"
9274 " secondary node %s" % node)
9275
9276
9277 self.nic_pnew = {}
9278 self.nic_pinst = {}
9279 for nic_op, nic_dict in self.op.nics:
9280 if nic_op == constants.DDM_REMOVE:
9281 if not instance.nics:
9282 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9283 errors.ECODE_INVAL)
9284 continue
9285 if nic_op != constants.DDM_ADD:
9286
9287 if not instance.nics:
9288 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9289 " no NICs" % nic_op,
9290 errors.ECODE_INVAL)
9291 if nic_op < 0 or nic_op >= len(instance.nics):
9292 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9293 " are 0 to %d" %
9294 (nic_op, len(instance.nics) - 1),
9295 errors.ECODE_INVAL)
9296 old_nic_params = instance.nics[nic_op].nicparams
9297 old_nic_ip = instance.nics[nic_op].ip
9298 else:
9299 old_nic_params = {}
9300 old_nic_ip = None
9301
9302 update_params_dict = dict([(key, nic_dict[key])
9303 for key in constants.NICS_PARAMETERS
9304 if key in nic_dict])
9305
9306 if 'bridge' in nic_dict:
9307 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9308
9309 new_nic_params = _GetUpdatedParams(old_nic_params,
9310 update_params_dict)
9311 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9312 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9313 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9314 self.nic_pinst[nic_op] = new_nic_params
9315 self.nic_pnew[nic_op] = new_filled_nic_params
9316 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9317
9318 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9319 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9320 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9321 if msg:
9322 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9323 if self.op.force:
9324 self.warn.append(msg)
9325 else:
9326 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9327 if new_nic_mode == constants.NIC_MODE_ROUTED:
9328 if 'ip' in nic_dict:
9329 nic_ip = nic_dict['ip']
9330 else:
9331 nic_ip = old_nic_ip
9332 if nic_ip is None:
9333 raise errors.OpPrereqError('Cannot set the nic ip to None'
9334 ' on a routed nic', errors.ECODE_INVAL)
9335 if 'mac' in nic_dict:
9336 nic_mac = nic_dict['mac']
9337 if nic_mac is None:
9338 raise errors.OpPrereqError('Cannot set the nic mac to None',
9339 errors.ECODE_INVAL)
9340 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9341
9342 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9343 else:
9344
9345 try:
9346 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9347 except errors.ReservationError:
9348 raise errors.OpPrereqError("MAC address %s already in use"
9349 " in cluster" % nic_mac,
9350 errors.ECODE_NOTUNIQUE)
9351
9352
9353 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9354 raise errors.OpPrereqError("Disk operations not supported for"
9355 " diskless instances",
9356 errors.ECODE_INVAL)
9357 for disk_op, _ in self.op.disks:
9358 if disk_op == constants.DDM_REMOVE:
9359 if len(instance.disks) == 1:
9360 raise errors.OpPrereqError("Cannot remove the last disk of"
9361 " an instance", errors.ECODE_INVAL)
9362 _CheckInstanceDown(self, instance, "cannot remove disks")
9363
9364 if (disk_op == constants.DDM_ADD and
9365 len(instance.nics) >= constants.MAX_DISKS):
9366 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9367 " add more" % constants.MAX_DISKS,
9368 errors.ECODE_STATE)
9369 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9370
9371 if disk_op < 0 or disk_op >= len(instance.disks):
9372 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9373 " are 0 to %d" %
9374 (disk_op, len(instance.disks)),
9375 errors.ECODE_INVAL)
9376
9377 return
9378
9380 """Converts an instance from plain to drbd.
9381
9382 """
9383 feedback_fn("Converting template to drbd")
9384 instance = self.instance
9385 pnode = instance.primary_node
9386 snode = self.op.remote_node
9387
9388
9389 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9390 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9391 instance.name, pnode, [snode],
9392 disk_info, None, None, 0)
9393 info = _GetInstanceInfoText(instance)
9394 feedback_fn("Creating aditional volumes...")
9395
9396 for disk in new_disks:
9397
9398 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9399 info, True)
9400 for child in disk.children:
9401 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9402
9403
9404 feedback_fn("Renaming original volumes...")
9405 rename_list = [(o, n.children[0].logical_id)
9406 for (o, n) in zip(instance.disks, new_disks)]
9407 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9408 result.Raise("Failed to rename original LVs")
9409
9410 feedback_fn("Initializing DRBD devices...")
9411
9412 for disk in new_disks:
9413 for node in [pnode, snode]:
9414 f_create = node == pnode
9415 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9416
9417
9418 instance.disk_template = constants.DT_DRBD8
9419 instance.disks = new_disks
9420 self.cfg.Update(instance, feedback_fn)
9421
9422
9423 disk_abort = not _WaitForSync(self, instance)
9424 if disk_abort:
9425 raise errors.OpExecError("There are some degraded disks for"
9426 " this instance, please cleanup manually")
9427
9429 """Converts an instance from drbd to plain.
9430
9431 """
9432 instance = self.instance
9433 assert len(instance.secondary_nodes) == 1
9434 pnode = instance.primary_node
9435 snode = instance.secondary_nodes[0]
9436 feedback_fn("Converting template to plain")
9437
9438 old_disks = instance.disks
9439 new_disks = [d.children[0] for d in old_disks]
9440
9441
9442 for parent, child in zip(old_disks, new_disks):
9443 child.size = parent.size
9444 child.mode = parent.mode
9445
9446
9447 instance.disks = new_disks
9448 instance.disk_template = constants.DT_PLAIN
9449 self.cfg.Update(instance, feedback_fn)
9450
9451 feedback_fn("Removing volumes on the secondary node...")
9452 for disk in old_disks:
9453 self.cfg.SetDiskID(disk, snode)
9454 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9455 if msg:
9456 self.LogWarning("Could not remove block device %s on node %s,"
9457 " continuing anyway: %s", disk.iv_name, snode, msg)
9458
9459 feedback_fn("Removing unneeded volumes on the primary node...")
9460 for idx, disk in enumerate(old_disks):
9461 meta = disk.children[1]
9462 self.cfg.SetDiskID(meta, pnode)
9463 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9464 if msg:
9465 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9466 " continuing anyway: %s", idx, pnode, msg)
9467
9468
9469 - def Exec(self, feedback_fn):
9470 """Modifies an instance.
9471
9472 All parameters take effect only at the next restart of the instance.
9473
9474 """
9475
9476
9477 for warn in self.warn:
9478 feedback_fn("WARNING: %s" % warn)
9479
9480 result = []
9481 instance = self.instance
9482
9483 for disk_op, disk_dict in self.op.disks:
9484 if disk_op == constants.DDM_REMOVE:
9485
9486 device = instance.disks.pop()
9487 device_idx = len(instance.disks)
9488 for node, disk in device.ComputeNodeTree(instance.primary_node):
9489 self.cfg.SetDiskID(disk, node)
9490 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9491 if msg:
9492 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9493 " continuing anyway", device_idx, node, msg)
9494 result.append(("disk/%d" % device_idx, "remove"))
9495 elif disk_op == constants.DDM_ADD:
9496
9497 if instance.disk_template == constants.DT_FILE:
9498 file_driver, file_path = instance.disks[0].logical_id
9499 file_path = os.path.dirname(file_path)
9500 else:
9501 file_driver = file_path = None
9502 disk_idx_base = len(instance.disks)
9503 new_disk = _GenerateDiskTemplate(self,
9504 instance.disk_template,
9505 instance.name, instance.primary_node,
9506 instance.secondary_nodes,
9507 [disk_dict],
9508 file_path,
9509 file_driver,
9510 disk_idx_base)[0]
9511 instance.disks.append(new_disk)
9512 info = _GetInstanceInfoText(instance)
9513
9514 logging.info("Creating volume %s for instance %s",
9515 new_disk.iv_name, instance.name)
9516
9517
9518 for node in instance.all_nodes:
9519 f_create = node == instance.primary_node
9520 try:
9521 _CreateBlockDev(self, node, instance, new_disk,
9522 f_create, info, f_create)
9523 except errors.OpExecError, err:
9524 self.LogWarning("Failed to create volume %s (%s) on"
9525 " node %s: %s",
9526 new_disk.iv_name, new_disk, node, err)
9527 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9528 (new_disk.size, new_disk.mode)))
9529 else:
9530
9531 instance.disks[disk_op].mode = disk_dict['mode']
9532 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9533
9534 if self.op.disk_template:
9535 r_shut = _ShutdownInstanceDisks(self, instance)
9536 if not r_shut:
9537 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9538 " proceed with disk template conversion")
9539 mode = (instance.disk_template, self.op.disk_template)
9540 try:
9541 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9542 except:
9543 self.cfg.ReleaseDRBDMinors(instance.name)
9544 raise
9545 result.append(("disk_template", self.op.disk_template))
9546
9547
9548 for nic_op, nic_dict in self.op.nics:
9549 if nic_op == constants.DDM_REMOVE:
9550
9551 del instance.nics[-1]
9552 result.append(("nic.%d" % len(instance.nics), "remove"))
9553 elif nic_op == constants.DDM_ADD:
9554
9555 mac = nic_dict['mac']
9556 ip = nic_dict.get('ip', None)
9557 nicparams = self.nic_pinst[constants.DDM_ADD]
9558 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9559 instance.nics.append(new_nic)
9560 result.append(("nic.%d" % (len(instance.nics) - 1),
9561 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9562 (new_nic.mac, new_nic.ip,
9563 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9564 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9565 )))
9566 else:
9567 for key in 'mac', 'ip':
9568 if key in nic_dict:
9569 setattr(instance.nics[nic_op], key, nic_dict[key])
9570 if nic_op in self.nic_pinst:
9571 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9572 for key, val in nic_dict.iteritems():
9573 result.append(("nic.%s/%d" % (key, nic_op), val))
9574
9575
9576 if self.op.hvparams:
9577 instance.hvparams = self.hv_inst
9578 for key, val in self.op.hvparams.iteritems():
9579 result.append(("hv/%s" % key, val))
9580
9581
9582 if self.op.beparams:
9583 instance.beparams = self.be_inst
9584 for key, val in self.op.beparams.iteritems():
9585 result.append(("be/%s" % key, val))
9586
9587
9588 if self.op.os_name:
9589 instance.os = self.op.os_name
9590
9591
9592 if self.op.osparams:
9593 instance.osparams = self.os_inst
9594 for key, val in self.op.osparams.iteritems():
9595 result.append(("os/%s" % key, val))
9596
9597 self.cfg.Update(instance, feedback_fn)
9598
9599 return result
9600
9601 _DISK_CONVERSIONS = {
9602 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9603 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9604 }
9605
9608 """Query the exports list
9609
9610 """
9611 _OP_PARAMS = [
9612 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9613 ("use_locking", False, ht.TBool),
9614 ]
9615 REQ_BGL = False
9616
9625
9626 - def Exec(self, feedback_fn):
9627 """Compute the list of all the exported system images.
9628
9629 @rtype: dict
9630 @return: a dictionary with the structure node->(export-list)
9631 where export-list is a list of the instances exported on
9632 that node.
9633
9634 """
9635 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9636 rpcresult = self.rpc.call_export_list(self.nodes)
9637 result = {}
9638 for node in rpcresult:
9639 if rpcresult[node].fail_msg:
9640 result[node] = False
9641 else:
9642 result[node] = rpcresult[node].payload
9643
9644 return result
9645
9648 """Prepares an instance for an export and returns useful information.
9649
9650 """
9651 _OP_PARAMS = [
9652 _PInstanceName,
9653 ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9654 ]
9655 REQ_BGL = False
9656
9659
9672
9673 - def Exec(self, feedback_fn):
9674 """Prepares an instance for an export.
9675
9676 """
9677 instance = self.instance
9678
9679 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9680 salt = utils.GenerateSecret(8)
9681
9682 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9683 result = self.rpc.call_x509_cert_create(instance.primary_node,
9684 constants.RIE_CERT_VALIDITY)
9685 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9686
9687 (name, cert_pem) = result.payload
9688
9689 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9690 cert_pem)
9691
9692 return {
9693 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9694 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9695 salt),
9696 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9697 }
9698
9699 return None
9700
9703 """Export an instance to an image in the cluster.
9704
9705 """
9706 HPATH = "instance-export"
9707 HTYPE = constants.HTYPE_INSTANCE
9708 _OP_PARAMS = [
9709 _PInstanceName,
9710 ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9711 ("shutdown", True, ht.TBool),
9712 _PShutdownTimeout,
9713 ("remove_instance", False, ht.TBool),
9714 ("ignore_remove_failures", False, ht.TBool),
9715 ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9716 ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9717 ("destination_x509_ca", None, ht.TMaybeString),
9718 ]
9719 REQ_BGL = False
9720
9736
9751
9753 """Last minute lock declaration."""
9754
9755
9757 """Build hooks env.
9758
9759 This will run on the master, primary node and target node.
9760
9761 """
9762 env = {
9763 "EXPORT_MODE": self.op.mode,
9764 "EXPORT_NODE": self.op.target_node,
9765 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9766 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9767
9768 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9769 }
9770
9771 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9772
9773 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9774
9775 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9776 nl.append(self.op.target_node)
9777
9778 return env, nl, nl
9779
9781 """Check prerequisites.
9782
9783 This checks that the instance and node names are valid.
9784
9785 """
9786 instance_name = self.op.instance_name
9787
9788 self.instance = self.cfg.GetInstanceInfo(instance_name)
9789 assert self.instance is not None, \
9790 "Cannot retrieve locked instance %s" % self.op.instance_name
9791 _CheckNodeOnline(self, self.instance.primary_node)
9792
9793 if (self.op.remove_instance and self.instance.admin_up and
9794 not self.op.shutdown):
9795 raise errors.OpPrereqError("Can not remove instance without shutting it"
9796 " down before")
9797
9798 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9799 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9800 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9801 assert self.dst_node is not None
9802
9803 _CheckNodeOnline(self, self.dst_node.name)
9804 _CheckNodeNotDrained(self, self.dst_node.name)
9805
9806 self._cds = None
9807 self.dest_disk_info = None
9808 self.dest_x509_ca = None
9809
9810 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9811 self.dst_node = None
9812
9813 if len(self.op.target_node) != len(self.instance.disks):
9814 raise errors.OpPrereqError(("Received destination information for %s"
9815 " disks, but instance %s has %s disks") %
9816 (len(self.op.target_node), instance_name,
9817 len(self.instance.disks)),
9818 errors.ECODE_INVAL)
9819
9820 cds = _GetClusterDomainSecret()
9821
9822
9823 try:
9824 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9825 except (TypeError, ValueError), err:
9826 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9827
9828 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9829 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9830 errors.ECODE_INVAL)
9831
9832
9833 try:
9834 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9835 except OpenSSL.crypto.Error, err:
9836 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9837 (err, ), errors.ECODE_INVAL)
9838
9839 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9840 if errcode is not None:
9841 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9842 (msg, ), errors.ECODE_INVAL)
9843
9844 self.dest_x509_ca = cert
9845
9846
9847 disk_info = []
9848 for idx, disk_data in enumerate(self.op.target_node):
9849 try:
9850 (host, port, magic) = \
9851 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9852 except errors.GenericError, err:
9853 raise errors.OpPrereqError("Target info for disk %s: %s" %
9854 (idx, err), errors.ECODE_INVAL)
9855
9856 disk_info.append((host, port, magic))
9857
9858 assert len(disk_info) == len(self.op.target_node)
9859 self.dest_disk_info = disk_info
9860
9861 else:
9862 raise errors.ProgrammerError("Unhandled export mode %r" %
9863 self.op.mode)
9864
9865
9866
9867 for disk in self.instance.disks:
9868 if disk.dev_type == constants.LD_FILE:
9869 raise errors.OpPrereqError("Export not supported for instances with"
9870 " file-based disks", errors.ECODE_INVAL)
9871
9873 """Removes exports of current instance from all other nodes.
9874
9875 If an instance in a cluster with nodes A..D was exported to node C, its
9876 exports will be removed from the nodes A, B and D.
9877
9878 """
9879 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9880
9881 nodelist = self.cfg.GetNodeList()
9882 nodelist.remove(self.dst_node.name)
9883
9884
9885
9886
9887 iname = self.instance.name
9888 if nodelist:
9889 feedback_fn("Removing old exports for instance %s" % iname)
9890 exportlist = self.rpc.call_export_list(nodelist)
9891 for node in exportlist:
9892 if exportlist[node].fail_msg:
9893 continue
9894 if iname in exportlist[node].payload:
9895 msg = self.rpc.call_export_remove(node, iname).fail_msg
9896 if msg:
9897 self.LogWarning("Could not remove older export for instance %s"
9898 " on node %s: %s", iname, node, msg)
9899
9900 - def Exec(self, feedback_fn):
9901 """Export an instance to an image in the cluster.
9902
9903 """
9904 assert self.op.mode in constants.EXPORT_MODES
9905
9906 instance = self.instance
9907 src_node = instance.primary_node
9908
9909 if self.op.shutdown:
9910
9911 feedback_fn("Shutting down instance %s" % instance.name)
9912 result = self.rpc.call_instance_shutdown(src_node, instance,
9913 self.op.shutdown_timeout)
9914
9915 result.Raise("Could not shutdown instance %s on"
9916 " node %s" % (instance.name, src_node))
9917
9918
9919
9920 for disk in instance.disks:
9921 self.cfg.SetDiskID(disk, src_node)
9922
9923 activate_disks = (not instance.admin_up)
9924
9925 if activate_disks:
9926
9927 feedback_fn("Activating disks for %s" % instance.name)
9928 _StartInstanceDisks(self, instance, None)
9929
9930 try:
9931 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9932 instance)
9933
9934 helper.CreateSnapshots()
9935 try:
9936 if (self.op.shutdown and instance.admin_up and
9937 not self.op.remove_instance):
9938 assert not activate_disks
9939 feedback_fn("Starting instance %s" % instance.name)
9940 result = self.rpc.call_instance_start(src_node, instance, None, None)
9941 msg = result.fail_msg
9942 if msg:
9943 feedback_fn("Failed to start instance: %s" % msg)
9944 _ShutdownInstanceDisks(self, instance)
9945 raise errors.OpExecError("Could not start instance: %s" % msg)
9946
9947 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9948 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9949 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9950 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9951 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9952
9953 (key_name, _, _) = self.x509_key_name
9954
9955 dest_ca_pem = \
9956 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9957 self.dest_x509_ca)
9958
9959 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9960 key_name, dest_ca_pem,
9961 timeouts)
9962 finally:
9963 helper.Cleanup()
9964
9965
9966 assert len(dresults) == len(instance.disks)
9967 assert compat.all(isinstance(i, bool) for i in dresults), \
9968 "Not all results are boolean: %r" % dresults
9969
9970 finally:
9971 if activate_disks:
9972 feedback_fn("Deactivating disks for %s" % instance.name)
9973 _ShutdownInstanceDisks(self, instance)
9974
9975 if not (compat.all(dresults) and fin_resu):
9976 failures = []
9977 if not fin_resu:
9978 failures.append("export finalization")
9979 if not compat.all(dresults):
9980 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9981 if not dsk)
9982 failures.append("disk export: disk(s) %s" % fdsk)
9983
9984 raise errors.OpExecError("Export failed, errors in %s" %
9985 utils.CommaJoin(failures))
9986
9987
9988
9989
9990 if self.op.remove_instance:
9991 feedback_fn("Removing instance %s" % instance.name)
9992 _RemoveInstance(self, feedback_fn, instance,
9993 self.op.ignore_remove_failures)
9994
9995 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9996 self._CleanupExports(feedback_fn)
9997
9998 return fin_resu, dresults
9999
10002 """Remove exports related to the named instance.
10003
10004 """
10005 _OP_PARAMS = [
10006 _PInstanceName,
10007 ]
10008 REQ_BGL = False
10009
10016
10017 - def Exec(self, feedback_fn):
10018 """Remove any export.
10019
10020 """
10021 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10022
10023
10024 fqdn_warn = False
10025 if not instance_name:
10026 fqdn_warn = True
10027 instance_name = self.op.instance_name
10028
10029 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10030 exportlist = self.rpc.call_export_list(locked_nodes)
10031 found = False
10032 for node in exportlist:
10033 msg = exportlist[node].fail_msg
10034 if msg:
10035 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10036 continue
10037 if instance_name in exportlist[node].payload:
10038 found = True
10039 result = self.rpc.call_export_remove(node, instance_name)
10040 msg = result.fail_msg
10041 if msg:
10042 logging.error("Could not remove export for instance %s"
10043 " on node %s: %s", instance_name, node, msg)
10044
10045 if fqdn_warn and not found:
10046 feedback_fn("Export not found. If trying to remove an export belonging"
10047 " to a deleted instance please use its Fully Qualified"
10048 " Domain Name.")
10049
10052 """Generic tags LU.
10053
10054 This is an abstract class which is the parent of all the other tags LUs.
10055
10056 """
10057
10066
10067
10068
10069
10083
10107
10149
10183
10223
10226 """Sleep for a specified amount of time.
10227
10228 This LU sleeps on the master and/or nodes for a specified amount of
10229 time.
10230
10231 """
10232 _OP_PARAMS = [
10233 ("duration", ht.NoDefault, ht.TFloat),
10234 ("on_master", True, ht.TBool),
10235 ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10236 ("repeat", 0, ht.TPositiveInt)
10237 ]
10238 REQ_BGL = False
10239
10241 """Expand names and set required locks.
10242
10243 This expands the node list, if any.
10244
10245 """
10246 self.needed_locks = {}
10247 if self.op.on_nodes:
10248
10249
10250
10251 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10252 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10253
10255 """Do the actual sleep.
10256
10257 """
10258 if self.op.on_master:
10259 if not utils.TestDelay(self.op.duration):
10260 raise errors.OpExecError("Error during master delay test")
10261 if self.op.on_nodes:
10262 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10263 for node, node_result in result.items():
10264 node_result.Raise("Failure during rpc call to node %s" % node)
10265
10266 - def Exec(self, feedback_fn):
10267 """Execute the test delay opcode, with the wanted repetitions.
10268
10269 """
10270 if self.op.repeat == 0:
10271 self._TestDelay()
10272 else:
10273 top_value = self.op.repeat - 1
10274 for i in range(self.op.repeat):
10275 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10276 self._TestDelay()
10277
10280 """Utility LU to test some aspects of the job queue.
10281
10282 """
10283 _OP_PARAMS = [
10284 ("notify_waitlock", False, ht.TBool),
10285 ("notify_exec", False, ht.TBool),
10286 ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10287 ("fail", False, ht.TBool),
10288 ]
10289 REQ_BGL = False
10290
10291
10292
10293 _CLIENT_CONNECT_TIMEOUT = 20.0
10294 _CLIENT_CONFIRM_TIMEOUT = 60.0
10295
10296 @classmethod
10298 """Opens a Unix socket and waits for another program to connect.
10299
10300 @type cb: callable
10301 @param cb: Callback to send socket name to client
10302 @type errcls: class
10303 @param errcls: Exception class to use for errors
10304
10305 """
10306
10307
10308
10309 tmpdir = tempfile.mkdtemp()
10310 try:
10311 tmpsock = utils.PathJoin(tmpdir, "sock")
10312
10313 logging.debug("Creating temporary socket at %s", tmpsock)
10314 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10315 try:
10316 sock.bind(tmpsock)
10317 sock.listen(1)
10318
10319
10320 cb(tmpsock)
10321
10322
10323 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10324 try:
10325 (conn, _) = sock.accept()
10326 except socket.error, err:
10327 raise errcls("Client didn't connect in time (%s)" % err)
10328 finally:
10329 sock.close()
10330 finally:
10331
10332 shutil.rmtree(tmpdir)
10333
10334
10335 try:
10336 try:
10337
10338
10339 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10340 conn.recv(1)
10341 except socket.error, err:
10342 raise errcls("Client failed to confirm notification (%s)" % err)
10343 finally:
10344 conn.close()
10345
10347 """Sends a notification to the client.
10348
10349 @type test: string
10350 @param test: Test name
10351 @param arg: Test argument (depends on test)
10352 @type sockname: string
10353 @param sockname: Socket path
10354
10355 """
10356 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10357
10358 - def _Notify(self, prereq, test, arg):
10359 """Notifies the client of a test.
10360
10361 @type prereq: bool
10362 @param prereq: Whether this is a prereq-phase test
10363 @type test: string
10364 @param test: Test name
10365 @param arg: Test argument (depends on test)
10366
10367 """
10368 if prereq:
10369 errcls = errors.OpPrereqError
10370 else:
10371 errcls = errors.OpExecError
10372
10373 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10374 test, arg),
10375 errcls)
10376
10378 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10379 self.expandnames_calls = 0
10380
10397
10398 - def Exec(self, feedback_fn):
10419
10422 """IAllocator framework.
10423
10424 An IAllocator instance has three sets of attributes:
10425 - cfg that is needed to query the cluster
10426 - input data (all members of the _KEYS class attribute are required)
10427 - four buffer attributes (in|out_data|text), that represent the
10428 input (to the external script) in text and data structure format,
10429 and the output from it, again in two formats
10430 - the result variables from the script (success, info, nodes) for
10431 easy usage
10432
10433 """
10434
10435
10436 _ALLO_KEYS = [
10437 "name", "mem_size", "disks", "disk_template",
10438 "os", "tags", "nics", "vcpus", "hypervisor",
10439 ]
10440 _RELO_KEYS = [
10441 "name", "relocate_from",
10442 ]
10443 _EVAC_KEYS = [
10444 "evac_nodes",
10445 ]
10446
10447 - def __init__(self, cfg, rpc, mode, **kwargs):
10448 self.cfg = cfg
10449 self.rpc = rpc
10450
10451 self.in_text = self.out_text = self.in_data = self.out_data = None
10452
10453 self.mode = mode
10454 self.mem_size = self.disks = self.disk_template = None
10455 self.os = self.tags = self.nics = self.vcpus = None
10456 self.hypervisor = None
10457 self.relocate_from = None
10458 self.name = None
10459 self.evac_nodes = None
10460
10461 self.required_nodes = None
10462
10463 self.success = self.info = self.result = None
10464 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10465 keyset = self._ALLO_KEYS
10466 fn = self._AddNewInstance
10467 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10468 keyset = self._RELO_KEYS
10469 fn = self._AddRelocateInstance
10470 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10471 keyset = self._EVAC_KEYS
10472 fn = self._AddEvacuateNodes
10473 else:
10474 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10475 " IAllocator" % self.mode)
10476 for key in kwargs:
10477 if key not in keyset:
10478 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10479 " IAllocator" % key)
10480 setattr(self, key, kwargs[key])
10481
10482 for key in keyset:
10483 if key not in kwargs:
10484 raise errors.ProgrammerError("Missing input parameter '%s' to"
10485 " IAllocator" % key)
10486 self._BuildInputData(fn)
10487
10489 """Compute the generic allocator input data.
10490
10491 This is the data that is independent of the actual operation.
10492
10493 """
10494 cfg = self.cfg
10495 cluster_info = cfg.GetClusterInfo()
10496
10497 data = {
10498 "version": constants.IALLOCATOR_VERSION,
10499 "cluster_name": cfg.GetClusterName(),
10500 "cluster_tags": list(cluster_info.GetTags()),
10501 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10502
10503 }
10504 iinfo = cfg.GetAllInstancesInfo().values()
10505 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10506
10507
10508 node_list = cfg.GetNodeList()
10509
10510 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10511 hypervisor_name = self.hypervisor
10512 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10513 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10514 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10515 hypervisor_name = cluster_info.enabled_hypervisors[0]
10516
10517 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10518 hypervisor_name)
10519 node_iinfo = \
10520 self.rpc.call_all_instances_info(node_list,
10521 cluster_info.enabled_hypervisors)
10522
10523 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10524
10525 data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10526
10527 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10528
10529 self.in_data = data
10530
10531 @staticmethod
10533 """Compute node groups data.
10534
10535 """
10536 ng = {}
10537 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10538 ng[guuid] = { "name": gdata.name }
10539 return ng
10540
10541 @staticmethod
10543 """Compute global node data.
10544
10545 """
10546 node_results = {}
10547 for nname, nresult in node_data.items():
10548
10549 ninfo = cfg.GetNodeInfo(nname)
10550 pnr = {
10551 "tags": list(ninfo.GetTags()),
10552 "primary_ip": ninfo.primary_ip,
10553 "secondary_ip": ninfo.secondary_ip,
10554 "offline": ninfo.offline,
10555 "drained": ninfo.drained,
10556 "master_candidate": ninfo.master_candidate,
10557 "group": ninfo.group,
10558 "master_capable": ninfo.master_capable,
10559 "vm_capable": ninfo.vm_capable,
10560 }
10561
10562 if not (ninfo.offline or ninfo.drained):
10563 nresult.Raise("Can't get data for node %s" % nname)
10564 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10565 nname)
10566 remote_info = nresult.payload
10567
10568 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10569 'vg_size', 'vg_free', 'cpu_total']:
10570 if attr not in remote_info:
10571 raise errors.OpExecError("Node '%s' didn't return attribute"
10572 " '%s'" % (nname, attr))
10573 if not isinstance(remote_info[attr], int):
10574 raise errors.OpExecError("Node '%s' returned invalid value"
10575 " for '%s': %s" %
10576 (nname, attr, remote_info[attr]))
10577
10578 i_p_mem = i_p_up_mem = 0
10579 for iinfo, beinfo in i_list:
10580 if iinfo.primary_node == nname:
10581 i_p_mem += beinfo[constants.BE_MEMORY]
10582 if iinfo.name not in node_iinfo[nname].payload:
10583 i_used_mem = 0
10584 else:
10585 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10586 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10587 remote_info['memory_free'] -= max(0, i_mem_diff)
10588
10589 if iinfo.admin_up:
10590 i_p_up_mem += beinfo[constants.BE_MEMORY]
10591
10592
10593 pnr_dyn = {
10594 "total_memory": remote_info['memory_total'],
10595 "reserved_memory": remote_info['memory_dom0'],
10596 "free_memory": remote_info['memory_free'],
10597 "total_disk": remote_info['vg_size'],
10598 "free_disk": remote_info['vg_free'],
10599 "total_cpus": remote_info['cpu_total'],
10600 "i_pri_memory": i_p_mem,
10601 "i_pri_up_memory": i_p_up_mem,
10602 }
10603 pnr.update(pnr_dyn)
10604
10605 node_results[nname] = pnr
10606
10607 return node_results
10608
10609 @staticmethod
10611 """Compute global instance data.
10612
10613 """
10614 instance_data = {}
10615 for iinfo, beinfo in i_list:
10616 nic_data = []
10617 for nic in iinfo.nics:
10618 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10619 nic_dict = {"mac": nic.mac,
10620 "ip": nic.ip,
10621 "mode": filled_params[constants.NIC_MODE],
10622 "link": filled_params[constants.NIC_LINK],
10623 }
10624 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10625 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10626 nic_data.append(nic_dict)
10627 pir = {
10628 "tags": list(iinfo.GetTags()),
10629 "admin_up": iinfo.admin_up,
10630 "vcpus": beinfo[constants.BE_VCPUS],
10631 "memory": beinfo[constants.BE_MEMORY],
10632 "os": iinfo.os,
10633 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10634 "nics": nic_data,
10635 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10636 "disk_template": iinfo.disk_template,
10637 "hypervisor": iinfo.hypervisor,
10638 }
10639 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10640 pir["disks"])
10641 instance_data[iinfo.name] = pir
10642
10643 return instance_data
10644
10646 """Add new instance data to allocator structure.
10647
10648 This in combination with _AllocatorGetClusterData will create the
10649 correct structure needed as input for the allocator.
10650
10651 The checks for the completeness of the opcode must have already been
10652 done.
10653
10654 """
10655 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10656
10657 if self.disk_template in constants.DTS_NET_MIRROR:
10658 self.required_nodes = 2
10659 else:
10660 self.required_nodes = 1
10661 request = {
10662 "name": self.name,
10663 "disk_template": self.disk_template,
10664 "tags": self.tags,
10665 "os": self.os,
10666 "vcpus": self.vcpus,
10667 "memory": self.mem_size,
10668 "disks": self.disks,
10669 "disk_space_total": disk_space,
10670 "nics": self.nics,
10671 "required_nodes": self.required_nodes,
10672 }
10673 return request
10674
10676 """Add relocate instance data to allocator structure.
10677
10678 This in combination with _IAllocatorGetClusterData will create the
10679 correct structure needed as input for the allocator.
10680
10681 The checks for the completeness of the opcode must have already been
10682 done.
10683
10684 """
10685 instance = self.cfg.GetInstanceInfo(self.name)
10686 if instance is None:
10687 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10688 " IAllocator" % self.name)
10689
10690 if instance.disk_template not in constants.DTS_NET_MIRROR:
10691 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10692 errors.ECODE_INVAL)
10693
10694 if len(instance.secondary_nodes) != 1:
10695 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10696 errors.ECODE_STATE)
10697
10698 self.required_nodes = 1
10699 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10700 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10701
10702 request = {
10703 "name": self.name,
10704 "disk_space_total": disk_space,
10705 "required_nodes": self.required_nodes,
10706 "relocate_from": self.relocate_from,
10707 }
10708 return request
10709
10711 """Add evacuate nodes data to allocator structure.
10712
10713 """
10714 request = {
10715 "evac_nodes": self.evac_nodes
10716 }
10717 return request
10718
10730
10731 - def Run(self, name, validate=True, call_fn=None):
10732 """Run an instance allocator and return the results.
10733
10734 """
10735 if call_fn is None:
10736 call_fn = self.rpc.call_iallocator_runner
10737
10738 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10739 result.Raise("Failure while running the iallocator script")
10740
10741 self.out_text = result.payload
10742 if validate:
10743 self._ValidateResult()
10744
10746 """Process the allocator results.
10747
10748 This will process and if successful save the result in
10749 self.out_data and the other parameters.
10750
10751 """
10752 try:
10753 rdict = serializer.Load(self.out_text)
10754 except Exception, err:
10755 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10756
10757 if not isinstance(rdict, dict):
10758 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10759
10760
10761 if "nodes" in rdict and "result" not in rdict:
10762 rdict["result"] = rdict["nodes"]
10763 del rdict["nodes"]
10764
10765 for key in "success", "info", "result":
10766 if key not in rdict:
10767 raise errors.OpExecError("Can't parse iallocator results:"
10768 " missing key '%s'" % key)
10769 setattr(self, key, rdict[key])
10770
10771 if not isinstance(rdict["result"], list):
10772 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10773 " is not a list")
10774 self.out_data = rdict
10775
10778 """Run allocator tests.
10779
10780 This LU runs the allocator tests
10781
10782 """
10783 _OP_PARAMS = [
10784 ("direction", ht.NoDefault,
10785 ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10786 ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
10787 ("name", ht.NoDefault, ht.TNonEmptyString),
10788 ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
10789 ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
10790 ht.TOr(ht.TNone, ht.TNonEmptyString))))),
10791 ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
10792 ("hypervisor", None, ht.TMaybeString),
10793 ("allocator", None, ht.TMaybeString),
10794 ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10795 ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10796 ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10797 ("os", None, ht.TMaybeString),
10798 ("disk_template", None, ht.TMaybeString),
10799 ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
10800 ]
10801
10803 """Check prerequisites.
10804
10805 This checks the opcode parameters depending on the director and mode test.
10806
10807 """
10808 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10809 for attr in ["mem_size", "disks", "disk_template",
10810 "os", "tags", "nics", "vcpus"]:
10811 if not hasattr(self.op, attr):
10812 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10813 attr, errors.ECODE_INVAL)
10814 iname = self.cfg.ExpandInstanceName(self.op.name)
10815 if iname is not None:
10816 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10817 iname, errors.ECODE_EXISTS)
10818 if not isinstance(self.op.nics, list):
10819 raise errors.OpPrereqError("Invalid parameter 'nics'",
10820 errors.ECODE_INVAL)
10821 if not isinstance(self.op.disks, list):
10822 raise errors.OpPrereqError("Invalid parameter 'disks'",
10823 errors.ECODE_INVAL)
10824 for row in self.op.disks:
10825 if (not isinstance(row, dict) or
10826 "size" not in row or
10827 not isinstance(row["size"], int) or
10828 "mode" not in row or
10829 row["mode"] not in ['r', 'w']):
10830 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10831 " parameter", errors.ECODE_INVAL)
10832 if self.op.hypervisor is None:
10833 self.op.hypervisor = self.cfg.GetHypervisorType()
10834 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10835 fname = _ExpandInstanceName(self.cfg, self.op.name)
10836 self.op.name = fname
10837 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10838 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10839 if not hasattr(self.op, "evac_nodes"):
10840 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10841 " opcode input", errors.ECODE_INVAL)
10842 else:
10843 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10844 self.op.mode, errors.ECODE_INVAL)
10845
10846 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10847 if self.op.allocator is None:
10848 raise errors.OpPrereqError("Missing allocator name",
10849 errors.ECODE_INVAL)
10850 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10851 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10852 self.op.direction, errors.ECODE_INVAL)
10853
10854 - def Exec(self, feedback_fn):
10855 """Run the allocator test.
10856
10857 """
10858 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10859 ial = IAllocator(self.cfg, self.rpc,
10860 mode=self.op.mode,
10861 name=self.op.name,
10862 mem_size=self.op.mem_size,
10863 disks=self.op.disks,
10864 disk_template=self.op.disk_template,
10865 os=self.op.os,
10866 tags=self.op.tags,
10867 nics=self.op.nics,
10868 vcpus=self.op.vcpus,
10869 hypervisor=self.op.hypervisor,
10870 )
10871 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10872 ial = IAllocator(self.cfg, self.rpc,
10873 mode=self.op.mode,
10874 name=self.op.name,
10875 relocate_from=list(self.relocate_from),
10876 )
10877 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10878 ial = IAllocator(self.cfg, self.rpc,
10879 mode=self.op.mode,
10880 evac_nodes=self.op.evac_nodes)
10881 else:
10882 raise errors.ProgrammerError("Uncatched mode %s in"
10883 " LUTestAllocator.Exec", self.op.mode)
10884
10885 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10886 result = ial.in_text
10887 else:
10888 ial.Run(self.op.allocator, validate=False)
10889 result = ial.out_text
10890 return result
10891