1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Module implementing the master-side code."""
23
24
25
26
27
28
29
30
31 import os
32 import os.path
33 import time
34 import re
35 import platform
36 import logging
37 import copy
38 import OpenSSL
39 import socket
40 import tempfile
41 import shutil
42
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56
57 import ganeti.masterd.instance
64 """Returns an empty list.
65
66 """
67 return []
68
71 """Returns an empty dict.
72
73 """
74 return {}
75
76
77
78 _NoDefault = object()
79
80
81
82 _NoType = object()
87 """Checks if the given value is not None.
88
89 """
90 return val is not None
91
94 """Checks if the given value is None.
95
96 """
97 return val is None
98
101 """Checks if the given value is a boolean.
102
103 """
104 return isinstance(val, bool)
105
108 """Checks if the given value is an integer.
109
110 """
111 return isinstance(val, int)
112
115 """Checks if the given value is a float.
116
117 """
118 return isinstance(val, float)
119
122 """Checks if the given value is a string.
123
124 """
125 return isinstance(val, basestring)
126
129 """Checks if a given value evaluates to a boolean True value.
130
131 """
132 return bool(val)
133
136 """Builds a function that checks if a given value is a member of a list.
137
138 """
139 return lambda val: val in target_list
140
144 """Checks if the given value is a list.
145
146 """
147 return isinstance(val, list)
148
151 """Checks if the given value is a dictionary.
152
153 """
154 return isinstance(val, dict)
155
158 """Check is the given container is of the given size.
159
160 """
161 return lambda container: len(container) == size
162
163
164
165 -def _TAnd(*args):
166 """Combine multiple functions using an AND operation.
167
168 """
169 def fn(val):
170 return compat.all(t(val) for t in args)
171 return fn
172
173
174 -def _TOr(*args):
175 """Combine multiple functions using an AND operation.
176
177 """
178 def fn(val):
179 return compat.any(t(val) for t in args)
180 return fn
181
184 """Checks that a modified version of the argument passes the given test.
185
186 """
187 return lambda val: test(fn(val))
188
189
190
191
192
193 _TNonEmptyString = _TAnd(_TString, _TTrue)
194
195
196
197 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
198
199
200
201 _TMaybeBool = _TOr(_TBool, _TNone)
202
203
204
205 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
206
207
208 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
212 """Checks if a given value is a list with all elements of the same type.
213
214 """
215 return _TAnd(_TList,
216 lambda lst: compat.all(my_type(v) for v in lst))
217
220 """Checks a dict type for the type of its key/values.
221
222 """
223 return _TAnd(_TDict,
224 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
225 and compat.all(val_type(v)
226 for v in my_dict.values())))
227
228
229
230
231
232 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
233
234
235
236 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
237 _TPositiveInt)
238
239
240 _PForce = ("force", False, _TBool)
241
242
243 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
244
245
246
247 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
248
249
250 _PMigrationMode = ("mode", None, _TOr(_TNone,
251 _TElemOf(constants.HT_MIGRATION_MODES)))
252
253
254 _PMigrationLive = ("live", None, _TMaybeBool)
259 """Logical Unit base class.
260
261 Subclasses must follow these rules:
262 - implement ExpandNames
263 - implement CheckPrereq (except when tasklets are used)
264 - implement Exec (except when tasklets are used)
265 - implement BuildHooksEnv
266 - redefine HPATH and HTYPE
267 - optionally redefine their run requirements:
268 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
269
270 Note that all commands require root permissions.
271
272 @ivar dry_run_result: the value (if any) that will be returned to the caller
273 in dry-run mode (signalled by opcode dry_run parameter)
274 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
275 they should get if not already defined, and types they must match
276
277 """
278 HPATH = None
279 HTYPE = None
280 _OP_PARAMS = []
281 REQ_BGL = True
282
283 - def __init__(self, processor, op, context, rpc):
284 """Constructor for LogicalUnit.
285
286 This needs to be overridden in derived classes in order to check op
287 validity.
288
289 """
290 self.proc = processor
291 self.op = op
292 self.cfg = context.cfg
293 self.context = context
294 self.rpc = rpc
295
296 self.needed_locks = None
297 self.acquired_locks = {}
298 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
299 self.add_locks = {}
300 self.remove_locks = {}
301
302 self.recalculate_locks = {}
303 self.__ssh = None
304
305 self.Log = processor.Log
306 self.LogWarning = processor.LogWarning
307 self.LogInfo = processor.LogInfo
308 self.LogStep = processor.LogStep
309
310 self.dry_run_result = None
311
312 if (not hasattr(self.op, "debug_level") or
313 not isinstance(self.op.debug_level, int)):
314 self.op.debug_level = 0
315
316
317 self.tasklets = None
318
319
320 op_id = self.op.OP_ID
321 for attr_name, aval, test in self._OP_PARAMS:
322 if not hasattr(op, attr_name):
323 if aval == _NoDefault:
324 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
325 (op_id, attr_name), errors.ECODE_INVAL)
326 else:
327 if callable(aval):
328 dval = aval()
329 else:
330 dval = aval
331 setattr(self.op, attr_name, dval)
332 attr_val = getattr(op, attr_name)
333 if test == _NoType:
334
335 continue
336 if not callable(test):
337 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
338 " given type is not a proper type (%s)" %
339 (op_id, attr_name, test))
340 if not test(attr_val):
341 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
342 self.op.OP_ID, attr_name, type(attr_val), attr_val)
343 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
344 (op_id, attr_name), errors.ECODE_INVAL)
345
346 self.CheckArguments()
347
349 """Returns the SshRunner object
350
351 """
352 if not self.__ssh:
353 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
354 return self.__ssh
355
356 ssh = property(fget=__GetSSH)
357
359 """Check syntactic validity for the opcode arguments.
360
361 This method is for doing a simple syntactic check and ensure
362 validity of opcode parameters, without any cluster-related
363 checks. While the same can be accomplished in ExpandNames and/or
364 CheckPrereq, doing these separate is better because:
365
366 - ExpandNames is left as as purely a lock-related function
367 - CheckPrereq is run after we have acquired locks (and possible
368 waited for them)
369
370 The function is allowed to change the self.op attribute so that
371 later methods can no longer worry about missing parameters.
372
373 """
374 pass
375
377 """Expand names for this LU.
378
379 This method is called before starting to execute the opcode, and it should
380 update all the parameters of the opcode to their canonical form (e.g. a
381 short node name must be fully expanded after this method has successfully
382 completed). This way locking, hooks, logging, ecc. can work correctly.
383
384 LUs which implement this method must also populate the self.needed_locks
385 member, as a dict with lock levels as keys, and a list of needed lock names
386 as values. Rules:
387
388 - use an empty dict if you don't need any lock
389 - if you don't need any lock at a particular level omit that level
390 - don't put anything for the BGL level
391 - if you want all locks at a level use locking.ALL_SET as a value
392
393 If you need to share locks (rather than acquire them exclusively) at one
394 level you can modify self.share_locks, setting a true value (usually 1) for
395 that level. By default locks are not shared.
396
397 This function can also define a list of tasklets, which then will be
398 executed in order instead of the usual LU-level CheckPrereq and Exec
399 functions, if those are not defined by the LU.
400
401 Examples::
402
403 # Acquire all nodes and one instance
404 self.needed_locks = {
405 locking.LEVEL_NODE: locking.ALL_SET,
406 locking.LEVEL_INSTANCE: ['instance1.example.com'],
407 }
408 # Acquire just two nodes
409 self.needed_locks = {
410 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
411 }
412 # Acquire no locks
413 self.needed_locks = {} # No, you can't leave it to the default value None
414
415 """
416
417
418
419 if self.REQ_BGL:
420 self.needed_locks = {}
421 else:
422 raise NotImplementedError
423
425 """Declare LU locking needs for a level
426
427 While most LUs can just declare their locking needs at ExpandNames time,
428 sometimes there's the need to calculate some locks after having acquired
429 the ones before. This function is called just before acquiring locks at a
430 particular level, but after acquiring the ones at lower levels, and permits
431 such calculations. It can be used to modify self.needed_locks, and by
432 default it does nothing.
433
434 This function is only called if you have something already set in
435 self.needed_locks for the level.
436
437 @param level: Locking level which is going to be locked
438 @type level: member of ganeti.locking.LEVELS
439
440 """
441
443 """Check prerequisites for this LU.
444
445 This method should check that the prerequisites for the execution
446 of this LU are fulfilled. It can do internode communication, but
447 it should be idempotent - no cluster or system changes are
448 allowed.
449
450 The method should raise errors.OpPrereqError in case something is
451 not fulfilled. Its return value is ignored.
452
453 This method should also update all the parameters of the opcode to
454 their canonical form if it hasn't been done by ExpandNames before.
455
456 """
457 if self.tasklets is not None:
458 for (idx, tl) in enumerate(self.tasklets):
459 logging.debug("Checking prerequisites for tasklet %s/%s",
460 idx + 1, len(self.tasklets))
461 tl.CheckPrereq()
462 else:
463 pass
464
465 - def Exec(self, feedback_fn):
466 """Execute the LU.
467
468 This method should implement the actual work. It should raise
469 errors.OpExecError for failures that are somewhat dealt with in
470 code, or expected.
471
472 """
473 if self.tasklets is not None:
474 for (idx, tl) in enumerate(self.tasklets):
475 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
476 tl.Exec(feedback_fn)
477 else:
478 raise NotImplementedError
479
481 """Build hooks environment for this LU.
482
483 This method should return a three-node tuple consisting of: a dict
484 containing the environment that will be used for running the
485 specific hook for this LU, a list of node names on which the hook
486 should run before the execution, and a list of node names on which
487 the hook should run after the execution.
488
489 The keys of the dict must not have 'GANETI_' prefixed as this will
490 be handled in the hooks runner. Also note additional keys will be
491 added by the hooks runner. If the LU doesn't define any
492 environment, an empty dict (and not None) should be returned.
493
494 No nodes should be returned as an empty list (and not None).
495
496 Note that if the HPATH for a LU class is None, this function will
497 not be called.
498
499 """
500 raise NotImplementedError
501
502 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
503 """Notify the LU about the results of its hooks.
504
505 This method is called every time a hooks phase is executed, and notifies
506 the Logical Unit about the hooks' result. The LU can then use it to alter
507 its result based on the hooks. By default the method does nothing and the
508 previous result is passed back unchanged but any LU can define it if it
509 wants to use the local cluster hook-scripts somehow.
510
511 @param phase: one of L{constants.HOOKS_PHASE_POST} or
512 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
513 @param hook_results: the results of the multi-node hooks rpc call
514 @param feedback_fn: function used send feedback back to the caller
515 @param lu_result: the previous Exec result this LU had, or None
516 in the PRE phase
517 @return: the new Exec result, based on the previous result
518 and hook results
519
520 """
521
522
523
524 return lu_result
525
527 """Helper function to expand and lock an instance.
528
529 Many LUs that work on an instance take its name in self.op.instance_name
530 and need to expand it and then declare the expanded name for locking. This
531 function does it, and then updates self.op.instance_name to the expanded
532 name. It also initializes needed_locks as a dict, if this hasn't been done
533 before.
534
535 """
536 if self.needed_locks is None:
537 self.needed_locks = {}
538 else:
539 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
540 "_ExpandAndLockInstance called with instance-level locks set"
541 self.op.instance_name = _ExpandInstanceName(self.cfg,
542 self.op.instance_name)
543 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
544
546 """Helper function to declare instances' nodes for locking.
547
548 This function should be called after locking one or more instances to lock
549 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
550 with all primary or secondary nodes for instances already locked and
551 present in self.needed_locks[locking.LEVEL_INSTANCE].
552
553 It should be called from DeclareLocks, and for safety only works if
554 self.recalculate_locks[locking.LEVEL_NODE] is set.
555
556 In the future it may grow parameters to just lock some instance's nodes, or
557 to just lock primaries or secondary nodes, if needed.
558
559 If should be called in DeclareLocks in a way similar to::
560
561 if level == locking.LEVEL_NODE:
562 self._LockInstancesNodes()
563
564 @type primary_only: boolean
565 @param primary_only: only lock primary nodes of locked instances
566
567 """
568 assert locking.LEVEL_NODE in self.recalculate_locks, \
569 "_LockInstancesNodes helper function called with no nodes to recalculate"
570
571
572
573
574
575
576 wanted_nodes = []
577 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
578 instance = self.context.cfg.GetInstanceInfo(instance_name)
579 wanted_nodes.append(instance.primary_node)
580 if not primary_only:
581 wanted_nodes.extend(instance.secondary_nodes)
582
583 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
584 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
585 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
586 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
587
588 del self.recalculate_locks[locking.LEVEL_NODE]
589
592 """Simple LU which runs no hooks.
593
594 This LU is intended as a parent for other LogicalUnits which will
595 run no hooks, in order to reduce duplicate code.
596
597 """
598 HPATH = None
599 HTYPE = None
600
602 """Empty BuildHooksEnv for NoHooksLu.
603
604 This just raises an error.
605
606 """
607 assert False, "BuildHooksEnv called for NoHooksLUs"
608
611 """Tasklet base class.
612
613 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
614 they can mix legacy code with tasklets. Locking needs to be done in the LU,
615 tasklets know nothing about locks.
616
617 Subclasses must follow these rules:
618 - Implement CheckPrereq
619 - Implement Exec
620
621 """
623 self.lu = lu
624
625
626 self.cfg = lu.cfg
627 self.rpc = lu.rpc
628
630 """Check prerequisites for this tasklets.
631
632 This method should check whether the prerequisites for the execution of
633 this tasklet are fulfilled. It can do internode communication, but it
634 should be idempotent - no cluster or system changes are allowed.
635
636 The method should raise errors.OpPrereqError in case something is not
637 fulfilled. Its return value is ignored.
638
639 This method should also update all parameters to their canonical form if it
640 hasn't been done before.
641
642 """
643 pass
644
645 - def Exec(self, feedback_fn):
646 """Execute the tasklet.
647
648 This method should implement the actual work. It should raise
649 errors.OpExecError for failures that are somewhat dealt with in code, or
650 expected.
651
652 """
653 raise NotImplementedError
654
657 """Returns list of checked and expanded node names.
658
659 @type lu: L{LogicalUnit}
660 @param lu: the logical unit on whose behalf we execute
661 @type nodes: list
662 @param nodes: list of node names or None for all nodes
663 @rtype: list
664 @return: the list of nodes, sorted
665 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666
667 """
668 if not nodes:
669 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
670 " non-empty list of nodes whose name is to be expanded.")
671
672 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
673 return utils.NiceSort(wanted)
674
677 """Returns list of checked and expanded instance names.
678
679 @type lu: L{LogicalUnit}
680 @param lu: the logical unit on whose behalf we execute
681 @type instances: list
682 @param instances: list of instance names or None for all instances
683 @rtype: list
684 @return: the list of instances, sorted
685 @raise errors.OpPrereqError: if the instances parameter is wrong type
686 @raise errors.OpPrereqError: if any of the passed instances is not found
687
688 """
689 if instances:
690 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
691 else:
692 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
693 return wanted
694
695
696 -def _GetUpdatedParams(old_params, update_dict,
697 use_default=True, use_none=False):
698 """Return the new version of a parameter dictionary.
699
700 @type old_params: dict
701 @param old_params: old parameters
702 @type update_dict: dict
703 @param update_dict: dict containing new parameter values, or
704 constants.VALUE_DEFAULT to reset the parameter to its default
705 value
706 @param use_default: boolean
707 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
708 values as 'to be deleted' values
709 @param use_none: boolean
710 @type use_none: whether to recognise C{None} values as 'to be
711 deleted' values
712 @rtype: dict
713 @return: the new parameter dictionary
714
715 """
716 params_copy = copy.deepcopy(old_params)
717 for key, val in update_dict.iteritems():
718 if ((use_default and val == constants.VALUE_DEFAULT) or
719 (use_none and val is None)):
720 try:
721 del params_copy[key]
722 except KeyError:
723 pass
724 else:
725 params_copy[key] = val
726 return params_copy
727
730 """Checks whether all selected fields are valid.
731
732 @type static: L{utils.FieldSet}
733 @param static: static fields set
734 @type dynamic: L{utils.FieldSet}
735 @param dynamic: dynamic fields set
736
737 """
738 f = utils.FieldSet()
739 f.Extend(static)
740 f.Extend(dynamic)
741
742 delta = f.NonMatching(selected)
743 if delta:
744 raise errors.OpPrereqError("Unknown output fields selected: %s"
745 % ",".join(delta), errors.ECODE_INVAL)
746
749 """Validates that given hypervisor params are not global ones.
750
751 This will ensure that instances don't get customised versions of
752 global params.
753
754 """
755 used_globals = constants.HVC_GLOBALS.intersection(params)
756 if used_globals:
757 msg = ("The following hypervisor parameters are global and cannot"
758 " be customized at instance level, please modify them at"
759 " cluster level: %s" % utils.CommaJoin(used_globals))
760 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
761
764 """Ensure that a given node is online.
765
766 @param lu: the LU on behalf of which we make the check
767 @param node: the node to check
768 @raise errors.OpPrereqError: if the node is offline
769
770 """
771 if lu.cfg.GetNodeInfo(node).offline:
772 raise errors.OpPrereqError("Can't use offline node %s" % node,
773 errors.ECODE_INVAL)
774
777 """Ensure that a given node is not drained.
778
779 @param lu: the LU on behalf of which we make the check
780 @param node: the node to check
781 @raise errors.OpPrereqError: if the node is drained
782
783 """
784 if lu.cfg.GetNodeInfo(node).drained:
785 raise errors.OpPrereqError("Can't use drained node %s" % node,
786 errors.ECODE_INVAL)
787
790 """Ensure that a node supports a given OS.
791
792 @param lu: the LU on behalf of which we make the check
793 @param node: the node to check
794 @param os_name: the OS to query about
795 @param force_variant: whether to ignore variant errors
796 @raise errors.OpPrereqError: if the node is not supporting the OS
797
798 """
799 result = lu.rpc.call_os_get(node, os_name)
800 result.Raise("OS '%s' not in supported OS list for node %s" %
801 (os_name, node),
802 prereq=True, ecode=errors.ECODE_INVAL)
803 if not force_variant:
804 _CheckOSVariant(result.payload, os_name)
805
816
829
841
844 """Reads the cluster domain secret.
845
846 """
847 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
848 strict=True)
849
865
868 """Expand an item name.
869
870 @param fn: the function to use for expansion
871 @param name: requested item name
872 @param kind: text description ('Node' or 'Instance')
873 @return: the resolved (full) name
874 @raise errors.OpPrereqError: if the item is not found
875
876 """
877 full_name = fn(name)
878 if full_name is None:
879 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
880 errors.ECODE_NOENT)
881 return full_name
882
887
892
893
894 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
895 memory, vcpus, nics, disk_template, disks,
896 bep, hvp, hypervisor_name):
897 """Builds instance related env variables for hooks
898
899 This builds the hook environment from individual variables.
900
901 @type name: string
902 @param name: the name of the instance
903 @type primary_node: string
904 @param primary_node: the name of the instance's primary node
905 @type secondary_nodes: list
906 @param secondary_nodes: list of secondary nodes as strings
907 @type os_type: string
908 @param os_type: the name of the instance's OS
909 @type status: boolean
910 @param status: the should_run status of the instance
911 @type memory: string
912 @param memory: the memory size of the instance
913 @type vcpus: string
914 @param vcpus: the count of VCPUs the instance has
915 @type nics: list
916 @param nics: list of tuples (ip, mac, mode, link) representing
917 the NICs the instance has
918 @type disk_template: string
919 @param disk_template: the disk template of the instance
920 @type disks: list
921 @param disks: the list of (size, mode) pairs
922 @type bep: dict
923 @param bep: the backend parameters for the instance
924 @type hvp: dict
925 @param hvp: the hypervisor parameters for the instance
926 @type hypervisor_name: string
927 @param hypervisor_name: the hypervisor for the instance
928 @rtype: dict
929 @return: the hook environment for this instance
930
931 """
932 if status:
933 str_status = "up"
934 else:
935 str_status = "down"
936 env = {
937 "OP_TARGET": name,
938 "INSTANCE_NAME": name,
939 "INSTANCE_PRIMARY": primary_node,
940 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
941 "INSTANCE_OS_TYPE": os_type,
942 "INSTANCE_STATUS": str_status,
943 "INSTANCE_MEMORY": memory,
944 "INSTANCE_VCPUS": vcpus,
945 "INSTANCE_DISK_TEMPLATE": disk_template,
946 "INSTANCE_HYPERVISOR": hypervisor_name,
947 }
948
949 if nics:
950 nic_count = len(nics)
951 for idx, (ip, mac, mode, link) in enumerate(nics):
952 if ip is None:
953 ip = ""
954 env["INSTANCE_NIC%d_IP" % idx] = ip
955 env["INSTANCE_NIC%d_MAC" % idx] = mac
956 env["INSTANCE_NIC%d_MODE" % idx] = mode
957 env["INSTANCE_NIC%d_LINK" % idx] = link
958 if mode == constants.NIC_MODE_BRIDGED:
959 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
960 else:
961 nic_count = 0
962
963 env["INSTANCE_NIC_COUNT"] = nic_count
964
965 if disks:
966 disk_count = len(disks)
967 for idx, (size, mode) in enumerate(disks):
968 env["INSTANCE_DISK%d_SIZE" % idx] = size
969 env["INSTANCE_DISK%d_MODE" % idx] = mode
970 else:
971 disk_count = 0
972
973 env["INSTANCE_DISK_COUNT"] = disk_count
974
975 for source, kind in [(bep, "BE"), (hvp, "HV")]:
976 for key, value in source.items():
977 env["INSTANCE_%s_%s" % (kind, key)] = value
978
979 return env
980
983 """Build a list of nic information tuples.
984
985 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
986 value in LUQueryInstanceData.
987
988 @type lu: L{LogicalUnit}
989 @param lu: the logical unit on whose behalf we execute
990 @type nics: list of L{objects.NIC}
991 @param nics: list of nics to convert to hooks tuples
992
993 """
994 hooks_nics = []
995 cluster = lu.cfg.GetClusterInfo()
996 for nic in nics:
997 ip = nic.ip
998 mac = nic.mac
999 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000 mode = filled_params[constants.NIC_MODE]
1001 link = filled_params[constants.NIC_LINK]
1002 hooks_nics.append((ip, mac, mode, link))
1003 return hooks_nics
1004
1007 """Builds instance related env variables for hooks from an object.
1008
1009 @type lu: L{LogicalUnit}
1010 @param lu: the logical unit on whose behalf we execute
1011 @type instance: L{objects.Instance}
1012 @param instance: the instance for which we should build the
1013 environment
1014 @type override: dict
1015 @param override: dictionary with key/values that will override
1016 our values
1017 @rtype: dict
1018 @return: the hook environment dictionary
1019
1020 """
1021 cluster = lu.cfg.GetClusterInfo()
1022 bep = cluster.FillBE(instance)
1023 hvp = cluster.FillHV(instance)
1024 args = {
1025 'name': instance.name,
1026 'primary_node': instance.primary_node,
1027 'secondary_nodes': instance.secondary_nodes,
1028 'os_type': instance.os,
1029 'status': instance.admin_up,
1030 'memory': bep[constants.BE_MEMORY],
1031 'vcpus': bep[constants.BE_VCPUS],
1032 'nics': _NICListToTuple(lu, instance.nics),
1033 'disk_template': instance.disk_template,
1034 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1035 'bep': bep,
1036 'hvp': hvp,
1037 'hypervisor_name': instance.hypervisor,
1038 }
1039 if override:
1040 args.update(override)
1041 return _BuildInstanceHookEnv(**args)
1042
1045 """Adjust the candidate pool after node operations.
1046
1047 """
1048 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1049 if mod_list:
1050 lu.LogInfo("Promoted nodes to master candidate role: %s",
1051 utils.CommaJoin(node.name for node in mod_list))
1052 for name in mod_list:
1053 lu.context.ReaddNode(name)
1054 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1055 if mc_now > mc_max:
1056 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1057 (mc_now, mc_max))
1058
1069
1083
1092
1095 """Check whether an OS name conforms to the os variants specification.
1096
1097 @type os_obj: L{objects.OS}
1098 @param os_obj: OS object to check
1099 @type name: string
1100 @param name: OS name passed by the user, to check for validity
1101
1102 """
1103 if not os_obj.supported_variants:
1104 return
1105 variant = objects.OS.GetVariant(name)
1106 if not variant:
1107 raise errors.OpPrereqError("OS name must include a variant",
1108 errors.ECODE_INVAL)
1109
1110 if variant not in os_obj.supported_variants:
1111 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1112
1116
1119 """Returns a list of all primary and secondary instances on a node.
1120
1121 """
1122
1123 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1124
1127 """Returns primary instances on a node.
1128
1129 """
1130 return _GetNodeInstancesInner(cfg,
1131 lambda inst: node_name == inst.primary_node)
1132
1140
1143 """Returns the arguments for a storage type.
1144
1145 """
1146
1147 if storage_type == constants.ST_FILE:
1148
1149 return [[cfg.GetFileStorageDir()]]
1150
1151 return []
1152
1155 faulty = []
1156
1157 for dev in instance.disks:
1158 cfg.SetDiskID(dev, node_name)
1159
1160 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161 result.Raise("Failed to get disk status from node %s" % node_name,
1162 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1163
1164 for idx, bdev_status in enumerate(result.payload):
1165 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1166 faulty.append(idx)
1167
1168 return faulty
1169
1172 """Check the sanity of iallocator and node arguments and use the
1173 cluster-wide iallocator if appropriate.
1174
1175 Check that at most one of (iallocator, node) is specified. If none is
1176 specified, then the LU's opcode's iallocator slot is filled with the
1177 cluster-wide default iallocator.
1178
1179 @type iallocator_slot: string
1180 @param iallocator_slot: the name of the opcode iallocator slot
1181 @type node_slot: string
1182 @param node_slot: the name of the opcode target node slot
1183
1184 """
1185 node = getattr(lu.op, node_slot, None)
1186 iallocator = getattr(lu.op, iallocator_slot, None)
1187
1188 if node is not None and iallocator is not None:
1189 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1190 errors.ECODE_INVAL)
1191 elif node is None and iallocator is None:
1192 default_iallocator = lu.cfg.GetDefaultIAllocator()
1193 if default_iallocator:
1194 setattr(lu.op, iallocator_slot, default_iallocator)
1195 else:
1196 raise errors.OpPrereqError("No iallocator or node given and no"
1197 " cluster-wide default iallocator found."
1198 " Please specify either an iallocator or a"
1199 " node, or set a cluster-wide default"
1200 " iallocator.")
1201
1202
1203 -class LUPostInitCluster(LogicalUnit):
1204 """Logical unit for running hooks after cluster initialization.
1205
1206 """
1207 HPATH = "cluster-init"
1208 HTYPE = constants.HTYPE_CLUSTER
1209
1210 - def BuildHooksEnv(self):
1211 """Build hooks env.
1212
1213 """
1214 env = {"OP_TARGET": self.cfg.GetClusterName()}
1215 mn = self.cfg.GetMasterNode()
1216 return env, [], [mn]
1217
1218 - def Exec(self, feedback_fn):
1219 """Nothing to do.
1220
1221 """
1222 return True
1223
1226 """Logical unit for destroying the cluster.
1227
1228 """
1229 HPATH = "cluster-destroy"
1230 HTYPE = constants.HTYPE_CLUSTER
1231
1233 """Build hooks env.
1234
1235 """
1236 env = {"OP_TARGET": self.cfg.GetClusterName()}
1237 return env, [], []
1238
1240 """Check prerequisites.
1241
1242 This checks whether the cluster is empty.
1243
1244 Any errors are signaled by raising errors.OpPrereqError.
1245
1246 """
1247 master = self.cfg.GetMasterNode()
1248
1249 nodelist = self.cfg.GetNodeList()
1250 if len(nodelist) != 1 or nodelist[0] != master:
1251 raise errors.OpPrereqError("There are still %d node(s) in"
1252 " this cluster." % (len(nodelist) - 1),
1253 errors.ECODE_INVAL)
1254 instancelist = self.cfg.GetInstanceList()
1255 if instancelist:
1256 raise errors.OpPrereqError("There are still %d instance(s) in"
1257 " this cluster." % len(instancelist),
1258 errors.ECODE_INVAL)
1259
1260 - def Exec(self, feedback_fn):
1284
1287 """Verifies a certificate for LUVerifyCluster.
1288
1289 @type filename: string
1290 @param filename: Path to PEM file
1291
1292 """
1293 try:
1294 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1295 utils.ReadFile(filename))
1296 except Exception, err:
1297 return (LUVerifyCluster.ETYPE_ERROR,
1298 "Failed to load X509 certificate %s: %s" % (filename, err))
1299
1300 (errcode, msg) = \
1301 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1302 constants.SSL_CERT_EXPIRATION_ERROR)
1303
1304 if msg:
1305 fnamemsg = "While verifying %s: %s" % (filename, msg)
1306 else:
1307 fnamemsg = None
1308
1309 if errcode is None:
1310 return (None, fnamemsg)
1311 elif errcode == utils.CERT_WARNING:
1312 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1313 elif errcode == utils.CERT_ERROR:
1314 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1315
1316 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1317
1320 """Verifies the cluster status.
1321
1322 """
1323 HPATH = "cluster-verify"
1324 HTYPE = constants.HTYPE_CLUSTER
1325 _OP_PARAMS = [
1326 ("skip_checks", _EmptyList,
1327 _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1328 ("verbose", False, _TBool),
1329 ("error_codes", False, _TBool),
1330 ("debug_simulate_errors", False, _TBool),
1331 ]
1332 REQ_BGL = False
1333
1334 TCLUSTER = "cluster"
1335 TNODE = "node"
1336 TINSTANCE = "instance"
1337
1338 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1339 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1340 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1341 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1342 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1343 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1344 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1345 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1346 ENODEDRBD = (TNODE, "ENODEDRBD")
1347 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1348 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1349 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1350 ENODEHV = (TNODE, "ENODEHV")
1351 ENODELVM = (TNODE, "ENODELVM")
1352 ENODEN1 = (TNODE, "ENODEN1")
1353 ENODENET = (TNODE, "ENODENET")
1354 ENODEOS = (TNODE, "ENODEOS")
1355 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1356 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1357 ENODERPC = (TNODE, "ENODERPC")
1358 ENODESSH = (TNODE, "ENODESSH")
1359 ENODEVERSION = (TNODE, "ENODEVERSION")
1360 ENODESETUP = (TNODE, "ENODESETUP")
1361 ENODETIME = (TNODE, "ENODETIME")
1362
1363 ETYPE_FIELD = "code"
1364 ETYPE_ERROR = "ERROR"
1365 ETYPE_WARNING = "WARNING"
1366
1368 """A class representing the logical and physical status of a node.
1369
1370 @type name: string
1371 @ivar name: the node name to which this object refers
1372 @ivar volumes: a structure as returned from
1373 L{ganeti.backend.GetVolumeList} (runtime)
1374 @ivar instances: a list of running instances (runtime)
1375 @ivar pinst: list of configured primary instances (config)
1376 @ivar sinst: list of configured secondary instances (config)
1377 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1378 of this node (config)
1379 @ivar mfree: free memory, as reported by hypervisor (runtime)
1380 @ivar dfree: free disk, as reported by the node (runtime)
1381 @ivar offline: the offline status (config)
1382 @type rpc_fail: boolean
1383 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1384 not whether the individual keys were correct) (runtime)
1385 @type lvm_fail: boolean
1386 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1387 @type hyp_fail: boolean
1388 @ivar hyp_fail: whether the RPC call didn't return the instance list
1389 @type ghost: boolean
1390 @ivar ghost: whether this is a known node or not (config)
1391 @type os_fail: boolean
1392 @ivar os_fail: whether the RPC call didn't return valid OS data
1393 @type oslist: list
1394 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1395
1396 """
1397 - def __init__(self, offline=False, name=None):
1398 self.name = name
1399 self.volumes = {}
1400 self.instances = []
1401 self.pinst = []
1402 self.sinst = []
1403 self.sbp = {}
1404 self.mfree = 0
1405 self.dfree = 0
1406 self.offline = offline
1407 self.rpc_fail = False
1408 self.lvm_fail = False
1409 self.hyp_fail = False
1410 self.ghost = False
1411 self.os_fail = False
1412 self.oslist = {}
1413
1420
1421 - def _Error(self, ecode, item, msg, *args, **kwargs):
1422 """Format an error message.
1423
1424 Based on the opcode's error_codes parameter, either format a
1425 parseable error code, or a simpler error string.
1426
1427 This must be called only from Exec and functions called from Exec.
1428
1429 """
1430 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1431 itype, etxt = ecode
1432
1433 if args:
1434 msg = msg % args
1435
1436 if self.op.error_codes:
1437 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1438 else:
1439 if item:
1440 item = " " + item
1441 else:
1442 item = ""
1443 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1444
1445 self._feedback_fn(" - %s" % msg)
1446
1447 - def _ErrorIf(self, cond, *args, **kwargs):
1448 """Log an error message if the passed condition is True.
1449
1450 """
1451 cond = bool(cond) or self.op.debug_simulate_errors
1452 if cond:
1453 self._Error(*args, **kwargs)
1454
1455 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1456 self.bad = self.bad or cond
1457
1459 """Perform some basic validation on data returned from a node.
1460
1461 - check the result data structure is well formed and has all the
1462 mandatory fields
1463 - check ganeti version
1464
1465 @type ninfo: L{objects.Node}
1466 @param ninfo: the node to check
1467 @param nresult: the results from the node
1468 @rtype: boolean
1469 @return: whether overall this call was successful (and we can expect
1470 reasonable values in the respose)
1471
1472 """
1473 node = ninfo.name
1474 _ErrorIf = self._ErrorIf
1475
1476
1477 test = not nresult or not isinstance(nresult, dict)
1478 _ErrorIf(test, self.ENODERPC, node,
1479 "unable to verify node: no data returned")
1480 if test:
1481 return False
1482
1483
1484 local_version = constants.PROTOCOL_VERSION
1485 remote_version = nresult.get("version", None)
1486 test = not (remote_version and
1487 isinstance(remote_version, (list, tuple)) and
1488 len(remote_version) == 2)
1489 _ErrorIf(test, self.ENODERPC, node,
1490 "connection to node returned invalid data")
1491 if test:
1492 return False
1493
1494 test = local_version != remote_version[0]
1495 _ErrorIf(test, self.ENODEVERSION, node,
1496 "incompatible protocol versions: master %s,"
1497 " node %s", local_version, remote_version[0])
1498 if test:
1499 return False
1500
1501
1502
1503
1504 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1505 self.ENODEVERSION, node,
1506 "software version mismatch: master %s, node %s",
1507 constants.RELEASE_VERSION, remote_version[1],
1508 code=self.ETYPE_WARNING)
1509
1510 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1511 if isinstance(hyp_result, dict):
1512 for hv_name, hv_result in hyp_result.iteritems():
1513 test = hv_result is not None
1514 _ErrorIf(test, self.ENODEHV, node,
1515 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1516
1517
1518 test = nresult.get(constants.NV_NODESETUP,
1519 ["Missing NODESETUP results"])
1520 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1521 "; ".join(test))
1522
1523 return True
1524
1525 - def _VerifyNodeTime(self, ninfo, nresult,
1526 nvinfo_starttime, nvinfo_endtime):
1527 """Check the node time.
1528
1529 @type ninfo: L{objects.Node}
1530 @param ninfo: the node to check
1531 @param nresult: the remote results for the node
1532 @param nvinfo_starttime: the start time of the RPC call
1533 @param nvinfo_endtime: the end time of the RPC call
1534
1535 """
1536 node = ninfo.name
1537 _ErrorIf = self._ErrorIf
1538
1539 ntime = nresult.get(constants.NV_TIME, None)
1540 try:
1541 ntime_merged = utils.MergeTime(ntime)
1542 except (ValueError, TypeError):
1543 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1544 return
1545
1546 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1550 else:
1551 ntime_diff = None
1552
1553 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554 "Node time diverges by at least %s from master node time",
1555 ntime_diff)
1556
1558 """Check the node time.
1559
1560 @type ninfo: L{objects.Node}
1561 @param ninfo: the node to check
1562 @param nresult: the remote results for the node
1563 @param vg_name: the configured VG name
1564
1565 """
1566 if vg_name is None:
1567 return
1568
1569 node = ninfo.name
1570 _ErrorIf = self._ErrorIf
1571
1572
1573 vglist = nresult.get(constants.NV_VGLIST, None)
1574 test = not vglist
1575 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1576 if not test:
1577 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1578 constants.MIN_VG_SIZE)
1579 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1580
1581
1582 pvlist = nresult.get(constants.NV_PVLIST, None)
1583 test = pvlist is None
1584 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1585 if not test:
1586
1587
1588
1589 for _, pvname, owner_vg in pvlist:
1590 test = ":" in pvname
1591 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1592 " '%s' of VG '%s'", pvname, owner_vg)
1593
1595 """Check the node time.
1596
1597 @type ninfo: L{objects.Node}
1598 @param ninfo: the node to check
1599 @param nresult: the remote results for the node
1600
1601 """
1602 node = ninfo.name
1603 _ErrorIf = self._ErrorIf
1604
1605 test = constants.NV_NODELIST not in nresult
1606 _ErrorIf(test, self.ENODESSH, node,
1607 "node hasn't returned node ssh connectivity data")
1608 if not test:
1609 if nresult[constants.NV_NODELIST]:
1610 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1611 _ErrorIf(True, self.ENODESSH, node,
1612 "ssh communication with node '%s': %s", a_node, a_msg)
1613
1614 test = constants.NV_NODENETTEST not in nresult
1615 _ErrorIf(test, self.ENODENET, node,
1616 "node hasn't returned node tcp connectivity data")
1617 if not test:
1618 if nresult[constants.NV_NODENETTEST]:
1619 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1620 for anode in nlist:
1621 _ErrorIf(True, self.ENODENET, node,
1622 "tcp communication with node '%s': %s",
1623 anode, nresult[constants.NV_NODENETTEST][anode])
1624
1625 test = constants.NV_MASTERIP not in nresult
1626 _ErrorIf(test, self.ENODENET, node,
1627 "node hasn't returned node master IP reachability data")
1628 if not test:
1629 if not nresult[constants.NV_MASTERIP]:
1630 if node == self.master_node:
1631 msg = "the master node cannot reach the master IP (not configured?)"
1632 else:
1633 msg = "cannot reach the master IP"
1634 _ErrorIf(True, self.ENODENET, node, msg)
1635
1636
1638 """Verify an instance.
1639
1640 This function checks to see if the required block devices are
1641 available on the instance's node.
1642
1643 """
1644 _ErrorIf = self._ErrorIf
1645 node_current = instanceconfig.primary_node
1646
1647 node_vol_should = {}
1648 instanceconfig.MapLVsByNode(node_vol_should)
1649
1650 for node in node_vol_should:
1651 n_img = node_image[node]
1652 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1653
1654 continue
1655 for volume in node_vol_should[node]:
1656 test = volume not in n_img.volumes
1657 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1658 "volume %s missing on node %s", volume, node)
1659
1660 if instanceconfig.admin_up:
1661 pri_img = node_image[node_current]
1662 test = instance not in pri_img.instances and not pri_img.offline
1663 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1664 "instance not running on its primary node %s",
1665 node_current)
1666
1667 for node, n_img in node_image.items():
1668 if (not node == node_current):
1669 test = instance in n_img.instances
1670 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1671 "instance should not run on node %s", node)
1672
1674 """Verify if there are any unknown volumes in the cluster.
1675
1676 The .os, .swap and backup volumes are ignored. All other volumes are
1677 reported as unknown.
1678
1679 @type reserved: L{ganeti.utils.FieldSet}
1680 @param reserved: a FieldSet of reserved volume names
1681
1682 """
1683 for node, n_img in node_image.items():
1684 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1685
1686 continue
1687 for volume in n_img.volumes:
1688 test = ((node not in node_vol_should or
1689 volume not in node_vol_should[node]) and
1690 not reserved.Matches(volume))
1691 self._ErrorIf(test, self.ENODEORPHANLV, node,
1692 "volume %s is unknown", volume)
1693
1695 """Verify the list of running instances.
1696
1697 This checks what instances are running but unknown to the cluster.
1698
1699 """
1700 for node, n_img in node_image.items():
1701 for o_inst in n_img.instances:
1702 test = o_inst not in instancelist
1703 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1704 "instance %s on node %s should not exist", o_inst, node)
1705
1707 """Verify N+1 Memory Resilience.
1708
1709 Check that if one single node dies we can still start all the
1710 instances it was primary for.
1711
1712 """
1713 for node, n_img in node_image.items():
1714
1715
1716
1717
1718
1719
1720
1721
1722 for prinode, instances in n_img.sbp.items():
1723 needed_mem = 0
1724 for instance in instances:
1725 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1726 if bep[constants.BE_AUTO_BALANCE]:
1727 needed_mem += bep[constants.BE_MEMORY]
1728 test = n_img.mfree < needed_mem
1729 self._ErrorIf(test, self.ENODEN1, node,
1730 "not enough memory on to accommodate"
1731 " failovers should peer node %s fail", prinode)
1732
1733 - def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1734 master_files):
1735 """Verifies and computes the node required file checksums.
1736
1737 @type ninfo: L{objects.Node}
1738 @param ninfo: the node to check
1739 @param nresult: the remote results for the node
1740 @param file_list: required list of files
1741 @param local_cksum: dictionary of local files and their checksums
1742 @param master_files: list of files that only masters should have
1743
1744 """
1745 node = ninfo.name
1746 _ErrorIf = self._ErrorIf
1747
1748 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1749 test = not isinstance(remote_cksum, dict)
1750 _ErrorIf(test, self.ENODEFILECHECK, node,
1751 "node hasn't returned file checksum data")
1752 if test:
1753 return
1754
1755 for file_name in file_list:
1756 node_is_mc = ninfo.master_candidate
1757 must_have = (file_name not in master_files) or node_is_mc
1758
1759 test1 = file_name not in remote_cksum
1760
1761 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1762
1763 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1764 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1765 "file '%s' missing", file_name)
1766 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1767 "file '%s' has wrong checksum", file_name)
1768
1769 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1770 "file '%s' should not exist on non master"
1771 " candidates (and the file is outdated)", file_name)
1772
1773 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1774 "file '%s' should not exist"
1775 " on non master candidates", file_name)
1776
1777 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1778 drbd_map):
1779 """Verifies and the node DRBD status.
1780
1781 @type ninfo: L{objects.Node}
1782 @param ninfo: the node to check
1783 @param nresult: the remote results for the node
1784 @param instanceinfo: the dict of instances
1785 @param drbd_helper: the configured DRBD usermode helper
1786 @param drbd_map: the DRBD map as returned by
1787 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1788
1789 """
1790 node = ninfo.name
1791 _ErrorIf = self._ErrorIf
1792
1793 if drbd_helper:
1794 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1795 test = (helper_result == None)
1796 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1797 "no drbd usermode helper returned")
1798 if helper_result:
1799 status, payload = helper_result
1800 test = not status
1801 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1802 "drbd usermode helper check unsuccessful: %s", payload)
1803 test = status and (payload != drbd_helper)
1804 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1805 "wrong drbd usermode helper: %s", payload)
1806
1807
1808 node_drbd = {}
1809 for minor, instance in drbd_map[node].items():
1810 test = instance not in instanceinfo
1811 _ErrorIf(test, self.ECLUSTERCFG, None,
1812 "ghost instance '%s' in temporary DRBD map", instance)
1813
1814
1815
1816 if test:
1817 node_drbd[minor] = (instance, False)
1818 else:
1819 instance = instanceinfo[instance]
1820 node_drbd[minor] = (instance.name, instance.admin_up)
1821
1822
1823 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1824 test = not isinstance(used_minors, (tuple, list))
1825 _ErrorIf(test, self.ENODEDRBD, node,
1826 "cannot parse drbd status file: %s", str(used_minors))
1827 if test:
1828
1829 return
1830
1831 for minor, (iname, must_exist) in node_drbd.items():
1832 test = minor not in used_minors and must_exist
1833 _ErrorIf(test, self.ENODEDRBD, node,
1834 "drbd minor %d of instance %s is not active", minor, iname)
1835 for minor in used_minors:
1836 test = minor not in node_drbd
1837 _ErrorIf(test, self.ENODEDRBD, node,
1838 "unallocated drbd minor %d is in use", minor)
1839
1841 """Builds the node OS structures.
1842
1843 @type ninfo: L{objects.Node}
1844 @param ninfo: the node to check
1845 @param nresult: the remote results for the node
1846 @param nimg: the node image object
1847
1848 """
1849 node = ninfo.name
1850 _ErrorIf = self._ErrorIf
1851
1852 remote_os = nresult.get(constants.NV_OSLIST, None)
1853 test = (not isinstance(remote_os, list) or
1854 not compat.all(isinstance(v, list) and len(v) == 7
1855 for v in remote_os))
1856
1857 _ErrorIf(test, self.ENODEOS, node,
1858 "node hasn't returned valid OS data")
1859
1860 nimg.os_fail = test
1861
1862 if test:
1863 return
1864
1865 os_dict = {}
1866
1867 for (name, os_path, status, diagnose,
1868 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1869
1870 if name not in os_dict:
1871 os_dict[name] = []
1872
1873
1874
1875 parameters = [tuple(v) for v in parameters]
1876 os_dict[name].append((os_path, status, diagnose,
1877 set(variants), set(parameters), set(api_ver)))
1878
1879 nimg.oslist = os_dict
1880
1882 """Verifies the node OS list.
1883
1884 @type ninfo: L{objects.Node}
1885 @param ninfo: the node to check
1886 @param nimg: the node image object
1887 @param base: the 'template' node we match against (e.g. from the master)
1888
1889 """
1890 node = ninfo.name
1891 _ErrorIf = self._ErrorIf
1892
1893 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1894
1895 for os_name, os_data in nimg.oslist.items():
1896 assert os_data, "Empty OS status for OS %s?!" % os_name
1897 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1898 _ErrorIf(not f_status, self.ENODEOS, node,
1899 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1900 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1901 "OS '%s' has multiple entries (first one shadows the rest): %s",
1902 os_name, utils.CommaJoin([v[0] for v in os_data]))
1903
1904 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1905 and not f_var, self.ENODEOS, node,
1906 "OS %s with API at least %d does not declare any variant",
1907 os_name, constants.OS_API_V15)
1908
1909 test = os_name not in base.oslist
1910 _ErrorIf(test, self.ENODEOS, node,
1911 "Extra OS %s not present on reference node (%s)",
1912 os_name, base.name)
1913 if test:
1914 continue
1915 assert base.oslist[os_name], "Base node has empty OS status?"
1916 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1917 if not b_status:
1918
1919 continue
1920 for kind, a, b in [("API version", f_api, b_api),
1921 ("variants list", f_var, b_var),
1922 ("parameters", f_param, b_param)]:
1923 _ErrorIf(a != b, self.ENODEOS, node,
1924 "OS %s %s differs from reference node %s: %s vs. %s",
1925 kind, os_name, base.name,
1926 utils.CommaJoin(a), utils.CommaJoin(b))
1927
1928
1929 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1930 _ErrorIf(missing, self.ENODEOS, node,
1931 "OSes present on reference node %s but missing on this node: %s",
1932 base.name, utils.CommaJoin(missing))
1933
1935 """Verifies and updates the node volume data.
1936
1937 This function will update a L{NodeImage}'s internal structures
1938 with data from the remote call.
1939
1940 @type ninfo: L{objects.Node}
1941 @param ninfo: the node to check
1942 @param nresult: the remote results for the node
1943 @param nimg: the node image object
1944 @param vg_name: the configured VG name
1945
1946 """
1947 node = ninfo.name
1948 _ErrorIf = self._ErrorIf
1949
1950 nimg.lvm_fail = True
1951 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1952 if vg_name is None:
1953 pass
1954 elif isinstance(lvdata, basestring):
1955 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1956 utils.SafeEncode(lvdata))
1957 elif not isinstance(lvdata, dict):
1958 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1959 else:
1960 nimg.volumes = lvdata
1961 nimg.lvm_fail = False
1962
1964 """Verifies and updates the node instance list.
1965
1966 If the listing was successful, then updates this node's instance
1967 list. Otherwise, it marks the RPC call as failed for the instance
1968 list key.
1969
1970 @type ninfo: L{objects.Node}
1971 @param ninfo: the node to check
1972 @param nresult: the remote results for the node
1973 @param nimg: the node image object
1974
1975 """
1976 idata = nresult.get(constants.NV_INSTANCELIST, None)
1977 test = not isinstance(idata, list)
1978 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1979 " (instancelist): %s", utils.SafeEncode(str(idata)))
1980 if test:
1981 nimg.hyp_fail = True
1982 else:
1983 nimg.instances = idata
1984
1986 """Verifies and computes a node information map
1987
1988 @type ninfo: L{objects.Node}
1989 @param ninfo: the node to check
1990 @param nresult: the remote results for the node
1991 @param nimg: the node image object
1992 @param vg_name: the configured VG name
1993
1994 """
1995 node = ninfo.name
1996 _ErrorIf = self._ErrorIf
1997
1998
1999 hv_info = nresult.get(constants.NV_HVINFO, None)
2000 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2001 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2002 if not test:
2003 try:
2004 nimg.mfree = int(hv_info["memory_free"])
2005 except (ValueError, TypeError):
2006 _ErrorIf(True, self.ENODERPC, node,
2007 "node returned invalid nodeinfo, check hypervisor")
2008
2009
2010 if vg_name is not None:
2011 test = (constants.NV_VGLIST not in nresult or
2012 vg_name not in nresult[constants.NV_VGLIST])
2013 _ErrorIf(test, self.ENODELVM, node,
2014 "node didn't return data for the volume group '%s'"
2015 " - it is either missing or broken", vg_name)
2016 if not test:
2017 try:
2018 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2019 except (ValueError, TypeError):
2020 _ErrorIf(True, self.ENODERPC, node,
2021 "node returned invalid LVM info, check LVM status")
2022
2024 """Build hooks env.
2025
2026 Cluster-Verify hooks just ran in the post phase and their failure makes
2027 the output be logged in the verify output and the verification to fail.
2028
2029 """
2030 all_nodes = self.cfg.GetNodeList()
2031 env = {
2032 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2033 }
2034 for node in self.cfg.GetAllNodesInfo().values():
2035 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2036
2037 return env, [], all_nodes
2038
2039 - def Exec(self, feedback_fn):
2040 """Verify integrity of cluster, performing various test on nodes.
2041
2042 """
2043 self.bad = False
2044 _ErrorIf = self._ErrorIf
2045 verbose = self.op.verbose
2046 self._feedback_fn = feedback_fn
2047 feedback_fn("* Verifying global settings")
2048 for msg in self.cfg.VerifyConfig():
2049 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2050
2051
2052 for cert_filename in constants.ALL_CERT_FILES:
2053 (errcode, msg) = _VerifyCertificate(cert_filename)
2054 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2055
2056 vg_name = self.cfg.GetVGName()
2057 drbd_helper = self.cfg.GetDRBDHelper()
2058 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2059 cluster = self.cfg.GetClusterInfo()
2060 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2061 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2062 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2063 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2064 for iname in instancelist)
2065 i_non_redundant = []
2066 i_non_a_balanced = []
2067 n_offline = 0
2068 n_drained = 0
2069 node_vol_should = {}
2070
2071
2072
2073 master_files = [constants.CLUSTER_CONF_FILE]
2074 master_node = self.master_node = self.cfg.GetMasterNode()
2075 master_ip = self.cfg.GetMasterIP()
2076
2077 file_names = ssconf.SimpleStore().GetFileList()
2078 file_names.extend(constants.ALL_CERT_FILES)
2079 file_names.extend(master_files)
2080 if cluster.modify_etc_hosts:
2081 file_names.append(constants.ETC_HOSTS)
2082
2083 local_checksums = utils.FingerprintFiles(file_names)
2084
2085 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2086 node_verify_param = {
2087 constants.NV_FILELIST: file_names,
2088 constants.NV_NODELIST: [node.name for node in nodeinfo
2089 if not node.offline],
2090 constants.NV_HYPERVISOR: hypervisors,
2091 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2092 node.secondary_ip) for node in nodeinfo
2093 if not node.offline],
2094 constants.NV_INSTANCELIST: hypervisors,
2095 constants.NV_VERSION: None,
2096 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2097 constants.NV_NODESETUP: None,
2098 constants.NV_TIME: None,
2099 constants.NV_MASTERIP: (master_node, master_ip),
2100 constants.NV_OSLIST: None,
2101 }
2102
2103 if vg_name is not None:
2104 node_verify_param[constants.NV_VGLIST] = None
2105 node_verify_param[constants.NV_LVLIST] = vg_name
2106 node_verify_param[constants.NV_PVLIST] = [vg_name]
2107 node_verify_param[constants.NV_DRBDLIST] = None
2108
2109 if drbd_helper:
2110 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2111
2112
2113 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2114 name=node.name))
2115 for node in nodeinfo)
2116
2117 for instance in instancelist:
2118 inst_config = instanceinfo[instance]
2119
2120 for nname in inst_config.all_nodes:
2121 if nname not in node_image:
2122
2123 gnode = self.NodeImage(name=nname)
2124 gnode.ghost = True
2125 node_image[nname] = gnode
2126
2127 inst_config.MapLVsByNode(node_vol_should)
2128
2129 pnode = inst_config.primary_node
2130 node_image[pnode].pinst.append(instance)
2131
2132 for snode in inst_config.secondary_nodes:
2133 nimg = node_image[snode]
2134 nimg.sinst.append(instance)
2135 if pnode not in nimg.sbp:
2136 nimg.sbp[pnode] = []
2137 nimg.sbp[pnode].append(instance)
2138
2139
2140
2141
2142
2143
2144
2145
2146 nvinfo_starttime = time.time()
2147 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2148 self.cfg.GetClusterName())
2149 nvinfo_endtime = time.time()
2150
2151 all_drbd_map = self.cfg.ComputeDRBDMap()
2152
2153 feedback_fn("* Verifying node status")
2154
2155 refos_img = None
2156
2157 for node_i in nodeinfo:
2158 node = node_i.name
2159 nimg = node_image[node]
2160
2161 if node_i.offline:
2162 if verbose:
2163 feedback_fn("* Skipping offline node %s" % (node,))
2164 n_offline += 1
2165 continue
2166
2167 if node == master_node:
2168 ntype = "master"
2169 elif node_i.master_candidate:
2170 ntype = "master candidate"
2171 elif node_i.drained:
2172 ntype = "drained"
2173 n_drained += 1
2174 else:
2175 ntype = "regular"
2176 if verbose:
2177 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2178
2179 msg = all_nvinfo[node].fail_msg
2180 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2181 if msg:
2182 nimg.rpc_fail = True
2183 continue
2184
2185 nresult = all_nvinfo[node].payload
2186
2187 nimg.call_ok = self._VerifyNode(node_i, nresult)
2188 self._VerifyNodeNetwork(node_i, nresult)
2189 self._VerifyNodeLVM(node_i, nresult, vg_name)
2190 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2191 master_files)
2192 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2193 all_drbd_map)
2194 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2195
2196 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2197 self._UpdateNodeInstances(node_i, nresult, nimg)
2198 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2199 self._UpdateNodeOS(node_i, nresult, nimg)
2200 if not nimg.os_fail:
2201 if refos_img is None:
2202 refos_img = nimg
2203 self._VerifyNodeOS(node_i, nimg, refos_img)
2204
2205 feedback_fn("* Verifying instance status")
2206 for instance in instancelist:
2207 if verbose:
2208 feedback_fn("* Verifying instance %s" % instance)
2209 inst_config = instanceinfo[instance]
2210 self._VerifyInstance(instance, inst_config, node_image)
2211 inst_nodes_offline = []
2212
2213 pnode = inst_config.primary_node
2214 pnode_img = node_image[pnode]
2215 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2216 self.ENODERPC, pnode, "instance %s, connection to"
2217 " primary node failed", instance)
2218
2219 if pnode_img.offline:
2220 inst_nodes_offline.append(pnode)
2221
2222
2223
2224
2225
2226
2227 if not inst_config.secondary_nodes:
2228 i_non_redundant.append(instance)
2229 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2230 instance, "instance has multiple secondary nodes: %s",
2231 utils.CommaJoin(inst_config.secondary_nodes),
2232 code=self.ETYPE_WARNING)
2233
2234 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2235 i_non_a_balanced.append(instance)
2236
2237 for snode in inst_config.secondary_nodes:
2238 s_img = node_image[snode]
2239 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2240 "instance %s, connection to secondary node failed", instance)
2241
2242 if s_img.offline:
2243 inst_nodes_offline.append(snode)
2244
2245
2246 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2247 "instance lives on offline node(s) %s",
2248 utils.CommaJoin(inst_nodes_offline))
2249
2250 for node in inst_config.all_nodes:
2251 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2252 "instance lives on ghost node %s", node)
2253
2254 feedback_fn("* Verifying orphan volumes")
2255 reserved = utils.FieldSet(*cluster.reserved_lvs)
2256 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2257
2258 feedback_fn("* Verifying orphan instances")
2259 self._VerifyOrphanInstances(instancelist, node_image)
2260
2261 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2262 feedback_fn("* Verifying N+1 Memory redundancy")
2263 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2264
2265 feedback_fn("* Other Notes")
2266 if i_non_redundant:
2267 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2268 % len(i_non_redundant))
2269
2270 if i_non_a_balanced:
2271 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2272 % len(i_non_a_balanced))
2273
2274 if n_offline:
2275 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2276
2277 if n_drained:
2278 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2279
2280 return not self.bad
2281
2282 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2283 """Analyze the post-hooks' result
2284
2285 This method analyses the hook result, handles it, and sends some
2286 nicely-formatted feedback back to the user.
2287
2288 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2289 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2290 @param hooks_results: the results of the multi-node hooks rpc call
2291 @param feedback_fn: function used send feedback back to the caller
2292 @param lu_result: previous Exec result
2293 @return: the new Exec result, based on the previous result
2294 and hook results
2295
2296 """
2297
2298
2299 if phase == constants.HOOKS_PHASE_POST:
2300
2301 indent_re = re.compile('^', re.M)
2302 feedback_fn("* Hooks Results")
2303 assert hooks_results, "invalid result from hooks"
2304
2305 for node_name in hooks_results:
2306 res = hooks_results[node_name]
2307 msg = res.fail_msg
2308 test = msg and not res.offline
2309 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2310 "Communication failure in hooks execution: %s", msg)
2311 if res.offline or msg:
2312
2313
2314
2315 lu_result = 1
2316 continue
2317 for script, hkr, output in res.payload:
2318 test = hkr == constants.HKR_FAIL
2319 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2320 "Script %s failed, output:", script)
2321 if test:
2322 output = indent_re.sub(' ', output)
2323 feedback_fn("%s" % output)
2324 lu_result = 0
2325
2326 return lu_result
2327
2330 """Verifies the cluster disks status.
2331
2332 """
2333 REQ_BGL = False
2334
2341
2342 - def Exec(self, feedback_fn):
2343 """Verify integrity of cluster disks.
2344
2345 @rtype: tuple of three items
2346 @return: a tuple of (dict of node-to-node_error, list of instances
2347 which need activate-disks, dict of instance: (node, volume) for
2348 missing volumes
2349
2350 """
2351 result = res_nodes, res_instances, res_missing = {}, [], {}
2352
2353 vg_name = self.cfg.GetVGName()
2354 nodes = utils.NiceSort(self.cfg.GetNodeList())
2355 instances = [self.cfg.GetInstanceInfo(name)
2356 for name in self.cfg.GetInstanceList()]
2357
2358 nv_dict = {}
2359 for inst in instances:
2360 inst_lvs = {}
2361 if (not inst.admin_up or
2362 inst.disk_template not in constants.DTS_NET_MIRROR):
2363 continue
2364 inst.MapLVsByNode(inst_lvs)
2365
2366 for node, vol_list in inst_lvs.iteritems():
2367 for vol in vol_list:
2368 nv_dict[(node, vol)] = inst
2369
2370 if not nv_dict:
2371 return result
2372
2373 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2374
2375 for node in nodes:
2376
2377 node_res = node_lvs[node]
2378 if node_res.offline:
2379 continue
2380 msg = node_res.fail_msg
2381 if msg:
2382 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2383 res_nodes[node] = msg
2384 continue
2385
2386 lvs = node_res.payload
2387 for lv_name, (_, _, lv_online) in lvs.items():
2388 inst = nv_dict.pop((node, lv_name), None)
2389 if (not lv_online and inst is not None
2390 and inst.name not in res_instances):
2391 res_instances.append(inst.name)
2392
2393
2394
2395 for key, inst in nv_dict.iteritems():
2396 if inst.name not in res_missing:
2397 res_missing[inst.name] = []
2398 res_missing[inst.name].append(key)
2399
2400 return result
2401
2404 """Verifies the cluster disks sizes.
2405
2406 """
2407 _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2408 REQ_BGL = False
2409
2428
2432
2434 """Check prerequisites.
2435
2436 This only checks the optional instance list against the existing names.
2437
2438 """
2439 if self.wanted_names is None:
2440 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2441
2442 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2443 in self.wanted_names]
2444
2446 """Ensure children of the disk have the needed disk size.
2447
2448 This is valid mainly for DRBD8 and fixes an issue where the
2449 children have smaller disk size.
2450
2451 @param disk: an L{ganeti.objects.Disk} object
2452
2453 """
2454 if disk.dev_type == constants.LD_DRBD8:
2455 assert disk.children, "Empty children for DRBD8?"
2456 fchild = disk.children[0]
2457 mismatch = fchild.size < disk.size
2458 if mismatch:
2459 self.LogInfo("Child disk has size %d, parent %d, fixing",
2460 fchild.size, disk.size)
2461 fchild.size = disk.size
2462
2463
2464 return self._EnsureChildSizes(fchild) or mismatch
2465 else:
2466 return False
2467
2468 - def Exec(self, feedback_fn):
2469 """Verify the size of cluster disks.
2470
2471 """
2472
2473
2474 per_node_disks = {}
2475 for instance in self.wanted_instances:
2476 pnode = instance.primary_node
2477 if pnode not in per_node_disks:
2478 per_node_disks[pnode] = []
2479 for idx, disk in enumerate(instance.disks):
2480 per_node_disks[pnode].append((instance, idx, disk))
2481
2482 changed = []
2483 for node, dskl in per_node_disks.items():
2484 newl = [v[2].Copy() for v in dskl]
2485 for dsk in newl:
2486 self.cfg.SetDiskID(dsk, node)
2487 result = self.rpc.call_blockdev_getsizes(node, newl)
2488 if result.fail_msg:
2489 self.LogWarning("Failure in blockdev_getsizes call to node"
2490 " %s, ignoring", node)
2491 continue
2492 if len(result.data) != len(dskl):
2493 self.LogWarning("Invalid result from node %s, ignoring node results",
2494 node)
2495 continue
2496 for ((instance, idx, disk), size) in zip(dskl, result.data):
2497 if size is None:
2498 self.LogWarning("Disk %d of instance %s did not return size"
2499 " information, ignoring", idx, instance.name)
2500 continue
2501 if not isinstance(size, (int, long)):
2502 self.LogWarning("Disk %d of instance %s did not return valid"
2503 " size information, ignoring", idx, instance.name)
2504 continue
2505 size = size >> 20
2506 if size != disk.size:
2507 self.LogInfo("Disk %d of instance %s has mismatched size,"
2508 " correcting: recorded %d, actual %d", idx,
2509 instance.name, disk.size, size)
2510 disk.size = size
2511 self.cfg.Update(instance, feedback_fn)
2512 changed.append((instance.name, idx, size))
2513 if self._EnsureChildSizes(disk):
2514 self.cfg.Update(instance, feedback_fn)
2515 changed.append((instance.name, idx, disk.size))
2516 return changed
2517
2520 """Rename the cluster.
2521
2522 """
2523 HPATH = "cluster-rename"
2524 HTYPE = constants.HTYPE_CLUSTER
2525 _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2526
2538
2560
2561 - def Exec(self, feedback_fn):
2562 """Rename the cluster.
2563
2564 """
2565 clustername = self.op.name
2566 ip = self.ip
2567
2568
2569 master = self.cfg.GetMasterNode()
2570 result = self.rpc.call_node_stop_master(master, False)
2571 result.Raise("Could not disable the master role")
2572
2573 try:
2574 cluster = self.cfg.GetClusterInfo()
2575 cluster.cluster_name = clustername
2576 cluster.master_ip = ip
2577 self.cfg.Update(cluster, feedback_fn)
2578
2579
2580 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2581 node_list = self.cfg.GetNodeList()
2582 try:
2583 node_list.remove(master)
2584 except ValueError:
2585 pass
2586 result = self.rpc.call_upload_file(node_list,
2587 constants.SSH_KNOWN_HOSTS_FILE)
2588 for to_node, to_result in result.iteritems():
2589 msg = to_result.fail_msg
2590 if msg:
2591 msg = ("Copy of file %s to node %s failed: %s" %
2592 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2593 self.proc.LogWarning(msg)
2594
2595 finally:
2596 result = self.rpc.call_node_start_master(master, False, False)
2597 msg = result.fail_msg
2598 if msg:
2599 self.LogWarning("Could not re-enable the master role on"
2600 " the master, please restart manually: %s", msg)
2601
2602 return clustername
2603
2606 """Change the parameters of the cluster.
2607
2608 """
2609 HPATH = "cluster-modify"
2610 HTYPE = constants.HTYPE_CLUSTER
2611 _OP_PARAMS = [
2612 ("vg_name", None, _TMaybeString),
2613 ("enabled_hypervisors", None,
2614 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2615 ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2616 ("beparams", None, _TOr(_TDict, _TNone)),
2617 ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2618 ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2619 ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2620 ("uid_pool", None, _NoType),
2621 ("add_uids", None, _NoType),
2622 ("remove_uids", None, _NoType),
2623 ("maintain_node_health", None, _TMaybeBool),
2624 ("nicparams", None, _TOr(_TDict, _TNone)),
2625 ("drbd_helper", None, _TOr(_TString, _TNone)),
2626 ("default_iallocator", None, _TMaybeString),
2627 ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2628 ("hidden_os", None, _TOr(_TListOf(\
2629 _TAnd(_TList,
2630 _TIsLength(2),
2631 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2632 _TNone)),
2633 ("blacklisted_os", None, _TOr(_TListOf(\
2634 _TAnd(_TList,
2635 _TIsLength(2),
2636 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2637 _TNone)),
2638 ]
2639 REQ_BGL = False
2640
2653
2661
2663 """Build hooks env.
2664
2665 """
2666 env = {
2667 "OP_TARGET": self.cfg.GetClusterName(),
2668 "NEW_VG_NAME": self.op.vg_name,
2669 }
2670 mn = self.cfg.GetMasterNode()
2671 return env, [mn], [mn]
2672
2674 """Check prerequisites.
2675
2676 This checks whether the given params don't conflict and
2677 if the given volume group is valid.
2678
2679 """
2680 if self.op.vg_name is not None and not self.op.vg_name:
2681 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2682 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2683 " instances exist", errors.ECODE_INVAL)
2684
2685 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2686 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2687 raise errors.OpPrereqError("Cannot disable drbd helper while"
2688 " drbd-based instances exist",
2689 errors.ECODE_INVAL)
2690
2691 node_list = self.acquired_locks[locking.LEVEL_NODE]
2692
2693
2694 if self.op.vg_name:
2695 vglist = self.rpc.call_vg_list(node_list)
2696 for node in node_list:
2697 msg = vglist[node].fail_msg
2698 if msg:
2699
2700 self.LogWarning("Error while gathering data on node %s"
2701 " (ignoring node): %s", node, msg)
2702 continue
2703 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2704 self.op.vg_name,
2705 constants.MIN_VG_SIZE)
2706 if vgstatus:
2707 raise errors.OpPrereqError("Error on node '%s': %s" %
2708 (node, vgstatus), errors.ECODE_ENVIRON)
2709
2710 if self.op.drbd_helper:
2711
2712 helpers = self.rpc.call_drbd_helper(node_list)
2713 for node in node_list:
2714 ninfo = self.cfg.GetNodeInfo(node)
2715 if ninfo.offline:
2716 self.LogInfo("Not checking drbd helper on offline node %s", node)
2717 continue
2718 msg = helpers[node].fail_msg
2719 if msg:
2720 raise errors.OpPrereqError("Error checking drbd helper on node"
2721 " '%s': %s" % (node, msg),
2722 errors.ECODE_ENVIRON)
2723 node_helper = helpers[node].payload
2724 if node_helper != self.op.drbd_helper:
2725 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2726 (node, node_helper), errors.ECODE_ENVIRON)
2727
2728 self.cluster = cluster = self.cfg.GetClusterInfo()
2729
2730 if self.op.beparams:
2731 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2732 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2733
2734 if self.op.nicparams:
2735 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2736 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2737 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2738 nic_errors = []
2739
2740
2741 for instance in self.cfg.GetAllInstancesInfo().values():
2742 for nic_idx, nic in enumerate(instance.nics):
2743 params_copy = copy.deepcopy(nic.nicparams)
2744 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2745
2746
2747 try:
2748 objects.NIC.CheckParameterSyntax(params_filled)
2749 except errors.ConfigurationError, err:
2750 nic_errors.append("Instance %s, nic/%d: %s" %
2751 (instance.name, nic_idx, err))
2752
2753
2754 target_mode = params_filled[constants.NIC_MODE]
2755 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2756 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2757 (instance.name, nic_idx))
2758 if nic_errors:
2759 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2760 "\n".join(nic_errors))
2761
2762
2763 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2764 if self.op.hvparams:
2765 for hv_name, hv_dict in self.op.hvparams.items():
2766 if hv_name not in self.new_hvparams:
2767 self.new_hvparams[hv_name] = hv_dict
2768 else:
2769 self.new_hvparams[hv_name].update(hv_dict)
2770
2771
2772 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2773 if self.op.os_hvp:
2774 for os_name, hvs in self.op.os_hvp.items():
2775 if os_name not in self.new_os_hvp:
2776 self.new_os_hvp[os_name] = hvs
2777 else:
2778 for hv_name, hv_dict in hvs.items():
2779 if hv_name not in self.new_os_hvp[os_name]:
2780 self.new_os_hvp[os_name][hv_name] = hv_dict
2781 else:
2782 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2783
2784
2785 self.new_osp = objects.FillDict(cluster.osparams, {})
2786 if self.op.osparams:
2787 for os_name, osp in self.op.osparams.items():
2788 if os_name not in self.new_osp:
2789 self.new_osp[os_name] = {}
2790
2791 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2792 use_none=True)
2793
2794 if not self.new_osp[os_name]:
2795
2796 del self.new_osp[os_name]
2797 else:
2798
2799 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2800 os_name, self.new_osp[os_name])
2801
2802
2803 if self.op.enabled_hypervisors is not None:
2804 self.hv_list = self.op.enabled_hypervisors
2805 for hv in self.hv_list:
2806
2807
2808
2809
2810
2811 if hv not in new_hvp:
2812 new_hvp[hv] = {}
2813 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2814 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2815 else:
2816 self.hv_list = cluster.enabled_hypervisors
2817
2818 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2819
2820 for hv_name, hv_params in self.new_hvparams.items():
2821 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2822 (self.op.enabled_hypervisors and
2823 hv_name in self.op.enabled_hypervisors)):
2824
2825 hv_class = hypervisor.GetHypervisor(hv_name)
2826 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2827 hv_class.CheckParameterSyntax(hv_params)
2828 _CheckHVParams(self, node_list, hv_name, hv_params)
2829
2830 if self.op.os_hvp:
2831
2832
2833 for os_name, os_hvp in self.new_os_hvp.items():
2834 for hv_name, hv_params in os_hvp.items():
2835 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2836
2837 cluster_defaults = self.new_hvparams.get(hv_name, {})
2838 new_osp = objects.FillDict(cluster_defaults, hv_params)
2839 hv_class = hypervisor.GetHypervisor(hv_name)
2840 hv_class.CheckParameterSyntax(new_osp)
2841 _CheckHVParams(self, node_list, hv_name, new_osp)
2842
2843 if self.op.default_iallocator:
2844 alloc_script = utils.FindFile(self.op.default_iallocator,
2845 constants.IALLOCATOR_SEARCH_PATH,
2846 os.path.isfile)
2847 if alloc_script is None:
2848 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2849 " specified" % self.op.default_iallocator,
2850 errors.ECODE_INVAL)
2851
2852 - def Exec(self, feedback_fn):
2853 """Change the parameters of the cluster.
2854
2855 """
2856 if self.op.vg_name is not None:
2857 new_volume = self.op.vg_name
2858 if not new_volume:
2859 new_volume = None
2860 if new_volume != self.cfg.GetVGName():
2861 self.cfg.SetVGName(new_volume)
2862 else:
2863 feedback_fn("Cluster LVM configuration already in desired"
2864 " state, not changing")
2865 if self.op.drbd_helper is not None:
2866 new_helper = self.op.drbd_helper
2867 if not new_helper:
2868 new_helper = None
2869 if new_helper != self.cfg.GetDRBDHelper():
2870 self.cfg.SetDRBDHelper(new_helper)
2871 else:
2872 feedback_fn("Cluster DRBD helper already in desired state,"
2873 " not changing")
2874 if self.op.hvparams:
2875 self.cluster.hvparams = self.new_hvparams
2876 if self.op.os_hvp:
2877 self.cluster.os_hvp = self.new_os_hvp
2878 if self.op.enabled_hypervisors is not None:
2879 self.cluster.hvparams = self.new_hvparams
2880 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2881 if self.op.beparams:
2882 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2883 if self.op.nicparams:
2884 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2885 if self.op.osparams:
2886 self.cluster.osparams = self.new_osp
2887
2888 if self.op.candidate_pool_size is not None:
2889 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2890
2891 _AdjustCandidatePool(self, [])
2892
2893 if self.op.maintain_node_health is not None:
2894 self.cluster.maintain_node_health = self.op.maintain_node_health
2895
2896 if self.op.add_uids is not None:
2897 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2898
2899 if self.op.remove_uids is not None:
2900 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2901
2902 if self.op.uid_pool is not None:
2903 self.cluster.uid_pool = self.op.uid_pool
2904
2905 if self.op.default_iallocator is not None:
2906 self.cluster.default_iallocator = self.op.default_iallocator
2907
2908 if self.op.reserved_lvs is not None:
2909 self.cluster.reserved_lvs = self.op.reserved_lvs
2910
2911 def helper_os(aname, mods, desc):
2912 desc += " OS list"
2913 lst = getattr(self.cluster, aname)
2914 for key, val in mods:
2915 if key == constants.DDM_ADD:
2916 if val in lst:
2917 feedback_fn("OS %s already in %s, ignoring", val, desc)
2918 else:
2919 lst.append(val)
2920 elif key == constants.DDM_REMOVE:
2921 if val in lst:
2922 lst.remove(val)
2923 else:
2924 feedback_fn("OS %s not found in %s, ignoring", val, desc)
2925 else:
2926 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2927
2928 if self.op.hidden_os:
2929 helper_os("hidden_os", self.op.hidden_os, "hidden")
2930
2931 if self.op.blacklisted_os:
2932 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2933
2934 self.cfg.Update(self.cluster, feedback_fn)
2935
2938 """Distribute additional files which are part of the cluster configuration.
2939
2940 ConfigWriter takes care of distributing the config and ssconf files, but
2941 there are more files which should be distributed to all nodes. This function
2942 makes sure those are copied.
2943
2944 @param lu: calling logical unit
2945 @param additional_nodes: list of nodes not in the config to distribute to
2946
2947 """
2948
2949 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2950 dist_nodes = lu.cfg.GetOnlineNodeList()
2951 if additional_nodes is not None:
2952 dist_nodes.extend(additional_nodes)
2953 if myself.name in dist_nodes:
2954 dist_nodes.remove(myself.name)
2955
2956
2957 dist_files = set([constants.ETC_HOSTS,
2958 constants.SSH_KNOWN_HOSTS_FILE,
2959 constants.RAPI_CERT_FILE,
2960 constants.RAPI_USERS_FILE,
2961 constants.CONFD_HMAC_KEY,
2962 constants.CLUSTER_DOMAIN_SECRET_FILE,
2963 ])
2964
2965 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2966 for hv_name in enabled_hypervisors:
2967 hv_class = hypervisor.GetHypervisor(hv_name)
2968 dist_files.update(hv_class.GetAncillaryFiles())
2969
2970
2971 for fname in dist_files:
2972 if os.path.exists(fname):
2973 result = lu.rpc.call_upload_file(dist_nodes, fname)
2974 for to_node, to_result in result.items():
2975 msg = to_result.fail_msg
2976 if msg:
2977 msg = ("Copy of file %s to node %s failed: %s" %
2978 (fname, to_node, msg))
2979 lu.proc.LogWarning(msg)
2980
2983 """Force the redistribution of cluster configuration.
2984
2985 This is a very simple LU.
2986
2987 """
2988 REQ_BGL = False
2989
2995
2996 - def Exec(self, feedback_fn):
3002
3003
3004 -def _WaitForSync(lu, instance, disks=None, oneshot=False):
3005 """Sleep and poll for an instance's disk to sync.
3006
3007 """
3008 if not instance.disks or disks is not None and not disks:
3009 return True
3010
3011 disks = _ExpandCheckDisks(instance, disks)
3012
3013 if not oneshot:
3014 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3015
3016 node = instance.primary_node
3017
3018 for dev in disks:
3019 lu.cfg.SetDiskID(dev, node)
3020
3021
3022
3023 retries = 0
3024 degr_retries = 10
3025 while True:
3026 max_time = 0
3027 done = True
3028 cumul_degraded = False
3029 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3030 msg = rstats.fail_msg
3031 if msg:
3032 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3033 retries += 1
3034 if retries >= 10:
3035 raise errors.RemoteError("Can't contact node %s for mirror data,"
3036 " aborting." % node)
3037 time.sleep(6)
3038 continue
3039 rstats = rstats.payload
3040 retries = 0
3041 for i, mstat in enumerate(rstats):
3042 if mstat is None:
3043 lu.LogWarning("Can't compute data for node %s/%s",
3044 node, disks[i].iv_name)
3045 continue
3046
3047 cumul_degraded = (cumul_degraded or
3048 (mstat.is_degraded and mstat.sync_percent is None))
3049 if mstat.sync_percent is not None:
3050 done = False
3051 if mstat.estimated_time is not None:
3052 rem_time = ("%s remaining (estimated)" %
3053 utils.FormatSeconds(mstat.estimated_time))
3054 max_time = mstat.estimated_time
3055 else:
3056 rem_time = "no time estimate"
3057 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3058 (disks[i].iv_name, mstat.sync_percent, rem_time))
3059
3060
3061
3062
3063 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3064 logging.info("Degraded disks found, %d retries left", degr_retries)
3065 degr_retries -= 1
3066 time.sleep(1)
3067 continue
3068
3069 if done or oneshot:
3070 break
3071
3072 time.sleep(min(60, max_time))
3073
3074 if done:
3075 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3076 return not cumul_degraded
3077
3080 """Check that mirrors are not degraded.
3081
3082 The ldisk parameter, if True, will change the test from the
3083 is_degraded attribute (which represents overall non-ok status for
3084 the device(s)) to the ldisk (representing the local storage status).
3085
3086 """
3087 lu.cfg.SetDiskID(dev, node)
3088
3089 result = True
3090
3091 if on_primary or dev.AssembleOnSecondary():
3092 rstats = lu.rpc.call_blockdev_find(node, dev)
3093 msg = rstats.fail_msg
3094 if msg:
3095 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3096 result = False
3097 elif not rstats.payload:
3098 lu.LogWarning("Can't find disk on node %s", node)
3099 result = False
3100 else:
3101 if ldisk:
3102 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3103 else:
3104 result = result and not rstats.payload.is_degraded
3105
3106 if dev.children:
3107 for child in dev.children:
3108 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3109
3110 return result
3111
3114 """Logical unit for OS diagnose/query.
3115
3116 """
3117 _OP_PARAMS = [
3118 _POutputFields,
3119 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3120 ]
3121 REQ_BGL = False
3122 _HID = "hidden"
3123 _BLK = "blacklisted"
3124 _VLD = "valid"
3125 _FIELDS_STATIC = utils.FieldSet()
3126 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3127 "parameters", "api_versions", _HID, _BLK)
3128
3137
3139
3140
3141
3142 self.needed_locks = {}
3143
3144
3145
3146 @staticmethod
3148 """Remaps a per-node return list into an a per-os per-node dictionary
3149
3150 @param rlist: a map with node names as keys and OS objects as values
3151
3152 @rtype: dict
3153 @return: a dictionary with osnames as keys and as value another
3154 map, with nodes as keys and tuples of (path, status, diagnose,
3155 variants, parameters, api_versions) as values, eg::
3156
3157 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3158 (/srv/..., False, "invalid api")],
3159 "node2": [(/srv/..., True, "", [], [])]}
3160 }
3161
3162 """
3163 all_os = {}
3164
3165
3166
3167 good_nodes = [node_name for node_name in rlist
3168 if not rlist[node_name].fail_msg]
3169 for node_name, nr in rlist.items():
3170 if nr.fail_msg or not nr.payload:
3171 continue
3172 for (name, path, status, diagnose, variants,
3173 params, api_versions) in nr.payload:
3174 if name not in all_os:
3175
3176
3177 all_os[name] = {}
3178 for nname in good_nodes:
3179 all_os[name][nname] = []
3180
3181 params = [tuple(v) for v in params]
3182 all_os[name][node_name].append((path, status, diagnose,
3183 variants, params, api_versions))
3184 return all_os
3185
3186 - def Exec(self, feedback_fn):
3187 """Compute the list of OSes.
3188
3189 """
3190 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3191 node_data = self.rpc.call_os_diagnose(valid_nodes)
3192 pol = self._DiagnoseByOS(node_data)
3193 output = []
3194 cluster = self.cfg.GetClusterInfo()
3195
3196 for os_name in utils.NiceSort(pol.keys()):
3197 os_data = pol[os_name]
3198 row = []
3199 valid = True
3200 (variants, params, api_versions) = null_state = (set(), set(), set())
3201 for idx, osl in enumerate(os_data.values()):
3202 valid = bool(valid and osl and osl[0][1])
3203 if not valid:
3204 (variants, params, api_versions) = null_state
3205 break
3206 node_variants, node_params, node_api = osl[0][3:6]
3207 if idx == 0:
3208 variants = set(node_variants)
3209 params = set(node_params)
3210 api_versions = set(node_api)
3211 else:
3212 variants.intersection_update(node_variants)
3213 params.intersection_update(node_params)
3214 api_versions.intersection_update(node_api)
3215
3216 is_hid = os_name in cluster.hidden_os
3217 is_blk = os_name in cluster.blacklisted_os
3218 if ((self._HID not in self.op.output_fields and is_hid) or
3219 (self._BLK not in self.op.output_fields and is_blk) or
3220 (self._VLD not in self.op.output_fields and not valid)):
3221 continue
3222
3223 for field in self.op.output_fields:
3224 if field == "name":
3225 val = os_name
3226 elif field == self._VLD:
3227 val = valid
3228 elif field == "node_status":
3229
3230 val = {}
3231 for node_name, nos_list in os_data.items():
3232 val[node_name] = nos_list
3233 elif field == "variants":
3234 val = utils.NiceSort(list(variants))
3235 elif field == "parameters":
3236 val = list(params)
3237 elif field == "api_versions":
3238 val = list(api_versions)
3239 elif field == self._HID:
3240 val = is_hid
3241 elif field == self._BLK:
3242 val = is_blk
3243 else:
3244 raise errors.ParameterError(field)
3245 row.append(val)
3246 output.append(row)
3247
3248 return output
3249
3252 """Logical unit for removing a node.
3253
3254 """
3255 HPATH = "node-remove"
3256 HTYPE = constants.HTYPE_NODE
3257 _OP_PARAMS = [
3258 _PNodeName,
3259 ]
3260
3262 """Build hooks env.
3263
3264 This doesn't run on the target node in the pre phase as a failed
3265 node would then be impossible to remove.
3266
3267 """
3268 env = {
3269 "OP_TARGET": self.op.node_name,
3270 "NODE_NAME": self.op.node_name,
3271 }
3272 all_nodes = self.cfg.GetNodeList()
3273 try:
3274 all_nodes.remove(self.op.node_name)
3275 except ValueError:
3276 logging.warning("Node %s which is about to be removed not found"
3277 " in the all nodes list", self.op.node_name)
3278 return env, all_nodes, all_nodes
3279
3281 """Check prerequisites.
3282
3283 This checks:
3284 - the node exists in the configuration
3285 - it does not have primary or secondary instances
3286 - it's not the master
3287
3288 Any errors are signaled by raising errors.OpPrereqError.
3289
3290 """
3291 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3292 node = self.cfg.GetNodeInfo(self.op.node_name)
3293 assert node is not None
3294
3295 instance_list = self.cfg.GetInstanceList()
3296
3297 masternode = self.cfg.GetMasterNode()
3298 if node.name == masternode:
3299 raise errors.OpPrereqError("Node is the master node,"
3300 " you need to failover first.",
3301 errors.ECODE_INVAL)
3302
3303 for instance_name in instance_list:
3304 instance = self.cfg.GetInstanceInfo(instance_name)
3305 if node.name in instance.all_nodes:
3306 raise errors.OpPrereqError("Instance %s is still running on the node,"
3307 " please remove first." % instance_name,
3308 errors.ECODE_INVAL)
3309 self.op.node_name = node.name
3310 self.node = node
3311
3312 - def Exec(self, feedback_fn):
3345
3348 """Logical unit for querying nodes.
3349
3350 """
3351
3352 _OP_PARAMS = [
3353 _POutputFields,
3354 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3355 ("use_locking", False, _TBool),
3356 ]
3357 REQ_BGL = False
3358
3359 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3360 "master_candidate", "offline", "drained"]
3361
3362 _FIELDS_DYNAMIC = utils.FieldSet(
3363 "dtotal", "dfree",
3364 "mtotal", "mnode", "mfree",
3365 "bootid",
3366 "ctotal", "cnodes", "csockets",
3367 )
3368
3369 _FIELDS_STATIC = utils.FieldSet(*[
3370 "pinst_cnt", "sinst_cnt",
3371 "pinst_list", "sinst_list",
3372 "pip", "sip", "tags",
3373 "master",
3374 "role"] + _SIMPLE_FIELDS
3375 )
3376
3381
3383 self.needed_locks = {}
3384 self.share_locks[locking.LEVEL_NODE] = 1
3385
3386 if self.op.names:
3387 self.wanted = _GetWantedNodes(self, self.op.names)
3388 else:
3389 self.wanted = locking.ALL_SET
3390
3391 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3392 self.do_locking = self.do_node_query and self.op.use_locking
3393 if self.do_locking:
3394
3395 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3396
3397 - def Exec(self, feedback_fn):
3398 """Computes the list of nodes and their attributes.
3399
3400 """
3401 all_info = self.cfg.GetAllNodesInfo()
3402 if self.do_locking:
3403 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3404 elif self.wanted != locking.ALL_SET:
3405 nodenames = self.wanted
3406 missing = set(nodenames).difference(all_info.keys())
3407 if missing:
3408 raise errors.OpExecError(
3409 "Some nodes were removed before retrieving their data: %s" % missing)
3410 else:
3411 nodenames = all_info.keys()
3412
3413 nodenames = utils.NiceSort(nodenames)
3414 nodelist = [all_info[name] for name in nodenames]
3415
3416
3417
3418 if self.do_node_query:
3419 live_data = {}
3420 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3421 self.cfg.GetHypervisorType())
3422 for name in nodenames:
3423 nodeinfo = node_data[name]
3424 if not nodeinfo.fail_msg and nodeinfo.payload:
3425 nodeinfo = nodeinfo.payload
3426 fn = utils.TryConvert
3427 live_data[name] = {
3428 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3429 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3430 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3431 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3432 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3433 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3434 "bootid": nodeinfo.get('bootid', None),
3435 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3436 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3437 }
3438 else:
3439 live_data[name] = {}
3440 else:
3441 live_data = dict.fromkeys(nodenames, {})
3442
3443 node_to_primary = dict([(name, set()) for name in nodenames])
3444 node_to_secondary = dict([(name, set()) for name in nodenames])
3445
3446 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3447 "sinst_cnt", "sinst_list"))
3448 if inst_fields & frozenset(self.op.output_fields):
3449 inst_data = self.cfg.GetAllInstancesInfo()
3450
3451 for inst in inst_data.values():
3452 if inst.primary_node in node_to_primary:
3453 node_to_primary[inst.primary_node].add(inst.name)
3454 for secnode in inst.secondary_nodes:
3455 if secnode in node_to_secondary:
3456 node_to_secondary[secnode].add(inst.name)
3457
3458 master_node = self.cfg.GetMasterNode()
3459
3460
3461
3462 output = []
3463 for node in nodelist:
3464 node_output = []
3465 for field in self.op.output_fields:
3466 if field in self._SIMPLE_FIELDS:
3467 val = getattr(node, field)
3468 elif field == "pinst_list":
3469 val = list(node_to_primary[node.name])
3470 elif field == "sinst_list":
3471 val = list(node_to_secondary[node.name])
3472 elif field == "pinst_cnt":
3473 val = len(node_to_primary[node.name])
3474 elif field == "sinst_cnt":
3475 val = len(node_to_secondary[node.name])
3476 elif field == "pip":
3477 val = node.primary_ip
3478 elif field == "sip":
3479 val = node.secondary_ip
3480 elif field == "tags":
3481 val = list(node.GetTags())
3482 elif field == "master":
3483 val = node.name == master_node
3484 elif self._FIELDS_DYNAMIC.Matches(field):
3485 val = live_data[node.name].get(field, None)
3486 elif field == "role":
3487 if node.name == master_node:
3488 val = "M"
3489 elif node.master_candidate:
3490 val = "C"
3491 elif node.drained:
3492 val = "D"
3493 elif node.offline:
3494 val = "O"
3495 else:
3496 val = "R"
3497 else:
3498 raise errors.ParameterError(field)
3499 node_output.append(val)
3500 output.append(node_output)
3501
3502 return output
3503
3506 """Logical unit for getting volumes on node(s).
3507
3508 """
3509 _OP_PARAMS = [
3510 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3511 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3512 ]
3513 REQ_BGL = False
3514 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3515 _FIELDS_STATIC = utils.FieldSet("node")
3516
3521
3530
3531 - def Exec(self, feedback_fn):
3532 """Computes the list of nodes and their attributes.
3533
3534 """
3535 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3536 volumes = self.rpc.call_node_volumes(nodenames)
3537
3538 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3539 in self.cfg.GetInstanceList()]
3540
3541 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3542
3543 output = []
3544 for node in nodenames:
3545 nresult = volumes[node]
3546 if nresult.offline:
3547 continue
3548 msg = nresult.fail_msg
3549 if msg:
3550 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3551 continue
3552
3553 node_vols = nresult.payload[:]
3554 node_vols.sort(key=lambda vol: vol['dev'])
3555
3556 for vol in node_vols:
3557 node_output = []
3558 for field in self.op.output_fields:
3559 if field == "node":
3560 val = node
3561 elif field == "phys":
3562 val = vol['dev']
3563 elif field == "vg":
3564 val = vol['vg']
3565 elif field == "name":
3566 val = vol['name']
3567 elif field == "size":
3568 val = int(float(vol['size']))
3569 elif field == "instance":
3570 for inst in ilist:
3571 if node not in lv_by_node[inst]:
3572 continue
3573 if vol['name'] in lv_by_node[inst][node]:
3574 val = inst.name
3575 break
3576 else:
3577 val = '-'
3578 else:
3579 raise errors.ParameterError(field)
3580 node_output.append(str(val))
3581
3582 output.append(node_output)
3583
3584 return output
3585
3588 """Logical unit for getting information on storage units on node(s).
3589
3590 """
3591 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3592 _OP_PARAMS = [
3593 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3594 ("storage_type", _NoDefault, _CheckStorageType),
3595 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3596 ("name", None, _TMaybeString),
3597 ]
3598 REQ_BGL = False
3599
3604
3614
3615 - def Exec(self, feedback_fn):
3616 """Computes the list of nodes and their attributes.
3617
3618 """
3619 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3620
3621
3622 if constants.SF_NAME in self.op.output_fields:
3623 fields = self.op.output_fields[:]
3624 else:
3625 fields = [constants.SF_NAME] + self.op.output_fields
3626
3627
3628 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3629 while extra in fields:
3630 fields.remove(extra)
3631
3632 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3633 name_idx = field_idx[constants.SF_NAME]
3634
3635 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3636 data = self.rpc.call_storage_list(self.nodes,
3637 self.op.storage_type, st_args,
3638 self.op.name, fields)
3639
3640 result = []
3641
3642 for node in utils.NiceSort(self.nodes):
3643 nresult = data[node]
3644 if nresult.offline:
3645 continue
3646
3647 msg = nresult.fail_msg
3648 if msg:
3649 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3650 continue
3651
3652 rows = dict([(row[name_idx], row) for row in nresult.payload])
3653
3654 for name in utils.NiceSort(rows.keys()):
3655 row = rows[name]
3656
3657 out = []
3658
3659 for field in self.op.output_fields:
3660 if field == constants.SF_NODE:
3661 val = node
3662 elif field == constants.SF_TYPE:
3663 val = self.op.storage_type
3664 elif field in field_idx:
3665 val = row[field_idx[field]]
3666 else:
3667 raise errors.ParameterError(field)
3668
3669 out.append(val)
3670
3671 result.append(out)
3672
3673 return result
3674
3677 """Logical unit for modifying a storage volume on a node.
3678
3679 """
3680 _OP_PARAMS = [
3681 _PNodeName,
3682 ("storage_type", _NoDefault, _CheckStorageType),
3683 ("name", _NoDefault, _TNonEmptyString),
3684 ("changes", _NoDefault, _TDict),
3685 ]
3686 REQ_BGL = False
3687
3706
3711
3712 - def Exec(self, feedback_fn):
3713 """Computes the list of nodes and their attributes.
3714
3715 """
3716 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3717 result = self.rpc.call_storage_modify(self.op.node_name,
3718 self.op.storage_type, st_args,
3719 self.op.name, self.op.changes)
3720 result.Raise("Failed to modify storage unit '%s' on %s" %
3721 (self.op.name, self.op.node_name))
3722
3725 """Logical unit for adding node to the cluster.
3726
3727 """
3728 HPATH = "node-add"
3729 HTYPE = constants.HTYPE_NODE
3730 _OP_PARAMS = [
3731 _PNodeName,
3732 ("primary_ip", None, _NoType),
3733 ("secondary_ip", None, _TMaybeString),
3734 ("readd", False, _TBool),
3735 ]
3736
3740
3742 """Build hooks env.
3743
3744 This will run on all nodes before, and on all nodes + the new node after.
3745
3746 """
3747 env = {
3748 "OP_TARGET": self.op.node_name,
3749 "NODE_NAME": self.op.node_name,
3750 "NODE_PIP": self.op.primary_ip,
3751 "NODE_SIP": self.op.secondary_ip,
3752 }
3753 nodes_0 = self.cfg.GetNodeList()
3754 nodes_1 = nodes_0 + [self.op.node_name, ]
3755 return env, nodes_0, nodes_1
3756
3758 """Check prerequisites.
3759
3760 This checks:
3761 - the new node is not already in the config
3762 - it is resolvable
3763 - its parameters (single/dual homed) matches the cluster
3764
3765 Any errors are signaled by raising errors.OpPrereqError.
3766
3767 """
3768 node_name = self.op.node_name
3769 cfg = self.cfg
3770
3771 dns_data = netutils.GetHostInfo(node_name)
3772
3773 node = dns_data.name
3774 primary_ip = self.op.primary_ip = dns_data.ip
3775 if self.op.secondary_ip is None:
3776 self.op.secondary_ip = primary_ip
3777 if not netutils.IsValidIP4(self.op.secondary_ip):
3778 raise errors.OpPrereqError("Invalid secondary IP given",
3779 errors.ECODE_INVAL)
3780 secondary_ip = self.op.secondary_ip
3781
3782 node_list = cfg.GetNodeList()
3783 if not self.op.readd and node in node_list:
3784 raise errors.OpPrereqError("Node %s is already in the configuration" %
3785 node, errors.ECODE_EXISTS)
3786 elif self.op.readd and node not in node_list:
3787 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3788 errors.ECODE_NOENT)
3789
3790 self.changed_primary_ip = False
3791
3792 for existing_node_name in node_list:
3793 existing_node = cfg.GetNodeInfo(existing_node_name)
3794
3795 if self.op.readd and node == existing_node_name:
3796 if existing_node.secondary_ip != secondary_ip:
3797 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3798 " address configuration as before",
3799 errors.ECODE_INVAL)
3800 if existing_node.primary_ip != primary_ip:
3801 self.changed_primary_ip = True
3802
3803 continue
3804
3805 if (existing_node.primary_ip == primary_ip or
3806 existing_node.secondary_ip == primary_ip or
3807 existing_node.primary_ip == secondary_ip or
3808 existing_node.secondary_ip == secondary_ip):
3809 raise errors.OpPrereqError("New node ip address(es) conflict with"
3810 " existing node %s" % existing_node.name,
3811 errors.ECODE_NOTUNIQUE)
3812
3813
3814
3815 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3816 master_singlehomed = myself.secondary_ip == myself.primary_ip
3817 newbie_singlehomed = secondary_ip == primary_ip
3818 if master_singlehomed != newbie_singlehomed:
3819 if master_singlehomed:
3820 raise errors.OpPrereqError("The master has no private ip but the"
3821 " new node has one",
3822 errors.ECODE_INVAL)
3823 else:
3824 raise errors.OpPrereqError("The master has a private ip but the"
3825 " new node doesn't have one",
3826 errors.ECODE_INVAL)
3827
3828
3829 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3830 raise errors.OpPrereqError("Node not reachable by ping",
3831 errors.ECODE_ENVIRON)
3832
3833 if not newbie_singlehomed:
3834
3835 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3836 source=myself.secondary_ip):
3837 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3838 " based ping to noded port",
3839 errors.ECODE_ENVIRON)
3840
3841 if self.op.readd:
3842 exceptions = [node]
3843 else:
3844 exceptions = []
3845
3846 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3847
3848 if self.op.readd:
3849 self.new_node = self.cfg.GetNodeInfo(node)
3850 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3851 else:
3852 self.new_node = objects.Node(name=node,
3853 primary_ip=primary_ip,
3854 secondary_ip=secondary_ip,
3855 master_candidate=self.master_candidate,
3856 offline=False, drained=False)
3857
3858 - def Exec(self, feedback_fn):
3859 """Adds the new node to the cluster.
3860
3861 """
3862 new_node = self.new_node
3863 node = new_node.name
3864
3865
3866
3867
3868
3869 if self.op.readd:
3870 new_node.drained = new_node.offline = False
3871 self.LogInfo("Readding a node, the offline/drained flags were reset")
3872
3873 new_node.master_candidate = self.master_candidate
3874 if self.changed_primary_ip:
3875 new_node.primary_ip = self.op.primary_ip
3876
3877
3878 if new_node.master_candidate:
3879 self.LogInfo("Node will be a master candidate")
3880
3881
3882 result = self.rpc.call_version([node])[node]
3883 result.Raise("Can't get version information from node %s" % node)
3884 if constants.PROTOCOL_VERSION == result.payload:
3885 logging.info("Communication to node %s fine, sw version %s match",
3886 node, result.payload)
3887 else:
3888 raise errors.OpExecError("Version mismatch master version %s,"
3889 " node version %s" %
3890 (constants.PROTOCOL_VERSION, result.payload))
3891
3892
3893 if self.cfg.GetClusterInfo().modify_ssh_setup:
3894 logging.info("Copy ssh key to node %s", node)
3895 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3896 keyarray = []
3897 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3898 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3899 priv_key, pub_key]
3900
3901 for i in keyfiles:
3902 keyarray.append(utils.ReadFile(i))
3903
3904 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3905 keyarray[2], keyarray[3], keyarray[4],
3906 keyarray[5])
3907 result.Raise("Cannot transfer ssh keys to the new node")
3908
3909
3910 if self.cfg.GetClusterInfo().modify_etc_hosts:
3911
3912 utils.AddHostToEtcHosts(new_node.name)
3913
3914 if new_node.secondary_ip != new_node.primary_ip:
3915 result = self.rpc.call_node_has_ip_address(new_node.name,
3916 new_node.secondary_ip)
3917 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3918 prereq=True, ecode=errors.ECODE_ENVIRON)
3919 if not result.payload:
3920 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3921 " you gave (%s). Please fix and re-run this"
3922 " command." % new_node.secondary_ip)
3923
3924 node_verify_list = [self.cfg.GetMasterNode()]
3925 node_verify_param = {
3926 constants.NV_NODELIST: [node],
3927
3928 }
3929
3930 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3931 self.cfg.GetClusterName())
3932 for verifier in node_verify_list:
3933 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3934 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3935 if nl_payload:
3936 for failed in nl_payload:
3937 feedback_fn("ssh/hostname verification failed"
3938 " (checking from %s): %s" %
3939 (verifier, nl_payload[failed]))
3940 raise errors.OpExecError("ssh/hostname verification failed.")
3941
3942 if self.op.readd:
3943 _RedistributeAncillaryFiles(self)
3944 self.context.ReaddNode(new_node)
3945
3946 self.cfg.Update(new_node, feedback_fn)
3947
3948 if not new_node.master_candidate:
3949 result = self.rpc.call_node_demote_from_mc(new_node.name)
3950 msg = result.fail_msg
3951 if msg:
3952 self.LogWarning("Node failed to demote itself from master"
3953 " candidate status: %s" % msg)
3954 else:
3955 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3956 self.context.AddNode(new_node, self.proc.GetECId())
3957
3960 """Modifies the parameters of a node.
3961
3962 """
3963 HPATH = "node-modify"
3964 HTYPE = constants.HTYPE_NODE
3965 _OP_PARAMS = [
3966 _PNodeName,
3967 ("master_candidate", None, _TMaybeBool),
3968 ("offline", None, _TMaybeBool),
3969 ("drained", None, _TMaybeBool),
3970 ("auto_promote", False, _TBool),
3971 _PForce,
3972 ]
3973 REQ_BGL = False
3974
3976 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3977 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3978 if all_mods.count(None) == 3:
3979 raise errors.OpPrereqError("Please pass at least one modification",
3980 errors.ECODE_INVAL)
3981 if all_mods.count(True) > 1:
3982 raise errors.OpPrereqError("Can't set the node into more than one"
3983 " state at the same time",
3984 errors.ECODE_INVAL)
3985
3986
3987 self.offline_or_drain = (self.op.offline == True or
3988 self.op.drained == True)
3989 self.deoffline_or_drain = (self.op.offline == False or
3990 self.op.drained == False)
3991 self.might_demote = (self.op.master_candidate == False or
3992 self.offline_or_drain)
3993
3994 self.lock_all = self.op.auto_promote and self.might_demote
3995
3996
4002
4004 """Build hooks env.
4005
4006 This runs on the master node.
4007
4008 """
4009 env = {
4010 "OP_TARGET": self.op.node_name,
4011 "MASTER_CANDIDATE": str(self.op.master_candidate),
4012 "OFFLINE": str(self.op.offline),
4013 "DRAINED": str(self.op.drained),
4014 }
4015 nl = [self.cfg.GetMasterNode(),
4016 self.op.node_name]
4017 return env, nl, nl
4018
4020 """Check prerequisites.
4021
4022 This only checks the instance list against the existing names.
4023
4024 """
4025 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4026
4027 if (self.op.master_candidate is not None or
4028 self.op.drained is not None or
4029 self.op.offline is not None):
4030
4031 if self.op.node_name == self.cfg.GetMasterNode():
4032 raise errors.OpPrereqError("The master role can be changed"
4033 " only via master-failover",
4034 errors.ECODE_INVAL)
4035
4036
4037 if node.master_candidate and self.might_demote and not self.lock_all:
4038 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4039
4040
4041 (mc_remaining, mc_should, _) = \
4042 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4043 if mc_remaining < mc_should:
4044 raise errors.OpPrereqError("Not enough master candidates, please"
4045 " pass auto_promote to allow promotion",
4046 errors.ECODE_INVAL)
4047
4048 if (self.op.master_candidate == True and
4049 ((node.offline and not self.op.offline == False) or
4050 (node.drained and not self.op.drained == False))):
4051 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4052 " to master_candidate" % node.name,
4053 errors.ECODE_INVAL)
4054
4055
4056 if (self.deoffline_or_drain and not self.offline_or_drain and not
4057 self.op.master_candidate == True and not node.master_candidate):
4058 self.op.master_candidate = _DecideSelfPromotion(self)
4059 if self.op.master_candidate:
4060 self.LogInfo("Autopromoting node to master candidate")
4061
4062 return
4063
4064 - def Exec(self, feedback_fn):
4065 """Modifies a node.
4066
4067 """
4068 node = self.node
4069
4070 result = []
4071 changed_mc = False
4072
4073 if self.op.offline is not None:
4074 node.offline = self.op.offline
4075 result.append(("offline", str(self.op.offline)))
4076 if self.op.offline == True:
4077 if node.master_candidate:
4078 node.master_candidate = False
4079 changed_mc = True
4080 result.append(("master_candidate", "auto-demotion due to offline"))
4081 if node.drained:
4082 node.drained = False
4083 result.append(("drained", "clear drained status due to offline"))
4084
4085 if self.op.master_candidate is not None:
4086 node.master_candidate = self.op.master_candidate
4087 changed_mc = True
4088 result.append(("master_candidate", str(self.op.master_candidate)))
4089 if self.op.master_candidate == False:
4090 rrc = self.rpc.call_node_demote_from_mc(node.name)
4091 msg = rrc.fail_msg
4092 if msg:
4093 self.LogWarning("Node failed to demote itself: %s" % msg)
4094
4095 if self.op.drained is not None:
4096 node.drained = self.op.drained
4097 result.append(("drained", str(self.op.drained)))
4098 if self.op.drained == True:
4099 if node.master_candidate:
4100 node.master_candidate = False
4101 changed_mc = True
4102 result.append(("master_candidate", "auto-demotion due to drain"))
4103 rrc = self.rpc.call_node_demote_from_mc(node.name)
4104 msg = rrc.fail_msg
4105 if msg:
4106 self.LogWarning("Node failed to demote itself: %s" % msg)
4107 if node.offline:
4108 node.offline = False
4109 result.append(("offline", "clear offline status due to drain"))
4110
4111
4112 if self.lock_all:
4113 _AdjustCandidatePool(self, [node.name])
4114
4115
4116 self.cfg.Update(node, feedback_fn)
4117
4118
4119 if changed_mc:
4120 self.context.ReaddNode(node)
4121
4122 return result
4123
4126 """Powercycles a node.
4127
4128 """
4129 _OP_PARAMS = [
4130 _PNodeName,
4131 _PForce,
4132 ]
4133 REQ_BGL = False
4134
4141
4143 """Locking for PowercycleNode.
4144
4145 This is a last-resort option and shouldn't block on other
4146 jobs. Therefore, we grab no locks.
4147
4148 """
4149 self.needed_locks = {}
4150
4151 - def Exec(self, feedback_fn):
4159
4162 """Query cluster configuration.
4163
4164 """
4165 REQ_BGL = False
4166
4168 self.needed_locks = {}
4169
4170 - def Exec(self, feedback_fn):
4171 """Return cluster config.
4172
4173 """
4174 cluster = self.cfg.GetClusterInfo()
4175 os_hvp = {}
4176
4177
4178 for os_name, hv_dict in cluster.os_hvp.items():
4179 os_hvp[os_name] = {}
4180 for hv_name, hv_params in hv_dict.items():
4181 if hv_name in cluster.enabled_hypervisors:
4182 os_hvp[os_name][hv_name] = hv_params
4183
4184 result = {
4185 "software_version": constants.RELEASE_VERSION,
4186 "protocol_version": constants.PROTOCOL_VERSION,
4187 "config_version": constants.CONFIG_VERSION,
4188 "os_api_version": max(constants.OS_API_VERSIONS),
4189 "export_version": constants.EXPORT_VERSION,
4190 "architecture": (platform.architecture()[0], platform.machine()),
4191 "name": cluster.cluster_name,
4192 "master": cluster.master_node,
4193 "default_hypervisor": cluster.enabled_hypervisors[0],
4194 "enabled_hypervisors": cluster.enabled_hypervisors,
4195 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4196 for hypervisor_name in cluster.enabled_hypervisors]),
4197 "os_hvp": os_hvp,
4198 "beparams": cluster.beparams,
4199 "osparams": cluster.osparams,
4200 "nicparams": cluster.nicparams,
4201 "candidate_pool_size": cluster.candidate_pool_size,
4202 "master_netdev": cluster.master_netdev,
4203 "volume_group_name": cluster.volume_group_name,
4204 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4205 "file_storage_dir": cluster.file_storage_dir,
4206 "maintain_node_health": cluster.maintain_node_health,
4207 "ctime": cluster.ctime,
4208 "mtime": cluster.mtime,
4209 "uuid": cluster.uuid,
4210 "tags": list(cluster.GetTags()),
4211 "uid_pool": cluster.uid_pool,
4212 "default_iallocator": cluster.default_iallocator,
4213 "reserved_lvs": cluster.reserved_lvs,
4214 }
4215
4216 return result
4217
4257
4260 """Bring up an instance's disks.
4261
4262 """
4263 _OP_PARAMS = [
4264 _PInstanceName,
4265 ("ignore_size", False, _TBool),
4266 ]
4267 REQ_BGL = False
4268
4273
4277
4279 """Check prerequisites.
4280
4281 This checks that the instance is in the cluster.
4282
4283 """
4284 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4285 assert self.instance is not None, \
4286 "Cannot retrieve locked instance %s" % self.op.instance_name
4287 _CheckNodeOnline(self, self.instance.primary_node)
4288
4289 - def Exec(self, feedback_fn):
4290 """Activate the disks.
4291
4292 """
4293 disks_ok, disks_info = \
4294 _AssembleInstanceDisks(self, self.instance,
4295 ignore_size=self.op.ignore_size)
4296 if not disks_ok:
4297 raise errors.OpExecError("Cannot activate block devices")
4298
4299 return disks_info
4300
4304 """Prepare the block devices for an instance.
4305
4306 This sets up the block devices on all nodes.
4307
4308 @type lu: L{LogicalUnit}
4309 @param lu: the logical unit on whose behalf we execute
4310 @type instance: L{objects.Instance}
4311 @param instance: the instance for whose disks we assemble
4312 @type disks: list of L{objects.Disk} or None
4313 @param disks: which disks to assemble (or all, if None)
4314 @type ignore_secondaries: boolean
4315 @param ignore_secondaries: if true, errors on secondary nodes
4316 won't result in an error return from the function
4317 @type ignore_size: boolean
4318 @param ignore_size: if true, the current known size of the disk
4319 will not be used during the disk activation, useful for cases
4320 when the size is wrong
4321 @return: False if the operation failed, otherwise a list of
4322 (host, instance_visible_name, node_visible_name)
4323 with the mapping from node devices to instance devices
4324
4325 """
4326 device_info = []
4327 disks_ok = True
4328 iname = instance.name
4329 disks = _ExpandCheckDisks(instance, disks)
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341 for inst_disk in disks:
4342 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4343 if ignore_size:
4344 node_disk = node_disk.Copy()
4345 node_disk.UnsetSize()
4346 lu.cfg.SetDiskID(node_disk, node)
4347 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4348 msg = result.fail_msg
4349 if msg:
4350 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4351 " (is_primary=False, pass=1): %s",
4352 inst_disk.iv_name, node, msg)
4353 if not ignore_secondaries:
4354 disks_ok = False
4355
4356
4357
4358
4359 for inst_disk in disks:
4360 dev_path = None
4361
4362 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4363 if node != instance.primary_node:
4364 continue
4365 if ignore_size:
4366 node_disk = node_disk.Copy()
4367 node_disk.UnsetSize()
4368 lu.cfg.SetDiskID(node_disk, node)
4369 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4370 msg = result.fail_msg
4371 if msg:
4372 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4373 " (is_primary=True, pass=2): %s",
4374 inst_disk.iv_name, node, msg)
4375 disks_ok = False
4376 else:
4377 dev_path = result.payload
4378
4379 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4380
4381
4382
4383
4384 for disk in disks:
4385 lu.cfg.SetDiskID(disk, instance.primary_node)
4386
4387 return disks_ok, device_info
4388
4391 """Start the disks of an instance.
4392
4393 """
4394 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4395 ignore_secondaries=force)
4396 if not disks_ok:
4397 _ShutdownInstanceDisks(lu, instance)
4398 if force is not None and not force:
4399 lu.proc.LogWarning("", hint="If the message above refers to a"
4400 " secondary node,"
4401 " you can retry the operation using '--force'.")
4402 raise errors.OpExecError("Disk consistency error")
4403
4406 """Shutdown an instance's disks.
4407
4408 """
4409 _OP_PARAMS = [
4410 _PInstanceName,
4411 ]
4412 REQ_BGL = False
4413
4418
4422
4424 """Check prerequisites.
4425
4426 This checks that the instance is in the cluster.
4427
4428 """
4429 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4430 assert self.instance is not None, \
4431 "Cannot retrieve locked instance %s" % self.op.instance_name
4432
4433 - def Exec(self, feedback_fn):
4439
4442 """Shutdown block devices of an instance.
4443
4444 This function checks if an instance is running, before calling
4445 _ShutdownInstanceDisks.
4446
4447 """
4448 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4449 _ShutdownInstanceDisks(lu, instance, disks=disks)
4450
4453 """Return the instance disks selected by the disks list
4454
4455 @type disks: list of L{objects.Disk} or None
4456 @param disks: selected disks
4457 @rtype: list of L{objects.Disk}
4458 @return: selected instance disks to act on
4459
4460 """
4461 if disks is None:
4462 return instance.disks
4463 else:
4464 if not set(disks).issubset(instance.disks):
4465 raise errors.ProgrammerError("Can only act on disks belonging to the"
4466 " target instance")
4467 return disks
4468
4471 """Shutdown block devices of an instance.
4472
4473 This does the shutdown on all nodes of the instance.
4474
4475 If the ignore_primary is false, errors on the primary node are
4476 ignored.
4477
4478 """
4479 all_result = True
4480 disks = _ExpandCheckDisks(instance, disks)
4481
4482 for disk in disks:
4483 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4484 lu.cfg.SetDiskID(top_disk, node)
4485 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4486 msg = result.fail_msg
4487 if msg:
4488 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4489 disk.iv_name, node, msg)
4490 if not ignore_primary or node != instance.primary_node:
4491 all_result = False
4492 return all_result
4493
4496 """Checks if a node has enough free memory.
4497
4498 This function check if a given node has the needed amount of free
4499 memory. In case the node has less memory or we cannot get the
4500 information from the node, this function raise an OpPrereqError
4501 exception.
4502
4503 @type lu: C{LogicalUnit}
4504 @param lu: a logical unit from which we get configuration data
4505 @type node: C{str}
4506 @param node: the node to check
4507 @type reason: C{str}
4508 @param reason: string to use in the error message
4509 @type requested: C{int}
4510 @param requested: the amount of memory in MiB to check for
4511 @type hypervisor_name: C{str}
4512 @param hypervisor_name: the hypervisor to ask for memory stats
4513 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4514 we cannot check the node
4515
4516 """
4517 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4518 nodeinfo[node].Raise("Can't get data from node %s" % node,
4519 prereq=True, ecode=errors.ECODE_ENVIRON)
4520 free_mem = nodeinfo[node].payload.get('memory_free', None)
4521 if not isinstance(free_mem, int):
4522 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4523 " was '%s'" % (node, free_mem),
4524 errors.ECODE_ENVIRON)
4525 if requested > free_mem:
4526 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4527 " needed %s MiB, available %s MiB" %
4528 (node, reason, requested, free_mem),
4529 errors.ECODE_NORES)
4530
4533 """Checks if nodes have enough free disk space in the default VG.
4534
4535 This function check if all given nodes have the needed amount of
4536 free disk. In case any node has less disk or we cannot get the
4537 information from the node, this function raise an OpPrereqError
4538 exception.
4539
4540 @type lu: C{LogicalUnit}
4541 @param lu: a logical unit from which we get configuration data
4542 @type nodenames: C{list}
4543 @param nodenames: the list of node names to check
4544 @type requested: C{int}
4545 @param requested: the amount of disk in MiB to check for
4546 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4547 we cannot check the node
4548
4549 """
4550 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4551 lu.cfg.GetHypervisorType())
4552 for node in nodenames:
4553 info = nodeinfo[node]
4554 info.Raise("Cannot get current information from node %s" % node,
4555 prereq=True, ecode=errors.ECODE_ENVIRON)
4556 vg_free = info.payload.get("vg_free", None)
4557 if not isinstance(vg_free, int):
4558 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4559 " result was '%s'" % (node, vg_free),
4560 errors.ECODE_ENVIRON)
4561 if requested > vg_free:
4562 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4563 " required %d MiB, available %d MiB" %
4564 (node, requested, vg_free),
4565 errors.ECODE_NORES)
4566
4569 """Starts an instance.
4570
4571 """
4572 HPATH = "instance-start"
4573 HTYPE = constants.HTYPE_INSTANCE
4574 _OP_PARAMS = [
4575 _PInstanceName,
4576 _PForce,
4577 ("hvparams", _EmptyDict, _TDict),
4578 ("beparams", _EmptyDict, _TDict),
4579 ]
4580 REQ_BGL = False
4581
4587
4590
4603
4605 """Check prerequisites.
4606
4607 This checks that the instance is in the cluster.
4608
4609 """
4610 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4611 assert self.instance is not None, \
4612 "Cannot retrieve locked instance %s" % self.op.instance_name
4613
4614
4615 if self.op.hvparams:
4616
4617 cluster = self.cfg.GetClusterInfo()
4618 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4619 filled_hvp = cluster.FillHV(instance)
4620 filled_hvp.update(self.op.hvparams)
4621 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4622 hv_type.CheckParameterSyntax(filled_hvp)
4623 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4624
4625 _CheckNodeOnline(self, instance.primary_node)
4626
4627 bep = self.cfg.GetClusterInfo().FillBE(instance)
4628
4629 _CheckInstanceBridgesExist(self, instance)
4630
4631 remote_info = self.rpc.call_instance_info(instance.primary_node,
4632 instance.name,
4633 instance.hypervisor)
4634 remote_info.Raise("Error checking node %s" % instance.primary_node,
4635 prereq=True, ecode=errors.ECODE_ENVIRON)
4636 if not remote_info.payload:
4637 _CheckNodeFreeMemory(self, instance.primary_node,
4638 "starting instance %s" % instance.name,
4639 bep[constants.BE_MEMORY], instance.hypervisor)
4640
4641 - def Exec(self, feedback_fn):
4660
4663 """Reboot an instance.
4664
4665 """
4666 HPATH = "instance-reboot"
4667 HTYPE = constants.HTYPE_INSTANCE
4668 _OP_PARAMS = [
4669 _PInstanceName,
4670 ("ignore_secondaries", False, _TBool),
4671 ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4672 _PShutdownTimeout,
4673 ]
4674 REQ_BGL = False
4675
4678
4680 """Build hooks env.
4681
4682 This runs on master, primary and secondary nodes of the instance.
4683
4684 """
4685 env = {
4686 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4687 "REBOOT_TYPE": self.op.reboot_type,
4688 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4689 }
4690 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4691 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4692 return env, nl, nl
4693
4708
4709 - def Exec(self, feedback_fn):
4710 """Reboot the instance.
4711
4712 """
4713 instance = self.instance
4714 ignore_secondaries = self.op.ignore_secondaries
4715 reboot_type = self.op.reboot_type
4716
4717 node_current = instance.primary_node
4718
4719 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4720 constants.INSTANCE_REBOOT_HARD]:
4721 for disk in instance.disks:
4722 self.cfg.SetDiskID(disk, node_current)
4723 result = self.rpc.call_instance_reboot(node_current, instance,
4724 reboot_type,
4725 self.op.shutdown_timeout)
4726 result.Raise("Could not reboot instance")
4727 else:
4728 result = self.rpc.call_instance_shutdown(node_current, instance,
4729 self.op.shutdown_timeout)
4730 result.Raise("Could not shutdown instance for full reboot")
4731 _ShutdownInstanceDisks(self, instance)
4732 _StartInstanceDisks(self, instance, ignore_secondaries)
4733 result = self.rpc.call_instance_start(node_current, instance, None, None)
4734 msg = result.fail_msg
4735 if msg:
4736 _ShutdownInstanceDisks(self, instance)
4737 raise errors.OpExecError("Could not start instance for"
4738 " full reboot: %s" % msg)
4739
4740 self.cfg.MarkInstanceUp(instance.name)
4741
4794
4797 """Reinstall an instance.
4798
4799 """
4800 HPATH = "instance-reinstall"
4801 HTYPE = constants.HTYPE_INSTANCE
4802 _OP_PARAMS = [
4803 _PInstanceName,
4804 ("os_type", None, _TMaybeString),
4805 ("force_variant", False, _TBool),
4806 ]
4807 REQ_BGL = False
4808
4811
4821
4845
4846 - def Exec(self, feedback_fn):
4847 """Reinstall the instance.
4848
4849 """
4850 inst = self.instance
4851
4852 if self.op.os_type is not None:
4853 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4854 inst.os = self.op.os_type
4855 self.cfg.Update(inst, feedback_fn)
4856
4857 _StartInstanceDisks(self, inst, None)
4858 try:
4859 feedback_fn("Running the instance OS create scripts...")
4860
4861 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4862 self.op.debug_level)
4863 result.Raise("Could not install OS for instance %s on node %s" %
4864 (inst.name, inst.primary_node))
4865 finally:
4866 _ShutdownInstanceDisks(self, inst)
4867
4931
4934 """Rename an instance.
4935
4936 """
4937 HPATH = "instance-rename"
4938 HTYPE = constants.HTYPE_INSTANCE
4939 _OP_PARAMS = [
4940 _PInstanceName,
4941 ("new_name", _NoDefault, _TNonEmptyString),
4942 ("ip_check", False, _TBool),
4943 ("name_check", True, _TBool),
4944 ]
4945
4947 """Check arguments.
4948
4949 """
4950 if self.op.ip_check and not self.op.name_check:
4951
4952 raise errors.OpPrereqError("Cannot do ip check without a name check",
4953 errors.ECODE_INVAL)
4954
4965
4994
4995
4996 - def Exec(self, feedback_fn):
4997 """Reinstall the instance.
4998
4999 """
5000 inst = self.instance
5001 old_name = inst.name
5002
5003 if inst.disk_template == constants.DT_FILE:
5004 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5005
5006 self.cfg.RenameInstance(inst.name, self.op.new_name)
5007
5008 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5009 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5010
5011
5012 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5013
5014 if inst.disk_template == constants.DT_FILE:
5015 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5016 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5017 old_file_storage_dir,
5018 new_file_storage_dir)
5019 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5020 " (but the instance has been renamed in Ganeti)" %
5021 (inst.primary_node, old_file_storage_dir,
5022 new_file_storage_dir))
5023
5024 _StartInstanceDisks(self, inst, None)
5025 try:
5026 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5027 old_name, self.op.debug_level)
5028 msg = result.fail_msg
5029 if msg:
5030 msg = ("Could not run OS rename script for instance %s on node %s"
5031 " (but the instance has been renamed in Ganeti): %s" %
5032 (inst.name, inst.primary_node, msg))
5033 self.proc.LogWarning(msg)
5034 finally:
5035 _ShutdownInstanceDisks(self, inst)
5036
5037 return inst.name
5038
5041 """Remove an instance.
5042
5043 """
5044 HPATH = "instance-remove"
5045 HTYPE = constants.HTYPE_INSTANCE
5046 _OP_PARAMS = [
5047 _PInstanceName,
5048 ("ignore_failures", False, _TBool),
5049 _PShutdownTimeout,
5050 ]
5051 REQ_BGL = False
5052
5057
5061
5063 """Build hooks env.
5064
5065 This runs on master, primary and secondary nodes of the instance.
5066
5067 """
5068 env = _BuildInstanceHookEnvByObject(self, self.instance)
5069 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5070 nl = [self.cfg.GetMasterNode()]
5071 nl_post = list(self.instance.all_nodes) + nl
5072 return env, nl, nl_post
5073
5075 """Check prerequisites.
5076
5077 This checks that the instance is in the cluster.
5078
5079 """
5080 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5081 assert self.instance is not None, \
5082 "Cannot retrieve locked instance %s" % self.op.instance_name
5083
5084 - def Exec(self, feedback_fn):
5085 """Remove the instance.
5086
5087 """
5088 instance = self.instance
5089 logging.info("Shutting down instance %s on node %s",
5090 instance.name, instance.primary_node)
5091
5092 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5093 self.op.shutdown_timeout)
5094 msg = result.fail_msg
5095 if msg:
5096 if self.op.ignore_failures:
5097 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5098 else:
5099 raise errors.OpExecError("Could not shutdown instance %s on"
5100 " node %s: %s" %
5101 (instance.name, instance.primary_node, msg))
5102
5103 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5104
5107 """Utility function to remove an instance.
5108
5109 """
5110 logging.info("Removing block devices for instance %s", instance.name)
5111
5112 if not _RemoveDisks(lu, instance):
5113 if not ignore_failures:
5114 raise errors.OpExecError("Can't remove instance's disks")
5115 feedback_fn("Warning: can't remove instance's disks")
5116
5117 logging.info("Removing instance %s out of cluster config", instance.name)
5118
5119 lu.cfg.RemoveInstance(instance.name)
5120
5121 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5122 "Instance lock removal conflict"
5123
5124
5125 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5126
5129 """Logical unit for querying instances.
5130
5131 """
5132
5133 _OP_PARAMS = [
5134 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5135 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5136 ("use_locking", False, _TBool),
5137 ]
5138 REQ_BGL = False
5139 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5140 "serial_no", "ctime", "mtime", "uuid"]
5141 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5142 "admin_state",
5143 "disk_template", "ip", "mac", "bridge",
5144 "nic_mode", "nic_link",
5145 "sda_size", "sdb_size", "vcpus", "tags",
5146 "network_port", "beparams",
5147 r"(disk)\.(size)/([0-9]+)",
5148 r"(disk)\.(sizes)", "disk_usage",
5149 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5150 r"(nic)\.(bridge)/([0-9]+)",
5151 r"(nic)\.(macs|ips|modes|links|bridges)",
5152 r"(disk|nic)\.(count)",
5153 "hvparams",
5154 ] + _SIMPLE_FIELDS +
5155 ["hv/%s" % name
5156 for name in constants.HVS_PARAMETERS
5157 if name not in constants.HVC_GLOBALS] +
5158 ["be/%s" % name
5159 for name in constants.BES_PARAMETERS])
5160 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5161 "oper_ram",
5162 "oper_vcpus",
5163 "status")
5164
5165
5170
5187
5191
5192 - def Exec(self, feedback_fn):
5193 """Computes the list of nodes and their attributes.
5194
5195 """
5196
5197
5198 all_info = self.cfg.GetAllInstancesInfo()
5199 if self.wanted == locking.ALL_SET:
5200
5201 if self.do_locking:
5202 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5203 else:
5204 instance_names = all_info.keys()
5205 instance_names = utils.NiceSort(instance_names)
5206 else:
5207
5208 if self.do_locking:
5209 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5210 else:
5211 tgt_set = all_info.keys()
5212 missing = set(self.wanted).difference(tgt_set)
5213 if missing:
5214 raise errors.OpExecError("Some instances were removed before"
5215 " retrieving their data: %s" % missing)
5216 instance_names = self.wanted
5217
5218 instance_list = [all_info[iname] for iname in instance_names]
5219
5220
5221
5222 nodes = frozenset([inst.primary_node for inst in instance_list])
5223 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5224
5225 bad_nodes = []
5226 off_nodes = []
5227 if self.do_node_query:
5228 live_data = {}
5229 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5230 for name in nodes:
5231 result = node_data[name]
5232 if result.offline:
5233
5234 off_nodes.append(name)
5235 if result.fail_msg:
5236 bad_nodes.append(name)
5237 else:
5238 if result.payload:
5239 live_data.update(result.payload)
5240
5241 else:
5242 live_data = dict([(name, {}) for name in instance_names])
5243
5244
5245
5246 HVPREFIX = "hv/"
5247 BEPREFIX = "be/"
5248 output = []
5249 cluster = self.cfg.GetClusterInfo()
5250 for instance in instance_list:
5251 iout = []
5252 i_hv = cluster.FillHV(instance, skip_globals=True)
5253 i_be = cluster.FillBE(instance)
5254 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5255 for field in self.op.output_fields:
5256 st_match = self._FIELDS_STATIC.Matches(field)
5257 if field in self._SIMPLE_FIELDS:
5258 val = getattr(instance, field)
5259 elif field == "pnode":
5260 val = instance.primary_node
5261 elif field == "snodes":
5262 val = list(instance.secondary_nodes)
5263 elif field == "admin_state":
5264 val = instance.admin_up
5265 elif field == "oper_state":
5266 if instance.primary_node in bad_nodes:
5267 val = None
5268 else:
5269 val = bool(live_data.get(instance.name))
5270 elif field == "status":
5271 if instance.primary_node in off_nodes:
5272 val = "ERROR_nodeoffline"
5273 elif instance.primary_node in bad_nodes:
5274 val = "ERROR_nodedown"
5275 else:
5276 running = bool(live_data.get(instance.name))
5277 if running:
5278 if instance.admin_up:
5279 val = "running"
5280 else:
5281 val = "ERROR_up"
5282 else:
5283 if instance.admin_up:
5284 val = "ERROR_down"
5285 else:
5286 val = "ADMIN_down"
5287 elif field == "oper_ram":
5288 if instance.primary_node in bad_nodes:
5289 val = None
5290 elif instance.name in live_data:
5291 val = live_data[instance.name].get("memory", "?")
5292 else:
5293 val = "-"
5294 elif field == "oper_vcpus":
5295 if instance.primary_node in bad_nodes:
5296 val = None
5297 elif instance.name in live_data:
5298 val = live_data[instance.name].get("vcpus", "?")
5299 else:
5300 val = "-"
5301 elif field == "vcpus":
5302 val = i_be[constants.BE_VCPUS]
5303 elif field == "disk_template":
5304 val = instance.disk_template
5305 elif field == "ip":
5306 if instance.nics:
5307 val = instance.nics[0].ip
5308 else:
5309 val = None
5310 elif field == "nic_mode":
5311 if instance.nics:
5312 val = i_nicp[0][constants.NIC_MODE]
5313 else:
5314 val = None
5315 elif field == "nic_link":
5316 if instance.nics:
5317 val = i_nicp[0][constants.NIC_LINK]
5318 else:
5319 val = None
5320 elif field == "bridge":
5321 if (instance.nics and
5322 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5323 val = i_nicp[0][constants.NIC_LINK]
5324 else:
5325 val = None
5326 elif field == "mac":
5327 if instance.nics:
5328 val = instance.nics[0].mac
5329 else:
5330 val = None
5331 elif field == "sda_size" or field == "sdb_size":
5332 idx = ord(field[2]) - ord('a')
5333 try:
5334 val = instance.FindDisk(idx).size
5335 except errors.OpPrereqError:
5336 val = None
5337 elif field == "disk_usage":
5338 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5339 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5340 elif field == "tags":
5341 val = list(instance.GetTags())
5342 elif field == "hvparams":
5343 val = i_hv
5344 elif (field.startswith(HVPREFIX) and
5345 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5346 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5347 val = i_hv.get(field[len(HVPREFIX):], None)
5348 elif field == "beparams":
5349 val = i_be
5350 elif (field.startswith(BEPREFIX) and
5351 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5352 val = i_be.get(field[len(BEPREFIX):], None)
5353 elif st_match and st_match.groups():
5354
5355 st_groups = st_match.groups()
5356 if st_groups and st_groups[0] == "disk":
5357 if st_groups[1] == "count":
5358 val = len(instance.disks)
5359 elif st_groups[1] == "sizes":
5360 val = [disk.size for disk in instance.disks]
5361 elif st_groups[1] == "size":
5362 try:
5363 val = instance.FindDisk(st_groups[2]).size
5364 except errors.OpPrereqError:
5365 val = None
5366 else:
5367 assert False, "Unhandled disk parameter"
5368 elif st_groups[0] == "nic":
5369 if st_groups[1] == "count":
5370 val = len(instance.nics)
5371 elif st_groups[1] == "macs":
5372 val = [nic.mac for nic in instance.nics]
5373 elif st_groups[1] == "ips":
5374 val = [nic.ip for nic in instance.nics]
5375 elif st_groups[1] == "modes":
5376 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5377 elif st_groups[1] == "links":
5378 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5379 elif st_groups[1] == "bridges":
5380 val = []
5381 for nicp in i_nicp:
5382 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5383 val.append(nicp[constants.NIC_LINK])
5384 else:
5385 val.append(None)
5386 else:
5387
5388 nic_idx = int(st_groups[2])
5389 if nic_idx >= len(instance.nics):
5390 val = None
5391 else:
5392 if st_groups[1] == "mac":
5393 val = instance.nics[nic_idx].mac
5394 elif st_groups[1] == "ip":
5395 val = instance.nics[nic_idx].ip
5396 elif st_groups[1] == "mode":
5397 val = i_nicp[nic_idx][constants.NIC_MODE]
5398 elif st_groups[1] == "link":
5399 val = i_nicp[nic_idx][constants.NIC_LINK]
5400 elif st_groups[1] == "bridge":
5401 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5402 if nic_mode == constants.NIC_MODE_BRIDGED:
5403 val = i_nicp[nic_idx][constants.NIC_LINK]
5404 else:
5405 val = None
5406 else:
5407 assert False, "Unhandled NIC parameter"
5408 else:
5409 assert False, ("Declared but unhandled variable parameter '%s'" %
5410 field)
5411 else:
5412 assert False, "Declared but unhandled parameter '%s'" % field
5413 iout.append(val)
5414 output.append(iout)
5415
5416 return output
5417
5420 """Failover an instance.
5421
5422 """
5423 HPATH = "instance-failover"
5424 HTYPE = constants.HTYPE_INSTANCE
5425 _OP_PARAMS = [
5426 _PInstanceName,
5427 ("ignore_consistency", False, _TBool),
5428 _PShutdownTimeout,
5429 ]
5430 REQ_BGL = False
5431
5436
5440
5442 """Build hooks env.
5443
5444 This runs on master, primary and secondary nodes of the instance.
5445
5446 """
5447 instance = self.instance
5448 source_node = instance.primary_node
5449 target_node = instance.secondary_nodes[0]
5450 env = {
5451 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5452 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5453 "OLD_PRIMARY": source_node,
5454 "OLD_SECONDARY": target_node,
5455 "NEW_PRIMARY": target_node,
5456 "NEW_SECONDARY": source_node,
5457 }
5458 env.update(_BuildInstanceHookEnvByObject(self, instance))
5459 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5460 nl_post = list(nl)
5461 nl_post.append(source_node)
5462 return env, nl, nl_post
5463
5465 """Check prerequisites.
5466
5467 This checks that the instance is in the cluster.
5468
5469 """
5470 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5471 assert self.instance is not None, \
5472 "Cannot retrieve locked instance %s" % self.op.instance_name
5473
5474 bep = self.cfg.GetClusterInfo().FillBE(instance)
5475 if instance.disk_template not in constants.DTS_NET_MIRROR:
5476 raise errors.OpPrereqError("Instance's disk layout is not"
5477 " network mirrored, cannot failover.",
5478 errors.ECODE_STATE)
5479
5480 secondary_nodes = instance.secondary_nodes
5481 if not secondary_nodes:
5482 raise errors.ProgrammerError("no secondary node but using "
5483 "a mirrored disk template")
5484
5485 target_node = secondary_nodes[0]
5486 _CheckNodeOnline(self, target_node)
5487 _CheckNodeNotDrained(self, target_node)
5488 if instance.admin_up:
5489
5490 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5491 instance.name, bep[constants.BE_MEMORY],
5492 instance.hypervisor)
5493 else:
5494 self.LogInfo("Not checking memory on the secondary node as"
5495 " instance will not be started")
5496
5497
5498 _CheckInstanceBridgesExist(self, instance, node=target_node)
5499
5500 - def Exec(self, feedback_fn):
5501 """Failover an instance.
5502
5503 The failover is done by shutting it down on its present node and
5504 starting it on the secondary.
5505
5506 """
5507 instance = self.instance
5508
5509 source_node = instance.primary_node
5510 target_node = instance.secondary_nodes[0]
5511
5512 if instance.admin_up:
5513 feedback_fn("* checking disk consistency between source and target")
5514 for dev in instance.disks:
5515
5516 if not _CheckDiskConsistency(self, dev, target_node, False):
5517 if not self.op.ignore_consistency:
5518 raise errors.OpExecError("Disk %s is degraded on target node,"
5519 " aborting failover." % dev.iv_name)
5520 else:
5521 feedback_fn("* not checking disk consistency as instance is not running")
5522
5523 feedback_fn("* shutting down instance on source node")
5524 logging.info("Shutting down instance %s on node %s",
5525 instance.name, source_node)
5526
5527 result = self.rpc.call_instance_shutdown(source_node, instance,
5528 self.op.shutdown_timeout)
5529 msg = result.fail_msg
5530 if msg:
5531 if self.op.ignore_consistency:
5532 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5533 " Proceeding anyway. Please make sure node"
5534 " %s is down. Error details: %s",
5535 instance.name, source_node, source_node, msg)
5536 else:
5537 raise errors.OpExecError("Could not shutdown instance %s on"
5538 " node %s: %s" %
5539 (instance.name, source_node, msg))
5540
5541 feedback_fn("* deactivating the instance's disks on source node")
5542 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5543 raise errors.OpExecError("Can't shut down the instance's disks.")
5544
5545 instance.primary_node = target_node
5546
5547 self.cfg.Update(instance, feedback_fn)
5548
5549
5550 if instance.admin_up:
5551 feedback_fn("* activating the instance's disks on target node")
5552 logging.info("Starting instance %s on node %s",
5553 instance.name, target_node)
5554
5555 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5556 ignore_secondaries=True)
5557 if not disks_ok:
5558 _ShutdownInstanceDisks(self, instance)
5559 raise errors.OpExecError("Can't activate the instance's disks")
5560
5561 feedback_fn("* starting the instance on the target node")
5562 result = self.rpc.call_instance_start(target_node, instance, None, None)
5563 msg = result.fail_msg
5564 if msg:
5565 _ShutdownInstanceDisks(self, instance)
5566 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5567 (instance.name, target_node, msg))
5568
5571 """Migrate an instance.
5572
5573 This is migration without shutting down, compared to the failover,
5574 which is done with shutdown.
5575
5576 """
5577 HPATH = "instance-migrate"
5578 HTYPE = constants.HTYPE_INSTANCE
5579 _OP_PARAMS = [
5580 _PInstanceName,
5581 _PMigrationMode,
5582 _PMigrationLive,
5583 ("cleanup", False, _TBool),
5584 ]
5585
5586 REQ_BGL = False
5587
5597
5601
5603 """Build hooks env.
5604
5605 This runs on master, primary and secondary nodes of the instance.
5606
5607 """
5608 instance = self._migrater.instance
5609 source_node = instance.primary_node
5610 target_node = instance.secondary_nodes[0]
5611 env = _BuildInstanceHookEnvByObject(self, instance)
5612 env["MIGRATE_LIVE"] = self._migrater.live
5613 env["MIGRATE_CLEANUP"] = self.op.cleanup
5614 env.update({
5615 "OLD_PRIMARY": source_node,
5616 "OLD_SECONDARY": target_node,
5617 "NEW_PRIMARY": target_node,
5618 "NEW_SECONDARY": source_node,
5619 })
5620 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5621 nl_post = list(nl)
5622 nl_post.append(source_node)
5623 return env, nl, nl_post
5624
5627 """Move an instance by data-copying.
5628
5629 """
5630 HPATH = "instance-move"
5631 HTYPE = constants.HTYPE_INSTANCE
5632 _OP_PARAMS = [
5633 _PInstanceName,
5634 ("target_node", _NoDefault, _TNonEmptyString),
5635 _PShutdownTimeout,
5636 ]
5637 REQ_BGL = False
5638
5645
5649
5651 """Build hooks env.
5652
5653 This runs on master, primary and secondary nodes of the instance.
5654
5655 """
5656 env = {
5657 "TARGET_NODE": self.op.target_node,
5658 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5659 }
5660 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5661 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5662 self.op.target_node]
5663 return env, nl, nl
5664
5666 """Check prerequisites.
5667
5668 This checks that the instance is in the cluster.
5669
5670 """
5671 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5672 assert self.instance is not None, \
5673 "Cannot retrieve locked instance %s" % self.op.instance_name
5674
5675 node = self.cfg.GetNodeInfo(self.op.target_node)
5676 assert node is not None, \
5677 "Cannot retrieve locked node %s" % self.op.target_node
5678
5679 self.target_node = target_node = node.name
5680
5681 if target_node == instance.primary_node:
5682 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5683 (instance.name, target_node),
5684 errors.ECODE_STATE)
5685
5686 bep = self.cfg.GetClusterInfo().FillBE(instance)
5687
5688 for idx, dsk in enumerate(instance.disks):
5689 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5690 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5691 " cannot copy" % idx, errors.ECODE_STATE)
5692
5693 _CheckNodeOnline(self, target_node)
5694 _CheckNodeNotDrained(self, target_node)
5695
5696 if instance.admin_up:
5697
5698 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5699 instance.name, bep[constants.BE_MEMORY],
5700 instance.hypervisor)
5701 else:
5702 self.LogInfo("Not checking memory on the secondary node as"
5703 " instance will not be started")
5704
5705
5706 _CheckInstanceBridgesExist(self, instance, node=target_node)
5707
5708 - def Exec(self, feedback_fn):
5709 """Move an instance.
5710
5711 The move is done by shutting it down on its present node, copying
5712 the data over (slow) and starting it on the new node.
5713
5714 """
5715 instance = self.instance
5716
5717 source_node = instance.primary_node
5718 target_node = self.target_node
5719
5720 self.LogInfo("Shutting down instance %s on source node %s",
5721 instance.name, source_node)
5722
5723 result = self.rpc.call_instance_shutdown(source_node, instance,
5724 self.op.shutdown_timeout)
5725 msg = result.fail_msg
5726 if msg:
5727 if self.op.ignore_consistency:
5728 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5729 " Proceeding anyway. Please make sure node"
5730 " %s is down. Error details: %s",
5731 instance.name, source_node, source_node, msg)
5732 else:
5733 raise errors.OpExecError("Could not shutdown instance %s on"
5734 " node %s: %s" %
5735 (instance.name, source_node, msg))
5736
5737
5738 try:
5739 _CreateDisks(self, instance, target_node=target_node)
5740 except errors.OpExecError:
5741 self.LogWarning("Device creation failed, reverting...")
5742 try:
5743 _RemoveDisks(self, instance, target_node=target_node)
5744 finally:
5745 self.cfg.ReleaseDRBDMinors(instance.name)
5746 raise
5747
5748 cluster_name = self.cfg.GetClusterInfo().cluster_name
5749
5750 errs = []
5751
5752 for idx, disk in enumerate(instance.disks):
5753 self.LogInfo("Copying data for disk %d", idx)
5754 result = self.rpc.call_blockdev_assemble(target_node, disk,
5755 instance.name, True)
5756 if result.fail_msg:
5757 self.LogWarning("Can't assemble newly created disk %d: %s",
5758 idx, result.fail_msg)
5759 errs.append(result.fail_msg)
5760 break
5761 dev_path = result.payload
5762 result = self.rpc.call_blockdev_export(source_node, disk,
5763 target_node, dev_path,
5764 cluster_name)
5765 if result.fail_msg:
5766 self.LogWarning("Can't copy data over for disk %d: %s",
5767 idx, result.fail_msg)
5768 errs.append(result.fail_msg)
5769 break
5770
5771 if errs:
5772 self.LogWarning("Some disks failed to copy, aborting")
5773 try:
5774 _RemoveDisks(self, instance, target_node=target_node)
5775 finally:
5776 self.cfg.ReleaseDRBDMinors(instance.name)
5777 raise errors.OpExecError("Errors during disk copy: %s" %
5778 (",".join(errs),))
5779
5780 instance.primary_node = target_node
5781 self.cfg.Update(instance, feedback_fn)
5782
5783 self.LogInfo("Removing the disks on the original node")
5784 _RemoveDisks(self, instance, target_node=source_node)
5785
5786
5787 if instance.admin_up:
5788 self.LogInfo("Starting instance %s on node %s",
5789 instance.name, target_node)
5790
5791 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5792 ignore_secondaries=True)
5793 if not disks_ok:
5794 _ShutdownInstanceDisks(self, instance)
5795 raise errors.OpExecError("Can't activate the instance's disks")
5796
5797 result = self.rpc.call_instance_start(target_node, instance, None, None)
5798 msg = result.fail_msg
5799 if msg:
5800 _ShutdownInstanceDisks(self, instance)
5801 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5802 (instance.name, target_node, msg))
5803
5859
5862 """Tasklet class for instance migration.
5863
5864 @type live: boolean
5865 @ivar live: whether the migration will be done live or non-live;
5866 this variable is initalized only after CheckPrereq has run
5867
5868 """
5869 - def __init__(self, lu, instance_name, cleanup):
5870 """Initializes this class.
5871
5872 """
5873 Tasklet.__init__(self, lu)
5874
5875
5876 self.instance_name = instance_name
5877 self.cleanup = cleanup
5878 self.live = False
5879
5881 """Check prerequisites.
5882
5883 This checks that the instance is in the cluster.
5884
5885 """
5886 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5887 instance = self.cfg.GetInstanceInfo(instance_name)
5888 assert instance is not None
5889
5890 if instance.disk_template != constants.DT_DRBD8:
5891 raise errors.OpPrereqError("Instance's disk layout is not"
5892 " drbd8, cannot migrate.", errors.ECODE_STATE)
5893
5894 secondary_nodes = instance.secondary_nodes
5895 if not secondary_nodes:
5896 raise errors.ConfigurationError("No secondary node but using"
5897 " drbd8 disk template")
5898
5899 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5900
5901 target_node = secondary_nodes[0]
5902
5903 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5904 instance.name, i_be[constants.BE_MEMORY],
5905 instance.hypervisor)
5906
5907
5908 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5909
5910 if not self.cleanup:
5911 _CheckNodeNotDrained(self.lu, target_node)
5912 result = self.rpc.call_instance_migratable(instance.primary_node,
5913 instance)
5914 result.Raise("Can't migrate, please use failover",
5915 prereq=True, ecode=errors.ECODE_STATE)
5916
5917 self.instance = instance
5918
5919 if self.lu.op.live is not None and self.lu.op.mode is not None:
5920 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5921 " parameters are accepted",
5922 errors.ECODE_INVAL)
5923 if self.lu.op.live is not None:
5924 if self.lu.op.live:
5925 self.lu.op.mode = constants.HT_MIGRATION_LIVE
5926 else:
5927 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5928
5929
5930 self.lu.op.live = None
5931 elif self.lu.op.mode is None:
5932
5933 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5934 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5935
5936 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5937
5939 """Poll with custom rpc for disk sync.
5940
5941 This uses our own step-based rpc call.
5942
5943 """
5944 self.feedback_fn("* wait until resync is done")
5945 all_done = False
5946 while not all_done:
5947 all_done = True
5948 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5949 self.nodes_ip,
5950 self.instance.disks)
5951 min_percent = 100
5952 for node, nres in result.items():
5953 nres.Raise("Cannot resync disks on node %s" % node)
5954 node_done, node_percent = nres.payload
5955 all_done = all_done and node_done
5956 if node_percent is not None:
5957 min_percent = min(min_percent, node_percent)
5958 if not all_done:
5959 if min_percent < 100:
5960 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5961 time.sleep(2)
5962
5964 """Demote a node to secondary.
5965
5966 """
5967 self.feedback_fn("* switching node %s to secondary mode" % node)
5968
5969 for dev in self.instance.disks:
5970 self.cfg.SetDiskID(dev, node)
5971
5972 result = self.rpc.call_blockdev_close(node, self.instance.name,
5973 self.instance.disks)
5974 result.Raise("Cannot change disk to secondary on node %s" % node)
5975
5977 """Disconnect from the network.
5978
5979 """
5980 self.feedback_fn("* changing into standalone mode")
5981 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5982 self.instance.disks)
5983 for node, nres in result.items():
5984 nres.Raise("Cannot disconnect disks node %s" % node)
5985
5987 """Reconnect to the network.
5988
5989 """
5990 if multimaster:
5991 msg = "dual-master"
5992 else:
5993 msg = "single-master"
5994 self.feedback_fn("* changing disks into %s mode" % msg)
5995 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5996 self.instance.disks,
5997 self.instance.name, multimaster)
5998 for node, nres in result.items():
5999 nres.Raise("Cannot change disks config on node %s" % node)
6000
6002 """Try to cleanup after a failed migration.
6003
6004 The cleanup is done by:
6005 - check that the instance is running only on one node
6006 (and update the config if needed)
6007 - change disks on its secondary node to secondary
6008 - wait until disks are fully synchronized
6009 - disconnect from the network
6010 - change disks into single-master mode
6011 - wait again until disks are fully synchronized
6012
6013 """
6014 instance = self.instance
6015 target_node = self.target_node
6016 source_node = self.source_node
6017
6018
6019 self.feedback_fn("* checking where the instance actually runs"
6020 " (if this hangs, the hypervisor might be in"
6021 " a bad state)")
6022 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6023 for node, result in ins_l.items():
6024 result.Raise("Can't contact node %s" % node)
6025
6026 runningon_source = instance.name in ins_l[source_node].payload
6027 runningon_target = instance.name in ins_l[target_node].payload
6028
6029 if runningon_source and runningon_target:
6030 raise errors.OpExecError("Instance seems to be running on two nodes,"
6031 " or the hypervisor is confused. You will have"
6032 " to ensure manually that it runs only on one"
6033 " and restart this operation.")
6034
6035 if not (runningon_source or runningon_target):
6036 raise errors.OpExecError("Instance does not seem to be running at all."
6037 " In this case, it's safer to repair by"
6038 " running 'gnt-instance stop' to ensure disk"
6039 " shutdown, and then restarting it.")
6040
6041 if runningon_target:
6042
6043 self.feedback_fn("* instance running on secondary node (%s),"
6044 " updating config" % target_node)
6045 instance.primary_node = target_node
6046 self.cfg.Update(instance, self.feedback_fn)
6047 demoted_node = source_node
6048 else:
6049 self.feedback_fn("* instance confirmed to be running on its"
6050 " primary node (%s)" % source_node)
6051 demoted_node = target_node
6052
6053 self._EnsureSecondary(demoted_node)
6054 try:
6055 self._WaitUntilSync()
6056 except errors.OpExecError:
6057
6058
6059 pass
6060 self._GoStandalone()
6061 self._GoReconnect(False)
6062 self._WaitUntilSync()
6063
6064 self.feedback_fn("* done")
6065
6067 """Try to revert the disk status after a failed migration.
6068
6069 """
6070 target_node = self.target_node
6071 try:
6072 self._EnsureSecondary(target_node)
6073 self._GoStandalone()
6074 self._GoReconnect(False)
6075 self._WaitUntilSync()
6076 except errors.OpExecError, err:
6077 self.lu.LogWarning("Migration failed and I can't reconnect the"
6078 " drives: error '%s'\n"
6079 "Please look and recover the instance status" %
6080 str(err))
6081
6083 """Call the hypervisor code to abort a started migration.
6084
6085 """
6086 instance = self.instance
6087 target_node = self.target_node
6088 migration_info = self.migration_info
6089
6090 abort_result = self.rpc.call_finalize_migration(target_node,
6091 instance,
6092 migration_info,
6093 False)
6094 abort_msg = abort_result.fail_msg
6095 if abort_msg:
6096 logging.error("Aborting migration failed on target node %s: %s",
6097 target_node, abort_msg)
6098
6099
6100
6102 """Migrate an instance.
6103
6104 The migrate is done by:
6105 - change the disks into dual-master mode
6106 - wait until disks are fully synchronized again
6107 - migrate the instance
6108 - change disks on the new secondary node (the old primary) to secondary
6109 - wait until disks are fully synchronized
6110 - change disks into single-master mode
6111
6112 """
6113 instance = self.instance
6114 target_node = self.target_node
6115 source_node = self.source_node
6116
6117 self.feedback_fn("* checking disk consistency between source and target")
6118 for dev in instance.disks:
6119 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6120 raise errors.OpExecError("Disk %s is degraded or not fully"
6121 " synchronized on target node,"
6122 " aborting migrate." % dev.iv_name)
6123
6124
6125 result = self.rpc.call_migration_info(source_node, instance)
6126 msg = result.fail_msg
6127 if msg:
6128 log_err = ("Failed fetching source migration information from %s: %s" %
6129 (source_node, msg))
6130 logging.error(log_err)
6131 raise errors.OpExecError(log_err)
6132
6133 self.migration_info = migration_info = result.payload
6134
6135
6136 self._EnsureSecondary(target_node)
6137 self._GoStandalone()
6138 self._GoReconnect(True)
6139 self._WaitUntilSync()
6140
6141 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6142 result = self.rpc.call_accept_instance(target_node,
6143 instance,
6144 migration_info,
6145 self.nodes_ip[target_node])
6146
6147 msg = result.fail_msg
6148 if msg:
6149 logging.error("Instance pre-migration failed, trying to revert"
6150 " disk status: %s", msg)
6151 self.feedback_fn("Pre-migration failed, aborting")
6152 self._AbortMigration()
6153 self._RevertDiskStatus()
6154 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6155 (instance.name, msg))
6156
6157 self.feedback_fn("* migrating instance to %s" % target_node)
6158 time.sleep(10)
6159 result = self.rpc.call_instance_migrate(source_node, instance,
6160 self.nodes_ip[target_node],
6161 self.live)
6162 msg = result.fail_msg
6163 if msg:
6164 logging.error("Instance migration failed, trying to revert"
6165 " disk status: %s", msg)
6166 self.feedback_fn("Migration failed, aborting")
6167 self._AbortMigration()
6168 self._RevertDiskStatus()
6169 raise errors.OpExecError("Could not migrate instance %s: %s" %
6170 (instance.name, msg))
6171 time.sleep(10)
6172
6173 instance.primary_node = target_node
6174
6175 self.cfg.Update(instance, self.feedback_fn)
6176
6177 result = self.rpc.call_finalize_migration(target_node,
6178 instance,
6179 migration_info,
6180 True)
6181 msg = result.fail_msg
6182 if msg:
6183 logging.error("Instance migration succeeded, but finalization failed:"
6184 " %s", msg)
6185 raise errors.OpExecError("Could not finalize instance migration: %s" %
6186 msg)
6187
6188 self._EnsureSecondary(source_node)
6189 self._WaitUntilSync()
6190 self._GoStandalone()
6191 self._GoReconnect(False)
6192 self._WaitUntilSync()
6193
6194 self.feedback_fn("* done")
6195
6196 - def Exec(self, feedback_fn):
6197 """Perform the migration.
6198
6199 """
6200 feedback_fn("Migrating instance %s" % self.instance.name)
6201
6202 self.feedback_fn = feedback_fn
6203
6204 self.source_node = self.instance.primary_node
6205 self.target_node = self.instance.secondary_nodes[0]
6206 self.all_nodes = [self.source_node, self.target_node]
6207 self.nodes_ip = {
6208 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6209 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6210 }
6211
6212 if self.cleanup:
6213 return self._ExecCleanup()
6214 else:
6215 return self._ExecMigration()
6216
6217
6218 -def _CreateBlockDev(lu, node, instance, device, force_create,
6219 info, force_open):
6220 """Create a tree of block devices on a given node.
6221
6222 If this device type has to be created on secondaries, create it and
6223 all its children.
6224
6225 If not, just recurse to children keeping the same 'force' value.
6226
6227 @param lu: the lu on whose behalf we execute
6228 @param node: the node on which to create the device
6229 @type instance: L{objects.Instance}
6230 @param instance: the instance which owns the device
6231 @type device: L{objects.Disk}
6232 @param device: the device to create
6233 @type force_create: boolean
6234 @param force_create: whether to force creation of this device; this
6235 will be change to True whenever we find a device which has
6236 CreateOnSecondary() attribute
6237 @param info: the extra 'metadata' we should attach to the device
6238 (this will be represented as a LVM tag)
6239 @type force_open: boolean
6240 @param force_open: this parameter will be passes to the
6241 L{backend.BlockdevCreate} function where it specifies
6242 whether we run on primary or not, and it affects both
6243 the child assembly and the device own Open() execution
6244
6245 """
6246 if device.CreateOnSecondary():
6247 force_create = True
6248
6249 if device.children:
6250 for child in device.children:
6251 _CreateBlockDev(lu, node, instance, child, force_create,
6252 info, force_open)
6253
6254 if not force_create:
6255 return
6256
6257 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6258
6261 """Create a single block device on a given node.
6262
6263 This will not recurse over children of the device, so they must be
6264 created in advance.
6265
6266 @param lu: the lu on whose behalf we execute
6267 @param node: the node on which to create the device
6268 @type instance: L{objects.Instance}
6269 @param instance: the instance which owns the device
6270 @type device: L{objects.Disk}
6271 @param device: the device to create
6272 @param info: the extra 'metadata' we should attach to the device
6273 (this will be represented as a LVM tag)
6274 @type force_open: boolean
6275 @param force_open: this parameter will be passes to the
6276 L{backend.BlockdevCreate} function where it specifies
6277 whether we run on primary or not, and it affects both
6278 the child assembly and the device own Open() execution
6279
6280 """
6281 lu.cfg.SetDiskID(device, node)
6282 result = lu.rpc.call_blockdev_create(node, device, device.size,
6283 instance.name, force_open, info)
6284 result.Raise("Can't create block device %s on"
6285 " node %s for instance %s" % (device, node, instance.name))
6286 if device.physical_id is None:
6287 device.physical_id = result.payload
6288
6291 """Generate a suitable LV name.
6292
6293 This will generate a logical volume name for the given instance.
6294
6295 """
6296 results = []
6297 for val in exts:
6298 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6299 results.append("%s%s" % (new_id, val))
6300 return results
6301
6302
6303 -def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6304 p_minor, s_minor):
6305 """Generate a drbd8 device complete with its children.
6306
6307 """
6308 port = lu.cfg.AllocatePort()
6309 vgname = lu.cfg.GetVGName()
6310 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6311 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6312 logical_id=(vgname, names[0]))
6313 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6314 logical_id=(vgname, names[1]))
6315 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6316 logical_id=(primary, secondary, port,
6317 p_minor, s_minor,
6318 shared_secret),
6319 children=[dev_data, dev_meta],
6320 iv_name=iv_name)
6321 return drbd_dev
6322
6323
6324 -def _GenerateDiskTemplate(lu, template_name,
6325 instance_name, primary_node,
6326 secondary_nodes, disk_info,
6327 file_storage_dir, file_driver,
6328 base_index):
6329 """Generate the entire disk layout for a given template type.
6330
6331 """
6332
6333
6334 vgname = lu.cfg.GetVGName()
6335 disk_count = len(disk_info)
6336 disks = []
6337 if template_name == constants.DT_DISKLESS:
6338 pass
6339 elif template_name == constants.DT_PLAIN:
6340 if len(secondary_nodes) != 0:
6341 raise errors.ProgrammerError("Wrong template configuration")
6342
6343 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6344 for i in range(disk_count)])
6345 for idx, disk in enumerate(disk_info):
6346 disk_index = idx + base_index
6347 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6348 logical_id=(vgname, names[idx]),
6349 iv_name="disk/%d" % disk_index,
6350 mode=disk["mode"])
6351 disks.append(disk_dev)
6352 elif template_name == constants.DT_DRBD8:
6353 if len(secondary_nodes) != 1:
6354 raise errors.ProgrammerError("Wrong template configuration")
6355 remote_node = secondary_nodes[0]
6356 minors = lu.cfg.AllocateDRBDMinor(
6357 [primary_node, remote_node] * len(disk_info), instance_name)
6358
6359 names = []
6360 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6361 for i in range(disk_count)]):
6362 names.append(lv_prefix + "_data")
6363 names.append(lv_prefix + "_meta")
6364 for idx, disk in enumerate(disk_info):
6365 disk_index = idx + base_index
6366 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6367 disk["size"], names[idx*2:idx*2+2],
6368 "disk/%d" % disk_index,
6369 minors[idx*2], minors[idx*2+1])
6370 disk_dev.mode = disk["mode"]
6371 disks.append(disk_dev)
6372 elif template_name == constants.DT_FILE:
6373 if len(secondary_nodes) != 0:
6374 raise errors.ProgrammerError("Wrong template configuration")
6375
6376 _RequireFileStorage()
6377
6378 for idx, disk in enumerate(disk_info):
6379 disk_index = idx + base_index
6380 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6381 iv_name="disk/%d" % disk_index,
6382 logical_id=(file_driver,
6383 "%s/disk%d" % (file_storage_dir,
6384 disk_index)),
6385 mode=disk["mode"])
6386 disks.append(disk_dev)
6387 else:
6388 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6389 return disks
6390
6391
6392 -def _GetInstanceInfoText(instance):
6393 """Compute that text that should be added to the disk's metadata.
6394
6395 """
6396 return "originstname+%s" % instance.name
6397
6398
6399 -def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6400 """Create all disks for an instance.
6401
6402 This abstracts away some work from AddInstance.
6403
6404 @type lu: L{LogicalUnit}
6405 @param lu: the logical unit on whose behalf we execute
6406 @type instance: L{objects.Instance}
6407 @param instance: the instance whose disks we should create
6408 @type to_skip: list
6409 @param to_skip: list of indices to skip
6410 @type target_node: string
6411 @param target_node: if passed, overrides the target node for creation
6412 @rtype: boolean
6413 @return: the success of the creation
6414
6415 """
6416 info = _GetInstanceInfoText(instance)
6417 if target_node is None:
6418 pnode = instance.primary_node
6419 all_nodes = instance.all_nodes
6420 else:
6421 pnode = target_node
6422 all_nodes = [pnode]
6423
6424 if instance.disk_template == constants.DT_FILE:
6425 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6426 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6427
6428 result.Raise("Failed to create directory '%s' on"
6429 " node %s" % (file_storage_dir, pnode))
6430
6431
6432
6433 for idx, device in enumerate(instance.disks):
6434 if to_skip and idx in to_skip:
6435 continue
6436 logging.info("Creating volume %s for instance %s",
6437 device.iv_name, instance.name)
6438
6439 for node in all_nodes:
6440 f_create = node == pnode
6441 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6442
6445 """Remove all disks for an instance.
6446
6447 This abstracts away some work from `AddInstance()` and
6448 `RemoveInstance()`. Note that in case some of the devices couldn't
6449 be removed, the removal will continue with the other ones (compare
6450 with `_CreateDisks()`).
6451
6452 @type lu: L{LogicalUnit}
6453 @param lu: the logical unit on whose behalf we execute
6454 @type instance: L{objects.Instance}
6455 @param instance: the instance whose disks we should remove
6456 @type target_node: string
6457 @param target_node: used to override the node on which to remove the disks
6458 @rtype: boolean
6459 @return: the success of the removal
6460
6461 """
6462 logging.info("Removing block devices for instance %s", instance.name)
6463
6464 all_result = True
6465 for device in instance.disks:
6466 if target_node:
6467 edata = [(target_node, device)]
6468 else:
6469 edata = device.ComputeNodeTree(instance.primary_node)
6470 for node, disk in edata:
6471 lu.cfg.SetDiskID(disk, node)
6472 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6473 if msg:
6474 lu.LogWarning("Could not remove block device %s on node %s,"
6475 " continuing anyway: %s", device.iv_name, node, msg)
6476 all_result = False
6477
6478 if instance.disk_template == constants.DT_FILE:
6479 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6480 if target_node:
6481 tgt = target_node
6482 else:
6483 tgt = instance.primary_node
6484 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6485 if result.fail_msg:
6486 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6487 file_storage_dir, instance.primary_node, result.fail_msg)
6488 all_result = False
6489
6490 return all_result
6491
6494 """Compute disk size requirements in the volume group
6495
6496 """
6497
6498 req_size_dict = {
6499 constants.DT_DISKLESS: None,
6500 constants.DT_PLAIN: sum(d["size"] for d in disks),
6501
6502 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6503 constants.DT_FILE: None,
6504 }
6505
6506 if disk_template not in req_size_dict:
6507 raise errors.ProgrammerError("Disk template '%s' size requirement"
6508 " is unknown" % disk_template)
6509
6510 return req_size_dict[disk_template]
6511
6514 """Hypervisor parameter validation.
6515
6516 This function abstract the hypervisor parameter validation to be
6517 used in both instance create and instance modify.
6518
6519 @type lu: L{LogicalUnit}
6520 @param lu: the logical unit for which we check
6521 @type nodenames: list
6522 @param nodenames: the list of nodes on which we should check
6523 @type hvname: string
6524 @param hvname: the name of the hypervisor we should use
6525 @type hvparams: dict
6526 @param hvparams: the parameters which we need to check
6527 @raise errors.OpPrereqError: if the parameters are not valid
6528
6529 """
6530 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6531 hvname,
6532 hvparams)
6533 for node in nodenames:
6534 info = hvinfo[node]
6535 if info.offline:
6536 continue
6537 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6538
6541 """OS parameters validation.
6542
6543 @type lu: L{LogicalUnit}
6544 @param lu: the logical unit for which we check
6545 @type required: boolean
6546 @param required: whether the validation should fail if the OS is not
6547 found
6548 @type nodenames: list
6549 @param nodenames: the list of nodes on which we should check
6550 @type osname: string
6551 @param osname: the name of the hypervisor we should use
6552 @type osparams: dict
6553 @param osparams: the parameters which we need to check
6554 @raise errors.OpPrereqError: if the parameters are not valid
6555
6556 """
6557 result = lu.rpc.call_os_validate(required, nodenames, osname,
6558 [constants.OS_VALIDATE_PARAMETERS],
6559 osparams)
6560 for node, nres in result.items():
6561
6562
6563 nres.Raise("OS Parameters validation failed on node %s" % node)
6564 if not nres.payload:
6565 lu.LogInfo("OS %s not found on node %s, validation skipped",
6566 osname, node)
6567
6570 """Create an instance.
6571
6572 """
6573 HPATH = "instance-add"
6574 HTYPE = constants.HTYPE_INSTANCE
6575 _OP_PARAMS = [
6576 _PInstanceName,
6577 ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6578 ("start", True, _TBool),
6579 ("wait_for_sync", True, _TBool),
6580 ("ip_check", True, _TBool),
6581 ("name_check", True, _TBool),
6582 ("disks", _NoDefault, _TListOf(_TDict)),
6583 ("nics", _NoDefault, _TListOf(_TDict)),
6584 ("hvparams", _EmptyDict, _TDict),
6585 ("beparams", _EmptyDict, _TDict),
6586 ("osparams", _EmptyDict, _TDict),
6587 ("no_install", None, _TMaybeBool),
6588 ("os_type", None, _TMaybeString),
6589 ("force_variant", False, _TBool),
6590 ("source_handshake", None, _TOr(_TList, _TNone)),
6591 ("source_x509_ca", None, _TMaybeString),
6592 ("source_instance_name", None, _TMaybeString),
6593 ("src_node", None, _TMaybeString),
6594 ("src_path", None, _TMaybeString),
6595 ("pnode", None, _TMaybeString),
6596 ("snode", None, _TMaybeString),
6597 ("iallocator", None, _TMaybeString),
6598 ("hypervisor", None, _TMaybeString),
6599 ("disk_template", _NoDefault, _CheckDiskTemplate),
6600 ("identify_defaults", False, _TBool),
6601 ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6602 ("file_storage_dir", None, _TMaybeString),
6603 ]
6604 REQ_BGL = False
6605
6607 """Check arguments.
6608
6609 """
6610
6611
6612 if self.op.no_install and self.op.start:
6613 self.LogInfo("No-installation mode selected, disabling startup")
6614 self.op.start = False
6615
6616 self.op.instance_name = \
6617 netutils.HostInfo.NormalizeName(self.op.instance_name)
6618
6619 if self.op.ip_check and not self.op.name_check:
6620
6621 raise errors.OpPrereqError("Cannot do ip check without a name check",
6622 errors.ECODE_INVAL)
6623
6624
6625 for nic in self.op.nics:
6626 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6627
6628
6629 has_adopt = has_no_adopt = False
6630 for disk in self.op.disks:
6631 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6632 if "adopt" in disk:
6633 has_adopt = True
6634 else:
6635 has_no_adopt = True
6636 if has_adopt and has_no_adopt:
6637 raise errors.OpPrereqError("Either all disks are adopted or none is",
6638 errors.ECODE_INVAL)
6639 if has_adopt:
6640 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6641 raise errors.OpPrereqError("Disk adoption is not supported for the"
6642 " '%s' disk template" %
6643 self.op.disk_template,
6644 errors.ECODE_INVAL)
6645 if self.op.iallocator is not None:
6646 raise errors.OpPrereqError("Disk adoption not allowed with an"
6647 " iallocator script", errors.ECODE_INVAL)
6648 if self.op.mode == constants.INSTANCE_IMPORT:
6649 raise errors.OpPrereqError("Disk adoption not allowed for"
6650 " instance import", errors.ECODE_INVAL)
6651
6652 self.adopt_disks = has_adopt
6653
6654
6655 if self.op.name_check:
6656 self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6657 self.op.instance_name = self.hostname1.name
6658
6659 self.check_ip = self.hostname1.ip
6660 else:
6661 self.check_ip = None
6662
6663
6664 if (self.op.file_driver and
6665 not self.op.file_driver in constants.FILE_DRIVER):
6666 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6667 self.op.file_driver, errors.ECODE_INVAL)
6668
6669 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6670 raise errors.OpPrereqError("File storage directory path not absolute",
6671 errors.ECODE_INVAL)
6672
6673
6674 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6675
6676 if self.op.pnode is not None:
6677 if self.op.disk_template in constants.DTS_NET_MIRROR:
6678 if self.op.snode is None:
6679 raise errors.OpPrereqError("The networked disk templates need"
6680 " a mirror node", errors.ECODE_INVAL)
6681 elif self.op.snode:
6682 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6683 " template")
6684 self.op.snode = None
6685
6686 self._cds = _GetClusterDomainSecret()
6687
6688 if self.op.mode == constants.INSTANCE_IMPORT:
6689
6690
6691
6692 self.op.force_variant = True
6693
6694 if self.op.no_install:
6695 self.LogInfo("No-installation mode has no effect during import")
6696
6697 elif self.op.mode == constants.INSTANCE_CREATE:
6698 if self.op.os_type is None:
6699 raise errors.OpPrereqError("No guest OS specified",
6700 errors.ECODE_INVAL)
6701 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6702 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6703 " installation" % self.op.os_type,
6704 errors.ECODE_STATE)
6705 if self.op.disk_template is None:
6706 raise errors.OpPrereqError("No disk template specified",
6707 errors.ECODE_INVAL)
6708
6709 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6710
6711 src_handshake = self.op.source_handshake
6712 if not src_handshake:
6713 raise errors.OpPrereqError("Missing source handshake",
6714 errors.ECODE_INVAL)
6715
6716 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6717 src_handshake)
6718 if errmsg:
6719 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6720 errors.ECODE_INVAL)
6721
6722
6723 self.source_x509_ca_pem = self.op.source_x509_ca
6724 if not self.source_x509_ca_pem:
6725 raise errors.OpPrereqError("Missing source X509 CA",
6726 errors.ECODE_INVAL)
6727
6728 try:
6729 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6730 self._cds)
6731 except OpenSSL.crypto.Error, err:
6732 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6733 (err, ), errors.ECODE_INVAL)
6734
6735 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6736 if errcode is not None:
6737 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6738 errors.ECODE_INVAL)
6739
6740 self.source_x509_ca = cert
6741
6742 src_instance_name = self.op.source_instance_name
6743 if not src_instance_name:
6744 raise errors.OpPrereqError("Missing source instance name",
6745 errors.ECODE_INVAL)
6746
6747 norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6748 self.source_instance_name = netutils.GetHostInfo(norm_name).name
6749
6750 else:
6751 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6752 self.op.mode, errors.ECODE_INVAL)
6753
6803
6805 """Run the allocator based on input opcode.
6806
6807 """
6808 nics = [n.ToDict() for n in self.nics]
6809 ial = IAllocator(self.cfg, self.rpc,
6810 mode=constants.IALLOCATOR_MODE_ALLOC,
6811 name=self.op.instance_name,
6812 disk_template=self.op.disk_template,
6813 tags=[],
6814 os=self.op.os_type,
6815 vcpus=self.be_full[constants.BE_VCPUS],
6816 mem_size=self.be_full[constants.BE_MEMORY],
6817 disks=self.disks,
6818 nics=nics,
6819 hypervisor=self.op.hypervisor,
6820 )
6821
6822 ial.Run(self.op.iallocator)
6823
6824 if not ial.success:
6825 raise errors.OpPrereqError("Can't compute nodes using"
6826 " iallocator '%s': %s" %
6827 (self.op.iallocator, ial.info),
6828 errors.ECODE_NORES)
6829 if len(ial.result) != ial.required_nodes:
6830 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6831 " of nodes (%s), required %s" %
6832 (self.op.iallocator, len(ial.result),
6833 ial.required_nodes), errors.ECODE_FAULT)
6834 self.op.pnode = ial.result[0]
6835 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6836 self.op.instance_name, self.op.iallocator,
6837 utils.CommaJoin(ial.result))
6838 if ial.required_nodes == 2:
6839 self.op.snode = ial.result[1]
6840
6842 """Build hooks env.
6843
6844 This runs on master, primary and secondary nodes of the instance.
6845
6846 """
6847 env = {
6848 "ADD_MODE": self.op.mode,
6849 }
6850 if self.op.mode == constants.INSTANCE_IMPORT:
6851 env["SRC_NODE"] = self.op.src_node
6852 env["SRC_PATH"] = self.op.src_path
6853 env["SRC_IMAGES"] = self.src_images
6854
6855 env.update(_BuildInstanceHookEnv(
6856 name=self.op.instance_name,
6857 primary_node=self.op.pnode,
6858 secondary_nodes=self.secondaries,
6859 status=self.op.start,
6860 os_type=self.op.os_type,
6861 memory=self.be_full[constants.BE_MEMORY],
6862 vcpus=self.be_full[constants.BE_VCPUS],
6863 nics=_NICListToTuple(self, self.nics),
6864 disk_template=self.op.disk_template,
6865 disks=[(d["size"], d["mode"]) for d in self.disks],
6866 bep=self.be_full,
6867 hvp=self.hv_full,
6868 hypervisor_name=self.op.hypervisor,
6869 ))
6870
6871 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6872 self.secondaries)
6873 return env, nl, nl
6874
6921
6923 """Use export parameters as defaults.
6924
6925 In case the opcode doesn't specify (as in override) some instance
6926 parameters, then try to use them from the export information, if
6927 that declares them.
6928
6929 """
6930 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6931
6932 if self.op.disk_template is None:
6933 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6934 self.op.disk_template = einfo.get(constants.INISECT_INS,
6935 "disk_template")
6936 else:
6937 raise errors.OpPrereqError("No disk template specified and the export"
6938 " is missing the disk_template information",
6939 errors.ECODE_INVAL)
6940
6941 if not self.op.disks:
6942 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6943 disks = []
6944
6945 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6946 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6947 disks.append({"size": disk_sz})
6948 self.op.disks = disks
6949 else:
6950 raise errors.OpPrereqError("No disk info specified and the export"
6951 " is missing the disk information",
6952 errors.ECODE_INVAL)
6953
6954 if (not self.op.nics and
6955 einfo.has_option(constants.INISECT_INS, "nic_count")):
6956 nics = []
6957 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6958 ndict = {}
6959 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6960 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6961 ndict[name] = v
6962 nics.append(ndict)
6963 self.op.nics = nics
6964
6965 if (self.op.hypervisor is None and
6966 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6967 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6968 if einfo.has_section(constants.INISECT_HYP):
6969
6970
6971 for name, value in einfo.items(constants.INISECT_HYP):
6972 if name not in self.op.hvparams:
6973 self.op.hvparams[name] = value
6974
6975 if einfo.has_section(constants.INISECT_BEP):
6976
6977 for name, value in einfo.items(constants.INISECT_BEP):
6978 if name not in self.op.beparams:
6979 self.op.beparams[name] = value
6980 else:
6981
6982 for name in constants.BES_PARAMETERS:
6983 if (name not in self.op.beparams and
6984 einfo.has_option(constants.INISECT_INS, name)):
6985 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6986
6987 if einfo.has_section(constants.INISECT_OSP):
6988
6989 for name, value in einfo.items(constants.INISECT_OSP):
6990 if name not in self.op.osparams:
6991 self.op.osparams[name] = value
6992
6994 """Revert the instance parameters to the default values.
6995
6996 """
6997
6998 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6999 for name in self.op.hvparams.keys():
7000 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7001 del self.op.hvparams[name]
7002
7003 be_defs = cluster.SimpleFillBE({})
7004 for name in self.op.beparams.keys():
7005 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7006 del self.op.beparams[name]
7007
7008 nic_defs = cluster.SimpleFillNIC({})
7009 for nic in self.op.nics:
7010 for name in constants.NICS_PARAMETERS:
7011 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7012 del nic[name]
7013
7014 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7015 for name in self.op.osparams.keys():
7016 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7017 del self.op.osparams[name]
7018
7020 """Check prerequisites.
7021
7022 """
7023 if self.op.mode == constants.INSTANCE_IMPORT:
7024 export_info = self._ReadExportInfo()
7025 self._ReadExportParams(export_info)
7026
7027 _CheckDiskTemplate(self.op.disk_template)
7028
7029 if (not self.cfg.GetVGName() and
7030 self.op.disk_template not in constants.DTS_NOT_LVM):
7031 raise errors.OpPrereqError("Cluster does not support lvm-based"
7032 " instances", errors.ECODE_STATE)
7033
7034 if self.op.hypervisor is None:
7035 self.op.hypervisor = self.cfg.GetHypervisorType()
7036
7037 cluster = self.cfg.GetClusterInfo()
7038 enabled_hvs = cluster.enabled_hypervisors
7039 if self.op.hypervisor not in enabled_hvs:
7040 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7041 " cluster (%s)" % (self.op.hypervisor,
7042 ",".join(enabled_hvs)),
7043 errors.ECODE_STATE)
7044
7045
7046 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7047 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7048 self.op.hvparams)
7049 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7050 hv_type.CheckParameterSyntax(filled_hvp)
7051 self.hv_full = filled_hvp
7052
7053 _CheckGlobalHvParams(self.op.hvparams)
7054
7055
7056 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7057 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7058
7059
7060 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7061
7062
7063
7064 if self.op.identify_defaults:
7065 self._RevertToDefaults(cluster)
7066
7067
7068 self.nics = []
7069 for idx, nic in enumerate(self.op.nics):
7070 nic_mode_req = nic.get("mode", None)
7071 nic_mode = nic_mode_req
7072 if nic_mode is None:
7073 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7074
7075
7076 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7077 default_ip_mode = constants.VALUE_AUTO
7078 else:
7079 default_ip_mode = constants.VALUE_NONE
7080
7081
7082 ip = nic.get("ip", default_ip_mode)
7083 if ip is None or ip.lower() == constants.VALUE_NONE:
7084 nic_ip = None
7085 elif ip.lower() == constants.VALUE_AUTO:
7086 if not self.op.name_check:
7087 raise errors.OpPrereqError("IP address set to auto but name checks"
7088 " have been skipped. Aborting.",
7089 errors.ECODE_INVAL)
7090 nic_ip = self.hostname1.ip
7091 else:
7092 if not netutils.IsValidIP4(ip):
7093 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7094 " like a valid IP" % ip,
7095 errors.ECODE_INVAL)
7096 nic_ip = ip
7097
7098
7099 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7100 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7101 errors.ECODE_INVAL)
7102
7103
7104 mac = nic.get("mac", constants.VALUE_AUTO)
7105 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7106 mac = utils.NormalizeAndValidateMac(mac)
7107
7108 try:
7109 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7110 except errors.ReservationError:
7111 raise errors.OpPrereqError("MAC address %s already in use"
7112 " in cluster" % mac,
7113 errors.ECODE_NOTUNIQUE)
7114
7115
7116 bridge = nic.get("bridge", None)
7117 link = nic.get("link", None)
7118 if bridge and link:
7119 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7120 " at the same time", errors.ECODE_INVAL)
7121 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7122 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7123 errors.ECODE_INVAL)
7124 elif bridge:
7125 link = bridge
7126
7127 nicparams = {}
7128 if nic_mode_req:
7129 nicparams[constants.NIC_MODE] = nic_mode_req
7130 if link:
7131 nicparams[constants.NIC_LINK] = link
7132
7133 check_params = cluster.SimpleFillNIC(nicparams)
7134 objects.NIC.CheckParameterSyntax(check_params)
7135 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7136
7137
7138 self.disks = []
7139 for disk in self.op.disks:
7140 mode = disk.get("mode", constants.DISK_RDWR)
7141 if mode not in constants.DISK_ACCESS_SET:
7142 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7143 mode, errors.ECODE_INVAL)
7144 size = disk.get("size", None)
7145 if size is None:
7146 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7147 try:
7148 size = int(size)
7149 except (TypeError, ValueError):
7150 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7151 errors.ECODE_INVAL)
7152 new_disk = {"size": size, "mode": mode}
7153 if "adopt" in disk:
7154 new_disk["adopt"] = disk["adopt"]
7155 self.disks.append(new_disk)
7156
7157 if self.op.mode == constants.INSTANCE_IMPORT:
7158
7159
7160 instance_disks = len(self.disks)
7161 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7162 if instance_disks < export_disks:
7163 raise errors.OpPrereqError("Not enough disks to import."
7164 " (instance: %d, export: %d)" %
7165 (instance_disks, export_disks),
7166 errors.ECODE_INVAL)
7167
7168 disk_images = []
7169 for idx in range(export_disks):
7170 option = 'disk%d_dump' % idx
7171 if export_info.has_option(constants.INISECT_INS, option):
7172
7173 export_name = export_info.get(constants.INISECT_INS, option)
7174 image = utils.PathJoin(self.op.src_path, export_name)
7175 disk_images.append(image)
7176 else:
7177 disk_images.append(False)
7178
7179 self.src_images = disk_images
7180
7181 old_name = export_info.get(constants.INISECT_INS, 'name')
7182 try:
7183 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7184 except (TypeError, ValueError), err:
7185 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7186 " an integer: %s" % str(err),
7187 errors.ECODE_STATE)
7188 if self.op.instance_name == old_name:
7189 for idx, nic in enumerate(self.nics):
7190 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7191 nic_mac_ini = 'nic%d_mac' % idx
7192 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7193
7194
7195
7196
7197 if self.op.ip_check:
7198 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7199 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7200 (self.check_ip, self.op.instance_name),
7201 errors.ECODE_NOTUNIQUE)
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211 for nic in self.nics:
7212 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7213 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7214
7215
7216
7217 if self.op.iallocator is not None:
7218 self._RunAllocator()
7219
7220
7221
7222
7223 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7224 assert self.pnode is not None, \
7225 "Cannot retrieve locked node %s" % self.op.pnode
7226 if pnode.offline:
7227 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7228 pnode.name, errors.ECODE_STATE)
7229 if pnode.drained:
7230 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7231 pnode.name, errors.ECODE_STATE)
7232
7233 self.secondaries = []
7234
7235
7236 if self.op.disk_template in constants.DTS_NET_MIRROR:
7237 if self.op.snode == pnode.name:
7238 raise errors.OpPrereqError("The secondary node cannot be the"
7239 " primary node.", errors.ECODE_INVAL)
7240 _CheckNodeOnline(self, self.op.snode)
7241 _CheckNodeNotDrained(self, self.op.snode)
7242 self.secondaries.append(self.op.snode)
7243
7244 nodenames = [pnode.name] + self.secondaries
7245
7246 req_size = _ComputeDiskSize(self.op.disk_template,
7247 self.disks)
7248
7249
7250 if req_size is not None and not self.adopt_disks:
7251 _CheckNodesFreeDisk(self, nodenames, req_size)
7252
7253 if self.adopt_disks:
7254 all_lvs = set([i["adopt"] for i in self.disks])
7255 if len(all_lvs) != len(self.disks):
7256 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7257 errors.ECODE_INVAL)
7258 for lv_name in all_lvs:
7259 try:
7260 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7261 except errors.ReservationError:
7262 raise errors.OpPrereqError("LV named %s used by another instance" %
7263 lv_name, errors.ECODE_NOTUNIQUE)
7264
7265 node_lvs = self.rpc.call_lv_list([pnode.name],
7266 self.cfg.GetVGName())[pnode.name]
7267 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7268 node_lvs = node_lvs.payload
7269 delta = all_lvs.difference(node_lvs.keys())
7270 if delta:
7271 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7272 utils.CommaJoin(delta),
7273 errors.ECODE_INVAL)
7274 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7275 if online_lvs:
7276 raise errors.OpPrereqError("Online logical volumes found, cannot"
7277 " adopt: %s" % utils.CommaJoin(online_lvs),
7278 errors.ECODE_STATE)
7279
7280 for dsk in self.disks:
7281 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7282
7283 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7284
7285 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7286
7287 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7288
7289 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7290
7291
7292 if self.op.start:
7293 _CheckNodeFreeMemory(self, self.pnode.name,
7294 "creating instance %s" % self.op.instance_name,
7295 self.be_full[constants.BE_MEMORY],
7296 self.op.hypervisor)
7297
7298 self.dry_run_result = list(nodenames)
7299
7300 - def Exec(self, feedback_fn):
7301 """Create and add the instance to the cluster.
7302
7303 """
7304 instance = self.op.instance_name
7305 pnode_name = self.pnode.name
7306
7307 ht_kind = self.op.hypervisor
7308 if ht_kind in constants.HTS_REQ_PORT:
7309 network_port = self.cfg.AllocatePort()
7310 else:
7311 network_port = None
7312
7313 if constants.ENABLE_FILE_STORAGE:
7314
7315 if self.op.file_storage_dir is None:
7316 string_file_storage_dir = ""
7317 else:
7318 string_file_storage_dir = self.op.file_storage_dir
7319
7320
7321 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7322 string_file_storage_dir, instance)
7323 else:
7324 file_storage_dir = ""
7325
7326 disks = _GenerateDiskTemplate(self,
7327 self.op.disk_template,
7328 instance, pnode_name,
7329 self.secondaries,
7330 self.disks,
7331 file_storage_dir,
7332 self.op.file_driver,
7333 0)
7334
7335 iobj = objects.Instance(name=instance, os=self.op.os_type,
7336 primary_node=pnode_name,
7337 nics=self.nics, disks=disks,
7338 disk_template=self.op.disk_template,
7339 admin_up=False,
7340 network_port=network_port,
7341 beparams=self.op.beparams,
7342 hvparams=self.op.hvparams,
7343 hypervisor=self.op.hypervisor,
7344 osparams=self.op.osparams,
7345 )
7346
7347 if self.adopt_disks:
7348
7349
7350 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7351 rename_to = []
7352 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7353 rename_to.append(t_dsk.logical_id)
7354 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7355 self.cfg.SetDiskID(t_dsk, pnode_name)
7356 result = self.rpc.call_blockdev_rename(pnode_name,
7357 zip(tmp_disks, rename_to))
7358 result.Raise("Failed to rename adoped LVs")
7359 else:
7360 feedback_fn("* creating instance disks...")
7361 try:
7362 _CreateDisks(self, iobj)
7363 except errors.OpExecError:
7364 self.LogWarning("Device creation failed, reverting...")
7365 try:
7366 _RemoveDisks(self, iobj)
7367 finally:
7368 self.cfg.ReleaseDRBDMinors(instance)
7369 raise
7370
7371 feedback_fn("adding instance %s to cluster config" % instance)
7372
7373 self.cfg.AddInstance(iobj, self.proc.GetECId())
7374
7375
7376
7377 del self.remove_locks[locking.LEVEL_INSTANCE]
7378
7379 if self.op.mode == constants.INSTANCE_IMPORT:
7380 nodes_keep = [self.op.src_node]
7381 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7382 if node != self.op.src_node]
7383 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7384 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7385 else:
7386 self.context.glm.release(locking.LEVEL_NODE)
7387 del self.acquired_locks[locking.LEVEL_NODE]
7388
7389 if self.op.wait_for_sync:
7390 disk_abort = not _WaitForSync(self, iobj)
7391 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7392
7393 time.sleep(15)
7394 feedback_fn("* checking mirrors status")
7395 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7396 else:
7397 disk_abort = False
7398
7399 if disk_abort:
7400 _RemoveDisks(self, iobj)
7401 self.cfg.RemoveInstance(iobj.name)
7402
7403 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7404 raise errors.OpExecError("There are some degraded disks for"
7405 " this instance")
7406
7407 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7408 if self.op.mode == constants.INSTANCE_CREATE:
7409 if not self.op.no_install:
7410 feedback_fn("* running the instance OS create scripts...")
7411
7412 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7413 self.op.debug_level)
7414 result.Raise("Could not add os for instance %s"
7415 " on node %s" % (instance, pnode_name))
7416
7417 elif self.op.mode == constants.INSTANCE_IMPORT:
7418 feedback_fn("* running the instance OS import scripts...")
7419
7420 transfers = []
7421
7422 for idx, image in enumerate(self.src_images):
7423 if not image:
7424 continue
7425
7426
7427 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7428 constants.IEIO_FILE, (image, ),
7429 constants.IEIO_SCRIPT,
7430 (iobj.disks[idx], idx),
7431 None)
7432 transfers.append(dt)
7433
7434 import_result = \
7435 masterd.instance.TransferInstanceData(self, feedback_fn,
7436 self.op.src_node, pnode_name,
7437 self.pnode.secondary_ip,
7438 iobj, transfers)
7439 if not compat.all(import_result):
7440 self.LogWarning("Some disks for instance %s on node %s were not"
7441 " imported successfully" % (instance, pnode_name))
7442
7443 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7444 feedback_fn("* preparing remote import...")
7445 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7446 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7447
7448 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7449 self.source_x509_ca,
7450 self._cds, timeouts)
7451 if not compat.all(disk_results):
7452
7453
7454 self.LogWarning("Some disks for instance %s on node %s were not"
7455 " imported successfully" % (instance, pnode_name))
7456
7457
7458 assert iobj.name == instance
7459 feedback_fn("Running rename script for %s" % instance)
7460 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7461 self.source_instance_name,
7462 self.op.debug_level)
7463 if result.fail_msg:
7464 self.LogWarning("Failed to run rename script for %s on node"
7465 " %s: %s" % (instance, pnode_name, result.fail_msg))
7466
7467 else:
7468
7469 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7470 % self.op.mode)
7471
7472 if self.op.start:
7473 iobj.admin_up = True
7474 self.cfg.Update(iobj, feedback_fn)
7475 logging.info("Starting instance %s on node %s", instance, pnode_name)
7476 feedback_fn("* starting instance...")
7477 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7478 result.Raise("Could not start instance")
7479
7480 return list(iobj.all_nodes)
7481
7484 """Connect to an instance's console.
7485
7486 This is somewhat special in that it returns the command line that
7487 you need to run on the master node in order to connect to the
7488 console.
7489
7490 """
7491 _OP_PARAMS = [
7492 _PInstanceName
7493 ]
7494 REQ_BGL = False
7495
7498
7500 """Check prerequisites.
7501
7502 This checks that the instance is in the cluster.
7503
7504 """
7505 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7506 assert self.instance is not None, \
7507 "Cannot retrieve locked instance %s" % self.op.instance_name
7508 _CheckNodeOnline(self, self.instance.primary_node)
7509
7510 - def Exec(self, feedback_fn):
7536
7539 """Replace the disks of an instance.
7540
7541 """
7542 HPATH = "mirrors-replace"
7543 HTYPE = constants.HTYPE_INSTANCE
7544 _OP_PARAMS = [
7545 _PInstanceName,
7546 ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7547 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7548 ("remote_node", None, _TMaybeString),
7549 ("iallocator", None, _TMaybeString),
7550 ("early_release", False, _TBool),
7551 ]
7552 REQ_BGL = False
7553
7557
7584
7591
7593 """Build hooks env.
7594
7595 This runs on the master, the primary and all the secondaries.
7596
7597 """
7598 instance = self.replacer.instance
7599 env = {
7600 "MODE": self.op.mode,
7601 "NEW_SECONDARY": self.op.remote_node,
7602 "OLD_SECONDARY": instance.secondary_nodes[0],
7603 }
7604 env.update(_BuildInstanceHookEnvByObject(self, instance))
7605 nl = [
7606 self.cfg.GetMasterNode(),
7607 instance.primary_node,
7608 ]
7609 if self.op.remote_node is not None:
7610 nl.append(self.op.remote_node)
7611 return env, nl, nl
7612
7615 """Replaces disks for an instance.
7616
7617 Note: Locking is not within the scope of this class.
7618
7619 """
7620 - def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7621 disks, delay_iallocator, early_release):
7622 """Initializes this class.
7623
7624 """
7625 Tasklet.__init__(self, lu)
7626
7627
7628 self.instance_name = instance_name
7629 self.mode = mode
7630 self.iallocator_name = iallocator_name
7631 self.remote_node = remote_node
7632 self.disks = disks
7633 self.delay_iallocator = delay_iallocator
7634 self.early_release = early_release
7635
7636
7637 self.instance = None
7638 self.new_node = None
7639 self.target_node = None
7640 self.other_node = None
7641 self.remote_node_info = None
7642 self.node_secondary_ip = None
7643
7644 @staticmethod
7646 """Helper function for users of this class.
7647
7648 """
7649
7650 if mode == constants.REPLACE_DISK_CHG:
7651 if remote_node is None and iallocator is None:
7652 raise errors.OpPrereqError("When changing the secondary either an"
7653 " iallocator script must be used or the"
7654 " new node given", errors.ECODE_INVAL)
7655
7656 if remote_node is not None and iallocator is not None:
7657 raise errors.OpPrereqError("Give either the iallocator or the new"
7658 " secondary, not both", errors.ECODE_INVAL)
7659
7660 elif remote_node is not None or iallocator is not None:
7661
7662 raise errors.OpPrereqError("The iallocator and new node options can"
7663 " only be used when changing the"
7664 " secondary node", errors.ECODE_INVAL)
7665
7666 @staticmethod
7667 - def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7668 """Compute a new secondary node using an IAllocator.
7669
7670 """
7671 ial = IAllocator(lu.cfg, lu.rpc,
7672 mode=constants.IALLOCATOR_MODE_RELOC,
7673 name=instance_name,
7674 relocate_from=relocate_from)
7675
7676 ial.Run(iallocator_name)
7677
7678 if not ial.success:
7679 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7680 " %s" % (iallocator_name, ial.info),
7681 errors.ECODE_NORES)
7682
7683 if len(ial.result) != ial.required_nodes:
7684 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7685 " of nodes (%s), required %s" %
7686 (iallocator_name,
7687 len(ial.result), ial.required_nodes),
7688 errors.ECODE_FAULT)
7689
7690 remote_node_name = ial.result[0]
7691
7692 lu.LogInfo("Selected new secondary for instance '%s': %s",
7693 instance_name, remote_node_name)
7694
7695 return remote_node_name
7696
7700
7723
7725 """Check prerequisites, second part.
7726
7727 This function should always be part of CheckPrereq. It was separated and is
7728 now called from Exec because during node evacuation iallocator was only
7729 called with an unmodified cluster model, not taking planned changes into
7730 account.
7731
7732 """
7733 instance = self.instance
7734 secondary_node = instance.secondary_nodes[0]
7735
7736 if self.iallocator_name is None:
7737 remote_node = self.remote_node
7738 else:
7739 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7740 instance.name, instance.secondary_nodes)
7741
7742 if remote_node is not None:
7743 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7744 assert self.remote_node_info is not None, \
7745 "Cannot retrieve locked node %s" % remote_node
7746 else:
7747 self.remote_node_info = None
7748
7749 if remote_node == self.instance.primary_node:
7750 raise errors.OpPrereqError("The specified node is the primary node of"
7751 " the instance.", errors.ECODE_INVAL)
7752
7753 if remote_node == secondary_node:
7754 raise errors.OpPrereqError("The specified node is already the"
7755 " secondary node of the instance.",
7756 errors.ECODE_INVAL)
7757
7758 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7759 constants.REPLACE_DISK_CHG):
7760 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7761 errors.ECODE_INVAL)
7762
7763 if self.mode == constants.REPLACE_DISK_AUTO:
7764 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7765 faulty_secondary = self._FindFaultyDisks(secondary_node)
7766
7767 if faulty_primary and faulty_secondary:
7768 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7769 " one node and can not be repaired"
7770 " automatically" % self.instance_name,
7771 errors.ECODE_STATE)
7772
7773 if faulty_primary:
7774 self.disks = faulty_primary
7775 self.target_node = instance.primary_node
7776 self.other_node = secondary_node
7777 check_nodes = [self.target_node, self.other_node]
7778 elif faulty_secondary:
7779 self.disks = faulty_secondary
7780 self.target_node = secondary_node
7781 self.other_node = instance.primary_node
7782 check_nodes = [self.target_node, self.other_node]
7783 else:
7784 self.disks = []
7785 check_nodes = []
7786
7787 else:
7788
7789 if self.mode == constants.REPLACE_DISK_PRI:
7790 self.target_node = instance.primary_node
7791 self.other_node = secondary_node
7792 check_nodes = [self.target_node, self.other_node]
7793
7794 elif self.mode == constants.REPLACE_DISK_SEC:
7795 self.target_node = secondary_node
7796 self.other_node = instance.primary_node
7797 check_nodes = [self.target_node, self.other_node]
7798
7799 elif self.mode == constants.REPLACE_DISK_CHG:
7800 self.new_node = remote_node
7801 self.other_node = instance.primary_node
7802 self.target_node = secondary_node
7803 check_nodes = [self.new_node, self.other_node]
7804
7805 _CheckNodeNotDrained(self.lu, remote_node)
7806
7807 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7808 assert old_node_info is not None
7809 if old_node_info.offline and not self.early_release:
7810
7811 self.early_release = True
7812 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7813 " early-release mode", secondary_node)
7814
7815 else:
7816 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7817 self.mode)
7818
7819
7820 if not self.disks:
7821 self.disks = range(len(self.instance.disks))
7822
7823 for node in check_nodes:
7824 _CheckNodeOnline(self.lu, node)
7825
7826
7827 for disk_idx in self.disks:
7828 instance.FindDisk(disk_idx)
7829
7830
7831 node_2nd_ip = {}
7832
7833 for node_name in [self.target_node, self.other_node, self.new_node]:
7834 if node_name is not None:
7835 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7836
7837 self.node_secondary_ip = node_2nd_ip
7838
7839 - def Exec(self, feedback_fn):
7840 """Execute disk replacement.
7841
7842 This dispatches the disk replacement to the appropriate handler.
7843
7844 """
7845 if self.delay_iallocator:
7846 self._CheckPrereq2()
7847
7848 if not self.disks:
7849 feedback_fn("No disks need replacement")
7850 return
7851
7852 feedback_fn("Replacing disk(s) %s for %s" %
7853 (utils.CommaJoin(self.disks), self.instance.name))
7854
7855 activate_disks = (not self.instance.admin_up)
7856
7857
7858 if activate_disks:
7859 _StartInstanceDisks(self.lu, self.instance, True)
7860
7861 try:
7862
7863 if self.new_node is not None:
7864 fn = self._ExecDrbd8Secondary
7865 else:
7866 fn = self._ExecDrbd8DiskOnly
7867
7868 return fn(feedback_fn)
7869
7870 finally:
7871
7872
7873 if activate_disks:
7874 _SafeShutdownInstanceDisks(self.lu, self.instance)
7875
7877 self.lu.LogInfo("Checking volume groups")
7878
7879 vgname = self.cfg.GetVGName()
7880
7881
7882 results = self.rpc.call_vg_list(nodes)
7883 if not results:
7884 raise errors.OpExecError("Can't list volume groups on the nodes")
7885
7886 for node in nodes:
7887 res = results[node]
7888 res.Raise("Error checking node %s" % node)
7889 if vgname not in res.payload:
7890 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7891 (vgname, node))
7892
7894
7895 for idx, dev in enumerate(self.instance.disks):
7896 if idx not in self.disks:
7897 continue
7898
7899 for node in nodes:
7900 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7901 self.cfg.SetDiskID(dev, node)
7902
7903 result = self.rpc.call_blockdev_find(node, dev)
7904
7905 msg = result.fail_msg
7906 if msg or not result.payload:
7907 if not msg:
7908 msg = "disk not found"
7909 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7910 (idx, node, msg))
7911
7913 for idx, dev in enumerate(self.instance.disks):
7914 if idx not in self.disks:
7915 continue
7916
7917 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7918 (idx, node_name))
7919
7920 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7921 ldisk=ldisk):
7922 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7923 " replace disks for instance %s" %
7924 (node_name, self.instance.name))
7925
7927 vgname = self.cfg.GetVGName()
7928 iv_names = {}
7929
7930 for idx, dev in enumerate(self.instance.disks):
7931 if idx not in self.disks:
7932 continue
7933
7934 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7935
7936 self.cfg.SetDiskID(dev, node_name)
7937
7938 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7939 names = _GenerateUniqueNames(self.lu, lv_names)
7940
7941 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7942 logical_id=(vgname, names[0]))
7943 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7944 logical_id=(vgname, names[1]))
7945
7946 new_lvs = [lv_data, lv_meta]
7947 old_lvs = dev.children
7948 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7949
7950
7951 for new_lv in new_lvs:
7952 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7953 _GetInstanceInfoText(self.instance), False)
7954
7955 return iv_names
7956
7958 for name, (dev, _, _) in iv_names.iteritems():
7959 self.cfg.SetDiskID(dev, node_name)
7960
7961 result = self.rpc.call_blockdev_find(node_name, dev)
7962
7963 msg = result.fail_msg
7964 if msg or not result.payload:
7965 if not msg:
7966 msg = "disk not found"
7967 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7968 (name, msg))
7969
7970 if result.payload.is_degraded:
7971 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7972
7974 for name, (_, old_lvs, _) in iv_names.iteritems():
7975 self.lu.LogInfo("Remove logical volumes for %s" % name)
7976
7977 for lv in old_lvs:
7978 self.cfg.SetDiskID(lv, node_name)
7979
7980 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7981 if msg:
7982 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7983 hint="remove unused LVs manually")
7984
7988
7990 """Replace a disk on the primary or secondary for DRBD 8.
7991
7992 The algorithm for replace is quite complicated:
7993
7994 1. for each disk to be replaced:
7995
7996 1. create new LVs on the target node with unique names
7997 1. detach old LVs from the drbd device
7998 1. rename old LVs to name_replaced.<time_t>
7999 1. rename new LVs to old LVs
8000 1. attach the new LVs (with the old names now) to the drbd device
8001
8002 1. wait for sync across all devices
8003
8004 1. for each modified disk:
8005
8006 1. remove old LVs (which have the name name_replaces.<time_t>)
8007
8008 Failures are not very well handled.
8009
8010 """
8011 steps_total = 6
8012
8013
8014 self.lu.LogStep(1, steps_total, "Check device existence")
8015 self._CheckDisksExistence([self.other_node, self.target_node])
8016 self._CheckVolumeGroup([self.target_node, self.other_node])
8017
8018
8019 self.lu.LogStep(2, steps_total, "Check peer consistency")
8020 self._CheckDisksConsistency(self.other_node,
8021 self.other_node == self.instance.primary_node,
8022 False)
8023
8024
8025 self.lu.LogStep(3, steps_total, "Allocate new storage")
8026 iv_names = self._CreateNewStorage(self.target_node)
8027
8028
8029 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8030 for dev, old_lvs, new_lvs in iv_names.itervalues():
8031 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8032
8033 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8034 old_lvs)
8035 result.Raise("Can't detach drbd from local storage on node"
8036 " %s for device %s" % (self.target_node, dev.iv_name))
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047 temp_suffix = int(time.time())
8048 ren_fn = lambda d, suff: (d.physical_id[0],
8049 d.physical_id[1] + "_replaced-%s" % suff)
8050
8051
8052 rename_old_to_new = []
8053 for to_ren in old_lvs:
8054 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8055 if not result.fail_msg and result.payload:
8056
8057 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8058
8059 self.lu.LogInfo("Renaming the old LVs on the target node")
8060 result = self.rpc.call_blockdev_rename(self.target_node,
8061 rename_old_to_new)
8062 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8063
8064
8065 self.lu.LogInfo("Renaming the new LVs on the target node")
8066 rename_new_to_old = [(new, old.physical_id)
8067 for old, new in zip(old_lvs, new_lvs)]
8068 result = self.rpc.call_blockdev_rename(self.target_node,
8069 rename_new_to_old)
8070 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8071
8072 for old, new in zip(old_lvs, new_lvs):
8073 new.logical_id = old.logical_id
8074 self.cfg.SetDiskID(new, self.target_node)
8075
8076 for disk in old_lvs:
8077 disk.logical_id = ren_fn(disk, temp_suffix)
8078 self.cfg.SetDiskID(disk, self.target_node)
8079
8080
8081 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8082 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8083 new_lvs)
8084 msg = result.fail_msg
8085 if msg:
8086 for new_lv in new_lvs:
8087 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8088 new_lv).fail_msg
8089 if msg2:
8090 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8091 hint=("cleanup manually the unused logical"
8092 "volumes"))
8093 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8094
8095 dev.children = new_lvs
8096
8097 self.cfg.Update(self.instance, feedback_fn)
8098
8099 cstep = 5
8100 if self.early_release:
8101 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8102 cstep += 1
8103 self._RemoveOldStorage(self.target_node, iv_names)
8104
8105
8106 self._ReleaseNodeLock([self.target_node, self.other_node])
8107
8108
8109
8110
8111 self.lu.LogStep(cstep, steps_total, "Sync devices")
8112 cstep += 1
8113 _WaitForSync(self.lu, self.instance)
8114
8115
8116 self._CheckDevices(self.instance.primary_node, iv_names)
8117
8118
8119 if not self.early_release:
8120 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8121 cstep += 1
8122 self._RemoveOldStorage(self.target_node, iv_names)
8123
8125 """Replace the secondary node for DRBD 8.
8126
8127 The algorithm for replace is quite complicated:
8128 - for all disks of the instance:
8129 - create new LVs on the new node with same names
8130 - shutdown the drbd device on the old secondary
8131 - disconnect the drbd network on the primary
8132 - create the drbd device on the new secondary
8133 - network attach the drbd on the primary, using an artifice:
8134 the drbd code for Attach() will connect to the network if it
8135 finds a device which is connected to the good local disks but
8136 not network enabled
8137 - wait for sync across all devices
8138 - remove all disks from the old secondary
8139
8140 Failures are not very well handled.
8141
8142 """
8143 steps_total = 6
8144
8145
8146 self.lu.LogStep(1, steps_total, "Check device existence")
8147 self._CheckDisksExistence([self.instance.primary_node])
8148 self._CheckVolumeGroup([self.instance.primary_node])
8149
8150
8151 self.lu.LogStep(2, steps_total, "Check peer consistency")
8152 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8153
8154
8155 self.lu.LogStep(3, steps_total, "Allocate new storage")
8156 for idx, dev in enumerate(self.instance.disks):
8157 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8158 (self.new_node, idx))
8159
8160 for new_lv in dev.children:
8161 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8162 _GetInstanceInfoText(self.instance), False)
8163
8164
8165
8166
8167 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8168 minors = self.cfg.AllocateDRBDMinor([self.new_node
8169 for dev in self.instance.disks],
8170 self.instance.name)
8171 logging.debug("Allocated minors %r", minors)
8172
8173 iv_names = {}
8174 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8175 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8176 (self.new_node, idx))
8177
8178
8179
8180
8181 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8182 if self.instance.primary_node == o_node1:
8183 p_minor = o_minor1
8184 else:
8185 assert self.instance.primary_node == o_node2, "Three-node instance?"
8186 p_minor = o_minor2
8187
8188 new_alone_id = (self.instance.primary_node, self.new_node, None,
8189 p_minor, new_minor, o_secret)
8190 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8191 p_minor, new_minor, o_secret)
8192
8193 iv_names[idx] = (dev, dev.children, new_net_id)
8194 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8195 new_net_id)
8196 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8197 logical_id=new_alone_id,
8198 children=dev.children,
8199 size=dev.size)
8200 try:
8201 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8202 _GetInstanceInfoText(self.instance), False)
8203 except errors.GenericError:
8204 self.cfg.ReleaseDRBDMinors(self.instance.name)
8205 raise
8206
8207
8208 for idx, dev in enumerate(self.instance.disks):
8209 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8210 self.cfg.SetDiskID(dev, self.target_node)
8211 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8212 if msg:
8213 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8214 "node: %s" % (idx, msg),
8215 hint=("Please cleanup this device manually as"
8216 " soon as possible"))
8217
8218 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8219 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8220 self.node_secondary_ip,
8221 self.instance.disks)\
8222 [self.instance.primary_node]
8223
8224 msg = result.fail_msg
8225 if msg:
8226
8227 self.cfg.ReleaseDRBDMinors(self.instance.name)
8228 raise errors.OpExecError("Can't detach the disks from the network on"
8229 " old node: %s" % (msg,))
8230
8231
8232
8233 self.lu.LogInfo("Updating instance configuration")
8234 for dev, _, new_logical_id in iv_names.itervalues():
8235 dev.logical_id = new_logical_id
8236 self.cfg.SetDiskID(dev, self.instance.primary_node)
8237
8238 self.cfg.Update(self.instance, feedback_fn)
8239
8240
8241 self.lu.LogInfo("Attaching primary drbds to new secondary"
8242 " (standalone => connected)")
8243 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8244 self.new_node],
8245 self.node_secondary_ip,
8246 self.instance.disks,
8247 self.instance.name,
8248 False)
8249 for to_node, to_result in result.items():
8250 msg = to_result.fail_msg
8251 if msg:
8252 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8253 to_node, msg,
8254 hint=("please do a gnt-instance info to see the"
8255 " status of disks"))
8256 cstep = 5
8257 if self.early_release:
8258 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8259 cstep += 1
8260 self._RemoveOldStorage(self.target_node, iv_names)
8261
8262
8263 self._ReleaseNodeLock([self.instance.primary_node,
8264 self.target_node,
8265 self.new_node])
8266
8267
8268
8269
8270 self.lu.LogStep(cstep, steps_total, "Sync devices")
8271 cstep += 1
8272 _WaitForSync(self.lu, self.instance)
8273
8274
8275 self._CheckDevices(self.instance.primary_node, iv_names)
8276
8277
8278 if not self.early_release:
8279 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8280 self._RemoveOldStorage(self.target_node, iv_names)
8281
8284 """Repairs the volume group on a node.
8285
8286 """
8287 _OP_PARAMS = [
8288 _PNodeName,
8289 ("storage_type", _NoDefault, _CheckStorageType),
8290 ("name", _NoDefault, _TNonEmptyString),
8291 ("ignore_consistency", False, _TBool),
8292 ]
8293 REQ_BGL = False
8294
8305
8310
8324
8326 """Check prerequisites.
8327
8328 """
8329
8330 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8331 if not inst.admin_up:
8332 continue
8333 check_nodes = set(inst.all_nodes)
8334 check_nodes.discard(self.op.node_name)
8335 for inst_node_name in check_nodes:
8336 self._CheckFaultyDisks(inst, inst_node_name)
8337
8338 - def Exec(self, feedback_fn):
8339 feedback_fn("Repairing storage unit '%s' on %s ..." %
8340 (self.op.name, self.op.node_name))
8341
8342 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8343 result = self.rpc.call_storage_execute(self.op.node_name,
8344 self.op.storage_type, st_args,
8345 self.op.name,
8346 constants.SO_FIX_CONSISTENCY)
8347 result.Raise("Failed to repair storage unit '%s' on %s" %
8348 (self.op.name, self.op.node_name))
8349
8352 """Computes the node evacuation strategy.
8353
8354 """
8355 _OP_PARAMS = [
8356 ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8357 ("remote_node", None, _TMaybeString),
8358 ("iallocator", None, _TMaybeString),
8359 ]
8360 REQ_BGL = False
8361
8364
8373
8374 - def Exec(self, feedback_fn):
8375 if self.op.remote_node is not None:
8376 instances = []
8377 for node in self.op.nodes:
8378 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8379 result = []
8380 for i in instances:
8381 if i.primary_node == self.op.remote_node:
8382 raise errors.OpPrereqError("Node %s is the primary node of"
8383 " instance %s, cannot use it as"
8384 " secondary" %
8385 (self.op.remote_node, i.name),
8386 errors.ECODE_INVAL)
8387 result.append([i.name, self.op.remote_node])
8388 else:
8389 ial = IAllocator(self.cfg, self.rpc,
8390 mode=constants.IALLOCATOR_MODE_MEVAC,
8391 evac_nodes=self.op.nodes)
8392 ial.Run(self.op.iallocator, validate=True)
8393 if not ial.success:
8394 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8395 errors.ECODE_NORES)
8396 result = ial.result
8397 return result
8398
8401 """Grow a disk of an instance.
8402
8403 """
8404 HPATH = "disk-grow"
8405 HTYPE = constants.HTYPE_INSTANCE
8406 _OP_PARAMS = [
8407 _PInstanceName,
8408 ("disk", _NoDefault, _TInt),
8409 ("amount", _NoDefault, _TInt),
8410 ("wait_for_sync", True, _TBool),
8411 ]
8412 REQ_BGL = False
8413
8418
8422
8424 """Build hooks env.
8425
8426 This runs on the master, the primary and all the secondaries.
8427
8428 """
8429 env = {
8430 "DISK": self.op.disk,
8431 "AMOUNT": self.op.amount,
8432 }
8433 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8434 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8435 return env, nl, nl
8436
8462
8463 - def Exec(self, feedback_fn):
8464 """Execute disk grow.
8465
8466 """
8467 instance = self.instance
8468 disk = self.disk
8469
8470 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8471 if not disks_ok:
8472 raise errors.OpExecError("Cannot activate block device to grow")
8473
8474 for node in instance.all_nodes:
8475 self.cfg.SetDiskID(disk, node)
8476 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8477 result.Raise("Grow request failed to node %s" % node)
8478
8479
8480
8481
8482
8483
8484 time.sleep(5)
8485
8486 disk.RecordGrow(self.op.amount)
8487 self.cfg.Update(instance, feedback_fn)
8488 if self.op.wait_for_sync:
8489 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8490 if disk_abort:
8491 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8492 " status.\nPlease check the instance.")
8493 if not instance.admin_up:
8494 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8495 elif not instance.admin_up:
8496 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8497 " not supposed to be running because no wait for"
8498 " sync mode was requested.")
8499
8502 """Query runtime instance data.
8503
8504 """
8505 _OP_PARAMS = [
8506 ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8507 ("static", False, _TBool),
8508 ]
8509 REQ_BGL = False
8510
8527
8531
8533 """Check prerequisites.
8534
8535 This only checks the optional instance list against the existing names.
8536
8537 """
8538 if self.wanted_names is None:
8539 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8540
8541 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8542 in self.wanted_names]
8543
8545 """Returns the status of a block device
8546
8547 """
8548 if self.op.static or not node:
8549 return None
8550
8551 self.cfg.SetDiskID(dev, node)
8552
8553 result = self.rpc.call_blockdev_find(node, dev)
8554 if result.offline:
8555 return None
8556
8557 result.Raise("Can't compute disk status for %s" % instance_name)
8558
8559 status = result.payload
8560 if status is None:
8561 return None
8562
8563 return (status.dev_path, status.major, status.minor,
8564 status.sync_percent, status.estimated_time,
8565 status.is_degraded, status.ldisk_status)
8566
8568 """Compute block device status.
8569
8570 """
8571 if dev.dev_type in constants.LDS_DRBD:
8572
8573 if dev.logical_id[0] == instance.primary_node:
8574 snode = dev.logical_id[1]
8575 else:
8576 snode = dev.logical_id[0]
8577
8578 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8579 instance.name, dev)
8580 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8581
8582 if dev.children:
8583 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8584 for child in dev.children]
8585 else:
8586 dev_children = []
8587
8588 data = {
8589 "iv_name": dev.iv_name,
8590 "dev_type": dev.dev_type,
8591 "logical_id": dev.logical_id,
8592 "physical_id": dev.physical_id,
8593 "pstatus": dev_pstatus,
8594 "sstatus": dev_sstatus,
8595 "children": dev_children,
8596 "mode": dev.mode,
8597 "size": dev.size,
8598 }
8599
8600 return data
8601
8602 - def Exec(self, feedback_fn):
8603 """Gather and return data"""
8604 result = {}
8605
8606 cluster = self.cfg.GetClusterInfo()
8607
8608 for instance in self.wanted_instances:
8609 if not self.op.static:
8610 remote_info = self.rpc.call_instance_info(instance.primary_node,
8611 instance.name,
8612 instance.hypervisor)
8613 remote_info.Raise("Error checking node %s" % instance.primary_node)
8614 remote_info = remote_info.payload
8615 if remote_info and "state" in remote_info:
8616 remote_state = "up"
8617 else:
8618 remote_state = "down"
8619 else:
8620 remote_state = None
8621 if instance.admin_up:
8622 config_state = "up"
8623 else:
8624 config_state = "down"
8625
8626 disks = [self._ComputeDiskStatus(instance, None, device)
8627 for device in instance.disks]
8628
8629 idict = {
8630 "name": instance.name,
8631 "config_state": config_state,
8632 "run_state": remote_state,
8633 "pnode": instance.primary_node,
8634 "snodes": instance.secondary_nodes,
8635 "os": instance.os,
8636
8637 "nics": _NICListToTuple(self, instance.nics),
8638 "disk_template": instance.disk_template,
8639 "disks": disks,
8640 "hypervisor": instance.hypervisor,
8641 "network_port": instance.network_port,
8642 "hv_instance": instance.hvparams,
8643 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8644 "be_instance": instance.beparams,
8645 "be_actual": cluster.FillBE(instance),
8646 "os_instance": instance.osparams,
8647 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8648 "serial_no": instance.serial_no,
8649 "mtime": instance.mtime,
8650 "ctime": instance.ctime,
8651 "uuid": instance.uuid,
8652 }
8653
8654 result[instance.name] = idict
8655
8656 return result
8657
8660 """Modifies an instances's parameters.
8661
8662 """
8663 HPATH = "instance-modify"
8664 HTYPE = constants.HTYPE_INSTANCE
8665 _OP_PARAMS = [
8666 _PInstanceName,
8667 ("nics", _EmptyList, _TList),
8668 ("disks", _EmptyList, _TList),
8669 ("beparams", _EmptyDict, _TDict),
8670 ("hvparams", _EmptyDict, _TDict),
8671 ("disk_template", None, _TMaybeString),
8672 ("remote_node", None, _TMaybeString),
8673 ("os_name", None, _TMaybeString),
8674 ("force_variant", False, _TBool),
8675 ("osparams", None, _TOr(_TDict, _TNone)),
8676 _PForce,
8677 ]
8678 REQ_BGL = False
8679
8681 if not (self.op.nics or self.op.disks or self.op.disk_template or
8682 self.op.hvparams or self.op.beparams or self.op.os_name):
8683 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8684
8685 if self.op.hvparams:
8686 _CheckGlobalHvParams(self.op.hvparams)
8687
8688
8689 disk_addremove = 0
8690 for disk_op, disk_dict in self.op.disks:
8691 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8692 if disk_op == constants.DDM_REMOVE:
8693 disk_addremove += 1
8694 continue
8695 elif disk_op == constants.DDM_ADD:
8696 disk_addremove += 1
8697 else:
8698 if not isinstance(disk_op, int):
8699 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8700 if not isinstance(disk_dict, dict):
8701 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8702 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8703
8704 if disk_op == constants.DDM_ADD:
8705 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8706 if mode not in constants.DISK_ACCESS_SET:
8707 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8708 errors.ECODE_INVAL)
8709 size = disk_dict.get('size', None)
8710 if size is None:
8711 raise errors.OpPrereqError("Required disk parameter size missing",
8712 errors.ECODE_INVAL)
8713 try:
8714 size = int(size)
8715 except (TypeError, ValueError), err:
8716 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8717 str(err), errors.ECODE_INVAL)
8718 disk_dict['size'] = size
8719 else:
8720
8721 if 'size' in disk_dict:
8722 raise errors.OpPrereqError("Disk size change not possible, use"
8723 " grow-disk", errors.ECODE_INVAL)
8724
8725 if disk_addremove > 1:
8726 raise errors.OpPrereqError("Only one disk add or remove operation"
8727 " supported at a time", errors.ECODE_INVAL)
8728
8729 if self.op.disks and self.op.disk_template is not None:
8730 raise errors.OpPrereqError("Disk template conversion and other disk"
8731 " changes not supported at the same time",
8732 errors.ECODE_INVAL)
8733
8734 if self.op.disk_template:
8735 _CheckDiskTemplate(self.op.disk_template)
8736 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8737 self.op.remote_node is None):
8738 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8739 " one requires specifying a secondary node",
8740 errors.ECODE_INVAL)
8741
8742
8743 nic_addremove = 0
8744 for nic_op, nic_dict in self.op.nics:
8745 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8746 if nic_op == constants.DDM_REMOVE:
8747 nic_addremove += 1
8748 continue
8749 elif nic_op == constants.DDM_ADD:
8750 nic_addremove += 1
8751 else:
8752 if not isinstance(nic_op, int):
8753 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8754 if not isinstance(nic_dict, dict):
8755 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8756 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8757
8758
8759 nic_ip = nic_dict.get('ip', None)
8760 if nic_ip is not None:
8761 if nic_ip.lower() == constants.VALUE_NONE:
8762 nic_dict['ip'] = None
8763 else:
8764 if not netutils.IsValidIP4(nic_ip):
8765 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8766 errors.ECODE_INVAL)
8767
8768 nic_bridge = nic_dict.get('bridge', None)
8769 nic_link = nic_dict.get('link', None)
8770 if nic_bridge and nic_link:
8771 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8772 " at the same time", errors.ECODE_INVAL)
8773 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8774 nic_dict['bridge'] = None
8775 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8776 nic_dict['link'] = None
8777
8778 if nic_op == constants.DDM_ADD:
8779 nic_mac = nic_dict.get('mac', None)
8780 if nic_mac is None:
8781 nic_dict['mac'] = constants.VALUE_AUTO
8782
8783 if 'mac' in nic_dict:
8784 nic_mac = nic_dict['mac']
8785 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8786 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8787
8788 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8789 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8790 " modifying an existing nic",
8791 errors.ECODE_INVAL)
8792
8793 if nic_addremove > 1:
8794 raise errors.OpPrereqError("Only one NIC add or remove operation"
8795 " supported at a time", errors.ECODE_INVAL)
8796
8801
8808
8810 """Build hooks env.
8811
8812 This runs on the master, primary and secondaries.
8813
8814 """
8815 args = dict()
8816 if constants.BE_MEMORY in self.be_new:
8817 args['memory'] = self.be_new[constants.BE_MEMORY]
8818 if constants.BE_VCPUS in self.be_new:
8819 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8820
8821
8822 if self.op.nics:
8823 args['nics'] = []
8824 nic_override = dict(self.op.nics)
8825 for idx, nic in enumerate(self.instance.nics):
8826 if idx in nic_override:
8827 this_nic_override = nic_override[idx]
8828 else:
8829 this_nic_override = {}
8830 if 'ip' in this_nic_override:
8831 ip = this_nic_override['ip']
8832 else:
8833 ip = nic.ip
8834 if 'mac' in this_nic_override:
8835 mac = this_nic_override['mac']
8836 else:
8837 mac = nic.mac
8838 if idx in self.nic_pnew:
8839 nicparams = self.nic_pnew[idx]
8840 else:
8841 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8842 mode = nicparams[constants.NIC_MODE]
8843 link = nicparams[constants.NIC_LINK]
8844 args['nics'].append((ip, mac, mode, link))
8845 if constants.DDM_ADD in nic_override:
8846 ip = nic_override[constants.DDM_ADD].get('ip', None)
8847 mac = nic_override[constants.DDM_ADD]['mac']
8848 nicparams = self.nic_pnew[constants.DDM_ADD]
8849 mode = nicparams[constants.NIC_MODE]
8850 link = nicparams[constants.NIC_LINK]
8851 args['nics'].append((ip, mac, mode, link))
8852 elif constants.DDM_REMOVE in nic_override:
8853 del args['nics'][-1]
8854
8855 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8856 if self.op.disk_template:
8857 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8858 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8859 return env, nl, nl
8860
8862 """Check prerequisites.
8863
8864 This only checks the instance list against the existing names.
8865
8866 """
8867
8868
8869 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8870 cluster = self.cluster = self.cfg.GetClusterInfo()
8871 assert self.instance is not None, \
8872 "Cannot retrieve locked instance %s" % self.op.instance_name
8873 pnode = instance.primary_node
8874 nodelist = list(instance.all_nodes)
8875
8876
8877 if self.op.os_name and not self.op.force:
8878 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8879 self.op.force_variant)
8880 instance_os = self.op.os_name
8881 else:
8882 instance_os = instance.os
8883
8884 if self.op.disk_template:
8885 if instance.disk_template == self.op.disk_template:
8886 raise errors.OpPrereqError("Instance already has disk template %s" %
8887 instance.disk_template, errors.ECODE_INVAL)
8888
8889 if (instance.disk_template,
8890 self.op.disk_template) not in self._DISK_CONVERSIONS:
8891 raise errors.OpPrereqError("Unsupported disk template conversion from"
8892 " %s to %s" % (instance.disk_template,
8893 self.op.disk_template),
8894 errors.ECODE_INVAL)
8895 _CheckInstanceDown(self, instance, "cannot change disk template")
8896 if self.op.disk_template in constants.DTS_NET_MIRROR:
8897 if self.op.remote_node == pnode:
8898 raise errors.OpPrereqError("Given new secondary node %s is the same"
8899 " as the primary node of the instance" %
8900 self.op.remote_node, errors.ECODE_STATE)
8901 _CheckNodeOnline(self, self.op.remote_node)
8902 _CheckNodeNotDrained(self, self.op.remote_node)
8903 disks = [{"size": d.size} for d in instance.disks]
8904 required = _ComputeDiskSize(self.op.disk_template, disks)
8905 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8906
8907
8908 if self.op.hvparams:
8909 hv_type = instance.hypervisor
8910 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8911 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8912 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8913
8914
8915 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8916 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8917 self.hv_new = hv_new
8918 self.hv_inst = i_hvdict
8919 else:
8920 self.hv_new = self.hv_inst = {}
8921
8922
8923 if self.op.beparams:
8924 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8925 use_none=True)
8926 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8927 be_new = cluster.SimpleFillBE(i_bedict)
8928 self.be_new = be_new
8929 self.be_inst = i_bedict
8930 else:
8931 self.be_new = self.be_inst = {}
8932
8933
8934 if self.op.osparams:
8935 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8936 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8937 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8938 self.os_inst = i_osdict
8939 else:
8940 self.os_new = self.os_inst = {}
8941
8942 self.warn = []
8943
8944 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8945 mem_check_list = [pnode]
8946 if be_new[constants.BE_AUTO_BALANCE]:
8947
8948 mem_check_list.extend(instance.secondary_nodes)
8949 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8950 instance.hypervisor)
8951 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8952 instance.hypervisor)
8953 pninfo = nodeinfo[pnode]
8954 msg = pninfo.fail_msg
8955 if msg:
8956
8957 self.warn.append("Can't get info from primary node %s: %s" %
8958 (pnode, msg))
8959 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8960 self.warn.append("Node data from primary node %s doesn't contain"
8961 " free memory information" % pnode)
8962 elif instance_info.fail_msg:
8963 self.warn.append("Can't get instance runtime information: %s" %
8964 instance_info.fail_msg)
8965 else:
8966 if instance_info.payload:
8967 current_mem = int(instance_info.payload['memory'])
8968 else:
8969
8970
8971
8972 current_mem = 0
8973 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8974 pninfo.payload['memory_free'])
8975 if miss_mem > 0:
8976 raise errors.OpPrereqError("This change will prevent the instance"
8977 " from starting, due to %d MB of memory"
8978 " missing on its primary node" % miss_mem,
8979 errors.ECODE_NORES)
8980
8981 if be_new[constants.BE_AUTO_BALANCE]:
8982 for node, nres in nodeinfo.items():
8983 if node not in instance.secondary_nodes:
8984 continue
8985 msg = nres.fail_msg
8986 if msg:
8987 self.warn.append("Can't get info from secondary node %s: %s" %
8988 (node, msg))
8989 elif not isinstance(nres.payload.get('memory_free', None), int):
8990 self.warn.append("Secondary node %s didn't return free"
8991 " memory information" % node)
8992 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8993 self.warn.append("Not enough memory to failover instance to"
8994 " secondary node %s" % node)
8995
8996
8997 self.nic_pnew = {}
8998 self.nic_pinst = {}
8999 for nic_op, nic_dict in self.op.nics:
9000 if nic_op == constants.DDM_REMOVE:
9001 if not instance.nics:
9002 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9003 errors.ECODE_INVAL)
9004 continue
9005 if nic_op != constants.DDM_ADD:
9006
9007 if not instance.nics:
9008 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9009 " no NICs" % nic_op,
9010 errors.ECODE_INVAL)
9011 if nic_op < 0 or nic_op >= len(instance.nics):
9012 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9013 " are 0 to %d" %
9014 (nic_op, len(instance.nics) - 1),
9015 errors.ECODE_INVAL)
9016 old_nic_params = instance.nics[nic_op].nicparams
9017 old_nic_ip = instance.nics[nic_op].ip
9018 else:
9019 old_nic_params = {}
9020 old_nic_ip = None
9021
9022 update_params_dict = dict([(key, nic_dict[key])
9023 for key in constants.NICS_PARAMETERS
9024 if key in nic_dict])
9025
9026 if 'bridge' in nic_dict:
9027 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9028
9029 new_nic_params = _GetUpdatedParams(old_nic_params,
9030 update_params_dict)
9031 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9032 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9033 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9034 self.nic_pinst[nic_op] = new_nic_params
9035 self.nic_pnew[nic_op] = new_filled_nic_params
9036 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9037
9038 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9039 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9040 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9041 if msg:
9042 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9043 if self.op.force:
9044 self.warn.append(msg)
9045 else:
9046 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9047 if new_nic_mode == constants.NIC_MODE_ROUTED:
9048 if 'ip' in nic_dict:
9049 nic_ip = nic_dict['ip']
9050 else:
9051 nic_ip = old_nic_ip
9052 if nic_ip is None:
9053 raise errors.OpPrereqError('Cannot set the nic ip to None'
9054 ' on a routed nic', errors.ECODE_INVAL)
9055 if 'mac' in nic_dict:
9056 nic_mac = nic_dict['mac']
9057 if nic_mac is None:
9058 raise errors.OpPrereqError('Cannot set the nic mac to None',
9059 errors.ECODE_INVAL)
9060 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9061
9062 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9063 else:
9064
9065 try:
9066 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9067 except errors.ReservationError:
9068 raise errors.OpPrereqError("MAC address %s already in use"
9069 " in cluster" % nic_mac,
9070 errors.ECODE_NOTUNIQUE)
9071
9072
9073 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9074 raise errors.OpPrereqError("Disk operations not supported for"
9075 " diskless instances",
9076 errors.ECODE_INVAL)
9077 for disk_op, _ in self.op.disks:
9078 if disk_op == constants.DDM_REMOVE:
9079 if len(instance.disks) == 1:
9080 raise errors.OpPrereqError("Cannot remove the last disk of"
9081 " an instance", errors.ECODE_INVAL)
9082 _CheckInstanceDown(self, instance, "cannot remove disks")
9083
9084 if (disk_op == constants.DDM_ADD and
9085 len(instance.nics) >= constants.MAX_DISKS):
9086 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9087 " add more" % constants.MAX_DISKS,
9088 errors.ECODE_STATE)
9089 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9090
9091 if disk_op < 0 or disk_op >= len(instance.disks):
9092 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9093 " are 0 to %d" %
9094 (disk_op, len(instance.disks)),
9095 errors.ECODE_INVAL)
9096
9097 return
9098
9100 """Converts an instance from plain to drbd.
9101
9102 """
9103 feedback_fn("Converting template to drbd")
9104 instance = self.instance
9105 pnode = instance.primary_node
9106 snode = self.op.remote_node
9107
9108
9109 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9110 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9111 instance.name, pnode, [snode],
9112 disk_info, None, None, 0)
9113 info = _GetInstanceInfoText(instance)
9114 feedback_fn("Creating aditional volumes...")
9115
9116 for disk in new_disks:
9117
9118 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9119 info, True)
9120 for child in disk.children:
9121 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9122
9123
9124 feedback_fn("Renaming original volumes...")
9125 rename_list = [(o, n.children[0].logical_id)
9126 for (o, n) in zip(instance.disks, new_disks)]
9127 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9128 result.Raise("Failed to rename original LVs")
9129
9130 feedback_fn("Initializing DRBD devices...")
9131
9132 for disk in new_disks:
9133 for node in [pnode, snode]:
9134 f_create = node == pnode
9135 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9136
9137
9138 instance.disk_template = constants.DT_DRBD8
9139 instance.disks = new_disks
9140 self.cfg.Update(instance, feedback_fn)
9141
9142
9143 disk_abort = not _WaitForSync(self, instance)
9144 if disk_abort:
9145 raise errors.OpExecError("There are some degraded disks for"
9146 " this instance, please cleanup manually")
9147
9149 """Converts an instance from drbd to plain.
9150
9151 """
9152 instance = self.instance
9153 assert len(instance.secondary_nodes) == 1
9154 pnode = instance.primary_node
9155 snode = instance.secondary_nodes[0]
9156 feedback_fn("Converting template to plain")
9157
9158 old_disks = instance.disks
9159 new_disks = [d.children[0] for d in old_disks]
9160
9161
9162 for parent, child in zip(old_disks, new_disks):
9163 child.size = parent.size
9164 child.mode = parent.mode
9165
9166
9167 instance.disks = new_disks
9168 instance.disk_template = constants.DT_PLAIN
9169 self.cfg.Update(instance, feedback_fn)
9170
9171 feedback_fn("Removing volumes on the secondary node...")
9172 for disk in old_disks:
9173 self.cfg.SetDiskID(disk, snode)
9174 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9175 if msg:
9176 self.LogWarning("Could not remove block device %s on node %s,"
9177 " continuing anyway: %s", disk.iv_name, snode, msg)
9178
9179 feedback_fn("Removing unneeded volumes on the primary node...")
9180 for idx, disk in enumerate(old_disks):
9181 meta = disk.children[1]
9182 self.cfg.SetDiskID(meta, pnode)
9183 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9184 if msg:
9185 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9186 " continuing anyway: %s", idx, pnode, msg)
9187
9188
9189 - def Exec(self, feedback_fn):
9190 """Modifies an instance.
9191
9192 All parameters take effect only at the next restart of the instance.
9193
9194 """
9195
9196
9197 for warn in self.warn:
9198 feedback_fn("WARNING: %s" % warn)
9199
9200 result = []
9201 instance = self.instance
9202
9203 for disk_op, disk_dict in self.op.disks:
9204 if disk_op == constants.DDM_REMOVE:
9205
9206 device = instance.disks.pop()
9207 device_idx = len(instance.disks)
9208 for node, disk in device.ComputeNodeTree(instance.primary_node):
9209 self.cfg.SetDiskID(disk, node)
9210 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9211 if msg:
9212 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9213 " continuing anyway", device_idx, node, msg)
9214 result.append(("disk/%d" % device_idx, "remove"))
9215 elif disk_op == constants.DDM_ADD:
9216
9217 if instance.disk_template == constants.DT_FILE:
9218 file_driver, file_path = instance.disks[0].logical_id
9219 file_path = os.path.dirname(file_path)
9220 else:
9221 file_driver = file_path = None
9222 disk_idx_base = len(instance.disks)
9223 new_disk = _GenerateDiskTemplate(self,
9224 instance.disk_template,
9225 instance.name, instance.primary_node,
9226 instance.secondary_nodes,
9227 [disk_dict],
9228 file_path,
9229 file_driver,
9230 disk_idx_base)[0]
9231 instance.disks.append(new_disk)
9232 info = _GetInstanceInfoText(instance)
9233
9234 logging.info("Creating volume %s for instance %s",
9235 new_disk.iv_name, instance.name)
9236
9237
9238 for node in instance.all_nodes:
9239 f_create = node == instance.primary_node
9240 try:
9241 _CreateBlockDev(self, node, instance, new_disk,
9242 f_create, info, f_create)
9243 except errors.OpExecError, err:
9244 self.LogWarning("Failed to create volume %s (%s) on"
9245 " node %s: %s",
9246 new_disk.iv_name, new_disk, node, err)
9247 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9248 (new_disk.size, new_disk.mode)))
9249 else:
9250
9251 instance.disks[disk_op].mode = disk_dict['mode']
9252 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9253
9254 if self.op.disk_template:
9255 r_shut = _ShutdownInstanceDisks(self, instance)
9256 if not r_shut:
9257 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9258 " proceed with disk template conversion")
9259 mode = (instance.disk_template, self.op.disk_template)
9260 try:
9261 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9262 except:
9263 self.cfg.ReleaseDRBDMinors(instance.name)
9264 raise
9265 result.append(("disk_template", self.op.disk_template))
9266
9267
9268 for nic_op, nic_dict in self.op.nics:
9269 if nic_op == constants.DDM_REMOVE:
9270
9271 del instance.nics[-1]
9272 result.append(("nic.%d" % len(instance.nics), "remove"))
9273 elif nic_op == constants.DDM_ADD:
9274
9275 mac = nic_dict['mac']
9276 ip = nic_dict.get('ip', None)
9277 nicparams = self.nic_pinst[constants.DDM_ADD]
9278 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9279 instance.nics.append(new_nic)
9280 result.append(("nic.%d" % (len(instance.nics) - 1),
9281 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9282 (new_nic.mac, new_nic.ip,
9283 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9284 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9285 )))
9286 else:
9287 for key in 'mac', 'ip':
9288 if key in nic_dict:
9289 setattr(instance.nics[nic_op], key, nic_dict[key])
9290 if nic_op in self.nic_pinst:
9291 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9292 for key, val in nic_dict.iteritems():
9293 result.append(("nic.%s/%d" % (key, nic_op), val))
9294
9295
9296 if self.op.hvparams:
9297 instance.hvparams = self.hv_inst
9298 for key, val in self.op.hvparams.iteritems():
9299 result.append(("hv/%s" % key, val))
9300
9301
9302 if self.op.beparams:
9303 instance.beparams = self.be_inst
9304 for key, val in self.op.beparams.iteritems():
9305 result.append(("be/%s" % key, val))
9306
9307
9308 if self.op.os_name:
9309 instance.os = self.op.os_name
9310
9311
9312 if self.op.osparams:
9313 instance.osparams = self.os_inst
9314 for key, val in self.op.osparams.iteritems():
9315 result.append(("os/%s" % key, val))
9316
9317 self.cfg.Update(instance, feedback_fn)
9318
9319 return result
9320
9321 _DISK_CONVERSIONS = {
9322 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9323 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9324 }
9325
9328 """Query the exports list
9329
9330 """
9331 _OP_PARAMS = [
9332 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9333 ("use_locking", False, _TBool),
9334 ]
9335 REQ_BGL = False
9336
9345
9346 - def Exec(self, feedback_fn):
9347 """Compute the list of all the exported system images.
9348
9349 @rtype: dict
9350 @return: a dictionary with the structure node->(export-list)
9351 where export-list is a list of the instances exported on
9352 that node.
9353
9354 """
9355 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9356 rpcresult = self.rpc.call_export_list(self.nodes)
9357 result = {}
9358 for node in rpcresult:
9359 if rpcresult[node].fail_msg:
9360 result[node] = False
9361 else:
9362 result[node] = rpcresult[node].payload
9363
9364 return result
9365
9368 """Prepares an instance for an export and returns useful information.
9369
9370 """
9371 _OP_PARAMS = [
9372 _PInstanceName,
9373 ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9374 ]
9375 REQ_BGL = False
9376
9379
9392
9393 - def Exec(self, feedback_fn):
9394 """Prepares an instance for an export.
9395
9396 """
9397 instance = self.instance
9398
9399 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9400 salt = utils.GenerateSecret(8)
9401
9402 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9403 result = self.rpc.call_x509_cert_create(instance.primary_node,
9404 constants.RIE_CERT_VALIDITY)
9405 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9406
9407 (name, cert_pem) = result.payload
9408
9409 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9410 cert_pem)
9411
9412 return {
9413 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9414 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9415 salt),
9416 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9417 }
9418
9419 return None
9420
9423 """Export an instance to an image in the cluster.
9424
9425 """
9426 HPATH = "instance-export"
9427 HTYPE = constants.HTYPE_INSTANCE
9428 _OP_PARAMS = [
9429 _PInstanceName,
9430 ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9431 ("shutdown", True, _TBool),
9432 _PShutdownTimeout,
9433 ("remove_instance", False, _TBool),
9434 ("ignore_remove_failures", False, _TBool),
9435 ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9436 ("x509_key_name", None, _TOr(_TList, _TNone)),
9437 ("destination_x509_ca", None, _TMaybeString),
9438 ]
9439 REQ_BGL = False
9440
9442 """Check the arguments.
9443
9444 """
9445 self.x509_key_name = self.op.x509_key_name
9446 self.dest_x509_ca_pem = self.op.destination_x509_ca
9447
9448 if self.op.remove_instance and not self.op.shutdown:
9449 raise errors.OpPrereqError("Can not remove instance without shutting it"
9450 " down before")
9451
9452 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9453 if not self.x509_key_name:
9454 raise errors.OpPrereqError("Missing X509 key name for encryption",
9455 errors.ECODE_INVAL)
9456
9457 if not self.dest_x509_ca_pem:
9458 raise errors.OpPrereqError("Missing destination X509 CA",
9459 errors.ECODE_INVAL)
9460
9475
9477 """Last minute lock declaration."""
9478
9479
9481 """Build hooks env.
9482
9483 This will run on the master, primary node and target node.
9484
9485 """
9486 env = {
9487 "EXPORT_MODE": self.op.mode,
9488 "EXPORT_NODE": self.op.target_node,
9489 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9490 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9491
9492 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9493 }
9494
9495 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9496
9497 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9498
9499 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9500 nl.append(self.op.target_node)
9501
9502 return env, nl, nl
9503
9505 """Check prerequisites.
9506
9507 This checks that the instance and node names are valid.
9508
9509 """
9510 instance_name = self.op.instance_name
9511
9512 self.instance = self.cfg.GetInstanceInfo(instance_name)
9513 assert self.instance is not None, \
9514 "Cannot retrieve locked instance %s" % self.op.instance_name
9515 _CheckNodeOnline(self, self.instance.primary_node)
9516
9517 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9518 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9519 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9520 assert self.dst_node is not None
9521
9522 _CheckNodeOnline(self, self.dst_node.name)
9523 _CheckNodeNotDrained(self, self.dst_node.name)
9524
9525 self._cds = None
9526 self.dest_disk_info = None
9527 self.dest_x509_ca = None
9528
9529 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9530 self.dst_node = None
9531
9532 if len(self.op.target_node) != len(self.instance.disks):
9533 raise errors.OpPrereqError(("Received destination information for %s"
9534 " disks, but instance %s has %s disks") %
9535 (len(self.op.target_node), instance_name,
9536 len(self.instance.disks)),
9537 errors.ECODE_INVAL)
9538
9539 cds = _GetClusterDomainSecret()
9540
9541
9542 try:
9543 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9544 except (TypeError, ValueError), err:
9545 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9546
9547 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9548 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9549 errors.ECODE_INVAL)
9550
9551
9552 try:
9553 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9554 except OpenSSL.crypto.Error, err:
9555 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9556 (err, ), errors.ECODE_INVAL)
9557
9558 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9559 if errcode is not None:
9560 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9561 (msg, ), errors.ECODE_INVAL)
9562
9563 self.dest_x509_ca = cert
9564
9565
9566 disk_info = []
9567 for idx, disk_data in enumerate(self.op.target_node):
9568 try:
9569 (host, port, magic) = \
9570 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9571 except errors.GenericError, err:
9572 raise errors.OpPrereqError("Target info for disk %s: %s" %
9573 (idx, err), errors.ECODE_INVAL)
9574
9575 disk_info.append((host, port, magic))
9576
9577 assert len(disk_info) == len(self.op.target_node)
9578 self.dest_disk_info = disk_info
9579
9580 else:
9581 raise errors.ProgrammerError("Unhandled export mode %r" %
9582 self.op.mode)
9583
9584
9585
9586 for disk in self.instance.disks:
9587 if disk.dev_type == constants.LD_FILE:
9588 raise errors.OpPrereqError("Export not supported for instances with"
9589 " file-based disks", errors.ECODE_INVAL)
9590
9592 """Removes exports of current instance from all other nodes.
9593
9594 If an instance in a cluster with nodes A..D was exported to node C, its
9595 exports will be removed from the nodes A, B and D.
9596
9597 """
9598 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9599
9600 nodelist = self.cfg.GetNodeList()
9601 nodelist.remove(self.dst_node.name)
9602
9603
9604
9605
9606 iname = self.instance.name
9607 if nodelist:
9608 feedback_fn("Removing old exports for instance %s" % iname)
9609 exportlist = self.rpc.call_export_list(nodelist)
9610 for node in exportlist:
9611 if exportlist[node].fail_msg:
9612 continue
9613 if iname in exportlist[node].payload:
9614 msg = self.rpc.call_export_remove(node, iname).fail_msg
9615 if msg:
9616 self.LogWarning("Could not remove older export for instance %s"
9617 " on node %s: %s", iname, node, msg)
9618
9619 - def Exec(self, feedback_fn):
9620 """Export an instance to an image in the cluster.
9621
9622 """
9623 assert self.op.mode in constants.EXPORT_MODES
9624
9625 instance = self.instance
9626 src_node = instance.primary_node
9627
9628 if self.op.shutdown:
9629
9630 feedback_fn("Shutting down instance %s" % instance.name)
9631 result = self.rpc.call_instance_shutdown(src_node, instance,
9632 self.op.shutdown_timeout)
9633
9634 result.Raise("Could not shutdown instance %s on"
9635 " node %s" % (instance.name, src_node))
9636
9637
9638
9639 for disk in instance.disks:
9640 self.cfg.SetDiskID(disk, src_node)
9641
9642 activate_disks = (not instance.admin_up)
9643
9644 if activate_disks:
9645
9646 feedback_fn("Activating disks for %s" % instance.name)
9647 _StartInstanceDisks(self, instance, None)
9648
9649 try:
9650 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9651 instance)
9652
9653 helper.CreateSnapshots()
9654 try:
9655 if (self.op.shutdown and instance.admin_up and
9656 not self.op.remove_instance):
9657 assert not activate_disks
9658 feedback_fn("Starting instance %s" % instance.name)
9659 result = self.rpc.call_instance_start(src_node, instance, None, None)
9660 msg = result.fail_msg
9661 if msg:
9662 feedback_fn("Failed to start instance: %s" % msg)
9663 _ShutdownInstanceDisks(self, instance)
9664 raise errors.OpExecError("Could not start instance: %s" % msg)
9665
9666 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9667 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9668 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9669 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9670 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9671
9672 (key_name, _, _) = self.x509_key_name
9673
9674 dest_ca_pem = \
9675 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9676 self.dest_x509_ca)
9677
9678 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9679 key_name, dest_ca_pem,
9680 timeouts)
9681 finally:
9682 helper.Cleanup()
9683
9684
9685 assert len(dresults) == len(instance.disks)
9686 assert compat.all(isinstance(i, bool) for i in dresults), \
9687 "Not all results are boolean: %r" % dresults
9688
9689 finally:
9690 if activate_disks:
9691 feedback_fn("Deactivating disks for %s" % instance.name)
9692 _ShutdownInstanceDisks(self, instance)
9693
9694 if not (compat.all(dresults) and fin_resu):
9695 failures = []
9696 if not fin_resu:
9697 failures.append("export finalization")
9698 if not compat.all(dresults):
9699 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9700 if not dsk)
9701 failures.append("disk export: disk(s) %s" % fdsk)
9702
9703 raise errors.OpExecError("Export failed, errors in %s" %
9704 utils.CommaJoin(failures))
9705
9706
9707
9708
9709 if self.op.remove_instance:
9710 feedback_fn("Removing instance %s" % instance.name)
9711 _RemoveInstance(self, feedback_fn, instance,
9712 self.op.ignore_remove_failures)
9713
9714 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9715 self._CleanupExports(feedback_fn)
9716
9717 return fin_resu, dresults
9718
9721 """Remove exports related to the named instance.
9722
9723 """
9724 _OP_PARAMS = [
9725 _PInstanceName,
9726 ]
9727 REQ_BGL = False
9728
9735
9736 - def Exec(self, feedback_fn):
9737 """Remove any export.
9738
9739 """
9740 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9741
9742
9743 fqdn_warn = False
9744 if not instance_name:
9745 fqdn_warn = True
9746 instance_name = self.op.instance_name
9747
9748 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9749 exportlist = self.rpc.call_export_list(locked_nodes)
9750 found = False
9751 for node in exportlist:
9752 msg = exportlist[node].fail_msg
9753 if msg:
9754 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9755 continue
9756 if instance_name in exportlist[node].payload:
9757 found = True
9758 result = self.rpc.call_export_remove(node, instance_name)
9759 msg = result.fail_msg
9760 if msg:
9761 logging.error("Could not remove export for instance %s"
9762 " on node %s: %s", instance_name, node, msg)
9763
9764 if fqdn_warn and not found:
9765 feedback_fn("Export not found. If trying to remove an export belonging"
9766 " to a deleted instance please use its Fully Qualified"
9767 " Domain Name.")
9768
9771 """Generic tags LU.
9772
9773 This is an abstract class which is the parent of all the other tags LUs.
9774
9775 """
9776
9785
9786
9787
9788
9802
9826
9868
9902
9942
9945 """Sleep for a specified amount of time.
9946
9947 This LU sleeps on the master and/or nodes for a specified amount of
9948 time.
9949
9950 """
9951 _OP_PARAMS = [
9952 ("duration", _NoDefault, _TFloat),
9953 ("on_master", True, _TBool),
9954 ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9955 ("repeat", 0, _TPositiveInt)
9956 ]
9957 REQ_BGL = False
9958
9960 """Expand names and set required locks.
9961
9962 This expands the node list, if any.
9963
9964 """
9965 self.needed_locks = {}
9966 if self.op.on_nodes:
9967
9968
9969
9970 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9971 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9972
9974 """Do the actual sleep.
9975
9976 """
9977 if self.op.on_master:
9978 if not utils.TestDelay(self.op.duration):
9979 raise errors.OpExecError("Error during master delay test")
9980 if self.op.on_nodes:
9981 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9982 for node, node_result in result.items():
9983 node_result.Raise("Failure during rpc call to node %s" % node)
9984
9985 - def Exec(self, feedback_fn):
9986 """Execute the test delay opcode, with the wanted repetitions.
9987
9988 """
9989 if self.op.repeat == 0:
9990 self._TestDelay()
9991 else:
9992 top_value = self.op.repeat - 1
9993 for i in range(self.op.repeat):
9994 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9995 self._TestDelay()
9996
9999 """Utility LU to test some aspects of the job queue.
10000
10001 """
10002 _OP_PARAMS = [
10003 ("notify_waitlock", False, _TBool),
10004 ("notify_exec", False, _TBool),
10005 ("log_messages", _EmptyList, _TListOf(_TString)),
10006 ("fail", False, _TBool),
10007 ]
10008 REQ_BGL = False
10009
10010
10011
10012 _CLIENT_CONNECT_TIMEOUT = 20.0
10013 _CLIENT_CONFIRM_TIMEOUT = 60.0
10014
10015 @classmethod
10017 """Opens a Unix socket and waits for another program to connect.
10018
10019 @type cb: callable
10020 @param cb: Callback to send socket name to client
10021 @type errcls: class
10022 @param errcls: Exception class to use for errors
10023
10024 """
10025
10026
10027
10028 tmpdir = tempfile.mkdtemp()
10029 try:
10030 tmpsock = utils.PathJoin(tmpdir, "sock")
10031
10032 logging.debug("Creating temporary socket at %s", tmpsock)
10033 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10034 try:
10035 sock.bind(tmpsock)
10036 sock.listen(1)
10037
10038
10039 cb(tmpsock)
10040
10041
10042 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10043 try:
10044 (conn, _) = sock.accept()
10045 except socket.error, err:
10046 raise errcls("Client didn't connect in time (%s)" % err)
10047 finally:
10048 sock.close()
10049 finally:
10050
10051 shutil.rmtree(tmpdir)
10052
10053
10054 try:
10055 try:
10056
10057
10058 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10059 conn.recv(1)
10060 except socket.error, err:
10061 raise errcls("Client failed to confirm notification (%s)" % err)
10062 finally:
10063 conn.close()
10064
10066 """Sends a notification to the client.
10067
10068 @type test: string
10069 @param test: Test name
10070 @param arg: Test argument (depends on test)
10071 @type sockname: string
10072 @param sockname: Socket path
10073
10074 """
10075 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10076
10077 - def _Notify(self, prereq, test, arg):
10078 """Notifies the client of a test.
10079
10080 @type prereq: bool
10081 @param prereq: Whether this is a prereq-phase test
10082 @type test: string
10083 @param test: Test name
10084 @param arg: Test argument (depends on test)
10085
10086 """
10087 if prereq:
10088 errcls = errors.OpPrereqError
10089 else:
10090 errcls = errors.OpExecError
10091
10092 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10093 test, arg),
10094 errcls)
10095
10097 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10098 self.expandnames_calls = 0
10099
10116
10117 - def Exec(self, feedback_fn):
10138
10141 """IAllocator framework.
10142
10143 An IAllocator instance has three sets of attributes:
10144 - cfg that is needed to query the cluster
10145 - input data (all members of the _KEYS class attribute are required)
10146 - four buffer attributes (in|out_data|text), that represent the
10147 input (to the external script) in text and data structure format,
10148 and the output from it, again in two formats
10149 - the result variables from the script (success, info, nodes) for
10150 easy usage
10151
10152 """
10153
10154
10155 _ALLO_KEYS = [
10156 "name", "mem_size", "disks", "disk_template",
10157 "os", "tags", "nics", "vcpus", "hypervisor",
10158 ]
10159 _RELO_KEYS = [
10160 "name", "relocate_from",
10161 ]
10162 _EVAC_KEYS = [
10163 "evac_nodes",
10164 ]
10165
10166 - def __init__(self, cfg, rpc, mode, **kwargs):
10167 self.cfg = cfg
10168 self.rpc = rpc
10169
10170 self.in_text = self.out_text = self.in_data = self.out_data = None
10171
10172 self.mode = mode
10173 self.mem_size = self.disks = self.disk_template = None
10174 self.os = self.tags = self.nics = self.vcpus = None
10175 self.hypervisor = None
10176 self.relocate_from = None
10177 self.name = None
10178 self.evac_nodes = None
10179
10180 self.required_nodes = None
10181
10182 self.success = self.info = self.result = None
10183 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10184 keyset = self._ALLO_KEYS
10185 fn = self._AddNewInstance
10186 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10187 keyset = self._RELO_KEYS
10188 fn = self._AddRelocateInstance
10189 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10190 keyset = self._EVAC_KEYS
10191 fn = self._AddEvacuateNodes
10192 else:
10193 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10194 " IAllocator" % self.mode)
10195 for key in kwargs:
10196 if key not in keyset:
10197 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10198 " IAllocator" % key)
10199 setattr(self, key, kwargs[key])
10200
10201 for key in keyset:
10202 if key not in kwargs:
10203 raise errors.ProgrammerError("Missing input parameter '%s' to"
10204 " IAllocator" % key)
10205 self._BuildInputData(fn)
10206
10208 """Compute the generic allocator input data.
10209
10210 This is the data that is independent of the actual operation.
10211
10212 """
10213 cfg = self.cfg
10214 cluster_info = cfg.GetClusterInfo()
10215
10216 data = {
10217 "version": constants.IALLOCATOR_VERSION,
10218 "cluster_name": cfg.GetClusterName(),
10219 "cluster_tags": list(cluster_info.GetTags()),
10220 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10221
10222 }
10223 iinfo = cfg.GetAllInstancesInfo().values()
10224 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10225
10226
10227 node_results = {}
10228 node_list = cfg.GetNodeList()
10229
10230 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10231 hypervisor_name = self.hypervisor
10232 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10233 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10234 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10235 hypervisor_name = cluster_info.enabled_hypervisors[0]
10236
10237 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10238 hypervisor_name)
10239 node_iinfo = \
10240 self.rpc.call_all_instances_info(node_list,
10241 cluster_info.enabled_hypervisors)
10242 for nname, nresult in node_data.items():
10243
10244 ninfo = cfg.GetNodeInfo(nname)
10245 pnr = {
10246 "tags": list(ninfo.GetTags()),
10247 "primary_ip": ninfo.primary_ip,
10248 "secondary_ip": ninfo.secondary_ip,
10249 "offline": ninfo.offline,
10250 "drained": ninfo.drained,
10251 "master_candidate": ninfo.master_candidate,
10252 }
10253
10254 if not (ninfo.offline or ninfo.drained):
10255 nresult.Raise("Can't get data for node %s" % nname)
10256 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10257 nname)
10258 remote_info = nresult.payload
10259
10260 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10261 'vg_size', 'vg_free', 'cpu_total']:
10262 if attr not in remote_info:
10263 raise errors.OpExecError("Node '%s' didn't return attribute"
10264 " '%s'" % (nname, attr))
10265 if not isinstance(remote_info[attr], int):
10266 raise errors.OpExecError("Node '%s' returned invalid value"
10267 " for '%s': %s" %
10268 (nname, attr, remote_info[attr]))
10269
10270 i_p_mem = i_p_up_mem = 0
10271 for iinfo, beinfo in i_list:
10272 if iinfo.primary_node == nname:
10273 i_p_mem += beinfo[constants.BE_MEMORY]
10274 if iinfo.name not in node_iinfo[nname].payload:
10275 i_used_mem = 0
10276 else:
10277 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10278 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10279 remote_info['memory_free'] -= max(0, i_mem_diff)
10280
10281 if iinfo.admin_up:
10282 i_p_up_mem += beinfo[constants.BE_MEMORY]
10283
10284
10285 pnr_dyn = {
10286 "total_memory": remote_info['memory_total'],
10287 "reserved_memory": remote_info['memory_dom0'],
10288 "free_memory": remote_info['memory_free'],
10289 "total_disk": remote_info['vg_size'],
10290 "free_disk": remote_info['vg_free'],
10291 "total_cpus": remote_info['cpu_total'],
10292 "i_pri_memory": i_p_mem,
10293 "i_pri_up_memory": i_p_up_mem,
10294 }
10295 pnr.update(pnr_dyn)
10296
10297 node_results[nname] = pnr
10298 data["nodes"] = node_results
10299
10300
10301 instance_data = {}
10302 for iinfo, beinfo in i_list:
10303 nic_data = []
10304 for nic in iinfo.nics:
10305 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10306 nic_dict = {"mac": nic.mac,
10307 "ip": nic.ip,
10308 "mode": filled_params[constants.NIC_MODE],
10309 "link": filled_params[constants.NIC_LINK],
10310 }
10311 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10312 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10313 nic_data.append(nic_dict)
10314 pir = {
10315 "tags": list(iinfo.GetTags()),
10316 "admin_up": iinfo.admin_up,
10317 "vcpus": beinfo[constants.BE_VCPUS],
10318 "memory": beinfo[constants.BE_MEMORY],
10319 "os": iinfo.os,
10320 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10321 "nics": nic_data,
10322 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10323 "disk_template": iinfo.disk_template,
10324 "hypervisor": iinfo.hypervisor,
10325 }
10326 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10327 pir["disks"])
10328 instance_data[iinfo.name] = pir
10329
10330 data["instances"] = instance_data
10331
10332 self.in_data = data
10333
10335 """Add new instance data to allocator structure.
10336
10337 This in combination with _AllocatorGetClusterData will create the
10338 correct structure needed as input for the allocator.
10339
10340 The checks for the completeness of the opcode must have already been
10341 done.
10342
10343 """
10344 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10345
10346 if self.disk_template in constants.DTS_NET_MIRROR:
10347 self.required_nodes = 2
10348 else:
10349 self.required_nodes = 1
10350 request = {
10351 "name": self.name,
10352 "disk_template": self.disk_template,
10353 "tags": self.tags,
10354 "os": self.os,
10355 "vcpus": self.vcpus,
10356 "memory": self.mem_size,
10357 "disks": self.disks,
10358 "disk_space_total": disk_space,
10359 "nics": self.nics,
10360 "required_nodes": self.required_nodes,
10361 }
10362 return request
10363
10365 """Add relocate instance data to allocator structure.
10366
10367 This in combination with _IAllocatorGetClusterData will create the
10368 correct structure needed as input for the allocator.
10369
10370 The checks for the completeness of the opcode must have already been
10371 done.
10372
10373 """
10374 instance = self.cfg.GetInstanceInfo(self.name)
10375 if instance is None:
10376 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10377 " IAllocator" % self.name)
10378
10379 if instance.disk_template not in constants.DTS_NET_MIRROR:
10380 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10381 errors.ECODE_INVAL)
10382
10383 if len(instance.secondary_nodes) != 1:
10384 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10385 errors.ECODE_STATE)
10386
10387 self.required_nodes = 1
10388 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10389 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10390
10391 request = {
10392 "name": self.name,
10393 "disk_space_total": disk_space,
10394 "required_nodes": self.required_nodes,
10395 "relocate_from": self.relocate_from,
10396 }
10397 return request
10398
10400 """Add evacuate nodes data to allocator structure.
10401
10402 """
10403 request = {
10404 "evac_nodes": self.evac_nodes
10405 }
10406 return request
10407
10419
10420 - def Run(self, name, validate=True, call_fn=None):
10421 """Run an instance allocator and return the results.
10422
10423 """
10424 if call_fn is None:
10425 call_fn = self.rpc.call_iallocator_runner
10426
10427 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10428 result.Raise("Failure while running the iallocator script")
10429
10430 self.out_text = result.payload
10431 if validate:
10432 self._ValidateResult()
10433
10435 """Process the allocator results.
10436
10437 This will process and if successful save the result in
10438 self.out_data and the other parameters.
10439
10440 """
10441 try:
10442 rdict = serializer.Load(self.out_text)
10443 except Exception, err:
10444 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10445
10446 if not isinstance(rdict, dict):
10447 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10448
10449
10450 if "nodes" in rdict and "result" not in rdict:
10451 rdict["result"] = rdict["nodes"]
10452 del rdict["nodes"]
10453
10454 for key in "success", "info", "result":
10455 if key not in rdict:
10456 raise errors.OpExecError("Can't parse iallocator results:"
10457 " missing key '%s'" % key)
10458 setattr(self, key, rdict[key])
10459
10460 if not isinstance(rdict["result"], list):
10461 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10462 " is not a list")
10463 self.out_data = rdict
10464
10467 """Run allocator tests.
10468
10469 This LU runs the allocator tests
10470
10471 """
10472 _OP_PARAMS = [
10473 ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10474 ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10475 ("name", _NoDefault, _TNonEmptyString),
10476 ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10477 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10478 _TOr(_TNone, _TNonEmptyString))))),
10479 ("disks", _NoDefault, _TOr(_TNone, _TList)),
10480 ("hypervisor", None, _TMaybeString),
10481 ("allocator", None, _TMaybeString),
10482 ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10483 ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10484 ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10485 ("os", None, _TMaybeString),
10486 ("disk_template", None, _TMaybeString),
10487 ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10488 ]
10489
10491 """Check prerequisites.
10492
10493 This checks the opcode parameters depending on the director and mode test.
10494
10495 """
10496 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10497 for attr in ["mem_size", "disks", "disk_template",
10498 "os", "tags", "nics", "vcpus"]:
10499 if not hasattr(self.op, attr):
10500 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10501 attr, errors.ECODE_INVAL)
10502 iname = self.cfg.ExpandInstanceName(self.op.name)
10503 if iname is not None:
10504 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10505 iname, errors.ECODE_EXISTS)
10506 if not isinstance(self.op.nics, list):
10507 raise errors.OpPrereqError("Invalid parameter 'nics'",
10508 errors.ECODE_INVAL)
10509 if not isinstance(self.op.disks, list):
10510 raise errors.OpPrereqError("Invalid parameter 'disks'",
10511 errors.ECODE_INVAL)
10512 for row in self.op.disks:
10513 if (not isinstance(row, dict) or
10514 "size" not in row or
10515 not isinstance(row["size"], int) or
10516 "mode" not in row or
10517 row["mode"] not in ['r', 'w']):
10518 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10519 " parameter", errors.ECODE_INVAL)
10520 if self.op.hypervisor is None:
10521 self.op.hypervisor = self.cfg.GetHypervisorType()
10522 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10523 fname = _ExpandInstanceName(self.cfg, self.op.name)
10524 self.op.name = fname
10525 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10526 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10527 if not hasattr(self.op, "evac_nodes"):
10528 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10529 " opcode input", errors.ECODE_INVAL)
10530 else:
10531 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10532 self.op.mode, errors.ECODE_INVAL)
10533
10534 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10535 if self.op.allocator is None:
10536 raise errors.OpPrereqError("Missing allocator name",
10537 errors.ECODE_INVAL)
10538 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10539 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10540 self.op.direction, errors.ECODE_INVAL)
10541
10542 - def Exec(self, feedback_fn):
10543 """Run the allocator test.
10544
10545 """
10546 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10547 ial = IAllocator(self.cfg, self.rpc,
10548 mode=self.op.mode,
10549 name=self.op.name,
10550 mem_size=self.op.mem_size,
10551 disks=self.op.disks,
10552 disk_template=self.op.disk_template,
10553 os=self.op.os,
10554 tags=self.op.tags,
10555 nics=self.op.nics,
10556 vcpus=self.op.vcpus,
10557 hypervisor=self.op.hypervisor,
10558 )
10559 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10560 ial = IAllocator(self.cfg, self.rpc,
10561 mode=self.op.mode,
10562 name=self.op.name,
10563 relocate_from=list(self.relocate_from),
10564 )
10565 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10566 ial = IAllocator(self.cfg, self.rpc,
10567 mode=self.op.mode,
10568 evac_nodes=self.op.evac_nodes)
10569 else:
10570 raise errors.ProgrammerError("Uncatched mode %s in"
10571 " LUTestAllocator.Exec", self.op.mode)
10572
10573 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10574 result = ial.in_text
10575 else:
10576 ial.Run(self.op.allocator, validate=False)
10577 result = ial.out_text
10578 return result
10579