1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Module implementing the master-side code."""
23
24
25
26
27
28
29
30
31 import os
32 import os.path
33 import time
34 import re
35 import logging
36 import copy
37 import OpenSSL
38 import socket
39 import tempfile
40 import shutil
41 import itertools
42 import operator
43
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
60 from ganeti import ht
61 from ganeti import rpc
62 from ganeti import runtime
63
64 import ganeti.masterd.instance
65
66
67
68 DRBD_META_SIZE = 128
69
70
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
74
75
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
78 ]))
82 """Data container for LU results with jobs.
83
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
87 result.
88
89 """
91 """Initializes this class.
92
93 Additional return values can be specified as keyword arguments.
94
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
97
98 """
99 self.jobs = jobs
100 self.other = kwargs
101
104 """Logical Unit base class.
105
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
115
116 Note that all commands require root permissions.
117
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
120
121 """
122 HPATH = None
123 HTYPE = None
124 REQ_BGL = True
125
126 - def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
128
129 This needs to be overridden in derived classes in order to check op
130 validity.
131
132 """
133 self.proc = processor
134 self.op = op
135 self.cfg = context.cfg
136 self.glm = context.glm
137
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
144 self.add_locks = {}
145 self.remove_locks = {}
146
147 self.recalculate_locks = {}
148
149 self.Log = processor.Log
150 self.LogWarning = processor.LogWarning
151 self.LogInfo = processor.LogInfo
152 self.LogStep = processor.LogStep
153
154 self.dry_run_result = None
155
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
159
160
161 self.tasklets = None
162
163
164 self.op.Validate(True)
165
166 self.CheckArguments()
167
169 """Check syntactic validity for the opcode arguments.
170
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
175
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
178 waited for them)
179
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
182
183 """
184 pass
185
187 """Expand names for this LU.
188
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
193
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
196 as values. Rules:
197
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
208
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
212
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
216
217 Examples::
218
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
223 }
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
227 }
228 # Acquire no locks
229 self.needed_locks = {} # No, you can't leave it to the default value None
230
231 """
232
233
234
235 if self.REQ_BGL:
236 self.needed_locks = {}
237 else:
238 raise NotImplementedError
239
241 """Declare LU locking needs for a level
242
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
249
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
252
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
255
256 """
257
259 """Check prerequisites for this LU.
260
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
264 allowed.
265
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
268
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
271
272 """
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
277 tl.CheckPrereq()
278 else:
279 pass
280
281 - def Exec(self, feedback_fn):
282 """Execute the LU.
283
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
286 code, or expected.
287
288 """
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
292 tl.Exec(feedback_fn)
293 else:
294 raise NotImplementedError
295
297 """Build hooks environment for this LU.
298
299 @rtype: dict
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
306 will not be called.
307
308 """
309 raise NotImplementedError
310
312 """Build list of nodes to run LU's hooks.
313
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
320 will not be called.
321
322 """
323 raise NotImplementedError
324
325 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
327
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
333
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
339 in the PRE phase
340 @return: the new Exec result, based on the previous result
341 and hook results
342
343 """
344
345
346
347 return lu_result
348
350 """Helper function to expand and lock an instance.
351
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
356 before.
357
358 """
359 if self.needed_locks is None:
360 self.needed_locks = {}
361 else:
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
367
370 """Helper function to declare instances' nodes for locking.
371
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
376
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
379
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
382
383 If should be called in DeclareLocks in a way similar to::
384
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
387
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
391
392 """
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
395
396
397
398
399
400
401 wanted_nodes = []
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
405 if not primary_only:
406 wanted_nodes.extend(instance.secondary_nodes)
407
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
412 else:
413 raise errors.ProgrammerError("Unknown recalculation mode")
414
415 del self.recalculate_locks[level]
416
419 """Simple LU which runs no hooks.
420
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
423
424 """
425 HPATH = None
426 HTYPE = None
427
429 """Empty BuildHooksEnv for NoHooksLu.
430
431 This just raises an error.
432
433 """
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
435
437 """Empty BuildHooksNodes for NoHooksLU.
438
439 """
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
441
444 """Tasklet base class.
445
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
449
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
452 - Implement Exec
453
454 """
456 self.lu = lu
457
458
459 self.cfg = lu.cfg
460 self.rpc = lu.rpc
461
463 """Check prerequisites for this tasklets.
464
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
468
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
471
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
474
475 """
476 pass
477
478 - def Exec(self, feedback_fn):
479 """Execute the tasklet.
480
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
483 expected.
484
485 """
486 raise NotImplementedError
487
490 """Base for query utility classes.
491
492 """
493
494 FIELDS = None
495
496
497 SORT_FIELD = "name"
498
499 - def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
501
502 """
503 self.use_locking = use_locking
504
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
509
510
511 self.sort_by_name = not self.names
512
513 self.do_locking = None
514 self.wanted = None
515
516 - def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
518
519 """
520 if self.do_locking:
521 names = lu.owned_locks(lock_level)
522 else:
523 names = all_names
524
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527
528 return utils.NiceSort(names)
529
530
531 assert self.names
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
533
534 missing = set(self.wanted).difference(names)
535 if missing:
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
538
539
540 return self.wanted
541
543 """Expand names for this query.
544
545 See L{LogicalUnit.ExpandNames}.
546
547 """
548 raise NotImplementedError()
549
551 """Declare locks for this query.
552
553 See L{LogicalUnit.DeclareLocks}.
554
555 """
556 raise NotImplementedError()
557
559 """Collects all data for this query.
560
561 @return: Query data object
562
563 """
564 raise NotImplementedError()
565
572
579
582 """Returns a dict declaring all lock levels shared.
583
584 """
585 return dict.fromkeys(locking.LEVELS, 1)
586
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
590
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
593
594 """
595 (bootid, (vg_info, ), (hv_info, )) = data
596
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
598 "bootid": bootid,
599 })
600
603 """Little helper wrapper to the rpc annotation method.
604
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
611
612 """
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
615
619 """Checks if node groups for locked instances are still correct.
620
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
631
632 """
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
636
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
638
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
641
644 """Checks if the owned node groups are still correct for an instance.
645
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
652
653 """
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
655
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
660 " operation" %
661 (instance_name,
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
664 errors.ECODE_STATE)
665
666 return inst_groups
667
670 """Checks if the instances in a node group are still correct.
671
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
678
679 """
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
685 (group_uuid,
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
688 errors.ECODE_STATE)
689
690 return wanted_instances
691
694 """Tells if node supports OOB.
695
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
701
702 """
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
704
707 """Makes a copy of a list of lock names.
708
709 Handles L{locking.ALL_SET} correctly.
710
711 """
712 if names == locking.ALL_SET:
713 return locking.ALL_SET
714 else:
715 return names[:]
716
719 """Returns list of checked and expanded node names.
720
721 @type lu: L{LogicalUnit}
722 @param lu: the logical unit on whose behalf we execute
723 @type nodes: list
724 @param nodes: list of node names or None for all nodes
725 @rtype: list
726 @return: the list of nodes, sorted
727 @raise errors.ProgrammerError: if the nodes parameter is wrong type
728
729 """
730 if nodes:
731 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
732
733 return utils.NiceSort(lu.cfg.GetNodeList())
734
737 """Returns list of checked and expanded instance names.
738
739 @type lu: L{LogicalUnit}
740 @param lu: the logical unit on whose behalf we execute
741 @type instances: list
742 @param instances: list of instance names or None for all instances
743 @rtype: list
744 @return: the list of instances, sorted
745 @raise errors.OpPrereqError: if the instances parameter is wrong type
746 @raise errors.OpPrereqError: if any of the passed instances is not found
747
748 """
749 if instances:
750 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
751 else:
752 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
753 return wanted
754
755
756 -def _GetUpdatedParams(old_params, update_dict,
757 use_default=True, use_none=False):
758 """Return the new version of a parameter dictionary.
759
760 @type old_params: dict
761 @param old_params: old parameters
762 @type update_dict: dict
763 @param update_dict: dict containing new parameter values, or
764 constants.VALUE_DEFAULT to reset the parameter to its default
765 value
766 @param use_default: boolean
767 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
768 values as 'to be deleted' values
769 @param use_none: boolean
770 @type use_none: whether to recognise C{None} values as 'to be
771 deleted' values
772 @rtype: dict
773 @return: the new parameter dictionary
774
775 """
776 params_copy = copy.deepcopy(old_params)
777 for key, val in update_dict.iteritems():
778 if ((use_default and val == constants.VALUE_DEFAULT) or
779 (use_none and val is None)):
780 try:
781 del params_copy[key]
782 except KeyError:
783 pass
784 else:
785 params_copy[key] = val
786 return params_copy
787
790 """Return the new version of a instance policy.
791
792 @param group_policy: whether this policy applies to a group and thus
793 we should support removal of policy entries
794
795 """
796 use_none = use_default = group_policy
797 ipolicy = copy.deepcopy(old_ipolicy)
798 for key, value in new_ipolicy.items():
799 if key not in constants.IPOLICY_ALL_KEYS:
800 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
801 errors.ECODE_INVAL)
802 if key in constants.IPOLICY_ISPECS:
803 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
804 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
805 use_none=use_none,
806 use_default=use_default)
807 else:
808 if (not value or value == [constants.VALUE_DEFAULT] or
809 value == constants.VALUE_DEFAULT):
810 if group_policy:
811 del ipolicy[key]
812 else:
813 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
814 " on the cluster'" % key,
815 errors.ECODE_INVAL)
816 else:
817 if key in constants.IPOLICY_PARAMETERS:
818
819 try:
820 ipolicy[key] = float(value)
821 except (TypeError, ValueError), err:
822 raise errors.OpPrereqError("Invalid value for attribute"
823 " '%s': '%s', error: %s" %
824 (key, value, err), errors.ECODE_INVAL)
825 else:
826
827
828 ipolicy[key] = list(value)
829 try:
830 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
831 except errors.ConfigurationError, err:
832 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
833 errors.ECODE_INVAL)
834 return ipolicy
835
838 """Updates and verifies a dict with sub dicts of the same type.
839
840 @param base: The dict with the old data
841 @param updates: The dict with the new data
842 @param type_check: Dict suitable to ForceDictType to verify correct types
843 @returns: A new dict with updated and verified values
844
845 """
846 def fn(old, value):
847 new = _GetUpdatedParams(old, value)
848 utils.ForceDictType(new, type_check)
849 return new
850
851 ret = copy.deepcopy(base)
852 ret.update(dict((key, fn(base.get(key, {}), value))
853 for key, value in updates.items()))
854 return ret
855
858 """Combines the hv state from an opcode with the one of the object
859
860 @param op_input: The input dict from the opcode
861 @param obj_input: The input dict from the objects
862 @return: The verified and updated dict
863
864 """
865 if op_input:
866 invalid_hvs = set(op_input) - constants.HYPER_TYPES
867 if invalid_hvs:
868 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
869 " %s" % utils.CommaJoin(invalid_hvs),
870 errors.ECODE_INVAL)
871 if obj_input is None:
872 obj_input = {}
873 type_check = constants.HVSTS_PARAMETER_TYPES
874 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
875
876 return None
877
880 """Combines the disk state from an opcode with the one of the object
881
882 @param op_input: The input dict from the opcode
883 @param obj_input: The input dict from the objects
884 @return: The verified and updated dict
885 """
886 if op_input:
887 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
888 if invalid_dst:
889 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
890 utils.CommaJoin(invalid_dst),
891 errors.ECODE_INVAL)
892 type_check = constants.DSS_PARAMETER_TYPES
893 if obj_input is None:
894 obj_input = {}
895 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
896 type_check))
897 for key, value in op_input.items())
898
899 return None
900
903 """Releases locks owned by an LU.
904
905 @type lu: L{LogicalUnit}
906 @param level: Lock level
907 @type names: list or None
908 @param names: Names of locks to release
909 @type keep: list or None
910 @param keep: Names of locks to retain
911
912 """
913 assert not (keep is not None and names is not None), \
914 "Only one of the 'names' and the 'keep' parameters can be given"
915
916 if names is not None:
917 should_release = names.__contains__
918 elif keep:
919 should_release = lambda name: name not in keep
920 else:
921 should_release = None
922
923 owned = lu.owned_locks(level)
924 if not owned:
925
926 pass
927
928 elif should_release:
929 retain = []
930 release = []
931
932
933 for name in owned:
934 if should_release(name):
935 release.append(name)
936 else:
937 retain.append(name)
938
939 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
940
941
942 lu.glm.release(level, names=release)
943
944 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
945 else:
946
947 lu.glm.release(level)
948
949 assert not lu.glm.is_owned(level), "No locks should be owned"
950
953 """Creates a map from (node, volume) to instance name.
954
955 @type instances: list of L{objects.Instance}
956 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
957
958 """
959 return dict(((node, vol), inst.name)
960 for inst in instances
961 for (node, vols) in inst.MapLVsByNode().items()
962 for vol in vols)
963
964
965 -def _RunPostHook(lu, node_name):
966 """Runs the post-hook for an opcode on a single node.
967
968 """
969 hm = lu.proc.BuildHooksManager(lu)
970 try:
971 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
972 except Exception, err:
973 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
974
977 """Checks whether all selected fields are valid.
978
979 @type static: L{utils.FieldSet}
980 @param static: static fields set
981 @type dynamic: L{utils.FieldSet}
982 @param dynamic: dynamic fields set
983
984 """
985 f = utils.FieldSet()
986 f.Extend(static)
987 f.Extend(dynamic)
988
989 delta = f.NonMatching(selected)
990 if delta:
991 raise errors.OpPrereqError("Unknown output fields selected: %s"
992 % ",".join(delta), errors.ECODE_INVAL)
993
996 """Validates that given hypervisor params are not global ones.
997
998 This will ensure that instances don't get customised versions of
999 global params.
1000
1001 """
1002 used_globals = constants.HVC_GLOBALS.intersection(params)
1003 if used_globals:
1004 msg = ("The following hypervisor parameters are global and cannot"
1005 " be customized at instance level, please modify them at"
1006 " cluster level: %s" % utils.CommaJoin(used_globals))
1007 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1008
1011 """Ensure that a given node is online.
1012
1013 @param lu: the LU on behalf of which we make the check
1014 @param node: the node to check
1015 @param msg: if passed, should be a message to replace the default one
1016 @raise errors.OpPrereqError: if the node is offline
1017
1018 """
1019 if msg is None:
1020 msg = "Can't use offline node"
1021 if lu.cfg.GetNodeInfo(node).offline:
1022 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1023
1026 """Ensure that a given node is not drained.
1027
1028 @param lu: the LU on behalf of which we make the check
1029 @param node: the node to check
1030 @raise errors.OpPrereqError: if the node is drained
1031
1032 """
1033 if lu.cfg.GetNodeInfo(node).drained:
1034 raise errors.OpPrereqError("Can't use drained node %s" % node,
1035 errors.ECODE_STATE)
1036
1039 """Ensure that a given node is vm capable.
1040
1041 @param lu: the LU on behalf of which we make the check
1042 @param node: the node to check
1043 @raise errors.OpPrereqError: if the node is not vm capable
1044
1045 """
1046 if not lu.cfg.GetNodeInfo(node).vm_capable:
1047 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1048 errors.ECODE_STATE)
1049
1052 """Ensure that a node supports a given OS.
1053
1054 @param lu: the LU on behalf of which we make the check
1055 @param node: the node to check
1056 @param os_name: the OS to query about
1057 @param force_variant: whether to ignore variant errors
1058 @raise errors.OpPrereqError: if the node is not supporting the OS
1059
1060 """
1061 result = lu.rpc.call_os_get(node, os_name)
1062 result.Raise("OS '%s' not in supported OS list for node %s" %
1063 (os_name, node),
1064 prereq=True, ecode=errors.ECODE_INVAL)
1065 if not force_variant:
1066 _CheckOSVariant(result.payload, os_name)
1067
1070 """Ensure that a node has the given secondary ip.
1071
1072 @type lu: L{LogicalUnit}
1073 @param lu: the LU on behalf of which we make the check
1074 @type node: string
1075 @param node: the node to check
1076 @type secondary_ip: string
1077 @param secondary_ip: the ip to check
1078 @type prereq: boolean
1079 @param prereq: whether to throw a prerequisite or an execute error
1080 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1081 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1082
1083 """
1084 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1085 result.Raise("Failure checking secondary ip on node %s" % node,
1086 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1087 if not result.payload:
1088 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1089 " please fix and re-run this command" % secondary_ip)
1090 if prereq:
1091 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1092 else:
1093 raise errors.OpExecError(msg)
1094
1097 """Reads the cluster domain secret.
1098
1099 """
1100 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1101 strict=True)
1102
1105 """Ensure that an instance is in one of the required states.
1106
1107 @param lu: the LU on behalf of which we make the check
1108 @param instance: the instance to check
1109 @param msg: if passed, should be a message to replace the default one
1110 @raise errors.OpPrereqError: if the instance is not in the required state
1111
1112 """
1113 if msg is None:
1114 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1115 if instance.admin_state not in req_states:
1116 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1117 (instance.name, instance.admin_state, msg),
1118 errors.ECODE_STATE)
1119
1120 if constants.ADMINST_UP not in req_states:
1121 pnode = instance.primary_node
1122 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1123 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1124 prereq=True, ecode=errors.ECODE_ENVIRON)
1125
1126 if instance.name in ins_l.payload:
1127 raise errors.OpPrereqError("Instance %s is running, %s" %
1128 (instance.name, msg), errors.ECODE_STATE)
1129
1132 """Computes if value is in the desired range.
1133
1134 @param name: name of the parameter for which we perform the check
1135 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1136 not just 'disk')
1137 @param ipolicy: dictionary containing min, max and std values
1138 @param value: actual value that we want to use
1139 @return: None or element not meeting the criteria
1140
1141
1142 """
1143 if value in [None, constants.VALUE_AUTO]:
1144 return None
1145 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1146 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1147 if value > max_v or min_v > value:
1148 if qualifier:
1149 fqn = "%s/%s" % (name, qualifier)
1150 else:
1151 fqn = name
1152 return ("%s value %s is not in range [%s, %s]" %
1153 (fqn, value, min_v, max_v))
1154 return None
1155
1160 """Verifies ipolicy against provided specs.
1161
1162 @type ipolicy: dict
1163 @param ipolicy: The ipolicy
1164 @type mem_size: int
1165 @param mem_size: The memory size
1166 @type cpu_count: int
1167 @param cpu_count: Used cpu cores
1168 @type disk_count: int
1169 @param disk_count: Number of disks used
1170 @type nic_count: int
1171 @param nic_count: Number of nics used
1172 @type disk_sizes: list of ints
1173 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1174 @type spindle_use: int
1175 @param spindle_use: The number of spindles this instance uses
1176 @param _compute_fn: The compute function (unittest only)
1177 @return: A list of violations, or an empty list of no violations are found
1178
1179 """
1180 assert disk_count == len(disk_sizes)
1181
1182 test_settings = [
1183 (constants.ISPEC_MEM_SIZE, "", mem_size),
1184 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1185 (constants.ISPEC_DISK_COUNT, "", disk_count),
1186 (constants.ISPEC_NIC_COUNT, "", nic_count),
1187 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1188 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1189 for idx, d in enumerate(disk_sizes)]
1190
1191 return filter(None,
1192 (_compute_fn(name, qualifier, ipolicy, value)
1193 for (name, qualifier, value) in test_settings))
1194
1198 """Compute if instance meets the specs of ipolicy.
1199
1200 @type ipolicy: dict
1201 @param ipolicy: The ipolicy to verify against
1202 @type instance: L{objects.Instance}
1203 @param instance: The instance to verify
1204 @param _compute_fn: The function to verify ipolicy (unittest only)
1205 @see: L{_ComputeIPolicySpecViolation}
1206
1207 """
1208 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1209 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1210 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1211 disk_count = len(instance.disks)
1212 disk_sizes = [disk.size for disk in instance.disks]
1213 nic_count = len(instance.nics)
1214
1215 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1216 disk_sizes, spindle_use)
1217
1221 """Compute if instance specs meets the specs of ipolicy.
1222
1223 @type ipolicy: dict
1224 @param ipolicy: The ipolicy to verify against
1225 @param instance_spec: dict
1226 @param instance_spec: The instance spec to verify
1227 @param _compute_fn: The function to verify ipolicy (unittest only)
1228 @see: L{_ComputeIPolicySpecViolation}
1229
1230 """
1231 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1232 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1233 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1234 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1235 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1236 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1237
1238 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1239 disk_sizes, spindle_use)
1240
1245 """Compute if instance meets the specs of the new target group.
1246
1247 @param ipolicy: The ipolicy to verify
1248 @param instance: The instance object to verify
1249 @param current_group: The current group of the instance
1250 @param target_group: The new group of the instance
1251 @param _compute_fn: The function to verify ipolicy (unittest only)
1252 @see: L{_ComputeIPolicySpecViolation}
1253
1254 """
1255 if current_group == target_group:
1256 return []
1257 else:
1258 return _compute_fn(ipolicy, instance)
1259
1263 """Checks that the target node is correct in terms of instance policy.
1264
1265 @param ipolicy: The ipolicy to verify
1266 @param instance: The instance object to verify
1267 @param node: The new node to relocate
1268 @param ignore: Ignore violations of the ipolicy
1269 @param _compute_fn: The function to verify ipolicy (unittest only)
1270 @see: L{_ComputeIPolicySpecViolation}
1271
1272 """
1273 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1274 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1275
1276 if res:
1277 msg = ("Instance does not meet target node group's (%s) instance"
1278 " policy: %s") % (node.group, utils.CommaJoin(res))
1279 if ignore:
1280 lu.LogWarning(msg)
1281 else:
1282 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1283
1286 """Computes a set of any instances that would violate the new ipolicy.
1287
1288 @param old_ipolicy: The current (still in-place) ipolicy
1289 @param new_ipolicy: The new (to become) ipolicy
1290 @param instances: List of instances to verify
1291 @return: A list of instances which violates the new ipolicy but
1292 did not before
1293
1294 """
1295 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1296 _ComputeViolatingInstances(old_ipolicy, instances))
1297
1300 """Expand an item name.
1301
1302 @param fn: the function to use for expansion
1303 @param name: requested item name
1304 @param kind: text description ('Node' or 'Instance')
1305 @return: the resolved (full) name
1306 @raise errors.OpPrereqError: if the item is not found
1307
1308 """
1309 full_name = fn(name)
1310 if full_name is None:
1311 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1312 errors.ECODE_NOENT)
1313 return full_name
1314
1319
1324
1325
1326 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1327 minmem, maxmem, vcpus, nics, disk_template, disks,
1328 bep, hvp, hypervisor_name, tags):
1329 """Builds instance related env variables for hooks
1330
1331 This builds the hook environment from individual variables.
1332
1333 @type name: string
1334 @param name: the name of the instance
1335 @type primary_node: string
1336 @param primary_node: the name of the instance's primary node
1337 @type secondary_nodes: list
1338 @param secondary_nodes: list of secondary nodes as strings
1339 @type os_type: string
1340 @param os_type: the name of the instance's OS
1341 @type status: string
1342 @param status: the desired status of the instance
1343 @type minmem: string
1344 @param minmem: the minimum memory size of the instance
1345 @type maxmem: string
1346 @param maxmem: the maximum memory size of the instance
1347 @type vcpus: string
1348 @param vcpus: the count of VCPUs the instance has
1349 @type nics: list
1350 @param nics: list of tuples (ip, mac, mode, link) representing
1351 the NICs the instance has
1352 @type disk_template: string
1353 @param disk_template: the disk template of the instance
1354 @type disks: list
1355 @param disks: the list of (size, mode) pairs
1356 @type bep: dict
1357 @param bep: the backend parameters for the instance
1358 @type hvp: dict
1359 @param hvp: the hypervisor parameters for the instance
1360 @type hypervisor_name: string
1361 @param hypervisor_name: the hypervisor for the instance
1362 @type tags: list
1363 @param tags: list of instance tags as strings
1364 @rtype: dict
1365 @return: the hook environment for this instance
1366
1367 """
1368 env = {
1369 "OP_TARGET": name,
1370 "INSTANCE_NAME": name,
1371 "INSTANCE_PRIMARY": primary_node,
1372 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1373 "INSTANCE_OS_TYPE": os_type,
1374 "INSTANCE_STATUS": status,
1375 "INSTANCE_MINMEM": minmem,
1376 "INSTANCE_MAXMEM": maxmem,
1377
1378 "INSTANCE_MEMORY": maxmem,
1379 "INSTANCE_VCPUS": vcpus,
1380 "INSTANCE_DISK_TEMPLATE": disk_template,
1381 "INSTANCE_HYPERVISOR": hypervisor_name,
1382 }
1383 if nics:
1384 nic_count = len(nics)
1385 for idx, (ip, mac, mode, link) in enumerate(nics):
1386 if ip is None:
1387 ip = ""
1388 env["INSTANCE_NIC%d_IP" % idx] = ip
1389 env["INSTANCE_NIC%d_MAC" % idx] = mac
1390 env["INSTANCE_NIC%d_MODE" % idx] = mode
1391 env["INSTANCE_NIC%d_LINK" % idx] = link
1392 if mode == constants.NIC_MODE_BRIDGED:
1393 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1394 else:
1395 nic_count = 0
1396
1397 env["INSTANCE_NIC_COUNT"] = nic_count
1398
1399 if disks:
1400 disk_count = len(disks)
1401 for idx, (size, mode) in enumerate(disks):
1402 env["INSTANCE_DISK%d_SIZE" % idx] = size
1403 env["INSTANCE_DISK%d_MODE" % idx] = mode
1404 else:
1405 disk_count = 0
1406
1407 env["INSTANCE_DISK_COUNT"] = disk_count
1408
1409 if not tags:
1410 tags = []
1411
1412 env["INSTANCE_TAGS"] = " ".join(tags)
1413
1414 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1415 for key, value in source.items():
1416 env["INSTANCE_%s_%s" % (kind, key)] = value
1417
1418 return env
1419
1422 """Build a list of nic information tuples.
1423
1424 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1425 value in LUInstanceQueryData.
1426
1427 @type lu: L{LogicalUnit}
1428 @param lu: the logical unit on whose behalf we execute
1429 @type nics: list of L{objects.NIC}
1430 @param nics: list of nics to convert to hooks tuples
1431
1432 """
1433 hooks_nics = []
1434 cluster = lu.cfg.GetClusterInfo()
1435 for nic in nics:
1436 ip = nic.ip
1437 mac = nic.mac
1438 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1439 mode = filled_params[constants.NIC_MODE]
1440 link = filled_params[constants.NIC_LINK]
1441 hooks_nics.append((ip, mac, mode, link))
1442 return hooks_nics
1443
1446 """Builds instance related env variables for hooks from an object.
1447
1448 @type lu: L{LogicalUnit}
1449 @param lu: the logical unit on whose behalf we execute
1450 @type instance: L{objects.Instance}
1451 @param instance: the instance for which we should build the
1452 environment
1453 @type override: dict
1454 @param override: dictionary with key/values that will override
1455 our values
1456 @rtype: dict
1457 @return: the hook environment dictionary
1458
1459 """
1460 cluster = lu.cfg.GetClusterInfo()
1461 bep = cluster.FillBE(instance)
1462 hvp = cluster.FillHV(instance)
1463 args = {
1464 "name": instance.name,
1465 "primary_node": instance.primary_node,
1466 "secondary_nodes": instance.secondary_nodes,
1467 "os_type": instance.os,
1468 "status": instance.admin_state,
1469 "maxmem": bep[constants.BE_MAXMEM],
1470 "minmem": bep[constants.BE_MINMEM],
1471 "vcpus": bep[constants.BE_VCPUS],
1472 "nics": _NICListToTuple(lu, instance.nics),
1473 "disk_template": instance.disk_template,
1474 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1475 "bep": bep,
1476 "hvp": hvp,
1477 "hypervisor_name": instance.hypervisor,
1478 "tags": instance.tags,
1479 }
1480 if override:
1481 args.update(override)
1482 return _BuildInstanceHookEnv(**args)
1483
1486 """Adjust the candidate pool after node operations.
1487
1488 """
1489 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1490 if mod_list:
1491 lu.LogInfo("Promoted nodes to master candidate role: %s",
1492 utils.CommaJoin(node.name for node in mod_list))
1493 for name in mod_list:
1494 lu.context.ReaddNode(name)
1495 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1496 if mc_now > mc_max:
1497 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1498 (mc_now, mc_max))
1499
1510
1513 """Calculate instance policy for group.
1514
1515 """
1516 return cluster.SimpleFillIPolicy(group.ipolicy)
1517
1520 """Computes a set of instances who violates given ipolicy.
1521
1522 @param ipolicy: The ipolicy to verify
1523 @type instances: object.Instance
1524 @param instances: List of instances to verify
1525 @return: A frozenset of instance names violating the ipolicy
1526
1527 """
1528 return frozenset([inst.name for inst in instances
1529 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1530
1544
1553
1556 """Check whether an OS name conforms to the os variants specification.
1557
1558 @type os_obj: L{objects.OS}
1559 @param os_obj: OS object to check
1560 @type name: string
1561 @param name: OS name passed by the user, to check for validity
1562
1563 """
1564 variant = objects.OS.GetVariant(name)
1565 if not os_obj.supported_variants:
1566 if variant:
1567 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1568 " passed)" % (os_obj.name, variant),
1569 errors.ECODE_INVAL)
1570 return
1571 if not variant:
1572 raise errors.OpPrereqError("OS name must include a variant",
1573 errors.ECODE_INVAL)
1574
1575 if variant not in os_obj.supported_variants:
1576 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1577
1581
1584 """Returns a list of all primary and secondary instances on a node.
1585
1586 """
1587
1588 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1589
1592 """Returns primary instances on a node.
1593
1594 """
1595 return _GetNodeInstancesInner(cfg,
1596 lambda inst: node_name == inst.primary_node)
1597
1605
1617
1620 faulty = []
1621
1622 for dev in instance.disks:
1623 cfg.SetDiskID(dev, node_name)
1624
1625 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1626 instance))
1627 result.Raise("Failed to get disk status from node %s" % node_name,
1628 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1629
1630 for idx, bdev_status in enumerate(result.payload):
1631 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1632 faulty.append(idx)
1633
1634 return faulty
1635
1638 """Check the sanity of iallocator and node arguments and use the
1639 cluster-wide iallocator if appropriate.
1640
1641 Check that at most one of (iallocator, node) is specified. If none is
1642 specified, then the LU's opcode's iallocator slot is filled with the
1643 cluster-wide default iallocator.
1644
1645 @type iallocator_slot: string
1646 @param iallocator_slot: the name of the opcode iallocator slot
1647 @type node_slot: string
1648 @param node_slot: the name of the opcode target node slot
1649
1650 """
1651 node = getattr(lu.op, node_slot, None)
1652 iallocator = getattr(lu.op, iallocator_slot, None)
1653
1654 if node is not None and iallocator is not None:
1655 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1656 errors.ECODE_INVAL)
1657 elif node is None and iallocator is None:
1658 default_iallocator = lu.cfg.GetDefaultIAllocator()
1659 if default_iallocator:
1660 setattr(lu.op, iallocator_slot, default_iallocator)
1661 else:
1662 raise errors.OpPrereqError("No iallocator or node given and no"
1663 " cluster-wide default iallocator found;"
1664 " please specify either an iallocator or a"
1665 " node, or set a cluster-wide default"
1666 " iallocator")
1667
1670 """Decides on which iallocator to use.
1671
1672 @type cfg: L{config.ConfigWriter}
1673 @param cfg: Cluster configuration object
1674 @type iallocator: string or None
1675 @param iallocator: Iallocator specified in opcode
1676 @rtype: string
1677 @return: Iallocator name
1678
1679 """
1680 if not iallocator:
1681
1682 iallocator = cfg.GetDefaultIAllocator()
1683
1684 if not iallocator:
1685 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1686 " opcode nor as a cluster-wide default",
1687 errors.ECODE_INVAL)
1688
1689 return iallocator
1690
1693 """Ensures that a given hostname resolves to a 'sane' name.
1694
1695 The given name is required to be a prefix of the resolved hostname,
1696 to prevent accidental mismatches.
1697
1698 @param lu: the logical unit on behalf of which we're checking
1699 @param name: the name we should resolve and check
1700 @return: the resolved hostname object
1701
1702 """
1703 hostname = netutils.GetHostname(name=name)
1704 if hostname.name != name:
1705 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1706 if not utils.MatchNameComponent(name, [hostname.name]):
1707 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1708 " same as given hostname '%s'") %
1709 (hostname.name, name), errors.ECODE_INVAL)
1710 return hostname
1711
1712
1713 -class LUClusterPostInit(LogicalUnit):
1714 """Logical unit for running hooks after cluster initialization.
1715
1716 """
1717 HPATH = "cluster-init"
1718 HTYPE = constants.HTYPE_CLUSTER
1719
1720 - def BuildHooksEnv(self):
1721 """Build hooks env.
1722
1723 """
1724 return {
1725 "OP_TARGET": self.cfg.GetClusterName(),
1726 }
1727
1728 - def BuildHooksNodes(self):
1729 """Build hooks nodes.
1730
1731 """
1732 return ([], [self.cfg.GetMasterNode()])
1733
1734 - def Exec(self, feedback_fn):
1735 """Nothing to do.
1736
1737 """
1738 return True
1739
1742 """Logical unit for destroying the cluster.
1743
1744 """
1745 HPATH = "cluster-destroy"
1746 HTYPE = constants.HTYPE_CLUSTER
1747
1749 """Build hooks env.
1750
1751 """
1752 return {
1753 "OP_TARGET": self.cfg.GetClusterName(),
1754 }
1755
1757 """Build hooks nodes.
1758
1759 """
1760 return ([], [])
1761
1763 """Check prerequisites.
1764
1765 This checks whether the cluster is empty.
1766
1767 Any errors are signaled by raising errors.OpPrereqError.
1768
1769 """
1770 master = self.cfg.GetMasterNode()
1771
1772 nodelist = self.cfg.GetNodeList()
1773 if len(nodelist) != 1 or nodelist[0] != master:
1774 raise errors.OpPrereqError("There are still %d node(s) in"
1775 " this cluster." % (len(nodelist) - 1),
1776 errors.ECODE_INVAL)
1777 instancelist = self.cfg.GetInstanceList()
1778 if instancelist:
1779 raise errors.OpPrereqError("There are still %d instance(s) in"
1780 " this cluster." % len(instancelist),
1781 errors.ECODE_INVAL)
1782
1783 - def Exec(self, feedback_fn):
1800
1803 """Verifies a certificate for L{LUClusterVerifyConfig}.
1804
1805 @type filename: string
1806 @param filename: Path to PEM file
1807
1808 """
1809 try:
1810 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1811 utils.ReadFile(filename))
1812 except Exception, err:
1813 return (LUClusterVerifyConfig.ETYPE_ERROR,
1814 "Failed to load X509 certificate %s: %s" % (filename, err))
1815
1816 (errcode, msg) = \
1817 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1818 constants.SSL_CERT_EXPIRATION_ERROR)
1819
1820 if msg:
1821 fnamemsg = "While verifying %s: %s" % (filename, msg)
1822 else:
1823 fnamemsg = None
1824
1825 if errcode is None:
1826 return (None, fnamemsg)
1827 elif errcode == utils.CERT_WARNING:
1828 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1829 elif errcode == utils.CERT_ERROR:
1830 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1831
1832 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1833
1836 """Compute the set of all hypervisor parameters.
1837
1838 @type cluster: L{objects.Cluster}
1839 @param cluster: the cluster object
1840 @param instances: list of L{objects.Instance}
1841 @param instances: additional instances from which to obtain parameters
1842 @rtype: list of (origin, hypervisor, parameters)
1843 @return: a list with all parameters found, indicating the hypervisor they
1844 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1845
1846 """
1847 hvp_data = []
1848
1849 for hv_name in cluster.enabled_hypervisors:
1850 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1851
1852 for os_name, os_hvp in cluster.os_hvp.items():
1853 for hv_name, hv_params in os_hvp.items():
1854 if hv_params:
1855 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1856 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1857
1858
1859 for instance in instances:
1860 if instance.hvparams:
1861 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1862 cluster.FillHV(instance)))
1863
1864 return hvp_data
1865
1868 """Mix-in for cluster/group verify LUs.
1869
1870 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1871 self.op and self._feedback_fn to be available.)
1872
1873 """
1874
1875 ETYPE_FIELD = "code"
1876 ETYPE_ERROR = "ERROR"
1877 ETYPE_WARNING = "WARNING"
1878
1879 - def _Error(self, ecode, item, msg, *args, **kwargs):
1880 """Format an error message.
1881
1882 Based on the opcode's error_codes parameter, either format a
1883 parseable error code, or a simpler error string.
1884
1885 This must be called only from Exec and functions called from Exec.
1886
1887 """
1888 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1889 itype, etxt, _ = ecode
1890
1891 if args:
1892 msg = msg % args
1893
1894 if self.op.error_codes:
1895 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1896 else:
1897 if item:
1898 item = " " + item
1899 else:
1900 item = ""
1901 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1902
1903 self._feedback_fn(" - %s" % msg)
1904
1905 - def _ErrorIf(self, cond, ecode, *args, **kwargs):
1906 """Log an error message if the passed condition is True.
1907
1908 """
1909 cond = (bool(cond)
1910 or self.op.debug_simulate_errors)
1911
1912
1913
1914 (_, etxt, _) = ecode
1915 if etxt in self.op.ignore_errors:
1916 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1917
1918 if cond:
1919 self._Error(ecode, *args, **kwargs)
1920
1921
1922 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1923 self.bad = self.bad or cond
1924
1927 """Submits all jobs necessary to verify the cluster.
1928
1929 """
1930 REQ_BGL = False
1931
1933 self.needed_locks = {}
1934
1935 - def Exec(self, feedback_fn):
1936 jobs = []
1937
1938 if self.op.group_name:
1939 groups = [self.op.group_name]
1940 depends_fn = lambda: None
1941 else:
1942 groups = self.cfg.GetNodeGroupList()
1943
1944
1945 jobs.append([
1946 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1947 ])
1948
1949
1950 depends_fn = lambda: [(-len(jobs), [])]
1951
1952 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1953 ignore_errors=self.op.ignore_errors,
1954 depends=depends_fn())]
1955 for group in groups)
1956
1957
1958 for op in itertools.chain(*jobs):
1959 op.debug_simulate_errors = self.op.debug_simulate_errors
1960 op.verbose = self.op.verbose
1961 op.error_codes = self.op.error_codes
1962 try:
1963 op.skip_checks = self.op.skip_checks
1964 except AttributeError:
1965 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1966
1967 return ResultWithJobs(jobs)
1968
1971 """Verifies the cluster config.
1972
1973 """
1974 REQ_BGL = False
1975
1989
1993
2002
2003 - def Exec(self, feedback_fn):
2004 """Verify integrity of cluster, performing various test on nodes.
2005
2006 """
2007 self.bad = False
2008 self._feedback_fn = feedback_fn
2009
2010 feedback_fn("* Verifying cluster config")
2011
2012 for msg in self.cfg.VerifyConfig():
2013 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2014
2015 feedback_fn("* Verifying cluster certificate files")
2016
2017 for cert_filename in constants.ALL_CERT_FILES:
2018 (errcode, msg) = _VerifyCertificate(cert_filename)
2019 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2020
2021 feedback_fn("* Verifying hypervisor parameters")
2022
2023 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2024 self.all_inst_info.values()))
2025
2026 feedback_fn("* Verifying all nodes belong to an existing group")
2027
2028
2029
2030
2031
2032 dangling_nodes = set(node.name for node in self.all_node_info.values()
2033 if node.group not in self.all_group_info)
2034
2035 dangling_instances = {}
2036 no_node_instances = []
2037
2038 for inst in self.all_inst_info.values():
2039 if inst.primary_node in dangling_nodes:
2040 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2041 elif inst.primary_node not in self.all_node_info:
2042 no_node_instances.append(inst.name)
2043
2044 pretty_dangling = [
2045 "%s (%s)" %
2046 (node.name,
2047 utils.CommaJoin(dangling_instances.get(node.name,
2048 ["no instances"])))
2049 for node in dangling_nodes]
2050
2051 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2052 None,
2053 "the following nodes (and their instances) belong to a non"
2054 " existing group: %s", utils.CommaJoin(pretty_dangling))
2055
2056 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2057 None,
2058 "the following instances have a non-existing primary-node:"
2059 " %s", utils.CommaJoin(no_node_instances))
2060
2061 return not self.bad
2062
2065 """Verifies the status of a node group.
2066
2067 """
2068 HPATH = "cluster-verify"
2069 HTYPE = constants.HTYPE_CLUSTER
2070 REQ_BGL = False
2071
2072 _HOOKS_INDENT_RE = re.compile("^", re.M)
2073
2075 """A class representing the logical and physical status of a node.
2076
2077 @type name: string
2078 @ivar name: the node name to which this object refers
2079 @ivar volumes: a structure as returned from
2080 L{ganeti.backend.GetVolumeList} (runtime)
2081 @ivar instances: a list of running instances (runtime)
2082 @ivar pinst: list of configured primary instances (config)
2083 @ivar sinst: list of configured secondary instances (config)
2084 @ivar sbp: dictionary of {primary-node: list of instances} for all
2085 instances for which this node is secondary (config)
2086 @ivar mfree: free memory, as reported by hypervisor (runtime)
2087 @ivar dfree: free disk, as reported by the node (runtime)
2088 @ivar offline: the offline status (config)
2089 @type rpc_fail: boolean
2090 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2091 not whether the individual keys were correct) (runtime)
2092 @type lvm_fail: boolean
2093 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2094 @type hyp_fail: boolean
2095 @ivar hyp_fail: whether the RPC call didn't return the instance list
2096 @type ghost: boolean
2097 @ivar ghost: whether this is a known node or not (config)
2098 @type os_fail: boolean
2099 @ivar os_fail: whether the RPC call didn't return valid OS data
2100 @type oslist: list
2101 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2102 @type vm_capable: boolean
2103 @ivar vm_capable: whether the node can host instances
2104
2105 """
2106 - def __init__(self, offline=False, name=None, vm_capable=True):
2107 self.name = name
2108 self.volumes = {}
2109 self.instances = []
2110 self.pinst = []
2111 self.sinst = []
2112 self.sbp = {}
2113 self.mfree = 0
2114 self.dfree = 0
2115 self.offline = offline
2116 self.vm_capable = vm_capable
2117 self.rpc_fail = False
2118 self.lvm_fail = False
2119 self.hyp_fail = False
2120 self.ghost = False
2121 self.os_fail = False
2122 self.oslist = {}
2123
2139
2158
2213
2215 """Perform some basic validation on data returned from a node.
2216
2217 - check the result data structure is well formed and has all the
2218 mandatory fields
2219 - check ganeti version
2220
2221 @type ninfo: L{objects.Node}
2222 @param ninfo: the node to check
2223 @param nresult: the results from the node
2224 @rtype: boolean
2225 @return: whether overall this call was successful (and we can expect
2226 reasonable values in the respose)
2227
2228 """
2229 node = ninfo.name
2230 _ErrorIf = self._ErrorIf
2231
2232
2233 test = not nresult or not isinstance(nresult, dict)
2234 _ErrorIf(test, constants.CV_ENODERPC, node,
2235 "unable to verify node: no data returned")
2236 if test:
2237 return False
2238
2239
2240 local_version = constants.PROTOCOL_VERSION
2241 remote_version = nresult.get("version", None)
2242 test = not (remote_version and
2243 isinstance(remote_version, (list, tuple)) and
2244 len(remote_version) == 2)
2245 _ErrorIf(test, constants.CV_ENODERPC, node,
2246 "connection to node returned invalid data")
2247 if test:
2248 return False
2249
2250 test = local_version != remote_version[0]
2251 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2252 "incompatible protocol versions: master %s,"
2253 " node %s", local_version, remote_version[0])
2254 if test:
2255 return False
2256
2257
2258
2259
2260 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2261 constants.CV_ENODEVERSION, node,
2262 "software version mismatch: master %s, node %s",
2263 constants.RELEASE_VERSION, remote_version[1],
2264 code=self.ETYPE_WARNING)
2265
2266 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2267 if ninfo.vm_capable and isinstance(hyp_result, dict):
2268 for hv_name, hv_result in hyp_result.iteritems():
2269 test = hv_result is not None
2270 _ErrorIf(test, constants.CV_ENODEHV, node,
2271 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2272
2273 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2274 if ninfo.vm_capable and isinstance(hvp_result, list):
2275 for item, hv_name, hv_result in hvp_result:
2276 _ErrorIf(True, constants.CV_ENODEHV, node,
2277 "hypervisor %s parameter verify failure (source %s): %s",
2278 hv_name, item, hv_result)
2279
2280 test = nresult.get(constants.NV_NODESETUP,
2281 ["Missing NODESETUP results"])
2282 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2283 "; ".join(test))
2284
2285 return True
2286
2287 - def _VerifyNodeTime(self, ninfo, nresult,
2288 nvinfo_starttime, nvinfo_endtime):
2289 """Check the node time.
2290
2291 @type ninfo: L{objects.Node}
2292 @param ninfo: the node to check
2293 @param nresult: the remote results for the node
2294 @param nvinfo_starttime: the start time of the RPC call
2295 @param nvinfo_endtime: the end time of the RPC call
2296
2297 """
2298 node = ninfo.name
2299 _ErrorIf = self._ErrorIf
2300
2301 ntime = nresult.get(constants.NV_TIME, None)
2302 try:
2303 ntime_merged = utils.MergeTime(ntime)
2304 except (ValueError, TypeError):
2305 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2306 return
2307
2308 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2309 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2310 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2311 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2312 else:
2313 ntime_diff = None
2314
2315 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2316 "Node time diverges by at least %s from master node time",
2317 ntime_diff)
2318
2320 """Check the node LVM results.
2321
2322 @type ninfo: L{objects.Node}
2323 @param ninfo: the node to check
2324 @param nresult: the remote results for the node
2325 @param vg_name: the configured VG name
2326
2327 """
2328 if vg_name is None:
2329 return
2330
2331 node = ninfo.name
2332 _ErrorIf = self._ErrorIf
2333
2334
2335 vglist = nresult.get(constants.NV_VGLIST, None)
2336 test = not vglist
2337 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2338 if not test:
2339 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2340 constants.MIN_VG_SIZE)
2341 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2342
2343
2344 pvlist = nresult.get(constants.NV_PVLIST, None)
2345 test = pvlist is None
2346 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2347 if not test:
2348
2349
2350
2351 for _, pvname, owner_vg in pvlist:
2352 test = ":" in pvname
2353 _ErrorIf(test, constants.CV_ENODELVM, node,
2354 "Invalid character ':' in PV '%s' of VG '%s'",
2355 pvname, owner_vg)
2356
2358 """Check the node bridges.
2359
2360 @type ninfo: L{objects.Node}
2361 @param ninfo: the node to check
2362 @param nresult: the remote results for the node
2363 @param bridges: the expected list of bridges
2364
2365 """
2366 if not bridges:
2367 return
2368
2369 node = ninfo.name
2370 _ErrorIf = self._ErrorIf
2371
2372 missing = nresult.get(constants.NV_BRIDGES, None)
2373 test = not isinstance(missing, list)
2374 _ErrorIf(test, constants.CV_ENODENET, node,
2375 "did not return valid bridge information")
2376 if not test:
2377 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2378 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2379
2399
2401 """Check the node network connectivity results.
2402
2403 @type ninfo: L{objects.Node}
2404 @param ninfo: the node to check
2405 @param nresult: the remote results for the node
2406
2407 """
2408 node = ninfo.name
2409 _ErrorIf = self._ErrorIf
2410
2411 test = constants.NV_NODELIST not in nresult
2412 _ErrorIf(test, constants.CV_ENODESSH, node,
2413 "node hasn't returned node ssh connectivity data")
2414 if not test:
2415 if nresult[constants.NV_NODELIST]:
2416 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2417 _ErrorIf(True, constants.CV_ENODESSH, node,
2418 "ssh communication with node '%s': %s", a_node, a_msg)
2419
2420 test = constants.NV_NODENETTEST not in nresult
2421 _ErrorIf(test, constants.CV_ENODENET, node,
2422 "node hasn't returned node tcp connectivity data")
2423 if not test:
2424 if nresult[constants.NV_NODENETTEST]:
2425 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2426 for anode in nlist:
2427 _ErrorIf(True, constants.CV_ENODENET, node,
2428 "tcp communication with node '%s': %s",
2429 anode, nresult[constants.NV_NODENETTEST][anode])
2430
2431 test = constants.NV_MASTERIP not in nresult
2432 _ErrorIf(test, constants.CV_ENODENET, node,
2433 "node hasn't returned node master IP reachability data")
2434 if not test:
2435 if not nresult[constants.NV_MASTERIP]:
2436 if node == self.master_node:
2437 msg = "the master node cannot reach the master IP (not configured?)"
2438 else:
2439 msg = "cannot reach the master IP"
2440 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2441
2442 - def _VerifyInstance(self, instance, instanceconfig, node_image,
2443 diskstatus):
2444 """Verify an instance.
2445
2446 This function checks to see if the required block devices are
2447 available on the instance's node.
2448
2449 """
2450 _ErrorIf = self._ErrorIf
2451 node_current = instanceconfig.primary_node
2452
2453 node_vol_should = {}
2454 instanceconfig.MapLVsByNode(node_vol_should)
2455
2456 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2457 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2458 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2459
2460 for node in node_vol_should:
2461 n_img = node_image[node]
2462 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2463
2464 continue
2465 for volume in node_vol_should[node]:
2466 test = volume not in n_img.volumes
2467 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2468 "volume %s missing on node %s", volume, node)
2469
2470 if instanceconfig.admin_state == constants.ADMINST_UP:
2471 pri_img = node_image[node_current]
2472 test = instance not in pri_img.instances and not pri_img.offline
2473 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2474 "instance not running on its primary node %s",
2475 node_current)
2476
2477 diskdata = [(nname, success, status, idx)
2478 for (nname, disks) in diskstatus.items()
2479 for idx, (success, status) in enumerate(disks)]
2480
2481 for nname, success, bdev_status, idx in diskdata:
2482
2483
2484 snode = node_image[nname]
2485 bad_snode = snode.ghost or snode.offline
2486 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2487 not success and not bad_snode,
2488 constants.CV_EINSTANCEFAULTYDISK, instance,
2489 "couldn't retrieve status for disk/%s on %s: %s",
2490 idx, nname, bdev_status)
2491 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2492 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2493 constants.CV_EINSTANCEFAULTYDISK, instance,
2494 "disk/%s on %s is faulty", idx, nname)
2495
2497 """Verify if there are any unknown volumes in the cluster.
2498
2499 The .os, .swap and backup volumes are ignored. All other volumes are
2500 reported as unknown.
2501
2502 @type reserved: L{ganeti.utils.FieldSet}
2503 @param reserved: a FieldSet of reserved volume names
2504
2505 """
2506 for node, n_img in node_image.items():
2507 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2508 self.all_node_info[node].group != self.group_uuid):
2509
2510 continue
2511 for volume in n_img.volumes:
2512 test = ((node not in node_vol_should or
2513 volume not in node_vol_should[node]) and
2514 not reserved.Matches(volume))
2515 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2516 "volume %s is unknown", volume)
2517
2519 """Verify N+1 Memory Resilience.
2520
2521 Check that if one single node dies we can still start all the
2522 instances it was primary for.
2523
2524 """
2525 cluster_info = self.cfg.GetClusterInfo()
2526 for node, n_img in node_image.items():
2527
2528
2529
2530
2531
2532
2533
2534
2535 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2536
2537
2538
2539
2540 continue
2541
2542 for prinode, instances in n_img.sbp.items():
2543 needed_mem = 0
2544 for instance in instances:
2545 bep = cluster_info.FillBE(instance_cfg[instance])
2546 if bep[constants.BE_AUTO_BALANCE]:
2547 needed_mem += bep[constants.BE_MINMEM]
2548 test = n_img.mfree < needed_mem
2549 self._ErrorIf(test, constants.CV_ENODEN1, node,
2550 "not enough memory to accomodate instance failovers"
2551 " should node %s fail (%dMiB needed, %dMiB available)",
2552 prinode, needed_mem, n_img.mfree)
2553
2554 @classmethod
2555 - def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2556 (files_all, files_opt, files_mc, files_vm)):
2557 """Verifies file checksums collected from all nodes.
2558
2559 @param errorif: Callback for reporting errors
2560 @param nodeinfo: List of L{objects.Node} objects
2561 @param master_node: Name of master node
2562 @param all_nvinfo: RPC results
2563
2564 """
2565
2566 files2nodefn = [
2567 (files_all, None),
2568 (files_mc, lambda node: (node.master_candidate or
2569 node.name == master_node)),
2570 (files_vm, lambda node: node.vm_capable),
2571 ]
2572
2573
2574 nodefiles = {}
2575 for (files, fn) in files2nodefn:
2576 if fn is None:
2577 filenodes = nodeinfo
2578 else:
2579 filenodes = filter(fn, nodeinfo)
2580 nodefiles.update((filename,
2581 frozenset(map(operator.attrgetter("name"), filenodes)))
2582 for filename in files)
2583
2584 assert set(nodefiles) == (files_all | files_mc | files_vm)
2585
2586 fileinfo = dict((filename, {}) for filename in nodefiles)
2587 ignore_nodes = set()
2588
2589 for node in nodeinfo:
2590 if node.offline:
2591 ignore_nodes.add(node.name)
2592 continue
2593
2594 nresult = all_nvinfo[node.name]
2595
2596 if nresult.fail_msg or not nresult.payload:
2597 node_files = None
2598 else:
2599 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2600
2601 test = not (node_files and isinstance(node_files, dict))
2602 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2603 "Node did not return file checksum data")
2604 if test:
2605 ignore_nodes.add(node.name)
2606 continue
2607
2608
2609 for (filename, checksum) in node_files.items():
2610 assert filename in nodefiles
2611 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2612
2613 for (filename, checksums) in fileinfo.items():
2614 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2615
2616
2617 with_file = frozenset(node_name
2618 for nodes in fileinfo[filename].values()
2619 for node_name in nodes) - ignore_nodes
2620
2621 expected_nodes = nodefiles[filename] - ignore_nodes
2622
2623
2624 missing_file = expected_nodes - with_file
2625
2626 if filename in files_opt:
2627
2628 errorif(missing_file and missing_file != expected_nodes,
2629 constants.CV_ECLUSTERFILECHECK, None,
2630 "File %s is optional, but it must exist on all or no"
2631 " nodes (not found on %s)",
2632 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2633 else:
2634 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2635 "File %s is missing from node(s) %s", filename,
2636 utils.CommaJoin(utils.NiceSort(missing_file)))
2637
2638
2639 unexpected = with_file - expected_nodes
2640 errorif(unexpected,
2641 constants.CV_ECLUSTERFILECHECK, None,
2642 "File %s should not exist on node(s) %s",
2643 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2644
2645
2646 test = len(checksums) > 1
2647 if test:
2648 variants = ["variant %s on %s" %
2649 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2650 for (idx, (checksum, nodes)) in
2651 enumerate(sorted(checksums.items()))]
2652 else:
2653 variants = []
2654
2655 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2656 "File %s found with %s different checksums (%s)",
2657 filename, len(checksums), "; ".join(variants))
2658
2659 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2660 drbd_map):
2661 """Verifies and the node DRBD status.
2662
2663 @type ninfo: L{objects.Node}
2664 @param ninfo: the node to check
2665 @param nresult: the remote results for the node
2666 @param instanceinfo: the dict of instances
2667 @param drbd_helper: the configured DRBD usermode helper
2668 @param drbd_map: the DRBD map as returned by
2669 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2670
2671 """
2672 node = ninfo.name
2673 _ErrorIf = self._ErrorIf
2674
2675 if drbd_helper:
2676 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2677 test = (helper_result == None)
2678 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2679 "no drbd usermode helper returned")
2680 if helper_result:
2681 status, payload = helper_result
2682 test = not status
2683 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2684 "drbd usermode helper check unsuccessful: %s", payload)
2685 test = status and (payload != drbd_helper)
2686 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2687 "wrong drbd usermode helper: %s", payload)
2688
2689
2690 node_drbd = {}
2691 for minor, instance in drbd_map[node].items():
2692 test = instance not in instanceinfo
2693 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2694 "ghost instance '%s' in temporary DRBD map", instance)
2695
2696
2697
2698 if test:
2699 node_drbd[minor] = (instance, False)
2700 else:
2701 instance = instanceinfo[instance]
2702 node_drbd[minor] = (instance.name,
2703 instance.admin_state == constants.ADMINST_UP)
2704
2705
2706 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2707 test = not isinstance(used_minors, (tuple, list))
2708 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2709 "cannot parse drbd status file: %s", str(used_minors))
2710 if test:
2711
2712 return
2713
2714 for minor, (iname, must_exist) in node_drbd.items():
2715 test = minor not in used_minors and must_exist
2716 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2717 "drbd minor %d of instance %s is not active", minor, iname)
2718 for minor in used_minors:
2719 test = minor not in node_drbd
2720 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2721 "unallocated drbd minor %d is in use", minor)
2722
2724 """Builds the node OS structures.
2725
2726 @type ninfo: L{objects.Node}
2727 @param ninfo: the node to check
2728 @param nresult: the remote results for the node
2729 @param nimg: the node image object
2730
2731 """
2732 node = ninfo.name
2733 _ErrorIf = self._ErrorIf
2734
2735 remote_os = nresult.get(constants.NV_OSLIST, None)
2736 test = (not isinstance(remote_os, list) or
2737 not compat.all(isinstance(v, list) and len(v) == 7
2738 for v in remote_os))
2739
2740 _ErrorIf(test, constants.CV_ENODEOS, node,
2741 "node hasn't returned valid OS data")
2742
2743 nimg.os_fail = test
2744
2745 if test:
2746 return
2747
2748 os_dict = {}
2749
2750 for (name, os_path, status, diagnose,
2751 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2752
2753 if name not in os_dict:
2754 os_dict[name] = []
2755
2756
2757
2758 parameters = [tuple(v) for v in parameters]
2759 os_dict[name].append((os_path, status, diagnose,
2760 set(variants), set(parameters), set(api_ver)))
2761
2762 nimg.oslist = os_dict
2763
2765 """Verifies the node OS list.
2766
2767 @type ninfo: L{objects.Node}
2768 @param ninfo: the node to check
2769 @param nimg: the node image object
2770 @param base: the 'template' node we match against (e.g. from the master)
2771
2772 """
2773 node = ninfo.name
2774 _ErrorIf = self._ErrorIf
2775
2776 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2777
2778 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2779 for os_name, os_data in nimg.oslist.items():
2780 assert os_data, "Empty OS status for OS %s?!" % os_name
2781 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2782 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2783 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2784 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2785 "OS '%s' has multiple entries (first one shadows the rest): %s",
2786 os_name, utils.CommaJoin([v[0] for v in os_data]))
2787
2788 test = os_name not in base.oslist
2789 _ErrorIf(test, constants.CV_ENODEOS, node,
2790 "Extra OS %s not present on reference node (%s)",
2791 os_name, base.name)
2792 if test:
2793 continue
2794 assert base.oslist[os_name], "Base node has empty OS status?"
2795 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2796 if not b_status:
2797
2798 continue
2799 for kind, a, b in [("API version", f_api, b_api),
2800 ("variants list", f_var, b_var),
2801 ("parameters", beautify_params(f_param),
2802 beautify_params(b_param))]:
2803 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2804 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2805 kind, os_name, base.name,
2806 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2807
2808
2809 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2810 _ErrorIf(missing, constants.CV_ENODEOS, node,
2811 "OSes present on reference node %s but missing on this node: %s",
2812 base.name, utils.CommaJoin(missing))
2813
2815 """Verifies out of band functionality of a node.
2816
2817 @type ninfo: L{objects.Node}
2818 @param ninfo: the node to check
2819 @param nresult: the remote results for the node
2820
2821 """
2822 node = ninfo.name
2823
2824
2825 if ((ninfo.master_candidate or ninfo.master_capable) and
2826 constants.NV_OOB_PATHS in nresult):
2827 for path_result in nresult[constants.NV_OOB_PATHS]:
2828 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2829
2831 """Verifies and updates the node volume data.
2832
2833 This function will update a L{NodeImage}'s internal structures
2834 with data from the remote call.
2835
2836 @type ninfo: L{objects.Node}
2837 @param ninfo: the node to check
2838 @param nresult: the remote results for the node
2839 @param nimg: the node image object
2840 @param vg_name: the configured VG name
2841
2842 """
2843 node = ninfo.name
2844 _ErrorIf = self._ErrorIf
2845
2846 nimg.lvm_fail = True
2847 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2848 if vg_name is None:
2849 pass
2850 elif isinstance(lvdata, basestring):
2851 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2852 utils.SafeEncode(lvdata))
2853 elif not isinstance(lvdata, dict):
2854 _ErrorIf(True, constants.CV_ENODELVM, node,
2855 "rpc call to node failed (lvlist)")
2856 else:
2857 nimg.volumes = lvdata
2858 nimg.lvm_fail = False
2859
2861 """Verifies and updates the node instance list.
2862
2863 If the listing was successful, then updates this node's instance
2864 list. Otherwise, it marks the RPC call as failed for the instance
2865 list key.
2866
2867 @type ninfo: L{objects.Node}
2868 @param ninfo: the node to check
2869 @param nresult: the remote results for the node
2870 @param nimg: the node image object
2871
2872 """
2873 idata = nresult.get(constants.NV_INSTANCELIST, None)
2874 test = not isinstance(idata, list)
2875 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2876 "rpc call to node failed (instancelist): %s",
2877 utils.SafeEncode(str(idata)))
2878 if test:
2879 nimg.hyp_fail = True
2880 else:
2881 nimg.instances = idata
2882
2884 """Verifies and computes a node information map
2885
2886 @type ninfo: L{objects.Node}
2887 @param ninfo: the node to check
2888 @param nresult: the remote results for the node
2889 @param nimg: the node image object
2890 @param vg_name: the configured VG name
2891
2892 """
2893 node = ninfo.name
2894 _ErrorIf = self._ErrorIf
2895
2896
2897 hv_info = nresult.get(constants.NV_HVINFO, None)
2898 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2899 _ErrorIf(test, constants.CV_ENODEHV, node,
2900 "rpc call to node failed (hvinfo)")
2901 if not test:
2902 try:
2903 nimg.mfree = int(hv_info["memory_free"])
2904 except (ValueError, TypeError):
2905 _ErrorIf(True, constants.CV_ENODERPC, node,
2906 "node returned invalid nodeinfo, check hypervisor")
2907
2908
2909 if vg_name is not None:
2910 test = (constants.NV_VGLIST not in nresult or
2911 vg_name not in nresult[constants.NV_VGLIST])
2912 _ErrorIf(test, constants.CV_ENODELVM, node,
2913 "node didn't return data for the volume group '%s'"
2914 " - it is either missing or broken", vg_name)
2915 if not test:
2916 try:
2917 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2918 except (ValueError, TypeError):
2919 _ErrorIf(True, constants.CV_ENODERPC, node,
2920 "node returned invalid LVM info, check LVM status")
2921
2923 """Gets per-disk status information for all instances.
2924
2925 @type nodelist: list of strings
2926 @param nodelist: Node names
2927 @type node_image: dict of (name, L{objects.Node})
2928 @param node_image: Node objects
2929 @type instanceinfo: dict of (name, L{objects.Instance})
2930 @param instanceinfo: Instance objects
2931 @rtype: {instance: {node: [(succes, payload)]}}
2932 @return: a dictionary of per-instance dictionaries with nodes as
2933 keys and disk information as values; the disk information is a
2934 list of tuples (success, payload)
2935
2936 """
2937 _ErrorIf = self._ErrorIf
2938
2939 node_disks = {}
2940 node_disks_devonly = {}
2941 diskless_instances = set()
2942 diskless = constants.DT_DISKLESS
2943
2944 for nname in nodelist:
2945 node_instances = list(itertools.chain(node_image[nname].pinst,
2946 node_image[nname].sinst))
2947 diskless_instances.update(inst for inst in node_instances
2948 if instanceinfo[inst].disk_template == diskless)
2949 disks = [(inst, disk)
2950 for inst in node_instances
2951 for disk in instanceinfo[inst].disks]
2952
2953 if not disks:
2954
2955 continue
2956
2957 node_disks[nname] = disks
2958
2959
2960 devonly = []
2961 for (inst, dev) in disks:
2962 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2963 self.cfg.SetDiskID(anno_disk, nname)
2964 devonly.append(anno_disk)
2965
2966 node_disks_devonly[nname] = devonly
2967
2968 assert len(node_disks) == len(node_disks_devonly)
2969
2970
2971 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2972 node_disks_devonly)
2973
2974 assert len(result) == len(node_disks)
2975
2976 instdisk = {}
2977
2978 for (nname, nres) in result.items():
2979 disks = node_disks[nname]
2980
2981 if nres.offline:
2982
2983 data = len(disks) * [(False, "node offline")]
2984 else:
2985 msg = nres.fail_msg
2986 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2987 "while getting disk information: %s", msg)
2988 if msg:
2989
2990 data = len(disks) * [(False, msg)]
2991 else:
2992 data = []
2993 for idx, i in enumerate(nres.payload):
2994 if isinstance(i, (tuple, list)) and len(i) == 2:
2995 data.append(i)
2996 else:
2997 logging.warning("Invalid result from node %s, entry %d: %s",
2998 nname, idx, i)
2999 data.append((False, "Invalid result from the remote node"))
3000
3001 for ((inst, _), status) in zip(disks, data):
3002 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3003
3004
3005 for inst in diskless_instances:
3006 assert inst not in instdisk
3007 instdisk[inst] = {}
3008
3009 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3010 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3011 compat.all(isinstance(s, (tuple, list)) and
3012 len(s) == 2 for s in statuses)
3013 for inst, nnames in instdisk.items()
3014 for nname, statuses in nnames.items())
3015 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3016
3017 return instdisk
3018
3019 @staticmethod
3021 """Create endless iterators for all potential SSH check hosts.
3022
3023 """
3024 nodes = [node for node in all_nodes
3025 if (node.group != group_uuid and
3026 not node.offline)]
3027 keyfunc = operator.attrgetter("group")
3028
3029 return map(itertools.cycle,
3030 [sorted(map(operator.attrgetter("name"), names))
3031 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3032 keyfunc)])
3033
3034 @classmethod
3036 """Choose which nodes should talk to which other nodes.
3037
3038 We will make nodes contact all nodes in their group, and one node from
3039 every other group.
3040
3041 @warning: This algorithm has a known issue if one node group is much
3042 smaller than others (e.g. just one node). In such a case all other
3043 nodes will talk to the single node.
3044
3045 """
3046 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3047 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3048
3049 return (online_nodes,
3050 dict((name, sorted([i.next() for i in sel]))
3051 for name in online_nodes))
3052
3054 """Build hooks env.
3055
3056 Cluster-Verify hooks just ran in the post phase and their failure makes
3057 the output be logged in the verify output and the verification to fail.
3058
3059 """
3060 env = {
3061 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3062 }
3063
3064 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3065 for node in self.my_node_info.values())
3066
3067 return env
3068
3070 """Build hooks nodes.
3071
3072 """
3073 return ([], self.my_node_names)
3074
3075 - def Exec(self, feedback_fn):
3076 """Verify integrity of the node group, performing various test on nodes.
3077
3078 """
3079
3080 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3081
3082 if not self.my_node_names:
3083
3084 feedback_fn("* Empty node group, skipping verification")
3085 return True
3086
3087 self.bad = False
3088 _ErrorIf = self._ErrorIf
3089 verbose = self.op.verbose
3090 self._feedback_fn = feedback_fn
3091
3092 vg_name = self.cfg.GetVGName()
3093 drbd_helper = self.cfg.GetDRBDHelper()
3094 cluster = self.cfg.GetClusterInfo()
3095 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3096 hypervisors = cluster.enabled_hypervisors
3097 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3098
3099 i_non_redundant = []
3100 i_non_a_balanced = []
3101 i_offline = 0
3102 n_offline = 0
3103 n_drained = 0
3104 node_vol_should = {}
3105
3106
3107
3108
3109 filemap = _ComputeAncillaryFiles(cluster, False)
3110
3111
3112 master_node = self.master_node = self.cfg.GetMasterNode()
3113 master_ip = self.cfg.GetMasterIP()
3114
3115 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3116
3117 user_scripts = []
3118 if self.cfg.GetUseExternalMipScript():
3119 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3120
3121 node_verify_param = {
3122 constants.NV_FILELIST:
3123 utils.UniqueSequence(filename
3124 for files in filemap
3125 for filename in files),
3126 constants.NV_NODELIST:
3127 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3128 self.all_node_info.values()),
3129 constants.NV_HYPERVISOR: hypervisors,
3130 constants.NV_HVPARAMS:
3131 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3132 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3133 for node in node_data_list
3134 if not node.offline],
3135 constants.NV_INSTANCELIST: hypervisors,
3136 constants.NV_VERSION: None,
3137 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3138 constants.NV_NODESETUP: None,
3139 constants.NV_TIME: None,
3140 constants.NV_MASTERIP: (master_node, master_ip),
3141 constants.NV_OSLIST: None,
3142 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3143 constants.NV_USERSCRIPTS: user_scripts,
3144 }
3145
3146 if vg_name is not None:
3147 node_verify_param[constants.NV_VGLIST] = None
3148 node_verify_param[constants.NV_LVLIST] = vg_name
3149 node_verify_param[constants.NV_PVLIST] = [vg_name]
3150
3151 if drbd_helper:
3152 node_verify_param[constants.NV_DRBDLIST] = None
3153 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3154
3155
3156
3157 bridges = set()
3158 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3159 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3160 bridges.add(default_nicpp[constants.NIC_LINK])
3161 for instance in self.my_inst_info.values():
3162 for nic in instance.nics:
3163 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3164 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3165 bridges.add(full_nic[constants.NIC_LINK])
3166
3167 if bridges:
3168 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3169
3170
3171 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3172 name=node.name,
3173 vm_capable=node.vm_capable))
3174 for node in node_data_list)
3175
3176
3177 oob_paths = []
3178 for node in self.all_node_info.values():
3179 path = _SupportsOob(self.cfg, node)
3180 if path and path not in oob_paths:
3181 oob_paths.append(path)
3182
3183 if oob_paths:
3184 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3185
3186 for instance in self.my_inst_names:
3187 inst_config = self.my_inst_info[instance]
3188 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3189 i_offline += 1
3190
3191 for nname in inst_config.all_nodes:
3192 if nname not in node_image:
3193 gnode = self.NodeImage(name=nname)
3194 gnode.ghost = (nname not in self.all_node_info)
3195 node_image[nname] = gnode
3196
3197 inst_config.MapLVsByNode(node_vol_should)
3198
3199 pnode = inst_config.primary_node
3200 node_image[pnode].pinst.append(instance)
3201
3202 for snode in inst_config.secondary_nodes:
3203 nimg = node_image[snode]
3204 nimg.sinst.append(instance)
3205 if pnode not in nimg.sbp:
3206 nimg.sbp[pnode] = []
3207 nimg.sbp[pnode].append(instance)
3208
3209
3210
3211
3212
3213
3214
3215
3216 nvinfo_starttime = time.time()
3217 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3218 node_verify_param,
3219 self.cfg.GetClusterName())
3220 nvinfo_endtime = time.time()
3221
3222 if self.extra_lv_nodes and vg_name is not None:
3223 extra_lv_nvinfo = \
3224 self.rpc.call_node_verify(self.extra_lv_nodes,
3225 {constants.NV_LVLIST: vg_name},
3226 self.cfg.GetClusterName())
3227 else:
3228 extra_lv_nvinfo = {}
3229
3230 all_drbd_map = self.cfg.ComputeDRBDMap()
3231
3232 feedback_fn("* Gathering disk information (%s nodes)" %
3233 len(self.my_node_names))
3234 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3235 self.my_inst_info)
3236
3237 feedback_fn("* Verifying configuration file consistency")
3238
3239
3240
3241 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3242 if absent_nodes:
3243 vf_nvinfo = all_nvinfo.copy()
3244 vf_node_info = list(self.my_node_info.values())
3245 additional_nodes = []
3246 if master_node not in self.my_node_info:
3247 additional_nodes.append(master_node)
3248 vf_node_info.append(self.all_node_info[master_node])
3249
3250
3251 for node in absent_nodes:
3252 nodeinfo = self.all_node_info[node]
3253 if (nodeinfo.vm_capable and not nodeinfo.offline and
3254 node != master_node):
3255 additional_nodes.append(node)
3256 vf_node_info.append(self.all_node_info[node])
3257 break
3258 key = constants.NV_FILELIST
3259 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3260 {key: node_verify_param[key]},
3261 self.cfg.GetClusterName()))
3262 else:
3263 vf_nvinfo = all_nvinfo
3264 vf_node_info = self.my_node_info.values()
3265
3266 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3267
3268 feedback_fn("* Verifying node status")
3269
3270 refos_img = None
3271
3272 for node_i in node_data_list:
3273 node = node_i.name
3274 nimg = node_image[node]
3275
3276 if node_i.offline:
3277 if verbose:
3278 feedback_fn("* Skipping offline node %s" % (node,))
3279 n_offline += 1
3280 continue
3281
3282 if node == master_node:
3283 ntype = "master"
3284 elif node_i.master_candidate:
3285 ntype = "master candidate"
3286 elif node_i.drained:
3287 ntype = "drained"
3288 n_drained += 1
3289 else:
3290 ntype = "regular"
3291 if verbose:
3292 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3293
3294 msg = all_nvinfo[node].fail_msg
3295 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3296 msg)
3297 if msg:
3298 nimg.rpc_fail = True
3299 continue
3300
3301 nresult = all_nvinfo[node].payload
3302
3303 nimg.call_ok = self._VerifyNode(node_i, nresult)
3304 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3305 self._VerifyNodeNetwork(node_i, nresult)
3306 self._VerifyNodeUserScripts(node_i, nresult)
3307 self._VerifyOob(node_i, nresult)
3308
3309 if nimg.vm_capable:
3310 self._VerifyNodeLVM(node_i, nresult, vg_name)
3311 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3312 all_drbd_map)
3313
3314 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3315 self._UpdateNodeInstances(node_i, nresult, nimg)
3316 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3317 self._UpdateNodeOS(node_i, nresult, nimg)
3318
3319 if not nimg.os_fail:
3320 if refos_img is None:
3321 refos_img = nimg
3322 self._VerifyNodeOS(node_i, nimg, refos_img)
3323 self._VerifyNodeBridges(node_i, nresult, bridges)
3324
3325
3326
3327
3328 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3329
3330 for inst in non_primary_inst:
3331 test = inst in self.all_inst_info
3332 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3333 "instance should not run on node %s", node_i.name)
3334 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3335 "node is running unknown instance %s", inst)
3336
3337 for node, result in extra_lv_nvinfo.items():
3338 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3339 node_image[node], vg_name)
3340
3341 feedback_fn("* Verifying instance status")
3342 for instance in self.my_inst_names:
3343 if verbose:
3344 feedback_fn("* Verifying instance %s" % instance)
3345 inst_config = self.my_inst_info[instance]
3346 self._VerifyInstance(instance, inst_config, node_image,
3347 instdisk[instance])
3348 inst_nodes_offline = []
3349
3350 pnode = inst_config.primary_node
3351 pnode_img = node_image[pnode]
3352 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3353 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3354 " primary node failed", instance)
3355
3356 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3357 pnode_img.offline,
3358 constants.CV_EINSTANCEBADNODE, instance,
3359 "instance is marked as running and lives on offline node %s",
3360 inst_config.primary_node)
3361
3362
3363
3364 if inst_config.disk_template not in constants.DTS_MIRRORED:
3365 i_non_redundant.append(instance)
3366
3367 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3368 constants.CV_EINSTANCELAYOUT,
3369 instance, "instance has multiple secondary nodes: %s",
3370 utils.CommaJoin(inst_config.secondary_nodes),
3371 code=self.ETYPE_WARNING)
3372
3373 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3374 pnode = inst_config.primary_node
3375 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3376 instance_groups = {}
3377
3378 for node in instance_nodes:
3379 instance_groups.setdefault(self.all_node_info[node].group,
3380 []).append(node)
3381
3382 pretty_list = [
3383 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3384
3385 for group, nodes in sorted(instance_groups.items(),
3386 key=lambda (_, nodes): pnode in nodes,
3387 reverse=True)]
3388
3389 self._ErrorIf(len(instance_groups) > 1,
3390 constants.CV_EINSTANCESPLITGROUPS,
3391 instance, "instance has primary and secondary nodes in"
3392 " different groups: %s", utils.CommaJoin(pretty_list),
3393 code=self.ETYPE_WARNING)
3394
3395 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3396 i_non_a_balanced.append(instance)
3397
3398 for snode in inst_config.secondary_nodes:
3399 s_img = node_image[snode]
3400 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3401 snode, "instance %s, connection to secondary node failed",
3402 instance)
3403
3404 if s_img.offline:
3405 inst_nodes_offline.append(snode)
3406
3407
3408 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3409 "instance has offline secondary node(s) %s",
3410 utils.CommaJoin(inst_nodes_offline))
3411
3412 for node in inst_config.all_nodes:
3413 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3414 instance, "instance lives on ghost node %s", node)
3415 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3416 instance, "instance lives on non-vm_capable node %s", node)
3417
3418 feedback_fn("* Verifying orphan volumes")
3419 reserved = utils.FieldSet(*cluster.reserved_lvs)
3420
3421
3422
3423
3424 for inst in self.all_inst_info.values():
3425 for secondary in inst.secondary_nodes:
3426 if (secondary in self.my_node_info
3427 and inst.name not in self.my_inst_info):
3428 inst.MapLVsByNode(node_vol_should)
3429 break
3430
3431 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3432
3433 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3434 feedback_fn("* Verifying N+1 Memory redundancy")
3435 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3436
3437 feedback_fn("* Other Notes")
3438 if i_non_redundant:
3439 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3440 % len(i_non_redundant))
3441
3442 if i_non_a_balanced:
3443 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3444 % len(i_non_a_balanced))
3445
3446 if i_offline:
3447 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3448
3449 if n_offline:
3450 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3451
3452 if n_drained:
3453 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3454
3455 return not self.bad
3456
3457 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3458 """Analyze the post-hooks' result
3459
3460 This method analyses the hook result, handles it, and sends some
3461 nicely-formatted feedback back to the user.
3462
3463 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3464 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3465 @param hooks_results: the results of the multi-node hooks rpc call
3466 @param feedback_fn: function used send feedback back to the caller
3467 @param lu_result: previous Exec result
3468 @return: the new Exec result, based on the previous result
3469 and hook results
3470
3471 """
3472
3473
3474 if not self.my_node_names:
3475
3476 pass
3477 elif phase == constants.HOOKS_PHASE_POST:
3478
3479 feedback_fn("* Hooks Results")
3480 assert hooks_results, "invalid result from hooks"
3481
3482 for node_name in hooks_results:
3483 res = hooks_results[node_name]
3484 msg = res.fail_msg
3485 test = msg and not res.offline
3486 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3487 "Communication failure in hooks execution: %s", msg)
3488 if res.offline or msg:
3489
3490
3491 continue
3492 for script, hkr, output in res.payload:
3493 test = hkr == constants.HKR_FAIL
3494 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3495 "Script %s failed, output:", script)
3496 if test:
3497 output = self._HOOKS_INDENT_RE.sub(" ", output)
3498 feedback_fn("%s" % output)
3499 lu_result = False
3500
3501 return lu_result
3502
3505 """Verifies the cluster disks status.
3506
3507 """
3508 REQ_BGL = False
3509
3515
3516 - def Exec(self, feedback_fn):
3522
3525 """Verifies the status of all disks in a node group.
3526
3527 """
3528 REQ_BGL = False
3529
3540
3572
3589
3590 - def Exec(self, feedback_fn):
3591 """Verify integrity of cluster disks.
3592
3593 @rtype: tuple of three items
3594 @return: a tuple of (dict of node-to-node_error, list of instances
3595 which need activate-disks, dict of instance: (node, volume) for
3596 missing volumes
3597
3598 """
3599 res_nodes = {}
3600 res_instances = set()
3601 res_missing = {}
3602
3603 nv_dict = _MapInstanceDisksToNodes([inst
3604 for inst in self.instances.values()
3605 if inst.admin_state == constants.ADMINST_UP])
3606
3607 if nv_dict:
3608 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3609 set(self.cfg.GetVmCapableNodeList()))
3610
3611 node_lvs = self.rpc.call_lv_list(nodes, [])
3612
3613 for (node, node_res) in node_lvs.items():
3614 if node_res.offline:
3615 continue
3616
3617 msg = node_res.fail_msg
3618 if msg:
3619 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3620 res_nodes[node] = msg
3621 continue
3622
3623 for lv_name, (_, _, lv_online) in node_res.payload.items():
3624 inst = nv_dict.pop((node, lv_name), None)
3625 if not (lv_online or inst is None):
3626 res_instances.add(inst)
3627
3628
3629
3630 for key, inst in nv_dict.iteritems():
3631 res_missing.setdefault(inst, []).append(list(key))
3632
3633 return (res_nodes, list(res_instances), res_missing)
3634
3637 """Verifies the cluster disks sizes.
3638
3639 """
3640 REQ_BGL = False
3641
3660
3664
3666 """Check prerequisites.
3667
3668 This only checks the optional instance list against the existing names.
3669
3670 """
3671 if self.wanted_names is None:
3672 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3673
3674 self.wanted_instances = \
3675 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3676
3678 """Ensure children of the disk have the needed disk size.
3679
3680 This is valid mainly for DRBD8 and fixes an issue where the
3681 children have smaller disk size.
3682
3683 @param disk: an L{ganeti.objects.Disk} object
3684
3685 """
3686 if disk.dev_type == constants.LD_DRBD8:
3687 assert disk.children, "Empty children for DRBD8?"
3688 fchild = disk.children[0]
3689 mismatch = fchild.size < disk.size
3690 if mismatch:
3691 self.LogInfo("Child disk has size %d, parent %d, fixing",
3692 fchild.size, disk.size)
3693 fchild.size = disk.size
3694
3695
3696 return self._EnsureChildSizes(fchild) or mismatch
3697 else:
3698 return False
3699
3700 - def Exec(self, feedback_fn):
3701 """Verify the size of cluster disks.
3702
3703 """
3704
3705
3706 per_node_disks = {}
3707 for instance in self.wanted_instances:
3708 pnode = instance.primary_node
3709 if pnode not in per_node_disks:
3710 per_node_disks[pnode] = []
3711 for idx, disk in enumerate(instance.disks):
3712 per_node_disks[pnode].append((instance, idx, disk))
3713
3714 assert not (frozenset(per_node_disks.keys()) -
3715 self.owned_locks(locking.LEVEL_NODE_RES)), \
3716 "Not owning correct locks"
3717 assert not self.owned_locks(locking.LEVEL_NODE)
3718
3719 changed = []
3720 for node, dskl in per_node_disks.items():
3721 newl = [v[2].Copy() for v in dskl]
3722 for dsk in newl:
3723 self.cfg.SetDiskID(dsk, node)
3724 result = self.rpc.call_blockdev_getsize(node, newl)
3725 if result.fail_msg:
3726 self.LogWarning("Failure in blockdev_getsize call to node"
3727 " %s, ignoring", node)
3728 continue
3729 if len(result.payload) != len(dskl):
3730 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3731 " result.payload=%s", node, len(dskl), result.payload)
3732 self.LogWarning("Invalid result from node %s, ignoring node results",
3733 node)
3734 continue
3735 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3736 if size is None:
3737 self.LogWarning("Disk %d of instance %s did not return size"
3738 " information, ignoring", idx, instance.name)
3739 continue
3740 if not isinstance(size, (int, long)):
3741 self.LogWarning("Disk %d of instance %s did not return valid"
3742 " size information, ignoring", idx, instance.name)
3743 continue
3744 size = size >> 20
3745 if size != disk.size:
3746 self.LogInfo("Disk %d of instance %s has mismatched size,"
3747 " correcting: recorded %d, actual %d", idx,
3748 instance.name, disk.size, size)
3749 disk.size = size
3750 self.cfg.Update(instance, feedback_fn)
3751 changed.append((instance.name, idx, size))
3752 if self._EnsureChildSizes(disk):
3753 self.cfg.Update(instance, feedback_fn)
3754 changed.append((instance.name, idx, disk.size))
3755 return changed
3756
3759 """Rename the cluster.
3760
3761 """
3762 HPATH = "cluster-rename"
3763 HTYPE = constants.HTYPE_CLUSTER
3764
3766 """Build hooks env.
3767
3768 """
3769 return {
3770 "OP_TARGET": self.cfg.GetClusterName(),
3771 "NEW_NAME": self.op.name,
3772 }
3773
3779
3802
3803 - def Exec(self, feedback_fn):
3804 """Rename the cluster.
3805
3806 """
3807 clustername = self.op.name
3808 new_ip = self.ip
3809
3810
3811 master_params = self.cfg.GetMasterNetworkParameters()
3812 ems = self.cfg.GetUseExternalMipScript()
3813 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3814 master_params, ems)
3815 result.Raise("Could not disable the master role")
3816
3817 try:
3818 cluster = self.cfg.GetClusterInfo()
3819 cluster.cluster_name = clustername
3820 cluster.master_ip = new_ip
3821 self.cfg.Update(cluster, feedback_fn)
3822
3823
3824 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3825 node_list = self.cfg.GetOnlineNodeList()
3826 try:
3827 node_list.remove(master_params.name)
3828 except ValueError:
3829 pass
3830 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3831 finally:
3832 master_params.ip = new_ip
3833 result = self.rpc.call_node_activate_master_ip(master_params.name,
3834 master_params, ems)
3835 msg = result.fail_msg
3836 if msg:
3837 self.LogWarning("Could not re-enable the master role on"
3838 " the master, please restart manually: %s", msg)
3839
3840 return clustername
3841
3862
3865 """Change the parameters of the cluster.
3866
3867 """
3868 HPATH = "cluster-modify"
3869 HTYPE = constants.HTYPE_CLUSTER
3870 REQ_BGL = False
3871
3896
3910
3912 """Build hooks env.
3913
3914 """
3915 return {
3916 "OP_TARGET": self.cfg.GetClusterName(),
3917 "NEW_VG_NAME": self.op.vg_name,
3918 }
3919
3921 """Build hooks nodes.
3922
3923 """
3924 mn = self.cfg.GetMasterNode()
3925 return ([mn], [mn])
3926
3928 """Check prerequisites.
3929
3930 This checks whether the given params don't conflict and
3931 if the given volume group is valid.
3932
3933 """
3934 if self.op.vg_name is not None and not self.op.vg_name:
3935 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3936 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3937 " instances exist", errors.ECODE_INVAL)
3938
3939 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3940 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3941 raise errors.OpPrereqError("Cannot disable drbd helper while"
3942 " drbd-based instances exist",
3943 errors.ECODE_INVAL)
3944
3945 node_list = self.owned_locks(locking.LEVEL_NODE)
3946
3947
3948 if self.op.vg_name:
3949 vglist = self.rpc.call_vg_list(node_list)
3950 for node in node_list:
3951 msg = vglist[node].fail_msg
3952 if msg:
3953
3954 self.LogWarning("Error while gathering data on node %s"
3955 " (ignoring node): %s", node, msg)
3956 continue
3957 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3958 self.op.vg_name,
3959 constants.MIN_VG_SIZE)
3960 if vgstatus:
3961 raise errors.OpPrereqError("Error on node '%s': %s" %
3962 (node, vgstatus), errors.ECODE_ENVIRON)
3963
3964 if self.op.drbd_helper:
3965
3966 helpers = self.rpc.call_drbd_helper(node_list)
3967 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3968 if ninfo.offline:
3969 self.LogInfo("Not checking drbd helper on offline node %s", node)
3970 continue
3971 msg = helpers[node].fail_msg
3972 if msg:
3973 raise errors.OpPrereqError("Error checking drbd helper on node"
3974 " '%s': %s" % (node, msg),
3975 errors.ECODE_ENVIRON)
3976 node_helper = helpers[node].payload
3977 if node_helper != self.op.drbd_helper:
3978 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3979 (node, node_helper), errors.ECODE_ENVIRON)
3980
3981 self.cluster = cluster = self.cfg.GetClusterInfo()
3982
3983 if self.op.beparams:
3984 objects.UpgradeBeParams(self.op.beparams)
3985 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3986 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3987
3988 if self.op.ndparams:
3989 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3990 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3991
3992
3993
3994 if self.new_ndparams["oob_program"] == "":
3995 self.new_ndparams["oob_program"] = \
3996 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3997
3998 if self.op.hv_state:
3999 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4000 self.cluster.hv_state_static)
4001 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4002 for hv, values in new_hv_state.items())
4003
4004 if self.op.disk_state:
4005 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4006 self.cluster.disk_state_static)
4007 self.new_disk_state = \
4008 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4009 for name, values in svalues.items()))
4010 for storage, svalues in new_disk_state.items())
4011
4012 if self.op.ipolicy:
4013 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4014 group_policy=False)
4015
4016 all_instances = self.cfg.GetAllInstancesInfo().values()
4017 violations = set()
4018 for group in self.cfg.GetAllNodeGroupsInfo().values():
4019 instances = frozenset([inst for inst in all_instances
4020 if compat.any(node in group.members
4021 for node in inst.all_nodes)])
4022 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4023 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4024 group),
4025 new_ipolicy, instances)
4026 if new:
4027 violations.update(new)
4028
4029 if violations:
4030 self.LogWarning("After the ipolicy change the following instances"
4031 " violate them: %s",
4032 utils.CommaJoin(utils.NiceSort(violations)))
4033
4034 if self.op.nicparams:
4035 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4036 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4037 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4038 nic_errors = []
4039
4040
4041 for instance in self.cfg.GetAllInstancesInfo().values():
4042 for nic_idx, nic in enumerate(instance.nics):
4043 params_copy = copy.deepcopy(nic.nicparams)
4044 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4045
4046
4047 try:
4048 objects.NIC.CheckParameterSyntax(params_filled)
4049 except errors.ConfigurationError, err:
4050 nic_errors.append("Instance %s, nic/%d: %s" %
4051 (instance.name, nic_idx, err))
4052
4053
4054 target_mode = params_filled[constants.NIC_MODE]
4055 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4056 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4057 " address" % (instance.name, nic_idx))
4058 if nic_errors:
4059 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4060 "\n".join(nic_errors))
4061
4062
4063 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4064 if self.op.hvparams:
4065 for hv_name, hv_dict in self.op.hvparams.items():
4066 if hv_name not in self.new_hvparams:
4067 self.new_hvparams[hv_name] = hv_dict
4068 else:
4069 self.new_hvparams[hv_name].update(hv_dict)
4070
4071
4072 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4073 if self.op.diskparams:
4074 for dt_name, dt_params in self.op.diskparams.items():
4075 if dt_name not in self.op.diskparams:
4076 self.new_diskparams[dt_name] = dt_params
4077 else:
4078 self.new_diskparams[dt_name].update(dt_params)
4079
4080
4081 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4082 if self.op.os_hvp:
4083 for os_name, hvs in self.op.os_hvp.items():
4084 if os_name not in self.new_os_hvp:
4085 self.new_os_hvp[os_name] = hvs
4086 else:
4087 for hv_name, hv_dict in hvs.items():
4088 if hv_dict is None:
4089
4090 self.new_os_hvp[os_name].pop(hv_name, None)
4091 elif hv_name not in self.new_os_hvp[os_name]:
4092 self.new_os_hvp[os_name][hv_name] = hv_dict
4093 else:
4094 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4095
4096
4097 self.new_osp = objects.FillDict(cluster.osparams, {})
4098 if self.op.osparams:
4099 for os_name, osp in self.op.osparams.items():
4100 if os_name not in self.new_osp:
4101 self.new_osp[os_name] = {}
4102
4103 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4104 use_none=True)
4105
4106 if not self.new_osp[os_name]:
4107
4108 del self.new_osp[os_name]
4109 else:
4110
4111 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4112 os_name, self.new_osp[os_name])
4113
4114
4115 if self.op.enabled_hypervisors is not None:
4116 self.hv_list = self.op.enabled_hypervisors
4117 for hv in self.hv_list:
4118
4119
4120
4121
4122
4123 if hv not in new_hvp:
4124 new_hvp[hv] = {}
4125 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4126 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4127 else:
4128 self.hv_list = cluster.enabled_hypervisors
4129
4130 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4131
4132 for hv_name, hv_params in self.new_hvparams.items():
4133 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4134 (self.op.enabled_hypervisors and
4135 hv_name in self.op.enabled_hypervisors)):
4136
4137 hv_class = hypervisor.GetHypervisorClass(hv_name)
4138 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4139 hv_class.CheckParameterSyntax(hv_params)
4140 _CheckHVParams(self, node_list, hv_name, hv_params)
4141
4142 if self.op.os_hvp:
4143
4144
4145 for os_name, os_hvp in self.new_os_hvp.items():
4146 for hv_name, hv_params in os_hvp.items():
4147 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4148
4149 cluster_defaults = self.new_hvparams.get(hv_name, {})
4150 new_osp = objects.FillDict(cluster_defaults, hv_params)
4151 hv_class = hypervisor.GetHypervisorClass(hv_name)
4152 hv_class.CheckParameterSyntax(new_osp)
4153 _CheckHVParams(self, node_list, hv_name, new_osp)
4154
4155 if self.op.default_iallocator:
4156 alloc_script = utils.FindFile(self.op.default_iallocator,
4157 constants.IALLOCATOR_SEARCH_PATH,
4158 os.path.isfile)
4159 if alloc_script is None:
4160 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4161 " specified" % self.op.default_iallocator,
4162 errors.ECODE_INVAL)
4163
4164 - def Exec(self, feedback_fn):
4165 """Change the parameters of the cluster.
4166
4167 """
4168 if self.op.vg_name is not None:
4169 new_volume = self.op.vg_name
4170 if not new_volume:
4171 new_volume = None
4172 if new_volume != self.cfg.GetVGName():
4173 self.cfg.SetVGName(new_volume)
4174 else:
4175 feedback_fn("Cluster LVM configuration already in desired"
4176 " state, not changing")
4177 if self.op.drbd_helper is not None:
4178 new_helper = self.op.drbd_helper
4179 if not new_helper:
4180 new_helper = None
4181 if new_helper != self.cfg.GetDRBDHelper():
4182 self.cfg.SetDRBDHelper(new_helper)
4183 else:
4184 feedback_fn("Cluster DRBD helper already in desired state,"
4185 " not changing")
4186 if self.op.hvparams:
4187 self.cluster.hvparams = self.new_hvparams
4188 if self.op.os_hvp:
4189 self.cluster.os_hvp = self.new_os_hvp
4190 if self.op.enabled_hypervisors is not None:
4191 self.cluster.hvparams = self.new_hvparams
4192 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4193 if self.op.beparams:
4194 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4195 if self.op.nicparams:
4196 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4197 if self.op.ipolicy:
4198 self.cluster.ipolicy = self.new_ipolicy
4199 if self.op.osparams:
4200 self.cluster.osparams = self.new_osp
4201 if self.op.ndparams:
4202 self.cluster.ndparams = self.new_ndparams
4203 if self.op.diskparams:
4204 self.cluster.diskparams = self.new_diskparams
4205 if self.op.hv_state:
4206 self.cluster.hv_state_static = self.new_hv_state
4207 if self.op.disk_state:
4208 self.cluster.disk_state_static = self.new_disk_state
4209
4210 if self.op.candidate_pool_size is not None:
4211 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4212
4213 _AdjustCandidatePool(self, [])
4214
4215 if self.op.maintain_node_health is not None:
4216 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4217 feedback_fn("Note: CONFD was disabled at build time, node health"
4218 " maintenance is not useful (still enabling it)")
4219 self.cluster.maintain_node_health = self.op.maintain_node_health
4220
4221 if self.op.prealloc_wipe_disks is not None:
4222 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4223
4224 if self.op.add_uids is not None:
4225 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4226
4227 if self.op.remove_uids is not None:
4228 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4229
4230 if self.op.uid_pool is not None:
4231 self.cluster.uid_pool = self.op.uid_pool
4232
4233 if self.op.default_iallocator is not None:
4234 self.cluster.default_iallocator = self.op.default_iallocator
4235
4236 if self.op.reserved_lvs is not None:
4237 self.cluster.reserved_lvs = self.op.reserved_lvs
4238
4239 if self.op.use_external_mip_script is not None:
4240 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4241
4242 def helper_os(aname, mods, desc):
4243 desc += " OS list"
4244 lst = getattr(self.cluster, aname)
4245 for key, val in mods:
4246 if key == constants.DDM_ADD:
4247 if val in lst:
4248 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4249 else:
4250 lst.append(val)
4251 elif key == constants.DDM_REMOVE:
4252 if val in lst:
4253 lst.remove(val)
4254 else:
4255 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4256 else:
4257 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4258
4259 if self.op.hidden_os:
4260 helper_os("hidden_os", self.op.hidden_os, "hidden")
4261
4262 if self.op.blacklisted_os:
4263 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4264
4265 if self.op.master_netdev:
4266 master_params = self.cfg.GetMasterNetworkParameters()
4267 ems = self.cfg.GetUseExternalMipScript()
4268 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4269 self.cluster.master_netdev)
4270 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4271 master_params, ems)
4272 result.Raise("Could not disable the master ip")
4273 feedback_fn("Changing master_netdev from %s to %s" %
4274 (master_params.netdev, self.op.master_netdev))
4275 self.cluster.master_netdev = self.op.master_netdev
4276
4277 if self.op.master_netmask:
4278 master_params = self.cfg.GetMasterNetworkParameters()
4279 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4280 result = self.rpc.call_node_change_master_netmask(master_params.name,
4281 master_params.netmask,
4282 self.op.master_netmask,
4283 master_params.ip,
4284 master_params.netdev)
4285 if result.fail_msg:
4286 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4287 feedback_fn(msg)
4288
4289 self.cluster.master_netmask = self.op.master_netmask
4290
4291 self.cfg.Update(self.cluster, feedback_fn)
4292
4293 if self.op.master_netdev:
4294 master_params = self.cfg.GetMasterNetworkParameters()
4295 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4296 self.op.master_netdev)
4297 ems = self.cfg.GetUseExternalMipScript()
4298 result = self.rpc.call_node_activate_master_ip(master_params.name,
4299 master_params, ems)
4300 if result.fail_msg:
4301 self.LogWarning("Could not re-enable the master ip on"
4302 " the master, please restart manually: %s",
4303 result.fail_msg)
4304
4307 """Helper for uploading a file and showing warnings.
4308
4309 """
4310 if os.path.exists(fname):
4311 result = lu.rpc.call_upload_file(nodes, fname)
4312 for to_node, to_result in result.items():
4313 msg = to_result.fail_msg
4314 if msg:
4315 msg = ("Copy of file %s to node %s failed: %s" %
4316 (fname, to_node, msg))
4317 lu.proc.LogWarning(msg)
4318
4321 """Compute files external to Ganeti which need to be consistent.
4322
4323 @type redist: boolean
4324 @param redist: Whether to include files which need to be redistributed
4325
4326 """
4327
4328 files_all = set([
4329 constants.SSH_KNOWN_HOSTS_FILE,
4330 constants.CONFD_HMAC_KEY,
4331 constants.CLUSTER_DOMAIN_SECRET_FILE,
4332 constants.SPICE_CERT_FILE,
4333 constants.SPICE_CACERT_FILE,
4334 constants.RAPI_USERS_FILE,
4335 ])
4336
4337 if not redist:
4338 files_all.update(constants.ALL_CERT_FILES)
4339 files_all.update(ssconf.SimpleStore().GetFileList())
4340 else:
4341
4342 files_all.add(constants.RAPI_CERT_FILE)
4343
4344 if cluster.modify_etc_hosts:
4345 files_all.add(constants.ETC_HOSTS)
4346
4347 if cluster.use_external_mip_script:
4348 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4349
4350
4351
4352
4353 files_opt = set([
4354 constants.RAPI_USERS_FILE,
4355 ])
4356
4357
4358 files_mc = set()
4359
4360 if not redist:
4361 files_mc.add(constants.CLUSTER_CONF_FILE)
4362
4363
4364 files_vm = set(filename
4365 for hv_name in cluster.enabled_hypervisors
4366 for filename in
4367 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4368
4369 files_opt |= set(filename
4370 for hv_name in cluster.enabled_hypervisors
4371 for filename in
4372 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4373
4374
4375 all_files_set = files_all | files_mc | files_vm
4376 assert (len(all_files_set) ==
4377 sum(map(len, [files_all, files_mc, files_vm]))), \
4378 "Found file listed in more than one file list"
4379
4380
4381 assert all_files_set.issuperset(files_opt), \
4382 "Optional file not in a different required list"
4383
4384 return (files_all, files_opt, files_mc, files_vm)
4385
4388 """Distribute additional files which are part of the cluster configuration.
4389
4390 ConfigWriter takes care of distributing the config and ssconf files, but
4391 there are more files which should be distributed to all nodes. This function
4392 makes sure those are copied.
4393
4394 @param lu: calling logical unit
4395 @param additional_nodes: list of nodes not in the config to distribute to
4396 @type additional_vm: boolean
4397 @param additional_vm: whether the additional nodes are vm-capable or not
4398
4399 """
4400
4401 cluster = lu.cfg.GetClusterInfo()
4402 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4403
4404 online_nodes = lu.cfg.GetOnlineNodeList()
4405 online_set = frozenset(online_nodes)
4406 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4407
4408 if additional_nodes is not None:
4409 online_nodes.extend(additional_nodes)
4410 if additional_vm:
4411 vm_nodes.extend(additional_nodes)
4412
4413
4414 for nodelist in [online_nodes, vm_nodes]:
4415 if master_info.name in nodelist:
4416 nodelist.remove(master_info.name)
4417
4418
4419 (files_all, _, files_mc, files_vm) = \
4420 _ComputeAncillaryFiles(cluster, True)
4421
4422
4423 assert not (constants.CLUSTER_CONF_FILE in files_all or
4424 constants.CLUSTER_CONF_FILE in files_vm)
4425 assert not files_mc, "Master candidates not handled in this function"
4426
4427 filemap = [
4428 (online_nodes, files_all),
4429 (vm_nodes, files_vm),
4430 ]
4431
4432
4433 for (node_list, files) in filemap:
4434 for fname in files:
4435 _UploadHelper(lu, node_list, fname)
4436
4439 """Force the redistribution of cluster configuration.
4440
4441 This is a very simple LU.
4442
4443 """
4444 REQ_BGL = False
4445
4451
4452 - def Exec(self, feedback_fn):
4458
4461 """Activate the master IP on the master node.
4462
4463 """
4464 - def Exec(self, feedback_fn):
4473
4476 """Deactivate the master IP on the master node.
4477
4478 """
4479 - def Exec(self, feedback_fn):
4488
4489
4490 -def _WaitForSync(lu, instance, disks=None, oneshot=False):
4491 """Sleep and poll for an instance's disk to sync.
4492
4493 """
4494 if not instance.disks or disks is not None and not disks:
4495 return True
4496
4497 disks = _ExpandCheckDisks(instance, disks)
4498
4499 if not oneshot:
4500 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4501
4502 node = instance.primary_node
4503
4504 for dev in disks:
4505 lu.cfg.SetDiskID(dev, node)
4506
4507
4508
4509 retries = 0
4510 degr_retries = 10
4511 while True:
4512 max_time = 0
4513 done = True
4514 cumul_degraded = False
4515 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4516 msg = rstats.fail_msg
4517 if msg:
4518 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4519 retries += 1
4520 if retries >= 10:
4521 raise errors.RemoteError("Can't contact node %s for mirror data,"
4522 " aborting." % node)
4523 time.sleep(6)
4524 continue
4525 rstats = rstats.payload
4526 retries = 0
4527 for i, mstat in enumerate(rstats):
4528 if mstat is None:
4529 lu.LogWarning("Can't compute data for node %s/%s",
4530 node, disks[i].iv_name)
4531 continue
4532
4533 cumul_degraded = (cumul_degraded or
4534 (mstat.is_degraded and mstat.sync_percent is None))
4535 if mstat.sync_percent is not None:
4536 done = False
4537 if mstat.estimated_time is not None:
4538 rem_time = ("%s remaining (estimated)" %
4539 utils.FormatSeconds(mstat.estimated_time))
4540 max_time = mstat.estimated_time
4541 else:
4542 rem_time = "no time estimate"
4543 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4544 (disks[i].iv_name, mstat.sync_percent, rem_time))
4545
4546
4547
4548
4549 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4550 logging.info("Degraded disks found, %d retries left", degr_retries)
4551 degr_retries -= 1
4552 time.sleep(1)
4553 continue
4554
4555 if done or oneshot:
4556 break
4557
4558 time.sleep(min(60, max_time))
4559
4560 if done:
4561 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4562 return not cumul_degraded
4563
4566 """Wrapper around call_blockdev_find to annotate diskparams.
4567
4568 @param lu: A reference to the lu object
4569 @param node: The node to call out
4570 @param dev: The device to find
4571 @param instance: The instance object the device belongs to
4572 @returns The result of the rpc call
4573
4574 """
4575 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4576 return lu.rpc.call_blockdev_find(node, disk)
4577
4586
4590 """Check that mirrors are not degraded.
4591
4592 @attention: The device has to be annotated already.
4593
4594 The ldisk parameter, if True, will change the test from the
4595 is_degraded attribute (which represents overall non-ok status for
4596 the device(s)) to the ldisk (representing the local storage status).
4597
4598 """
4599 lu.cfg.SetDiskID(dev, node)
4600
4601 result = True
4602
4603 if on_primary or dev.AssembleOnSecondary():
4604 rstats = lu.rpc.call_blockdev_find(node, dev)
4605 msg = rstats.fail_msg
4606 if msg:
4607 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4608 result = False
4609 elif not rstats.payload:
4610 lu.LogWarning("Can't find disk on node %s", node)
4611 result = False
4612 else:
4613 if ldisk:
4614 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4615 else:
4616 result = result and not rstats.payload.is_degraded
4617
4618 if dev.children:
4619 for child in dev.children:
4620 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4621 on_primary)
4622
4623 return result
4624
4627 """Logical unit for OOB handling.
4628
4629 """
4630 REQ_BGL = False
4631 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4632
4634 """Gather locks we need.
4635
4636 """
4637 if self.op.node_names:
4638 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4639 lock_names = self.op.node_names
4640 else:
4641 lock_names = locking.ALL_SET
4642
4643 self.needed_locks = {
4644 locking.LEVEL_NODE: lock_names,
4645 }
4646
4648 """Check prerequisites.
4649
4650 This checks:
4651 - the node exists in the configuration
4652 - OOB is supported
4653
4654 Any errors are signaled by raising errors.OpPrereqError.
4655
4656 """
4657 self.nodes = []
4658 self.master_node = self.cfg.GetMasterNode()
4659
4660 assert self.op.power_delay >= 0.0
4661
4662 if self.op.node_names:
4663 if (self.op.command in self._SKIP_MASTER and
4664 self.master_node in self.op.node_names):
4665 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4666 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4667
4668 if master_oob_handler:
4669 additional_text = ("run '%s %s %s' if you want to operate on the"
4670 " master regardless") % (master_oob_handler,
4671 self.op.command,
4672 self.master_node)
4673 else:
4674 additional_text = "it does not support out-of-band operations"
4675
4676 raise errors.OpPrereqError(("Operating on the master node %s is not"
4677 " allowed for %s; %s") %
4678 (self.master_node, self.op.command,
4679 additional_text), errors.ECODE_INVAL)
4680 else:
4681 self.op.node_names = self.cfg.GetNodeList()
4682 if self.op.command in self._SKIP_MASTER:
4683 self.op.node_names.remove(self.master_node)
4684
4685 if self.op.command in self._SKIP_MASTER:
4686 assert self.master_node not in self.op.node_names
4687
4688 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4689 if node is None:
4690 raise errors.OpPrereqError("Node %s not found" % node_name,
4691 errors.ECODE_NOENT)
4692 else:
4693 self.nodes.append(node)
4694
4695 if (not self.op.ignore_status and
4696 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4697 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4698 " not marked offline") % node_name,
4699 errors.ECODE_STATE)
4700
4701 - def Exec(self, feedback_fn):
4702 """Execute OOB and return result if we expect any.
4703
4704 """
4705 master_node = self.master_node
4706 ret = []
4707
4708 for idx, node in enumerate(utils.NiceSort(self.nodes,
4709 key=lambda node: node.name)):
4710 node_entry = [(constants.RS_NORMAL, node.name)]
4711 ret.append(node_entry)
4712
4713 oob_program = _SupportsOob(self.cfg, node)
4714
4715 if not oob_program:
4716 node_entry.append((constants.RS_UNAVAIL, None))
4717 continue
4718
4719 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4720 self.op.command, oob_program, node.name)
4721 result = self.rpc.call_run_oob(master_node, oob_program,
4722 self.op.command, node.name,
4723 self.op.timeout)
4724
4725 if result.fail_msg:
4726 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4727 node.name, result.fail_msg)
4728 node_entry.append((constants.RS_NODATA, None))
4729 else:
4730 try:
4731 self._CheckPayload(result)
4732 except errors.OpExecError, err:
4733 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4734 node.name, err)
4735 node_entry.append((constants.RS_NODATA, None))
4736 else:
4737 if self.op.command == constants.OOB_HEALTH:
4738
4739 for item, status in result.payload:
4740 if status in [constants.OOB_STATUS_WARNING,
4741 constants.OOB_STATUS_CRITICAL]:
4742 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4743 item, node.name, status)
4744
4745 if self.op.command == constants.OOB_POWER_ON:
4746 node.powered = True
4747 elif self.op.command == constants.OOB_POWER_OFF:
4748 node.powered = False
4749 elif self.op.command == constants.OOB_POWER_STATUS:
4750 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4751 if powered != node.powered:
4752 logging.warning(("Recorded power state (%s) of node '%s' does not"
4753 " match actual power state (%s)"), node.powered,
4754 node.name, powered)
4755
4756
4757 if self.op.command in (constants.OOB_POWER_ON,
4758 constants.OOB_POWER_OFF):
4759 self.cfg.Update(node, feedback_fn)
4760
4761 node_entry.append((constants.RS_NORMAL, result.payload))
4762
4763 if (self.op.command == constants.OOB_POWER_ON and
4764 idx < len(self.nodes) - 1):
4765 time.sleep(self.op.power_delay)
4766
4767 return ret
4768
4770 """Checks if the payload is valid.
4771
4772 @param result: RPC result
4773 @raises errors.OpExecError: If payload is not valid
4774
4775 """
4776 errs = []
4777 if self.op.command == constants.OOB_HEALTH:
4778 if not isinstance(result.payload, list):
4779 errs.append("command 'health' is expected to return a list but got %s" %
4780 type(result.payload))
4781 else:
4782 for item, status in result.payload:
4783 if status not in constants.OOB_STATUSES:
4784 errs.append("health item '%s' has invalid status '%s'" %
4785 (item, status))
4786
4787 if self.op.command == constants.OOB_POWER_STATUS:
4788 if not isinstance(result.payload, dict):
4789 errs.append("power-status is expected to return a dict but got %s" %
4790 type(result.payload))
4791
4792 if self.op.command in [
4793 constants.OOB_POWER_ON,
4794 constants.OOB_POWER_OFF,
4795 constants.OOB_POWER_CYCLE,
4796 ]:
4797 if result.payload is not None:
4798 errs.append("%s is expected to not return payload but got '%s'" %
4799 (self.op.command, result.payload))
4800
4801 if errs:
4802 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4803 utils.CommaJoin(errs))
4804
4807 FIELDS = query.OS_FIELDS
4808
4810
4811
4812
4813 lu.needed_locks = {}
4814
4815
4816
4817
4818 if self.names:
4819 self.wanted = self.names
4820 else:
4821 self.wanted = locking.ALL_SET
4822
4823 self.do_locking = self.use_locking
4824
4827
4828 @staticmethod
4830 """Remaps a per-node return list into an a per-os per-node dictionary
4831
4832 @param rlist: a map with node names as keys and OS objects as values
4833
4834 @rtype: dict
4835 @return: a dictionary with osnames as keys and as value another
4836 map, with nodes as keys and tuples of (path, status, diagnose,
4837 variants, parameters, api_versions) as values, eg::
4838
4839 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4840 (/srv/..., False, "invalid api")],
4841 "node2": [(/srv/..., True, "", [], [])]}
4842 }
4843
4844 """
4845 all_os = {}
4846
4847
4848
4849 good_nodes = [node_name for node_name in rlist
4850 if not rlist[node_name].fail_msg]
4851 for node_name, nr in rlist.items():
4852 if nr.fail_msg or not nr.payload:
4853 continue
4854 for (name, path, status, diagnose, variants,
4855 params, api_versions) in nr.payload:
4856 if name not in all_os:
4857
4858
4859 all_os[name] = {}
4860 for nname in good_nodes:
4861 all_os[name][nname] = []
4862
4863 params = [tuple(v) for v in params]
4864 all_os[name][node_name].append((path, status, diagnose,
4865 variants, params, api_versions))
4866 return all_os
4867
4869 """Computes the list of nodes and their attributes.
4870
4871 """
4872
4873 assert not (compat.any(lu.glm.is_owned(level)
4874 for level in locking.LEVELS
4875 if level != locking.LEVEL_CLUSTER) or
4876 self.do_locking or self.use_locking)
4877
4878 valid_nodes = [node.name
4879 for node in lu.cfg.GetAllNodesInfo().values()
4880 if not node.offline and node.vm_capable]
4881 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4882 cluster = lu.cfg.GetClusterInfo()
4883
4884 data = {}
4885
4886 for (os_name, os_data) in pol.items():
4887 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4888 hidden=(os_name in cluster.hidden_os),
4889 blacklisted=(os_name in cluster.blacklisted_os))
4890
4891 variants = set()
4892 parameters = set()
4893 api_versions = set()
4894
4895 for idx, osl in enumerate(os_data.values()):
4896 info.valid = bool(info.valid and osl and osl[0][1])
4897 if not info.valid:
4898 break
4899
4900 (node_variants, node_params, node_api) = osl[0][3:6]
4901 if idx == 0:
4902
4903 variants.update(node_variants)
4904 parameters.update(node_params)
4905 api_versions.update(node_api)
4906 else:
4907
4908 variants.intersection_update(node_variants)
4909 parameters.intersection_update(node_params)
4910 api_versions.intersection_update(node_api)
4911
4912 info.variants = list(variants)
4913 info.parameters = list(parameters)
4914 info.api_versions = list(api_versions)
4915
4916 data[os_name] = info
4917
4918
4919 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4920 if name in data]
4921
4924 """Logical unit for OS diagnose/query.
4925
4926 """
4927 REQ_BGL = False
4928
4929 @staticmethod
4931 """Builds a filter for querying OSes.
4932
4933 """
4934 name_filter = qlang.MakeSimpleFilter("name", names)
4935
4936
4937
4938 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4939 for fname in ["hidden", "blacklisted"]
4940 if fname not in fields]
4941 if "valid" not in fields:
4942 status_filter.append([qlang.OP_TRUE, "valid"])
4943
4944 if status_filter:
4945 status_filter.insert(0, qlang.OP_AND)
4946 else:
4947 status_filter = None
4948
4949 if name_filter and status_filter:
4950 return [qlang.OP_AND, name_filter, status_filter]
4951 elif name_filter:
4952 return name_filter
4953 else:
4954 return status_filter
4955
4957 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4958 self.op.output_fields, False)
4959
4962
4963 - def Exec(self, feedback_fn):
4965
4968 """Logical unit for removing a node.
4969
4970 """
4971 HPATH = "node-remove"
4972 HTYPE = constants.HTYPE_NODE
4973
4975 """Build hooks env.
4976
4977 """
4978 return {
4979 "OP_TARGET": self.op.node_name,
4980 "NODE_NAME": self.op.node_name,
4981 }
4982
4984 """Build hooks nodes.
4985
4986 This doesn't run on the target node in the pre phase as a failed
4987 node would then be impossible to remove.
4988
4989 """
4990 all_nodes = self.cfg.GetNodeList()
4991 try:
4992 all_nodes.remove(self.op.node_name)
4993 except ValueError:
4994 pass
4995 return (all_nodes, all_nodes)
4996
4998 """Check prerequisites.
4999
5000 This checks:
5001 - the node exists in the configuration
5002 - it does not have primary or secondary instances
5003 - it's not the master
5004
5005 Any errors are signaled by raising errors.OpPrereqError.
5006
5007 """
5008 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5009 node = self.cfg.GetNodeInfo(self.op.node_name)
5010 assert node is not None
5011
5012 masternode = self.cfg.GetMasterNode()
5013 if node.name == masternode:
5014 raise errors.OpPrereqError("Node is the master node, failover to another"
5015 " node is required", errors.ECODE_INVAL)
5016
5017 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5018 if node.name in instance.all_nodes:
5019 raise errors.OpPrereqError("Instance %s is still running on the node,"
5020 " please remove first" % instance_name,
5021 errors.ECODE_INVAL)
5022 self.op.node_name = node.name
5023 self.node = node
5024
5025 - def Exec(self, feedback_fn):
5059
5062 FIELDS = query.NODE_FIELDS
5063
5065 lu.needed_locks = {}
5066 lu.share_locks = _ShareAll()
5067
5068 if self.names:
5069 self.wanted = _GetWantedNodes(lu, self.names)
5070 else:
5071 self.wanted = locking.ALL_SET
5072
5073 self.do_locking = (self.use_locking and
5074 query.NQ_LIVE in self.requested_data)
5075
5076 if self.do_locking:
5077
5078 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5079
5082
5084 """Computes the list of nodes and their attributes.
5085
5086 """
5087 all_info = lu.cfg.GetAllNodesInfo()
5088
5089 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5090
5091
5092 if query.NQ_LIVE in self.requested_data:
5093
5094 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5095
5096 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5097 [lu.cfg.GetHypervisorType()])
5098 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5099 for (name, nresult) in node_data.items()
5100 if not nresult.fail_msg and nresult.payload)
5101 else:
5102 live_data = None
5103
5104 if query.NQ_INST in self.requested_data:
5105 node_to_primary = dict([(name, set()) for name in nodenames])
5106 node_to_secondary = dict([(name, set()) for name in nodenames])
5107
5108 inst_data = lu.cfg.GetAllInstancesInfo()
5109
5110 for inst in inst_data.values():
5111 if inst.primary_node in node_to_primary:
5112 node_to_primary[inst.primary_node].add(inst.name)
5113 for secnode in inst.secondary_nodes:
5114 if secnode in node_to_secondary:
5115 node_to_secondary[secnode].add(inst.name)
5116 else:
5117 node_to_primary = None
5118 node_to_secondary = None
5119
5120 if query.NQ_OOB in self.requested_data:
5121 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5122 for name, node in all_info.iteritems())
5123 else:
5124 oob_support = None
5125
5126 if query.NQ_GROUP in self.requested_data:
5127 groups = lu.cfg.GetAllNodeGroupsInfo()
5128 else:
5129 groups = {}
5130
5131 return query.NodeQueryData([all_info[name] for name in nodenames],
5132 live_data, lu.cfg.GetMasterNode(),
5133 node_to_primary, node_to_secondary, groups,
5134 oob_support, lu.cfg.GetClusterInfo())
5135
5138 """Logical unit for querying nodes.
5139
5140 """
5141
5142 REQ_BGL = False
5143
5147
5150
5153
5154 - def Exec(self, feedback_fn):
5156
5159 """Logical unit for getting volumes on node(s).
5160
5161 """
5162 REQ_BGL = False
5163 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5164 _FIELDS_STATIC = utils.FieldSet("node")
5165
5170
5180
5181 - def Exec(self, feedback_fn):
5182 """Computes the list of nodes and their attributes.
5183
5184 """
5185 nodenames = self.owned_locks(locking.LEVEL_NODE)
5186 volumes = self.rpc.call_node_volumes(nodenames)
5187
5188 ilist = self.cfg.GetAllInstancesInfo()
5189 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5190
5191 output = []
5192 for node in nodenames:
5193 nresult = volumes[node]
5194 if nresult.offline:
5195 continue
5196 msg = nresult.fail_msg
5197 if msg:
5198 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5199 continue
5200
5201 node_vols = sorted(nresult.payload,
5202 key=operator.itemgetter("dev"))
5203
5204 for vol in node_vols:
5205 node_output = []
5206 for field in self.op.output_fields:
5207 if field == "node":
5208 val = node
5209 elif field == "phys":
5210 val = vol["dev"]
5211 elif field == "vg":
5212 val = vol["vg"]
5213 elif field == "name":
5214 val = vol["name"]
5215 elif field == "size":
5216 val = int(float(vol["size"]))
5217 elif field == "instance":
5218 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5219 else:
5220 raise errors.ParameterError(field)
5221 node_output.append(str(val))
5222
5223 output.append(node_output)
5224
5225 return output
5226
5229 """Logical unit for getting information on storage units on node(s).
5230
5231 """
5232 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5233 REQ_BGL = False
5234
5239
5249
5250 - def Exec(self, feedback_fn):
5251 """Computes the list of nodes and their attributes.
5252
5253 """
5254 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5255
5256
5257 if constants.SF_NAME in self.op.output_fields:
5258 fields = self.op.output_fields[:]
5259 else:
5260 fields = [constants.SF_NAME] + self.op.output_fields
5261
5262
5263 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5264 while extra in fields:
5265 fields.remove(extra)
5266
5267 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5268 name_idx = field_idx[constants.SF_NAME]
5269
5270 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5271 data = self.rpc.call_storage_list(self.nodes,
5272 self.op.storage_type, st_args,
5273 self.op.name, fields)
5274
5275 result = []
5276
5277 for node in utils.NiceSort(self.nodes):
5278 nresult = data[node]
5279 if nresult.offline:
5280 continue
5281
5282 msg = nresult.fail_msg
5283 if msg:
5284 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5285 continue
5286
5287 rows = dict([(row[name_idx], row) for row in nresult.payload])
5288
5289 for name in utils.NiceSort(rows.keys()):
5290 row = rows[name]
5291
5292 out = []
5293
5294 for field in self.op.output_fields:
5295 if field == constants.SF_NODE:
5296 val = node
5297 elif field == constants.SF_TYPE:
5298 val = self.op.storage_type
5299 elif field in field_idx:
5300 val = row[field_idx[field]]
5301 else:
5302 raise errors.ParameterError(field)
5303
5304 out.append(val)
5305
5306 result.append(out)
5307
5308 return result
5309
5312 FIELDS = query.INSTANCE_FIELDS
5313
5333
5347
5348 @staticmethod
5356
5358 """Computes the list of instances and their attributes.
5359
5360 """
5361 if self.do_grouplocks:
5362 self._CheckGroupLocks(lu)
5363
5364 cluster = lu.cfg.GetClusterInfo()
5365 all_info = lu.cfg.GetAllInstancesInfo()
5366
5367 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5368
5369 instance_list = [all_info[name] for name in instance_names]
5370 nodes = frozenset(itertools.chain(*(inst.all_nodes
5371 for inst in instance_list)))
5372 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5373 bad_nodes = []
5374 offline_nodes = []
5375 wrongnode_inst = set()
5376
5377
5378 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5379 live_data = {}
5380 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5381 for name in nodes:
5382 result = node_data[name]
5383 if result.offline:
5384
5385 assert result.fail_msg
5386 offline_nodes.append(name)
5387 if result.fail_msg:
5388 bad_nodes.append(name)
5389 elif result.payload:
5390 for inst in result.payload:
5391 if inst in all_info:
5392 if all_info[inst].primary_node == name:
5393 live_data.update(result.payload)
5394 else:
5395 wrongnode_inst.add(inst)
5396 else:
5397
5398
5399 logging.warning("Orphan instance '%s' found on node %s",
5400 inst, name)
5401
5402 else:
5403 live_data = {}
5404
5405 if query.IQ_DISKUSAGE in self.requested_data:
5406 disk_usage = dict((inst.name,
5407 _ComputeDiskSize(inst.disk_template,
5408 [{constants.IDISK_SIZE: disk.size}
5409 for disk in inst.disks]))
5410 for inst in instance_list)
5411 else:
5412 disk_usage = None
5413
5414 if query.IQ_CONSOLE in self.requested_data:
5415 consinfo = {}
5416 for inst in instance_list:
5417 if inst.name in live_data:
5418
5419 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5420 else:
5421 consinfo[inst.name] = None
5422 assert set(consinfo.keys()) == set(instance_names)
5423 else:
5424 consinfo = None
5425
5426 if query.IQ_NODES in self.requested_data:
5427 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5428 instance_list)))
5429 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5430 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5431 for uuid in set(map(operator.attrgetter("group"),
5432 nodes.values())))
5433 else:
5434 nodes = None
5435 groups = None
5436
5437 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5438 disk_usage, offline_nodes, bad_nodes,
5439 live_data, wrongnode_inst, consinfo,
5440 nodes, groups)
5441
5444 """Query for resources/items of a certain kind.
5445
5446 """
5447
5448 REQ_BGL = False
5449
5451 qcls = _GetQueryImplementation(self.op.what)
5452
5453 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5454
5457
5460
5461 - def Exec(self, feedback_fn):
5463
5466 """Query for resources/items of a certain kind.
5467
5468 """
5469
5470 REQ_BGL = False
5471
5474
5476 self.needed_locks = {}
5477
5478 - def Exec(self, feedback_fn):
5480
5483 """Logical unit for modifying a storage volume on a node.
5484
5485 """
5486 REQ_BGL = False
5487
5506
5511
5512 - def Exec(self, feedback_fn):
5513 """Computes the list of nodes and their attributes.
5514
5515 """
5516 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5517 result = self.rpc.call_storage_modify(self.op.node_name,
5518 self.op.storage_type, st_args,
5519 self.op.name, self.op.changes)
5520 result.Raise("Failed to modify storage unit '%s' on %s" %
5521 (self.op.name, self.op.node_name))
5522
5525 """Logical unit for adding node to the cluster.
5526
5527 """
5528 HPATH = "node-add"
5529 HTYPE = constants.HTYPE_NODE
5530 _NFLAGS = ["master_capable", "vm_capable"]
5531
5546
5548 """Build hooks env.
5549
5550 This will run on all nodes before, and on all nodes + the new node after.
5551
5552 """
5553 return {
5554 "OP_TARGET": self.op.node_name,
5555 "NODE_NAME": self.op.node_name,
5556 "NODE_PIP": self.op.primary_ip,
5557 "NODE_SIP": self.op.secondary_ip,
5558 "MASTER_CAPABLE": str(self.op.master_capable),
5559 "VM_CAPABLE": str(self.op.vm_capable),
5560 }
5561
5563 """Build hooks nodes.
5564
5565 """
5566
5567 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5568 post_nodes = pre_nodes + [self.op.node_name, ]
5569
5570 return (pre_nodes, post_nodes)
5571
5573 """Check prerequisites.
5574
5575 This checks:
5576 - the new node is not already in the config
5577 - it is resolvable
5578 - its parameters (single/dual homed) matches the cluster
5579
5580 Any errors are signaled by raising errors.OpPrereqError.
5581
5582 """
5583 cfg = self.cfg
5584 hostname = self.hostname
5585 node = hostname.name
5586 primary_ip = self.op.primary_ip = hostname.ip
5587 if self.op.secondary_ip is None:
5588 if self.primary_ip_family == netutils.IP6Address.family:
5589 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5590 " IPv4 address must be given as secondary",
5591 errors.ECODE_INVAL)
5592 self.op.secondary_ip = primary_ip
5593
5594 secondary_ip = self.op.secondary_ip
5595 if not netutils.IP4Address.IsValid(secondary_ip):
5596 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5597 " address" % secondary_ip, errors.ECODE_INVAL)
5598
5599 node_list = cfg.GetNodeList()
5600 if not self.op.readd and node in node_list:
5601 raise errors.OpPrereqError("Node %s is already in the configuration" %
5602 node, errors.ECODE_EXISTS)
5603 elif self.op.readd and node not in node_list:
5604 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5605 errors.ECODE_NOENT)
5606
5607 self.changed_primary_ip = False
5608
5609 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5610 if self.op.readd and node == existing_node_name:
5611 if existing_node.secondary_ip != secondary_ip:
5612 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5613 " address configuration as before",
5614 errors.ECODE_INVAL)
5615 if existing_node.primary_ip != primary_ip:
5616 self.changed_primary_ip = True
5617
5618 continue
5619
5620 if (existing_node.primary_ip == primary_ip or
5621 existing_node.secondary_ip == primary_ip or
5622 existing_node.primary_ip == secondary_ip or
5623 existing_node.secondary_ip == secondary_ip):
5624 raise errors.OpPrereqError("New node ip address(es) conflict with"
5625 " existing node %s" % existing_node.name,
5626 errors.ECODE_NOTUNIQUE)
5627
5628
5629
5630 if self.op.readd:
5631 old_node = self.cfg.GetNodeInfo(node)
5632 assert old_node is not None, "Can't retrieve locked node %s" % node
5633 for attr in self._NFLAGS:
5634 if getattr(self.op, attr) is None:
5635 setattr(self.op, attr, getattr(old_node, attr))
5636 else:
5637 for attr in self._NFLAGS:
5638 if getattr(self.op, attr) is None:
5639 setattr(self.op, attr, True)
5640
5641 if self.op.readd and not self.op.vm_capable:
5642 pri, sec = cfg.GetNodeInstances(node)
5643 if pri or sec:
5644 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5645 " flag set to false, but it already holds"
5646 " instances" % node,
5647 errors.ECODE_STATE)
5648
5649
5650
5651 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5652 master_singlehomed = myself.secondary_ip == myself.primary_ip
5653 newbie_singlehomed = secondary_ip == primary_ip
5654 if master_singlehomed != newbie_singlehomed:
5655 if master_singlehomed:
5656 raise errors.OpPrereqError("The master has no secondary ip but the"
5657 " new node has one",
5658 errors.ECODE_INVAL)
5659 else:
5660 raise errors.OpPrereqError("The master has a secondary ip but the"
5661 " new node doesn't have one",
5662 errors.ECODE_INVAL)
5663
5664
5665 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5666 raise errors.OpPrereqError("Node not reachable by ping",
5667 errors.ECODE_ENVIRON)
5668
5669 if not newbie_singlehomed:
5670
5671 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5672 source=myself.secondary_ip):
5673 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5674 " based ping to node daemon port",
5675 errors.ECODE_ENVIRON)
5676
5677 if self.op.readd:
5678 exceptions = [node]
5679 else:
5680 exceptions = []
5681
5682 if self.op.master_capable:
5683 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5684 else:
5685 self.master_candidate = False
5686
5687 if self.op.readd:
5688 self.new_node = old_node
5689 else:
5690 node_group = cfg.LookupNodeGroup(self.op.group)
5691 self.new_node = objects.Node(name=node,
5692 primary_ip=primary_ip,
5693 secondary_ip=secondary_ip,
5694 master_candidate=self.master_candidate,
5695 offline=False, drained=False,
5696 group=node_group)
5697
5698 if self.op.ndparams:
5699 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5700
5701 if self.op.hv_state:
5702 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5703
5704 if self.op.disk_state:
5705 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5706
5707
5708
5709 result = rpc.DnsOnlyRunner().call_version([node])[node]
5710 result.Raise("Can't get version information from node %s" % node)
5711 if constants.PROTOCOL_VERSION == result.payload:
5712 logging.info("Communication to node %s fine, sw version %s match",
5713 node, result.payload)
5714 else:
5715 raise errors.OpPrereqError("Version mismatch master version %s,"
5716 " node version %s" %
5717 (constants.PROTOCOL_VERSION, result.payload),
5718 errors.ECODE_ENVIRON)
5719
5720 - def Exec(self, feedback_fn):
5721 """Adds the new node to the cluster.
5722
5723 """
5724 new_node = self.new_node
5725 node = new_node.name
5726
5727 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5728 "Not owning BGL"
5729
5730
5731 new_node.powered = True
5732
5733
5734
5735
5736
5737 if self.op.readd:
5738 new_node.drained = new_node.offline = False
5739 self.LogInfo("Readding a node, the offline/drained flags were reset")
5740
5741 new_node.master_candidate = self.master_candidate
5742 if self.changed_primary_ip:
5743 new_node.primary_ip = self.op.primary_ip
5744
5745
5746 for attr in self._NFLAGS:
5747 setattr(new_node, attr, getattr(self.op, attr))
5748
5749
5750 if new_node.master_candidate:
5751 self.LogInfo("Node will be a master candidate")
5752
5753 if self.op.ndparams:
5754 new_node.ndparams = self.op.ndparams
5755 else:
5756 new_node.ndparams = {}
5757
5758 if self.op.hv_state:
5759 new_node.hv_state_static = self.new_hv_state
5760
5761 if self.op.disk_state:
5762 new_node.disk_state_static = self.new_disk_state
5763
5764
5765 if self.cfg.GetClusterInfo().modify_etc_hosts:
5766 master_node = self.cfg.GetMasterNode()
5767 result = self.rpc.call_etc_hosts_modify(master_node,
5768 constants.ETC_HOSTS_ADD,
5769 self.hostname.name,
5770 self.hostname.ip)
5771 result.Raise("Can't update hosts file with new host data")
5772
5773 if new_node.secondary_ip != new_node.primary_ip:
5774 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5775 False)
5776
5777 node_verify_list = [self.cfg.GetMasterNode()]
5778 node_verify_param = {
5779 constants.NV_NODELIST: ([node], {}),
5780
5781 }
5782
5783 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5784 self.cfg.GetClusterName())
5785 for verifier in node_verify_list:
5786 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5787 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5788 if nl_payload:
5789 for failed in nl_payload:
5790 feedback_fn("ssh/hostname verification failed"
5791 " (checking from %s): %s" %
5792 (verifier, nl_payload[failed]))
5793 raise errors.OpExecError("ssh/hostname verification failed")
5794
5795 if self.op.readd:
5796 _RedistributeAncillaryFiles(self)
5797 self.context.ReaddNode(new_node)
5798
5799 self.cfg.Update(new_node, feedback_fn)
5800
5801 if not new_node.master_candidate:
5802 result = self.rpc.call_node_demote_from_mc(new_node.name)
5803 msg = result.fail_msg
5804 if msg:
5805 self.LogWarning("Node failed to demote itself from master"
5806 " candidate status: %s" % msg)
5807 else:
5808 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5809 additional_vm=self.op.vm_capable)
5810 self.context.AddNode(new_node, self.proc.GetECId())
5811
5814 """Modifies the parameters of a node.
5815
5816 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5817 to the node role (as _ROLE_*)
5818 @cvar _R2F: a dictionary from node role to tuples of flags
5819 @cvar _FLAGS: a list of attribute names corresponding to the flags
5820
5821 """
5822 HPATH = "node-modify"
5823 HTYPE = constants.HTYPE_NODE
5824 REQ_BGL = False
5825 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5826 _F2R = {
5827 (True, False, False): _ROLE_CANDIDATE,
5828 (False, True, False): _ROLE_DRAINED,
5829 (False, False, True): _ROLE_OFFLINE,
5830 (False, False, False): _ROLE_REGULAR,
5831 }
5832 _R2F = dict((v, k) for k, v in _F2R.items())
5833 _FLAGS = ["master_candidate", "drained", "offline"]
5834
5836 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5837 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5838 self.op.master_capable, self.op.vm_capable,
5839 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5840 self.op.disk_state]
5841 if all_mods.count(None) == len(all_mods):
5842 raise errors.OpPrereqError("Please pass at least one modification",
5843 errors.ECODE_INVAL)
5844 if all_mods.count(True) > 1:
5845 raise errors.OpPrereqError("Can't set the node into more than one"
5846 " state at the same time",
5847 errors.ECODE_INVAL)
5848
5849
5850 self.might_demote = (self.op.master_candidate == False or
5851 self.op.offline == True or
5852 self.op.drained == True or
5853 self.op.master_capable == False)
5854
5855 if self.op.secondary_ip:
5856 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5857 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5858 " address" % self.op.secondary_ip,
5859 errors.ECODE_INVAL)
5860
5861 self.lock_all = self.op.auto_promote and self.might_demote
5862 self.lock_instances = self.op.secondary_ip is not None
5863
5870
5890
5892 """Build hooks env.
5893
5894 This runs on the master node.
5895
5896 """
5897 return {
5898 "OP_TARGET": self.op.node_name,
5899 "MASTER_CANDIDATE": str(self.op.master_candidate),
5900 "OFFLINE": str(self.op.offline),
5901 "DRAINED": str(self.op.drained),
5902 "MASTER_CAPABLE": str(self.op.master_capable),
5903 "VM_CAPABLE": str(self.op.vm_capable),
5904 }
5905
5907 """Build hooks nodes.
5908
5909 """
5910 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5911 return (nl, nl)
5912
5914 """Check prerequisites.
5915
5916 This only checks the instance list against the existing names.
5917
5918 """
5919 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5920
5921 if self.lock_instances:
5922 affected_instances = \
5923 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5924
5925
5926 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5927 wanted_instances = frozenset(affected_instances.keys())
5928 if wanted_instances - owned_instances:
5929 raise errors.OpPrereqError("Instances affected by changing node %s's"
5930 " secondary IP address have changed since"
5931 " locks were acquired, wanted '%s', have"
5932 " '%s'; retry the operation" %
5933 (self.op.node_name,
5934 utils.CommaJoin(wanted_instances),
5935 utils.CommaJoin(owned_instances)),
5936 errors.ECODE_STATE)
5937 else:
5938 affected_instances = None
5939
5940 if (self.op.master_candidate is not None or
5941 self.op.drained is not None or
5942 self.op.offline is not None):
5943
5944 if self.op.node_name == self.cfg.GetMasterNode():
5945 raise errors.OpPrereqError("The master role can be changed"
5946 " only via master-failover",
5947 errors.ECODE_INVAL)
5948
5949 if self.op.master_candidate and not node.master_capable:
5950 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5951 " it a master candidate" % node.name,
5952 errors.ECODE_STATE)
5953
5954 if self.op.vm_capable == False:
5955 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5956 if ipri or isec:
5957 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5958 " the vm_capable flag" % node.name,
5959 errors.ECODE_STATE)
5960
5961 if node.master_candidate and self.might_demote and not self.lock_all:
5962 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5963
5964
5965 (mc_remaining, mc_should, _) = \
5966 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5967 if mc_remaining < mc_should:
5968 raise errors.OpPrereqError("Not enough master candidates, please"
5969 " pass auto promote option to allow"
5970 " promotion (--auto-promote or RAPI"
5971 " auto_promote=True)", errors.ECODE_STATE)
5972
5973 self.old_flags = old_flags = (node.master_candidate,
5974 node.drained, node.offline)
5975 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5976 self.old_role = old_role = self._F2R[old_flags]
5977
5978
5979 for attr in self._FLAGS:
5980 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5981 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5982 setattr(self.op, attr, None)
5983
5984
5985
5986
5987
5988 if _SupportsOob(self.cfg, node):
5989 if self.op.offline is False and not (node.powered or
5990 self.op.powered == True):
5991 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5992 " offline status can be reset") %
5993 self.op.node_name)
5994 elif self.op.powered is not None:
5995 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5996 " as it does not support out-of-band"
5997 " handling") % self.op.node_name)
5998
5999
6000 if (self.op.drained == False or self.op.offline == False or
6001 (self.op.master_capable and not node.master_capable)):
6002 if _DecideSelfPromotion(self):
6003 self.op.master_candidate = True
6004 self.LogInfo("Auto-promoting node to master candidate")
6005
6006
6007 if self.op.master_capable == False and node.master_candidate:
6008 self.LogInfo("Demoting from master candidate")
6009 self.op.master_candidate = False
6010
6011
6012 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6013 if self.op.master_candidate:
6014 new_role = self._ROLE_CANDIDATE
6015 elif self.op.drained:
6016 new_role = self._ROLE_DRAINED
6017 elif self.op.offline:
6018 new_role = self._ROLE_OFFLINE
6019 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6020
6021
6022 new_role = self._ROLE_REGULAR
6023 else:
6024 new_role = old_role
6025
6026 self.new_role = new_role
6027
6028 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6029
6030 result = self.rpc.call_version([node.name])[node.name]
6031 if result.fail_msg:
6032 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6033 " to report its version: %s" %
6034 (node.name, result.fail_msg),
6035 errors.ECODE_STATE)
6036 else:
6037 self.LogWarning("Transitioning node from offline to online state"
6038 " without using re-add. Please make sure the node"
6039 " is healthy!")
6040
6041 if self.op.secondary_ip:
6042
6043 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6044 master_singlehomed = master.secondary_ip == master.primary_ip
6045 if master_singlehomed and self.op.secondary_ip:
6046 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6047 " homed cluster", errors.ECODE_INVAL)
6048
6049 assert not (frozenset(affected_instances) -
6050 self.owned_locks(locking.LEVEL_INSTANCE))
6051
6052 if node.offline:
6053 if affected_instances:
6054 raise errors.OpPrereqError("Cannot change secondary IP address:"
6055 " offline node has instances (%s)"
6056 " configured to use it" %
6057 utils.CommaJoin(affected_instances.keys()))
6058 else:
6059
6060
6061 for instance in affected_instances.values():
6062 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6063 msg="cannot change secondary ip")
6064
6065 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6066 if master.name != node.name:
6067
6068 if not netutils.TcpPing(self.op.secondary_ip,
6069 constants.DEFAULT_NODED_PORT,
6070 source=master.secondary_ip):
6071 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6072 " based ping to node daemon port",
6073 errors.ECODE_ENVIRON)
6074
6075 if self.op.ndparams:
6076 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6077 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6078 self.new_ndparams = new_ndparams
6079
6080 if self.op.hv_state:
6081 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6082 self.node.hv_state_static)
6083
6084 if self.op.disk_state:
6085 self.new_disk_state = \
6086 _MergeAndVerifyDiskState(self.op.disk_state,
6087 self.node.disk_state_static)
6088
6089 - def Exec(self, feedback_fn):
6090 """Modifies a node.
6091
6092 """
6093 node = self.node
6094 old_role = self.old_role
6095 new_role = self.new_role
6096
6097 result = []
6098
6099 if self.op.ndparams:
6100 node.ndparams = self.new_ndparams
6101
6102 if self.op.powered is not None:
6103 node.powered = self.op.powered
6104
6105 if self.op.hv_state:
6106 node.hv_state_static = self.new_hv_state
6107
6108 if self.op.disk_state:
6109 node.disk_state_static = self.new_disk_state
6110
6111 for attr in ["master_capable", "vm_capable"]:
6112 val = getattr(self.op, attr)
6113 if val is not None:
6114 setattr(node, attr, val)
6115 result.append((attr, str(val)))
6116
6117 if new_role != old_role:
6118
6119 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6120 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6121 if msg:
6122 self.LogWarning("Node failed to demote itself: %s", msg)
6123
6124 new_flags = self._R2F[new_role]
6125 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6126 if of != nf:
6127 result.append((desc, str(nf)))
6128 (node.master_candidate, node.drained, node.offline) = new_flags
6129
6130
6131 if self.lock_all:
6132 _AdjustCandidatePool(self, [node.name])
6133
6134 if self.op.secondary_ip:
6135 node.secondary_ip = self.op.secondary_ip
6136 result.append(("secondary_ip", self.op.secondary_ip))
6137
6138
6139 self.cfg.Update(node, feedback_fn)
6140
6141
6142
6143 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6144 self.context.ReaddNode(node)
6145
6146 return result
6147
6150 """Powercycles a node.
6151
6152 """
6153 REQ_BGL = False
6154
6161
6163 """Locking for PowercycleNode.
6164
6165 This is a last-resort option and shouldn't block on other
6166 jobs. Therefore, we grab no locks.
6167
6168 """
6169 self.needed_locks = {}
6170
6171 - def Exec(self, feedback_fn):
6179
6182 """Query cluster configuration.
6183
6184 """
6185 REQ_BGL = False
6186
6188 self.needed_locks = {}
6189
6190 - def Exec(self, feedback_fn):
6191 """Return cluster config.
6192
6193 """
6194 cluster = self.cfg.GetClusterInfo()
6195 os_hvp = {}
6196
6197
6198 for os_name, hv_dict in cluster.os_hvp.items():
6199 os_hvp[os_name] = {}
6200 for hv_name, hv_params in hv_dict.items():
6201 if hv_name in cluster.enabled_hypervisors:
6202 os_hvp[os_name][hv_name] = hv_params
6203
6204
6205 primary_ip_version = constants.IP4_VERSION
6206 if cluster.primary_ip_family == netutils.IP6Address.family:
6207 primary_ip_version = constants.IP6_VERSION
6208
6209 result = {
6210 "software_version": constants.RELEASE_VERSION,
6211 "protocol_version": constants.PROTOCOL_VERSION,
6212 "config_version": constants.CONFIG_VERSION,
6213 "os_api_version": max(constants.OS_API_VERSIONS),
6214 "export_version": constants.EXPORT_VERSION,
6215 "architecture": runtime.GetArchInfo(),
6216 "name": cluster.cluster_name,
6217 "master": cluster.master_node,
6218 "default_hypervisor": cluster.primary_hypervisor,
6219 "enabled_hypervisors": cluster.enabled_hypervisors,
6220 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6221 for hypervisor_name in cluster.enabled_hypervisors]),
6222 "os_hvp": os_hvp,
6223 "beparams": cluster.beparams,
6224 "osparams": cluster.osparams,
6225 "ipolicy": cluster.ipolicy,
6226 "nicparams": cluster.nicparams,
6227 "ndparams": cluster.ndparams,
6228 "diskparams": cluster.diskparams,
6229 "candidate_pool_size": cluster.candidate_pool_size,
6230 "master_netdev": cluster.master_netdev,
6231 "master_netmask": cluster.master_netmask,
6232 "use_external_mip_script": cluster.use_external_mip_script,
6233 "volume_group_name": cluster.volume_group_name,
6234 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6235 "file_storage_dir": cluster.file_storage_dir,
6236 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6237 "maintain_node_health": cluster.maintain_node_health,
6238 "ctime": cluster.ctime,
6239 "mtime": cluster.mtime,
6240 "uuid": cluster.uuid,
6241 "tags": list(cluster.GetTags()),
6242 "uid_pool": cluster.uid_pool,
6243 "default_iallocator": cluster.default_iallocator,
6244 "reserved_lvs": cluster.reserved_lvs,
6245 "primary_ip_version": primary_ip_version,
6246 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6247 "hidden_os": cluster.hidden_os,
6248 "blacklisted_os": cluster.blacklisted_os,
6249 }
6250
6251 return result
6252
6255 """Return configuration values.
6256
6257 """
6258 REQ_BGL = False
6259
6262
6265
6268
6269 - def Exec(self, feedback_fn):
6270 result = self.cq.OldStyleQuery(self)
6271
6272 assert len(result) == 1
6273
6274 return result[0]
6275
6323
6326 """Bring up an instance's disks.
6327
6328 """
6329 REQ_BGL = False
6330
6335
6339
6341 """Check prerequisites.
6342
6343 This checks that the instance is in the cluster.
6344
6345 """
6346 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6347 assert self.instance is not None, \
6348 "Cannot retrieve locked instance %s" % self.op.instance_name
6349 _CheckNodeOnline(self, self.instance.primary_node)
6350
6351 - def Exec(self, feedback_fn):
6352 """Activate the disks.
6353
6354 """
6355 disks_ok, disks_info = \
6356 _AssembleInstanceDisks(self, self.instance,
6357 ignore_size=self.op.ignore_size)
6358 if not disks_ok:
6359 raise errors.OpExecError("Cannot activate block devices")
6360
6361 return disks_info
6362
6366 """Prepare the block devices for an instance.
6367
6368 This sets up the block devices on all nodes.
6369
6370 @type lu: L{LogicalUnit}
6371 @param lu: the logical unit on whose behalf we execute
6372 @type instance: L{objects.Instance}
6373 @param instance: the instance for whose disks we assemble
6374 @type disks: list of L{objects.Disk} or None
6375 @param disks: which disks to assemble (or all, if None)
6376 @type ignore_secondaries: boolean
6377 @param ignore_secondaries: if true, errors on secondary nodes
6378 won't result in an error return from the function
6379 @type ignore_size: boolean
6380 @param ignore_size: if true, the current known size of the disk
6381 will not be used during the disk activation, useful for cases
6382 when the size is wrong
6383 @return: False if the operation failed, otherwise a list of
6384 (host, instance_visible_name, node_visible_name)
6385 with the mapping from node devices to instance devices
6386
6387 """
6388 device_info = []
6389 disks_ok = True
6390 iname = instance.name
6391 disks = _ExpandCheckDisks(instance, disks)
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403 for idx, inst_disk in enumerate(disks):
6404 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6405 if ignore_size:
6406 node_disk = node_disk.Copy()
6407 node_disk.UnsetSize()
6408 lu.cfg.SetDiskID(node_disk, node)
6409 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6410 False, idx)
6411 msg = result.fail_msg
6412 if msg:
6413 is_offline_secondary = (node in instance.secondary_nodes and
6414 result.offline)
6415 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6416 " (is_primary=False, pass=1): %s",
6417 inst_disk.iv_name, node, msg)
6418 if not (ignore_secondaries or is_offline_secondary):
6419 disks_ok = False
6420
6421
6422
6423
6424 for idx, inst_disk in enumerate(disks):
6425 dev_path = None
6426
6427 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6428 if node != instance.primary_node:
6429 continue
6430 if ignore_size:
6431 node_disk = node_disk.Copy()
6432 node_disk.UnsetSize()
6433 lu.cfg.SetDiskID(node_disk, node)
6434 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6435 True, idx)
6436 msg = result.fail_msg
6437 if msg:
6438 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6439 " (is_primary=True, pass=2): %s",
6440 inst_disk.iv_name, node, msg)
6441 disks_ok = False
6442 else:
6443 dev_path = result.payload
6444
6445 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6446
6447
6448
6449
6450 for disk in disks:
6451 lu.cfg.SetDiskID(disk, instance.primary_node)
6452
6453 return disks_ok, device_info
6454
6457 """Start the disks of an instance.
6458
6459 """
6460 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6461 ignore_secondaries=force)
6462 if not disks_ok:
6463 _ShutdownInstanceDisks(lu, instance)
6464 if force is not None and not force:
6465 lu.proc.LogWarning("", hint="If the message above refers to a"
6466 " secondary node,"
6467 " you can retry the operation using '--force'.")
6468 raise errors.OpExecError("Disk consistency error")
6469
6472 """Shutdown an instance's disks.
6473
6474 """
6475 REQ_BGL = False
6476
6481
6485
6487 """Check prerequisites.
6488
6489 This checks that the instance is in the cluster.
6490
6491 """
6492 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6493 assert self.instance is not None, \
6494 "Cannot retrieve locked instance %s" % self.op.instance_name
6495
6496 - def Exec(self, feedback_fn):
6505
6516
6519 """Return the instance disks selected by the disks list
6520
6521 @type disks: list of L{objects.Disk} or None
6522 @param disks: selected disks
6523 @rtype: list of L{objects.Disk}
6524 @return: selected instance disks to act on
6525
6526 """
6527 if disks is None:
6528 return instance.disks
6529 else:
6530 if not set(disks).issubset(instance.disks):
6531 raise errors.ProgrammerError("Can only act on disks belonging to the"
6532 " target instance")
6533 return disks
6534
6537 """Shutdown block devices of an instance.
6538
6539 This does the shutdown on all nodes of the instance.
6540
6541 If the ignore_primary is false, errors on the primary node are
6542 ignored.
6543
6544 """
6545 all_result = True
6546 disks = _ExpandCheckDisks(instance, disks)
6547
6548 for disk in disks:
6549 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6550 lu.cfg.SetDiskID(top_disk, node)
6551 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6552 msg = result.fail_msg
6553 if msg:
6554 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6555 disk.iv_name, node, msg)
6556 if ((node == instance.primary_node and not ignore_primary) or
6557 (node != instance.primary_node and not result.offline)):
6558 all_result = False
6559 return all_result
6560
6563 """Checks if a node has enough free memory.
6564
6565 This function check if a given node has the needed amount of free
6566 memory. In case the node has less memory or we cannot get the
6567 information from the node, this function raise an OpPrereqError
6568 exception.
6569
6570 @type lu: C{LogicalUnit}
6571 @param lu: a logical unit from which we get configuration data
6572 @type node: C{str}
6573 @param node: the node to check
6574 @type reason: C{str}
6575 @param reason: string to use in the error message
6576 @type requested: C{int}
6577 @param requested: the amount of memory in MiB to check for
6578 @type hypervisor_name: C{str}
6579 @param hypervisor_name: the hypervisor to ask for memory stats
6580 @rtype: integer
6581 @return: node current free memory
6582 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6583 we cannot check the node
6584
6585 """
6586 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6587 nodeinfo[node].Raise("Can't get data from node %s" % node,
6588 prereq=True, ecode=errors.ECODE_ENVIRON)
6589 (_, _, (hv_info, )) = nodeinfo[node].payload
6590
6591 free_mem = hv_info.get("memory_free", None)
6592 if not isinstance(free_mem, int):
6593 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6594 " was '%s'" % (node, free_mem),
6595 errors.ECODE_ENVIRON)
6596 if requested > free_mem:
6597 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6598 " needed %s MiB, available %s MiB" %
6599 (node, reason, requested, free_mem),
6600 errors.ECODE_NORES)
6601 return free_mem
6602
6605 """Checks if nodes have enough free disk space in the all VGs.
6606
6607 This function check if all given nodes have the needed amount of
6608 free disk. In case any node has less disk or we cannot get the
6609 information from the node, this function raise an OpPrereqError
6610 exception.
6611
6612 @type lu: C{LogicalUnit}
6613 @param lu: a logical unit from which we get configuration data
6614 @type nodenames: C{list}
6615 @param nodenames: the list of node names to check
6616 @type req_sizes: C{dict}
6617 @param req_sizes: the hash of vg and corresponding amount of disk in
6618 MiB to check for
6619 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6620 or we cannot check the node
6621
6622 """
6623 for vg, req_size in req_sizes.items():
6624 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6625
6628 """Checks if nodes have enough free disk space in the specified VG.
6629
6630 This function check if all given nodes have the needed amount of
6631 free disk. In case any node has less disk or we cannot get the
6632 information from the node, this function raise an OpPrereqError
6633 exception.
6634
6635 @type lu: C{LogicalUnit}
6636 @param lu: a logical unit from which we get configuration data
6637 @type nodenames: C{list}
6638 @param nodenames: the list of node names to check
6639 @type vg: C{str}
6640 @param vg: the volume group to check
6641 @type requested: C{int}
6642 @param requested: the amount of disk in MiB to check for
6643 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6644 or we cannot check the node
6645
6646 """
6647 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6648 for node in nodenames:
6649 info = nodeinfo[node]
6650 info.Raise("Cannot get current information from node %s" % node,
6651 prereq=True, ecode=errors.ECODE_ENVIRON)
6652 (_, (vg_info, ), _) = info.payload
6653 vg_free = vg_info.get("vg_free", None)
6654 if not isinstance(vg_free, int):
6655 raise errors.OpPrereqError("Can't compute free disk space on node"
6656 " %s for vg %s, result was '%s'" %
6657 (node, vg, vg_free), errors.ECODE_ENVIRON)
6658 if requested > vg_free:
6659 raise errors.OpPrereqError("Not enough disk space on target node %s"
6660 " vg %s: required %d MiB, available %d MiB" %
6661 (node, vg, requested, vg_free),
6662 errors.ECODE_NORES)
6663
6666 """Checks if nodes have enough physical CPUs
6667
6668 This function checks if all given nodes have the needed number of
6669 physical CPUs. In case any node has less CPUs or we cannot get the
6670 information from the node, this function raises an OpPrereqError
6671 exception.
6672
6673 @type lu: C{LogicalUnit}
6674 @param lu: a logical unit from which we get configuration data
6675 @type nodenames: C{list}
6676 @param nodenames: the list of node names to check
6677 @type requested: C{int}
6678 @param requested: the minimum acceptable number of physical CPUs
6679 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6680 or we cannot check the node
6681
6682 """
6683 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6684 for node in nodenames:
6685 info = nodeinfo[node]
6686 info.Raise("Cannot get current information from node %s" % node,
6687 prereq=True, ecode=errors.ECODE_ENVIRON)
6688 (_, _, (hv_info, )) = info.payload
6689 num_cpus = hv_info.get("cpu_total", None)
6690 if not isinstance(num_cpus, int):
6691 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6692 " on node %s, result was '%s'" %
6693 (node, num_cpus), errors.ECODE_ENVIRON)
6694 if requested > num_cpus:
6695 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6696 "required" % (node, num_cpus, requested),
6697 errors.ECODE_NORES)
6698
6701 """Starts an instance.
6702
6703 """
6704 HPATH = "instance-start"
6705 HTYPE = constants.HTYPE_INSTANCE
6706 REQ_BGL = False
6707
6714
6718
6722
6724 """Build hooks env.
6725
6726 This runs on master, primary and secondary nodes of the instance.
6727
6728 """
6729 env = {
6730 "FORCE": self.op.force,
6731 }
6732
6733 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6734
6735 return env
6736
6743
6745 """Check prerequisites.
6746
6747 This checks that the instance is in the cluster.
6748
6749 """
6750 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6751 assert self.instance is not None, \
6752 "Cannot retrieve locked instance %s" % self.op.instance_name
6753
6754
6755 if self.op.hvparams:
6756
6757 cluster = self.cfg.GetClusterInfo()
6758 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6759 filled_hvp = cluster.FillHV(instance)
6760 filled_hvp.update(self.op.hvparams)
6761 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
6762 hv_type.CheckParameterSyntax(filled_hvp)
6763 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6764
6765 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6766
6767 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6768
6769 if self.primary_offline and self.op.ignore_offline_nodes:
6770 self.proc.LogWarning("Ignoring offline primary node")
6771
6772 if self.op.hvparams or self.op.beparams:
6773 self.proc.LogWarning("Overridden parameters are ignored")
6774 else:
6775 _CheckNodeOnline(self, instance.primary_node)
6776
6777 bep = self.cfg.GetClusterInfo().FillBE(instance)
6778 bep.update(self.op.beparams)
6779
6780
6781 _CheckInstanceBridgesExist(self, instance)
6782
6783 remote_info = self.rpc.call_instance_info(instance.primary_node,
6784 instance.name,
6785 instance.hypervisor)
6786 remote_info.Raise("Error checking node %s" % instance.primary_node,
6787 prereq=True, ecode=errors.ECODE_ENVIRON)
6788 if not remote_info.payload:
6789 _CheckNodeFreeMemory(self, instance.primary_node,
6790 "starting instance %s" % instance.name,
6791 bep[constants.BE_MINMEM], instance.hypervisor)
6792
6793 - def Exec(self, feedback_fn):
6820
6823 """Reboot an instance.
6824
6825 """
6826 HPATH = "instance-reboot"
6827 HTYPE = constants.HTYPE_INSTANCE
6828 REQ_BGL = False
6829
6832
6834 """Build hooks env.
6835
6836 This runs on master, primary and secondary nodes of the instance.
6837
6838 """
6839 env = {
6840 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6841 "REBOOT_TYPE": self.op.reboot_type,
6842 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6843 }
6844
6845 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6846
6847 return env
6848
6855
6870
6871 - def Exec(self, feedback_fn):
6872 """Reboot the instance.
6873
6874 """
6875 instance = self.instance
6876 ignore_secondaries = self.op.ignore_secondaries
6877 reboot_type = self.op.reboot_type
6878
6879 remote_info = self.rpc.call_instance_info(instance.primary_node,
6880 instance.name,
6881 instance.hypervisor)
6882 remote_info.Raise("Error checking node %s" % instance.primary_node)
6883 instance_running = bool(remote_info.payload)
6884
6885 node_current = instance.primary_node
6886
6887 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6888 constants.INSTANCE_REBOOT_HARD]:
6889 for disk in instance.disks:
6890 self.cfg.SetDiskID(disk, node_current)
6891 result = self.rpc.call_instance_reboot(node_current, instance,
6892 reboot_type,
6893 self.op.shutdown_timeout)
6894 result.Raise("Could not reboot instance")
6895 else:
6896 if instance_running:
6897 result = self.rpc.call_instance_shutdown(node_current, instance,
6898 self.op.shutdown_timeout)
6899 result.Raise("Could not shutdown instance for full reboot")
6900 _ShutdownInstanceDisks(self, instance)
6901 else:
6902 self.LogInfo("Instance %s was already stopped, starting now",
6903 instance.name)
6904 _StartInstanceDisks(self, instance, ignore_secondaries)
6905 result = self.rpc.call_instance_start(node_current,
6906 (instance, None, None), False)
6907 msg = result.fail_msg
6908 if msg:
6909 _ShutdownInstanceDisks(self, instance)
6910 raise errors.OpExecError("Could not start instance for"
6911 " full reboot: %s" % msg)
6912
6913 self.cfg.MarkInstanceUp(instance.name)
6914
6917 """Shutdown an instance.
6918
6919 """
6920 HPATH = "instance-stop"
6921 HTYPE = constants.HTYPE_INSTANCE
6922 REQ_BGL = False
6923
6926
6928 """Build hooks env.
6929
6930 This runs on master, primary and secondary nodes of the instance.
6931
6932 """
6933 env = _BuildInstanceHookEnvByObject(self, self.instance)
6934 env["TIMEOUT"] = self.op.timeout
6935 return env
6936
6943
6945 """Check prerequisites.
6946
6947 This checks that the instance is in the cluster.
6948
6949 """
6950 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6951 assert self.instance is not None, \
6952 "Cannot retrieve locked instance %s" % self.op.instance_name
6953
6954 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6955
6956 self.primary_offline = \
6957 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6958
6959 if self.primary_offline and self.op.ignore_offline_nodes:
6960 self.proc.LogWarning("Ignoring offline primary node")
6961 else:
6962 _CheckNodeOnline(self, self.instance.primary_node)
6963
6964 - def Exec(self, feedback_fn):
6985
6988 """Reinstall an instance.
6989
6990 """
6991 HPATH = "instance-reinstall"
6992 HTYPE = constants.HTYPE_INSTANCE
6993 REQ_BGL = False
6994
6997
7005
7012
7014 """Check prerequisites.
7015
7016 This checks that the instance is in the cluster and is not running.
7017
7018 """
7019 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7020 assert instance is not None, \
7021 "Cannot retrieve locked instance %s" % self.op.instance_name
7022 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7023 " offline, cannot reinstall")
7024
7025 if instance.disk_template == constants.DT_DISKLESS:
7026 raise errors.OpPrereqError("Instance '%s' has no disks" %
7027 self.op.instance_name,
7028 errors.ECODE_INVAL)
7029 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7030
7031 if self.op.os_type is not None:
7032
7033 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7034 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7035 instance_os = self.op.os_type
7036 else:
7037 instance_os = instance.os
7038
7039 nodelist = list(instance.all_nodes)
7040
7041 if self.op.osparams:
7042 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7043 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7044 self.os_inst = i_osdict
7045 else:
7046 self.os_inst = None
7047
7048 self.instance = instance
7049
7050 - def Exec(self, feedback_fn):
7051 """Reinstall the instance.
7052
7053 """
7054 inst = self.instance
7055
7056 if self.op.os_type is not None:
7057 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7058 inst.os = self.op.os_type
7059
7060 self.cfg.Update(inst, feedback_fn)
7061
7062 _StartInstanceDisks(self, inst, None)
7063 try:
7064 feedback_fn("Running the instance OS create scripts...")
7065
7066 result = self.rpc.call_instance_os_add(inst.primary_node,
7067 (inst, self.os_inst), True,
7068 self.op.debug_level)
7069 result.Raise("Could not install OS for instance %s on node %s" %
7070 (inst.name, inst.primary_node))
7071 finally:
7072 _ShutdownInstanceDisks(self, inst)
7073
7076 """Recreate an instance's missing disks.
7077
7078 """
7079 HPATH = "instance-recreate-disks"
7080 HTYPE = constants.HTYPE_INSTANCE
7081 REQ_BGL = False
7082
7083 _MODIFYABLE = frozenset([
7084 constants.IDISK_SIZE,
7085 constants.IDISK_MODE,
7086 ])
7087
7088
7089 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7090 constants.IDISK_ADOPT,
7091
7092
7093 constants.IDISK_VG,
7094 constants.IDISK_METAVG,
7095 ]))
7096
7116
7126
7137
7145
7152
7154 """Check prerequisites.
7155
7156 This checks that the instance is in the cluster and is not running.
7157
7158 """
7159 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7160 assert instance is not None, \
7161 "Cannot retrieve locked instance %s" % self.op.instance_name
7162 if self.op.nodes:
7163 if len(self.op.nodes) != len(instance.all_nodes):
7164 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7165 " %d replacement nodes were specified" %
7166 (instance.name, len(instance.all_nodes),
7167 len(self.op.nodes)),
7168 errors.ECODE_INVAL)
7169 assert instance.disk_template != constants.DT_DRBD8 or \
7170 len(self.op.nodes) == 2
7171 assert instance.disk_template != constants.DT_PLAIN or \
7172 len(self.op.nodes) == 1
7173 primary_node = self.op.nodes[0]
7174 else:
7175 primary_node = instance.primary_node
7176 _CheckNodeOnline(self, primary_node)
7177
7178 if instance.disk_template == constants.DT_DISKLESS:
7179 raise errors.OpPrereqError("Instance '%s' has no disks" %
7180 self.op.instance_name, errors.ECODE_INVAL)
7181
7182
7183
7184 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7185 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7186 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7187 if not (self.op.nodes and old_pnode.offline):
7188 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7189 msg="cannot recreate disks")
7190
7191 if self.op.disks:
7192 self.disks = dict(self.op.disks)
7193 else:
7194 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7195
7196 maxidx = max(self.disks.keys())
7197 if maxidx >= len(instance.disks):
7198 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7199 errors.ECODE_INVAL)
7200
7201 if (self.op.nodes and
7202 sorted(self.disks.keys()) != range(len(instance.disks))):
7203 raise errors.OpPrereqError("Can't recreate disks partially and"
7204 " change the nodes at the same time",
7205 errors.ECODE_INVAL)
7206
7207 self.instance = instance
7208
7209 - def Exec(self, feedback_fn):
7210 """Recreate the disks.
7211
7212 """
7213 instance = self.instance
7214
7215 assert (self.owned_locks(locking.LEVEL_NODE) ==
7216 self.owned_locks(locking.LEVEL_NODE_RES))
7217
7218 to_skip = []
7219 mods = []
7220
7221 for idx, disk in enumerate(instance.disks):
7222 try:
7223 changes = self.disks[idx]
7224 except KeyError:
7225
7226 to_skip.append(idx)
7227 continue
7228
7229
7230 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7231
7232 assert len(self.op.nodes) == 2
7233 assert len(disk.logical_id) == 6
7234
7235 (_, _, old_port, _, _, old_secret) = disk.logical_id
7236 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7237 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7238 new_minors[0], new_minors[1], old_secret)
7239 assert len(disk.logical_id) == len(new_id)
7240 else:
7241 new_id = None
7242
7243 mods.append((idx, new_id, changes))
7244
7245
7246
7247 for idx, new_id, changes in mods:
7248 disk = instance.disks[idx]
7249 if new_id is not None:
7250 assert disk.dev_type == constants.LD_DRBD8
7251 disk.logical_id = new_id
7252 if changes:
7253 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7254 mode=changes.get(constants.IDISK_MODE, None))
7255
7256
7257 if self.op.nodes:
7258 instance.primary_node = self.op.nodes[0]
7259 self.LogWarning("Changing the instance's nodes, you will have to"
7260 " remove any disks left on the older nodes manually")
7261
7262 if self.op.nodes:
7263 self.cfg.Update(instance, feedback_fn)
7264
7265 _CreateDisks(self, instance, to_skip=to_skip)
7266
7269 """Rename an instance.
7270
7271 """
7272 HPATH = "instance-rename"
7273 HTYPE = constants.HTYPE_INSTANCE
7274
7276 """Check arguments.
7277
7278 """
7279 if self.op.ip_check and not self.op.name_check:
7280
7281 raise errors.OpPrereqError("IP address check requires a name check",
7282 errors.ECODE_INVAL)
7283
7285 """Build hooks env.
7286
7287 This runs on master, primary and secondary nodes of the instance.
7288
7289 """
7290 env = _BuildInstanceHookEnvByObject(self, self.instance)
7291 env["INSTANCE_NEW_NAME"] = self.op.new_name
7292 return env
7293
7300
7302 """Check prerequisites.
7303
7304 This checks that the instance is in the cluster and is not running.
7305
7306 """
7307 self.op.instance_name = _ExpandInstanceName(self.cfg,
7308 self.op.instance_name)
7309 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7310 assert instance is not None
7311 _CheckNodeOnline(self, instance.primary_node)
7312 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7313 msg="cannot rename")
7314 self.instance = instance
7315
7316 new_name = self.op.new_name
7317 if self.op.name_check:
7318 hostname = _CheckHostnameSane(self, new_name)
7319 new_name = self.op.new_name = hostname.name
7320 if (self.op.ip_check and
7321 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7322 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7323 (hostname.ip, new_name),
7324 errors.ECODE_NOTUNIQUE)
7325
7326 instance_list = self.cfg.GetInstanceList()
7327 if new_name in instance_list and new_name != instance.name:
7328 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7329 new_name, errors.ECODE_EXISTS)
7330
7331 - def Exec(self, feedback_fn):
7332 """Rename the instance.
7333
7334 """
7335 inst = self.instance
7336 old_name = inst.name
7337
7338 rename_file_storage = False
7339 if (inst.disk_template in constants.DTS_FILEBASED and
7340 self.op.new_name != inst.name):
7341 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7342 rename_file_storage = True
7343
7344 self.cfg.RenameInstance(inst.name, self.op.new_name)
7345
7346
7347 assert self.REQ_BGL
7348 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7349 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7350
7351
7352 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7353
7354 if rename_file_storage:
7355 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7356 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7357 old_file_storage_dir,
7358 new_file_storage_dir)
7359 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7360 " (but the instance has been renamed in Ganeti)" %
7361 (inst.primary_node, old_file_storage_dir,
7362 new_file_storage_dir))
7363
7364 _StartInstanceDisks(self, inst, None)
7365 try:
7366 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7367 old_name, self.op.debug_level)
7368 msg = result.fail_msg
7369 if msg:
7370 msg = ("Could not run OS rename script for instance %s on node %s"
7371 " (but the instance has been renamed in Ganeti): %s" %
7372 (inst.name, inst.primary_node, msg))
7373 self.proc.LogWarning(msg)
7374 finally:
7375 _ShutdownInstanceDisks(self, inst)
7376
7377 return inst.name
7378
7381 """Remove an instance.
7382
7383 """
7384 HPATH = "instance-remove"
7385 HTYPE = constants.HTYPE_INSTANCE
7386 REQ_BGL = False
7387
7393
7401
7403 """Build hooks env.
7404
7405 This runs on master, primary and secondary nodes of the instance.
7406
7407 """
7408 env = _BuildInstanceHookEnvByObject(self, self.instance)
7409 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7410 return env
7411
7413 """Build hooks nodes.
7414
7415 """
7416 nl = [self.cfg.GetMasterNode()]
7417 nl_post = list(self.instance.all_nodes) + nl
7418 return (nl, nl_post)
7419
7421 """Check prerequisites.
7422
7423 This checks that the instance is in the cluster.
7424
7425 """
7426 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7427 assert self.instance is not None, \
7428 "Cannot retrieve locked instance %s" % self.op.instance_name
7429
7430 - def Exec(self, feedback_fn):
7431 """Remove the instance.
7432
7433 """
7434 instance = self.instance
7435 logging.info("Shutting down instance %s on node %s",
7436 instance.name, instance.primary_node)
7437
7438 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7439 self.op.shutdown_timeout)
7440 msg = result.fail_msg
7441 if msg:
7442 if self.op.ignore_failures:
7443 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7444 else:
7445 raise errors.OpExecError("Could not shutdown instance %s on"
7446 " node %s: %s" %
7447 (instance.name, instance.primary_node, msg))
7448
7449 assert (self.owned_locks(locking.LEVEL_NODE) ==
7450 self.owned_locks(locking.LEVEL_NODE_RES))
7451 assert not (set(instance.all_nodes) -
7452 self.owned_locks(locking.LEVEL_NODE)), \
7453 "Not owning correct locks"
7454
7455 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7456
7478
7481 """Logical unit for querying instances.
7482
7483 """
7484
7485 REQ_BGL = False
7486
7490
7493
7496
7497 - def Exec(self, feedback_fn):
7499
7502 """Failover an instance.
7503
7504 """
7505 HPATH = "instance-failover"
7506 HTYPE = constants.HTYPE_INSTANCE
7507 REQ_BGL = False
7508
7510 """Check the arguments.
7511
7512 """
7513 self.iallocator = getattr(self.op, "iallocator", None)
7514 self.target_node = getattr(self.op, "target_node", None)
7515
7537
7554
7556 """Build hooks env.
7557
7558 This runs on master, primary and secondary nodes of the instance.
7559
7560 """
7561 instance = self._migrater.instance
7562 source_node = instance.primary_node
7563 target_node = self.op.target_node
7564 env = {
7565 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7566 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7567 "OLD_PRIMARY": source_node,
7568 "NEW_PRIMARY": target_node,
7569 }
7570
7571 if instance.disk_template in constants.DTS_INT_MIRROR:
7572 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7573 env["NEW_SECONDARY"] = source_node
7574 else:
7575 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7576
7577 env.update(_BuildInstanceHookEnvByObject(self, instance))
7578
7579 return env
7580
7588
7591 """Migrate an instance.
7592
7593 This is migration without shutting down, compared to the failover,
7594 which is done with shutdown.
7595
7596 """
7597 HPATH = "instance-migrate"
7598 HTYPE = constants.HTYPE_INSTANCE
7599 REQ_BGL = False
7600
7621
7638
7640 """Build hooks env.
7641
7642 This runs on master, primary and secondary nodes of the instance.
7643
7644 """
7645 instance = self._migrater.instance
7646 source_node = instance.primary_node
7647 target_node = self.op.target_node
7648 env = _BuildInstanceHookEnvByObject(self, instance)
7649 env.update({
7650 "MIGRATE_LIVE": self._migrater.live,
7651 "MIGRATE_CLEANUP": self.op.cleanup,
7652 "OLD_PRIMARY": source_node,
7653 "NEW_PRIMARY": target_node,
7654 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7655 })
7656
7657 if instance.disk_template in constants.DTS_INT_MIRROR:
7658 env["OLD_SECONDARY"] = target_node
7659 env["NEW_SECONDARY"] = source_node
7660 else:
7661 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7662
7663 return env
7664
7672
7675 """Move an instance by data-copying.
7676
7677 """
7678 HPATH = "instance-move"
7679 HTYPE = constants.HTYPE_INSTANCE
7680 REQ_BGL = False
7681
7689
7697
7699 """Build hooks env.
7700
7701 This runs on master, primary and secondary nodes of the instance.
7702
7703 """
7704 env = {
7705 "TARGET_NODE": self.op.target_node,
7706 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7707 }
7708 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7709 return env
7710
7712 """Build hooks nodes.
7713
7714 """
7715 nl = [
7716 self.cfg.GetMasterNode(),
7717 self.instance.primary_node,
7718 self.op.target_node,
7719 ]
7720 return (nl, nl)
7721
7723 """Check prerequisites.
7724
7725 This checks that the instance is in the cluster.
7726
7727 """
7728 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7729 assert self.instance is not None, \
7730 "Cannot retrieve locked instance %s" % self.op.instance_name
7731
7732 node = self.cfg.GetNodeInfo(self.op.target_node)
7733 assert node is not None, \
7734 "Cannot retrieve locked node %s" % self.op.target_node
7735
7736 self.target_node = target_node = node.name
7737
7738 if target_node == instance.primary_node:
7739 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7740 (instance.name, target_node),
7741 errors.ECODE_STATE)
7742
7743 bep = self.cfg.GetClusterInfo().FillBE(instance)
7744
7745 for idx, dsk in enumerate(instance.disks):
7746 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7747 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7748 " cannot copy" % idx, errors.ECODE_STATE)
7749
7750 _CheckNodeOnline(self, target_node)
7751 _CheckNodeNotDrained(self, target_node)
7752 _CheckNodeVmCapable(self, target_node)
7753 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7754 self.cfg.GetNodeGroup(node.group))
7755 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7756 ignore=self.op.ignore_ipolicy)
7757
7758 if instance.admin_state == constants.ADMINST_UP:
7759
7760 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7761 instance.name, bep[constants.BE_MAXMEM],
7762 instance.hypervisor)
7763 else:
7764 self.LogInfo("Not checking memory on the secondary node as"
7765 " instance will not be started")
7766
7767
7768 _CheckInstanceBridgesExist(self, instance, node=target_node)
7769
7770 - def Exec(self, feedback_fn):
7771 """Move an instance.
7772
7773 The move is done by shutting it down on its present node, copying
7774 the data over (slow) and starting it on the new node.
7775
7776 """
7777 instance = self.instance
7778
7779 source_node = instance.primary_node
7780 target_node = self.target_node
7781
7782 self.LogInfo("Shutting down instance %s on source node %s",
7783 instance.name, source_node)
7784
7785 assert (self.owned_locks(locking.LEVEL_NODE) ==
7786 self.owned_locks(locking.LEVEL_NODE_RES))
7787
7788 result = self.rpc.call_instance_shutdown(source_node, instance,
7789 self.op.shutdown_timeout)
7790 msg = result.fail_msg
7791 if msg:
7792 if self.op.ignore_consistency:
7793 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7794 " Proceeding anyway. Please make sure node"
7795 " %s is down. Error details: %s",
7796 instance.name, source_node, source_node, msg)
7797 else:
7798 raise errors.OpExecError("Could not shutdown instance %s on"
7799 " node %s: %s" %
7800 (instance.name, source_node, msg))
7801
7802
7803 try:
7804 _CreateDisks(self, instance, target_node=target_node)
7805 except errors.OpExecError:
7806 self.LogWarning("Device creation failed, reverting...")
7807 try:
7808 _RemoveDisks(self, instance, target_node=target_node)
7809 finally:
7810 self.cfg.ReleaseDRBDMinors(instance.name)
7811 raise
7812
7813 cluster_name = self.cfg.GetClusterInfo().cluster_name
7814
7815 errs = []
7816
7817 for idx, disk in enumerate(instance.disks):
7818 self.LogInfo("Copying data for disk %d", idx)
7819 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7820 instance.name, True, idx)
7821 if result.fail_msg:
7822 self.LogWarning("Can't assemble newly created disk %d: %s",
7823 idx, result.fail_msg)
7824 errs.append(result.fail_msg)
7825 break
7826 dev_path = result.payload
7827 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7828 target_node, dev_path,
7829 cluster_name)
7830 if result.fail_msg:
7831 self.LogWarning("Can't copy data over for disk %d: %s",
7832 idx, result.fail_msg)
7833 errs.append(result.fail_msg)
7834 break
7835
7836 if errs:
7837 self.LogWarning("Some disks failed to copy, aborting")
7838 try:
7839 _RemoveDisks(self, instance, target_node=target_node)
7840 finally:
7841 self.cfg.ReleaseDRBDMinors(instance.name)
7842 raise errors.OpExecError("Errors during disk copy: %s" %
7843 (",".join(errs),))
7844
7845 instance.primary_node = target_node
7846 self.cfg.Update(instance, feedback_fn)
7847
7848 self.LogInfo("Removing the disks on the original node")
7849 _RemoveDisks(self, instance, target_node=source_node)
7850
7851
7852 if instance.admin_state == constants.ADMINST_UP:
7853 self.LogInfo("Starting instance %s on node %s",
7854 instance.name, target_node)
7855
7856 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7857 ignore_secondaries=True)
7858 if not disks_ok:
7859 _ShutdownInstanceDisks(self, instance)
7860 raise errors.OpExecError("Can't activate the instance's disks")
7861
7862 result = self.rpc.call_instance_start(target_node,
7863 (instance, None, None), False)
7864 msg = result.fail_msg
7865 if msg:
7866 _ShutdownInstanceDisks(self, instance)
7867 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7868 (instance.name, target_node, msg))
7869
7872 """Migrate all instances from a node.
7873
7874 """
7875 HPATH = "node-migrate"
7876 HTYPE = constants.HTYPE_NODE
7877 REQ_BGL = False
7878
7881
7889
7891 """Build hooks env.
7892
7893 This runs on the master, the primary and all the secondaries.
7894
7895 """
7896 return {
7897 "NODE_NAME": self.op.node_name,
7898 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7899 }
7900
7902 """Build hooks nodes.
7903
7904 """
7905 nl = [self.cfg.GetMasterNode()]
7906 return (nl, nl)
7907
7910
7911 - def Exec(self, feedback_fn):
7912
7913 allow_runtime_changes = self.op.allow_runtime_changes
7914 jobs = [
7915 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7916 mode=self.op.mode,
7917 live=self.op.live,
7918 iallocator=self.op.iallocator,
7919 target_node=self.op.target_node,
7920 allow_runtime_changes=allow_runtime_changes,
7921 ignore_ipolicy=self.op.ignore_ipolicy)]
7922 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7923 ]
7924
7925
7926
7927
7928
7929
7930 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7931 frozenset([self.op.node_name]))
7932
7933 return ResultWithJobs(jobs)
7934
7937 """Tasklet class for instance migration.
7938
7939 @type live: boolean
7940 @ivar live: whether the migration will be done live or non-live;
7941 this variable is initalized only after CheckPrereq has run
7942 @type cleanup: boolean
7943 @ivar cleanup: Wheater we cleanup from a failed migration
7944 @type iallocator: string
7945 @ivar iallocator: The iallocator used to determine target_node
7946 @type target_node: string
7947 @ivar target_node: If given, the target_node to reallocate the instance to
7948 @type failover: boolean
7949 @ivar failover: Whether operation results in failover or migration
7950 @type fallback: boolean
7951 @ivar fallback: Whether fallback to failover is allowed if migration not
7952 possible
7953 @type ignore_consistency: boolean
7954 @ivar ignore_consistency: Wheter we should ignore consistency between source
7955 and target node
7956 @type shutdown_timeout: int
7957 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7958 @type ignore_ipolicy: bool
7959 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7960
7961 """
7962
7963
7964 _MIGRATION_POLL_INTERVAL = 1
7965 _MIGRATION_FEEDBACK_INTERVAL = 10
7966
7967 - def __init__(self, lu, instance_name, cleanup=False,
7968 failover=False, fallback=False,
7969 ignore_consistency=False,
7970 allow_runtime_changes=True,
7971 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7972 ignore_ipolicy=False):
7973 """Initializes this class.
7974
7975 """
7976 Tasklet.__init__(self, lu)
7977
7978
7979 self.instance_name = instance_name
7980 self.cleanup = cleanup
7981 self.live = False
7982 self.failover = failover
7983 self.fallback = fallback
7984 self.ignore_consistency = ignore_consistency
7985 self.shutdown_timeout = shutdown_timeout
7986 self.ignore_ipolicy = ignore_ipolicy
7987 self.allow_runtime_changes = allow_runtime_changes
7988
7990 """Check prerequisites.
7991
7992 This checks that the instance is in the cluster.
7993
7994 """
7995 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7996 instance = self.cfg.GetInstanceInfo(instance_name)
7997 assert instance is not None
7998 self.instance = instance
7999 cluster = self.cfg.GetClusterInfo()
8000
8001 if (not self.cleanup and
8002 not instance.admin_state == constants.ADMINST_UP and
8003 not self.failover and self.fallback):
8004 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8005 " switching to failover")
8006 self.failover = True
8007
8008 if instance.disk_template not in constants.DTS_MIRRORED:
8009 if self.failover:
8010 text = "failovers"
8011 else:
8012 text = "migrations"
8013 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8014 " %s" % (instance.disk_template, text),
8015 errors.ECODE_STATE)
8016
8017 if instance.disk_template in constants.DTS_EXT_MIRROR:
8018 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8019
8020 if self.lu.op.iallocator:
8021 self._RunAllocator()
8022 else:
8023
8024
8025 self.target_node = self.lu.op.target_node
8026
8027
8028 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8029 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8030 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8031 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8032 ignore=self.ignore_ipolicy)
8033
8034
8035
8036 target_node = self.target_node
8037 if self.target_node == instance.primary_node:
8038 raise errors.OpPrereqError("Cannot migrate instance %s"
8039 " to its primary (%s)" %
8040 (instance.name, instance.primary_node))
8041
8042 if len(self.lu.tasklets) == 1:
8043
8044
8045 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8046 keep=[instance.primary_node, self.target_node])
8047
8048 else:
8049 secondary_nodes = instance.secondary_nodes
8050 if not secondary_nodes:
8051 raise errors.ConfigurationError("No secondary node but using"
8052 " %s disk template" %
8053 instance.disk_template)
8054 target_node = secondary_nodes[0]
8055 if self.lu.op.iallocator or (self.lu.op.target_node and
8056 self.lu.op.target_node != target_node):
8057 if self.failover:
8058 text = "failed over"
8059 else:
8060 text = "migrated"
8061 raise errors.OpPrereqError("Instances with disk template %s cannot"
8062 " be %s to arbitrary nodes"
8063 " (neither an iallocator nor a target"
8064 " node can be passed)" %
8065 (instance.disk_template, text),
8066 errors.ECODE_INVAL)
8067 nodeinfo = self.cfg.GetNodeInfo(target_node)
8068 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8069 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8070 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8071 ignore=self.ignore_ipolicy)
8072
8073 i_be = cluster.FillBE(instance)
8074
8075
8076 if (not self.cleanup and
8077 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8078 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8079 "migrating instance %s" %
8080 instance.name,
8081 i_be[constants.BE_MINMEM],
8082 instance.hypervisor)
8083 else:
8084 self.lu.LogInfo("Not checking memory on the secondary node as"
8085 " instance will not be started")
8086
8087
8088 if (not self.cleanup and not self.failover and
8089 i_be[constants.BE_ALWAYS_FAILOVER]):
8090 self.lu.LogInfo("Instance configured to always failover; fallback"
8091 " to failover")
8092 self.failover = True
8093
8094
8095 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8096
8097 if not self.cleanup:
8098 _CheckNodeNotDrained(self.lu, target_node)
8099 if not self.failover:
8100 result = self.rpc.call_instance_migratable(instance.primary_node,
8101 instance)
8102 if result.fail_msg and self.fallback:
8103 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8104 " failover")
8105 self.failover = True
8106 else:
8107 result.Raise("Can't migrate, please use failover",
8108 prereq=True, ecode=errors.ECODE_STATE)
8109
8110 assert not (self.failover and self.cleanup)
8111
8112 if not self.failover:
8113 if self.lu.op.live is not None and self.lu.op.mode is not None:
8114 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8115 " parameters are accepted",
8116 errors.ECODE_INVAL)
8117 if self.lu.op.live is not None:
8118 if self.lu.op.live:
8119 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8120 else:
8121 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8122
8123
8124 self.lu.op.live = None
8125 elif self.lu.op.mode is None:
8126
8127 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8128 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8129
8130 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8131 else:
8132
8133 self.live = False
8134
8135 if not (self.failover or self.cleanup):
8136 remote_info = self.rpc.call_instance_info(instance.primary_node,
8137 instance.name,
8138 instance.hypervisor)
8139 remote_info.Raise("Error checking instance on node %s" %
8140 instance.primary_node)
8141 instance_running = bool(remote_info.payload)
8142 if instance_running:
8143 self.current_mem = int(remote_info.payload["memory"])
8144
8146 """Run the allocator based on input opcode.
8147
8148 """
8149
8150 ial = IAllocator(self.cfg, self.rpc,
8151 mode=constants.IALLOCATOR_MODE_RELOC,
8152 name=self.instance_name,
8153 relocate_from=[self.instance.primary_node],
8154 )
8155
8156 ial.Run(self.lu.op.iallocator)
8157
8158 if not ial.success:
8159 raise errors.OpPrereqError("Can't compute nodes using"
8160 " iallocator '%s': %s" %
8161 (self.lu.op.iallocator, ial.info),
8162 errors.ECODE_NORES)
8163 if len(ial.result) != ial.required_nodes:
8164 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8165 " of nodes (%s), required %s" %
8166 (self.lu.op.iallocator, len(ial.result),
8167 ial.required_nodes), errors.ECODE_FAULT)
8168 self.target_node = ial.result[0]
8169 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8170 self.instance_name, self.lu.op.iallocator,
8171 utils.CommaJoin(ial.result))
8172
8174 """Poll with custom rpc for disk sync.
8175
8176 This uses our own step-based rpc call.
8177
8178 """
8179 self.feedback_fn("* wait until resync is done")
8180 all_done = False
8181 while not all_done:
8182 all_done = True
8183 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8184 self.nodes_ip,
8185 (self.instance.disks,
8186 self.instance))
8187 min_percent = 100
8188 for node, nres in result.items():
8189 nres.Raise("Cannot resync disks on node %s" % node)
8190 node_done, node_percent = nres.payload
8191 all_done = all_done and node_done
8192 if node_percent is not None:
8193 min_percent = min(min_percent, node_percent)
8194 if not all_done:
8195 if min_percent < 100:
8196 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8197 time.sleep(2)
8198
8200 """Demote a node to secondary.
8201
8202 """
8203 self.feedback_fn("* switching node %s to secondary mode" % node)
8204
8205 for dev in self.instance.disks:
8206 self.cfg.SetDiskID(dev, node)
8207
8208 result = self.rpc.call_blockdev_close(node, self.instance.name,
8209 self.instance.disks)
8210 result.Raise("Cannot change disk to secondary on node %s" % node)
8211
8213 """Disconnect from the network.
8214
8215 """
8216 self.feedback_fn("* changing into standalone mode")
8217 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8218 self.instance.disks)
8219 for node, nres in result.items():
8220 nres.Raise("Cannot disconnect disks node %s" % node)
8221
8223 """Reconnect to the network.
8224
8225 """
8226 if multimaster:
8227 msg = "dual-master"
8228 else:
8229 msg = "single-master"
8230 self.feedback_fn("* changing disks into %s mode" % msg)
8231 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8232 (self.instance.disks, self.instance),
8233 self.instance.name, multimaster)
8234 for node, nres in result.items():
8235 nres.Raise("Cannot change disks config on node %s" % node)
8236
8238 """Try to cleanup after a failed migration.
8239
8240 The cleanup is done by:
8241 - check that the instance is running only on one node
8242 (and update the config if needed)
8243 - change disks on its secondary node to secondary
8244 - wait until disks are fully synchronized
8245 - disconnect from the network
8246 - change disks into single-master mode
8247 - wait again until disks are fully synchronized
8248
8249 """
8250 instance = self.instance
8251 target_node = self.target_node
8252 source_node = self.source_node
8253
8254
8255 self.feedback_fn("* checking where the instance actually runs"
8256 " (if this hangs, the hypervisor might be in"
8257 " a bad state)")
8258 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8259 for node, result in ins_l.items():
8260 result.Raise("Can't contact node %s" % node)
8261
8262 runningon_source = instance.name in ins_l[source_node].payload
8263 runningon_target = instance.name in ins_l[target_node].payload
8264
8265 if runningon_source and runningon_target:
8266 raise errors.OpExecError("Instance seems to be running on two nodes,"
8267 " or the hypervisor is confused; you will have"
8268 " to ensure manually that it runs only on one"
8269 " and restart this operation")
8270
8271 if not (runningon_source or runningon_target):
8272 raise errors.OpExecError("Instance does not seem to be running at all;"
8273 " in this case it's safer to repair by"
8274 " running 'gnt-instance stop' to ensure disk"
8275 " shutdown, and then restarting it")
8276
8277 if runningon_target:
8278
8279 self.feedback_fn("* instance running on secondary node (%s),"
8280 " updating config" % target_node)
8281 instance.primary_node = target_node
8282 self.cfg.Update(instance, self.feedback_fn)
8283 demoted_node = source_node
8284 else:
8285 self.feedback_fn("* instance confirmed to be running on its"
8286 " primary node (%s)" % source_node)
8287 demoted_node = target_node
8288
8289 if instance.disk_template in constants.DTS_INT_MIRROR:
8290 self._EnsureSecondary(demoted_node)
8291 try:
8292 self._WaitUntilSync()
8293 except errors.OpExecError:
8294
8295
8296 pass
8297 self._GoStandalone()
8298 self._GoReconnect(False)
8299 self._WaitUntilSync()
8300
8301 self.feedback_fn("* done")
8302
8320
8322 """Call the hypervisor code to abort a started migration.
8323
8324 """
8325 instance = self.instance
8326 target_node = self.target_node
8327 source_node = self.source_node
8328 migration_info = self.migration_info
8329
8330 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8331 instance,
8332 migration_info,
8333 False)
8334 abort_msg = abort_result.fail_msg
8335 if abort_msg:
8336 logging.error("Aborting migration failed on target node %s: %s",
8337 target_node, abort_msg)
8338
8339
8340
8341 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8342 instance, False, self.live)
8343 abort_msg = abort_result.fail_msg
8344 if abort_msg:
8345 logging.error("Aborting migration failed on source node %s: %s",
8346 source_node, abort_msg)
8347
8349 """Migrate an instance.
8350
8351 The migrate is done by:
8352 - change the disks into dual-master mode
8353 - wait until disks are fully synchronized again
8354 - migrate the instance
8355 - change disks on the new secondary node (the old primary) to secondary
8356 - wait until disks are fully synchronized
8357 - change disks into single-master mode
8358
8359 """
8360 instance = self.instance
8361 target_node = self.target_node
8362 source_node = self.source_node
8363
8364
8365 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8366 None, [self.instance.hypervisor])
8367 for ninfo in nodeinfo.values():
8368 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8369 ninfo.node)
8370 (_, _, (src_info, )) = nodeinfo[source_node].payload
8371 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8372
8373 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8374 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8375 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8376 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8377 if src_version != dst_version:
8378 self.feedback_fn("* warning: hypervisor version mismatch between"
8379 " source (%s) and target (%s) node" %
8380 (src_version, dst_version))
8381
8382 self.feedback_fn("* checking disk consistency between source and target")
8383 for (idx, dev) in enumerate(instance.disks):
8384 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8385 raise errors.OpExecError("Disk %s is degraded or not fully"
8386 " synchronized on target node,"
8387 " aborting migration" % idx)
8388
8389 if self.current_mem > self.tgt_free_mem:
8390 if not self.allow_runtime_changes:
8391 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8392 " free memory to fit instance %s on target"
8393 " node %s (have %dMB, need %dMB)" %
8394 (instance.name, target_node,
8395 self.tgt_free_mem, self.current_mem))
8396 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8397 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8398 instance,
8399 self.tgt_free_mem)
8400 rpcres.Raise("Cannot modify instance runtime memory")
8401
8402
8403 result = self.rpc.call_migration_info(source_node, instance)
8404 msg = result.fail_msg
8405 if msg:
8406 log_err = ("Failed fetching source migration information from %s: %s" %
8407 (source_node, msg))
8408 logging.error(log_err)
8409 raise errors.OpExecError(log_err)
8410
8411 self.migration_info = migration_info = result.payload
8412
8413 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8414
8415 self._EnsureSecondary(target_node)
8416 self._GoStandalone()
8417 self._GoReconnect(True)
8418 self._WaitUntilSync()
8419
8420 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8421 result = self.rpc.call_accept_instance(target_node,
8422 instance,
8423 migration_info,
8424 self.nodes_ip[target_node])
8425
8426 msg = result.fail_msg
8427 if msg:
8428 logging.error("Instance pre-migration failed, trying to revert"
8429 " disk status: %s", msg)
8430 self.feedback_fn("Pre-migration failed, aborting")
8431 self._AbortMigration()
8432 self._RevertDiskStatus()
8433 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8434 (instance.name, msg))
8435
8436 self.feedback_fn("* migrating instance to %s" % target_node)
8437 result = self.rpc.call_instance_migrate(source_node, instance,
8438 self.nodes_ip[target_node],
8439 self.live)
8440 msg = result.fail_msg
8441 if msg:
8442 logging.error("Instance migration failed, trying to revert"
8443 " disk status: %s", msg)
8444 self.feedback_fn("Migration failed, aborting")
8445 self._AbortMigration()
8446 self._RevertDiskStatus()
8447 raise errors.OpExecError("Could not migrate instance %s: %s" %
8448 (instance.name, msg))
8449
8450 self.feedback_fn("* starting memory transfer")
8451 last_feedback = time.time()
8452 while True:
8453 result = self.rpc.call_instance_get_migration_status(source_node,
8454 instance)
8455 msg = result.fail_msg
8456 ms = result.payload
8457 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8458 logging.error("Instance migration failed, trying to revert"
8459 " disk status: %s", msg)
8460 self.feedback_fn("Migration failed, aborting")
8461 self._AbortMigration()
8462 self._RevertDiskStatus()
8463 if not msg:
8464 msg = "hypervisor returned failure"
8465 raise errors.OpExecError("Could not migrate instance %s: %s" %
8466 (instance.name, msg))
8467
8468 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8469 self.feedback_fn("* memory transfer complete")
8470 break
8471
8472 if (utils.TimeoutExpired(last_feedback,
8473 self._MIGRATION_FEEDBACK_INTERVAL) and
8474 ms.transferred_ram is not None):
8475 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8476 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8477 last_feedback = time.time()
8478
8479 time.sleep(self._MIGRATION_POLL_INTERVAL)
8480
8481 result = self.rpc.call_instance_finalize_migration_src(source_node,
8482 instance,
8483 True,
8484 self.live)
8485 msg = result.fail_msg
8486 if msg:
8487 logging.error("Instance migration succeeded, but finalization failed"
8488 " on the source node: %s", msg)
8489 raise errors.OpExecError("Could not finalize instance migration: %s" %
8490 msg)
8491
8492 instance.primary_node = target_node
8493
8494
8495 self.cfg.Update(instance, self.feedback_fn)
8496
8497 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8498 instance,
8499 migration_info,
8500 True)
8501 msg = result.fail_msg
8502 if msg:
8503 logging.error("Instance migration succeeded, but finalization failed"
8504 " on the target node: %s", msg)
8505 raise errors.OpExecError("Could not finalize instance migration: %s" %
8506 msg)
8507
8508 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8509 self._EnsureSecondary(source_node)
8510 self._WaitUntilSync()
8511 self._GoStandalone()
8512 self._GoReconnect(False)
8513 self._WaitUntilSync()
8514
8515
8516
8517 if self.instance.disk_template == constants.DT_RBD:
8518 disks = _ExpandCheckDisks(instance, instance.disks)
8519 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8520 for disk in disks:
8521 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8522 msg = result.fail_msg
8523 if msg:
8524 logging.error("Migration was successful, but couldn't unmap the"
8525 " block device %s on source node %s: %s",
8526 disk.iv_name, source_node, msg)
8527 logging.error("You need to unmap the device %s manually on %s",
8528 disk.iv_name, source_node)
8529
8530 self.feedback_fn("* done")
8531
8533 """Failover an instance.
8534
8535 The failover is done by shutting it down on its present node and
8536 starting it on the secondary.
8537
8538 """
8539 instance = self.instance
8540 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8541
8542 source_node = instance.primary_node
8543 target_node = self.target_node
8544
8545 if instance.admin_state == constants.ADMINST_UP:
8546 self.feedback_fn("* checking disk consistency between source and target")
8547 for (idx, dev) in enumerate(instance.disks):
8548
8549 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8550 False):
8551 if primary_node.offline:
8552 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8553 " target node %s" %
8554 (primary_node.name, idx, target_node))
8555 elif not self.ignore_consistency:
8556 raise errors.OpExecError("Disk %s is degraded on target node,"
8557 " aborting failover" % idx)
8558 else:
8559 self.feedback_fn("* not checking disk consistency as instance is not"
8560 " running")
8561
8562 self.feedback_fn("* shutting down instance on source node")
8563 logging.info("Shutting down instance %s on node %s",
8564 instance.name, source_node)
8565
8566 result = self.rpc.call_instance_shutdown(source_node, instance,
8567 self.shutdown_timeout)
8568 msg = result.fail_msg
8569 if msg:
8570 if self.ignore_consistency or primary_node.offline:
8571 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8572 " proceeding anyway; please make sure node"
8573 " %s is down; error details: %s",
8574 instance.name, source_node, source_node, msg)
8575 else:
8576 raise errors.OpExecError("Could not shutdown instance %s on"
8577 " node %s: %s" %
8578 (instance.name, source_node, msg))
8579
8580 self.feedback_fn("* deactivating the instance's disks on source node")
8581 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8582 raise errors.OpExecError("Can't shut down the instance's disks")
8583
8584 instance.primary_node = target_node
8585
8586 self.cfg.Update(instance, self.feedback_fn)
8587
8588
8589 if instance.admin_state == constants.ADMINST_UP:
8590 self.feedback_fn("* activating the instance's disks on target node %s" %
8591 target_node)
8592 logging.info("Starting instance %s on node %s",
8593 instance.name, target_node)
8594
8595 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8596 ignore_secondaries=True)
8597 if not disks_ok:
8598 _ShutdownInstanceDisks(self.lu, instance)
8599 raise errors.OpExecError("Can't activate the instance's disks")
8600
8601 self.feedback_fn("* starting the instance on the target node %s" %
8602 target_node)
8603 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8604 False)
8605 msg = result.fail_msg
8606 if msg:
8607 _ShutdownInstanceDisks(self.lu, instance)
8608 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8609 (instance.name, target_node, msg))
8610
8611 - def Exec(self, feedback_fn):
8638
8639
8640 -def _CreateBlockDev(lu, node, instance, device, force_create, info,
8641 force_open):
8642 """Wrapper around L{_CreateBlockDevInner}.
8643
8644 This method annotates the root device first.
8645
8646 """
8647 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8648 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8649 force_open)
8650
8654 """Create a tree of block devices on a given node.
8655
8656 If this device type has to be created on secondaries, create it and
8657 all its children.
8658
8659 If not, just recurse to children keeping the same 'force' value.
8660
8661 @attention: The device has to be annotated already.
8662
8663 @param lu: the lu on whose behalf we execute
8664 @param node: the node on which to create the device
8665 @type instance: L{objects.Instance}
8666 @param instance: the instance which owns the device
8667 @type device: L{objects.Disk}
8668 @param device: the device to create
8669 @type force_create: boolean
8670 @param force_create: whether to force creation of this device; this
8671 will be change to True whenever we find a device which has
8672 CreateOnSecondary() attribute
8673 @param info: the extra 'metadata' we should attach to the device
8674 (this will be represented as a LVM tag)
8675 @type force_open: boolean
8676 @param force_open: this parameter will be passes to the
8677 L{backend.BlockdevCreate} function where it specifies
8678 whether we run on primary or not, and it affects both
8679 the child assembly and the device own Open() execution
8680
8681 """
8682 if device.CreateOnSecondary():
8683 force_create = True
8684
8685 if device.children:
8686 for child in device.children:
8687 _CreateBlockDevInner(lu, node, instance, child, force_create,
8688 info, force_open)
8689
8690 if not force_create:
8691 return
8692
8693 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8694
8697 """Create a single block device on a given node.
8698
8699 This will not recurse over children of the device, so they must be
8700 created in advance.
8701
8702 @param lu: the lu on whose behalf we execute
8703 @param node: the node on which to create the device
8704 @type instance: L{objects.Instance}
8705 @param instance: the instance which owns the device
8706 @type device: L{objects.Disk}
8707 @param device: the device to create
8708 @param info: the extra 'metadata' we should attach to the device
8709 (this will be represented as a LVM tag)
8710 @type force_open: boolean
8711 @param force_open: this parameter will be passes to the
8712 L{backend.BlockdevCreate} function where it specifies
8713 whether we run on primary or not, and it affects both
8714 the child assembly and the device own Open() execution
8715
8716 """
8717 lu.cfg.SetDiskID(device, node)
8718 result = lu.rpc.call_blockdev_create(node, device, device.size,
8719 instance.name, force_open, info)
8720 result.Raise("Can't create block device %s on"
8721 " node %s for instance %s" % (device, node, instance.name))
8722 if device.physical_id is None:
8723 device.physical_id = result.payload
8724
8727 """Generate a suitable LV name.
8728
8729 This will generate a logical volume name for the given instance.
8730
8731 """
8732 results = []
8733 for val in exts:
8734 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8735 results.append("%s%s" % (new_id, val))
8736 return results
8737
8738
8739 -def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8740 iv_name, p_minor, s_minor):
8741 """Generate a drbd8 device complete with its children.
8742
8743 """
8744 assert len(vgnames) == len(names) == 2
8745 port = lu.cfg.AllocatePort()
8746 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8747
8748 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8749 logical_id=(vgnames[0], names[0]),
8750 params={})
8751 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8752 logical_id=(vgnames[1], names[1]),
8753 params={})
8754 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8755 logical_id=(primary, secondary, port,
8756 p_minor, s_minor,
8757 shared_secret),
8758 children=[dev_data, dev_meta],
8759 iv_name=iv_name, params={})
8760 return drbd_dev
8761
8762
8763 _DISK_TEMPLATE_NAME_PREFIX = {
8764 constants.DT_PLAIN: "",
8765 constants.DT_RBD: ".rbd",
8766 }
8767
8768
8769 _DISK_TEMPLATE_DEVICE_TYPE = {
8770 constants.DT_PLAIN: constants.LD_LV,
8771 constants.DT_FILE: constants.LD_FILE,
8772 constants.DT_SHARED_FILE: constants.LD_FILE,
8773 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8774 constants.DT_RBD: constants.LD_RBD,
8775 }
8776
8777
8778 -def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8779 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8780 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8781 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8782 """Generate the entire disk layout for a given template type.
8783
8784 """
8785
8786
8787 vgname = lu.cfg.GetVGName()
8788 disk_count = len(disk_info)
8789 disks = []
8790
8791 if template_name == constants.DT_DISKLESS:
8792 pass
8793 elif template_name == constants.DT_DRBD8:
8794 if len(secondary_nodes) != 1:
8795 raise errors.ProgrammerError("Wrong template configuration")
8796 remote_node = secondary_nodes[0]
8797 minors = lu.cfg.AllocateDRBDMinor(
8798 [primary_node, remote_node] * len(disk_info), instance_name)
8799
8800 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8801 full_disk_params)
8802 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8803
8804 names = []
8805 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8806 for i in range(disk_count)]):
8807 names.append(lv_prefix + "_data")
8808 names.append(lv_prefix + "_meta")
8809 for idx, disk in enumerate(disk_info):
8810 disk_index = idx + base_index
8811 data_vg = disk.get(constants.IDISK_VG, vgname)
8812 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8813 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8814 disk[constants.IDISK_SIZE],
8815 [data_vg, meta_vg],
8816 names[idx * 2:idx * 2 + 2],
8817 "disk/%d" % disk_index,
8818 minors[idx * 2], minors[idx * 2 + 1])
8819 disk_dev.mode = disk[constants.IDISK_MODE]
8820 disks.append(disk_dev)
8821 else:
8822 if secondary_nodes:
8823 raise errors.ProgrammerError("Wrong template configuration")
8824
8825 if template_name == constants.DT_FILE:
8826 _req_file_storage()
8827 elif template_name == constants.DT_SHARED_FILE:
8828 _req_shr_file_storage()
8829
8830 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8831 if name_prefix is None:
8832 names = None
8833 else:
8834 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8835 (name_prefix, base_index + i)
8836 for i in range(disk_count)])
8837
8838 if template_name == constants.DT_PLAIN:
8839 def logical_id_fn(idx, _, disk):
8840 vg = disk.get(constants.IDISK_VG, vgname)
8841 return (vg, names[idx])
8842 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8843 logical_id_fn = \
8844 lambda _, disk_index, disk: (file_driver,
8845 "%s/disk%d" % (file_storage_dir,
8846 disk_index))
8847 elif template_name == constants.DT_BLOCK:
8848 logical_id_fn = \
8849 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8850 disk[constants.IDISK_ADOPT])
8851 elif template_name == constants.DT_RBD:
8852 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8853 else:
8854 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8855
8856 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8857
8858 for idx, disk in enumerate(disk_info):
8859 disk_index = idx + base_index
8860 size = disk[constants.IDISK_SIZE]
8861 feedback_fn("* disk %s, size %s" %
8862 (disk_index, utils.FormatUnit(size, "h")))
8863 disks.append(objects.Disk(dev_type=dev_type, size=size,
8864 logical_id=logical_id_fn(idx, disk_index, disk),
8865 iv_name="disk/%d" % disk_index,
8866 mode=disk[constants.IDISK_MODE],
8867 params={}))
8868
8869 return disks
8870
8871
8872 -def _GetInstanceInfoText(instance):
8873 """Compute that text that should be added to the disk's metadata.
8874
8875 """
8876 return "originstname+%s" % instance.name
8877
8878
8879 -def _CalcEta(time_taken, written, total_size):
8880 """Calculates the ETA based on size written and total size.
8881
8882 @param time_taken: The time taken so far
8883 @param written: amount written so far
8884 @param total_size: The total size of data to be written
8885 @return: The remaining time in seconds
8886
8887 """
8888 avg_time = time_taken / float(written)
8889 return (total_size - written) * avg_time
8890
8893 """Wipes instance disks.
8894
8895 @type lu: L{LogicalUnit}
8896 @param lu: the logical unit on whose behalf we execute
8897 @type instance: L{objects.Instance}
8898 @param instance: the instance whose disks we should create
8899 @return: the success of the wipe
8900
8901 """
8902 node = instance.primary_node
8903
8904 for device in instance.disks:
8905 lu.cfg.SetDiskID(device, node)
8906
8907 logging.info("Pause sync of instance %s disks", instance.name)
8908 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8909 (instance.disks, instance),
8910 True)
8911 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8912
8913 for idx, success in enumerate(result.payload):
8914 if not success:
8915 logging.warn("pause-sync of instance %s for disks %d failed",
8916 instance.name, idx)
8917
8918 try:
8919 for idx, device in enumerate(instance.disks):
8920
8921
8922 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8923 constants.MIN_WIPE_CHUNK_PERCENT)
8924
8925
8926 wipe_chunk_size = int(wipe_chunk_size)
8927
8928 lu.LogInfo("* Wiping disk %d", idx)
8929 logging.info("Wiping disk %d for instance %s, node %s using"
8930 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8931
8932 offset = 0
8933 size = device.size
8934 last_output = 0
8935 start_time = time.time()
8936
8937 while offset < size:
8938 wipe_size = min(wipe_chunk_size, size - offset)
8939 logging.debug("Wiping disk %d, offset %s, chunk %s",
8940 idx, offset, wipe_size)
8941 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8942 wipe_size)
8943 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8944 (idx, offset, wipe_size))
8945 now = time.time()
8946 offset += wipe_size
8947 if now - last_output >= 60:
8948 eta = _CalcEta(now - start_time, offset, size)
8949 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8950 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8951 last_output = now
8952 finally:
8953 logging.info("Resume sync of instance %s disks", instance.name)
8954
8955 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8956 (instance.disks, instance),
8957 False)
8958
8959 if result.fail_msg:
8960 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8961 " please have a look at the status and troubleshoot"
8962 " the issue: %s", node, result.fail_msg)
8963 else:
8964 for idx, success in enumerate(result.payload):
8965 if not success:
8966 lu.LogWarning("Resume sync of disk %d failed, please have a"
8967 " look at the status and troubleshoot the issue", idx)
8968 logging.warn("resume-sync of instance %s for disks %d failed",
8969 instance.name, idx)
8970
8971
8972 -def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8973 """Create all disks for an instance.
8974
8975 This abstracts away some work from AddInstance.
8976
8977 @type lu: L{LogicalUnit}
8978 @param lu: the logical unit on whose behalf we execute
8979 @type instance: L{objects.Instance}
8980 @param instance: the instance whose disks we should create
8981 @type to_skip: list
8982 @param to_skip: list of indices to skip
8983 @type target_node: string
8984 @param target_node: if passed, overrides the target node for creation
8985 @rtype: boolean
8986 @return: the success of the creation
8987
8988 """
8989 info = _GetInstanceInfoText(instance)
8990 if target_node is None:
8991 pnode = instance.primary_node
8992 all_nodes = instance.all_nodes
8993 else:
8994 pnode = target_node
8995 all_nodes = [pnode]
8996
8997 if instance.disk_template in constants.DTS_FILEBASED:
8998 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8999 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9000
9001 result.Raise("Failed to create directory '%s' on"
9002 " node %s" % (file_storage_dir, pnode))
9003
9004
9005
9006 for idx, device in enumerate(instance.disks):
9007 if to_skip and idx in to_skip:
9008 continue
9009 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9010
9011 for node in all_nodes:
9012 f_create = node == pnode
9013 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9014
9015
9016 -def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9017 """Remove all disks for an instance.
9018
9019 This abstracts away some work from `AddInstance()` and
9020 `RemoveInstance()`. Note that in case some of the devices couldn't
9021 be removed, the removal will continue with the other ones (compare
9022 with `_CreateDisks()`).
9023
9024 @type lu: L{LogicalUnit}
9025 @param lu: the logical unit on whose behalf we execute
9026 @type instance: L{objects.Instance}
9027 @param instance: the instance whose disks we should remove
9028 @type target_node: string
9029 @param target_node: used to override the node on which to remove the disks
9030 @rtype: boolean
9031 @return: the success of the removal
9032
9033 """
9034 logging.info("Removing block devices for instance %s", instance.name)
9035
9036 all_result = True
9037 ports_to_release = set()
9038 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9039 for (idx, device) in enumerate(anno_disks):
9040 if target_node:
9041 edata = [(target_node, device)]
9042 else:
9043 edata = device.ComputeNodeTree(instance.primary_node)
9044 for node, disk in edata:
9045 lu.cfg.SetDiskID(disk, node)
9046 result = lu.rpc.call_blockdev_remove(node, disk)
9047 if result.fail_msg:
9048 lu.LogWarning("Could not remove disk %s on node %s,"
9049 " continuing anyway: %s", idx, node, result.fail_msg)
9050 if not (result.offline and node != instance.primary_node):
9051 all_result = False
9052
9053
9054 if device.dev_type in constants.LDS_DRBD:
9055 ports_to_release.add(device.logical_id[2])
9056
9057 if all_result or ignore_failures:
9058 for port in ports_to_release:
9059 lu.cfg.AddTcpUdpPort(port)
9060
9061 if instance.disk_template in constants.DTS_FILEBASED:
9062 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9063 if target_node:
9064 tgt = target_node
9065 else:
9066 tgt = instance.primary_node
9067 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9068 if result.fail_msg:
9069 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9070 file_storage_dir, instance.primary_node, result.fail_msg)
9071 all_result = False
9072
9073 return all_result
9074
9077 """Compute disk size requirements in the volume group
9078
9079 """
9080 def _compute(disks, payload):
9081 """Universal algorithm.
9082
9083 """
9084 vgs = {}
9085 for disk in disks:
9086 vgs[disk[constants.IDISK_VG]] = \
9087 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9088
9089 return vgs
9090
9091
9092 req_size_dict = {
9093 constants.DT_DISKLESS: {},
9094 constants.DT_PLAIN: _compute(disks, 0),
9095
9096 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9097 constants.DT_FILE: {},
9098 constants.DT_SHARED_FILE: {},
9099 }
9100
9101 if disk_template not in req_size_dict:
9102 raise errors.ProgrammerError("Disk template '%s' size requirement"
9103 " is unknown" % disk_template)
9104
9105 return req_size_dict[disk_template]
9106
9130
9133 """Filters out non-vm_capable nodes from a list.
9134
9135 @type lu: L{LogicalUnit}
9136 @param lu: the logical unit for which we check
9137 @type nodenames: list
9138 @param nodenames: the list of nodes on which we should check
9139 @rtype: list
9140 @return: the list of vm-capable nodes
9141
9142 """
9143 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9144 return [name for name in nodenames if name not in vm_nodes]
9145
9148 """Hypervisor parameter validation.
9149
9150 This function abstract the hypervisor parameter validation to be
9151 used in both instance create and instance modify.
9152
9153 @type lu: L{LogicalUnit}
9154 @param lu: the logical unit for which we check
9155 @type nodenames: list
9156 @param nodenames: the list of nodes on which we should check
9157 @type hvname: string
9158 @param hvname: the name of the hypervisor we should use
9159 @type hvparams: dict
9160 @param hvparams: the parameters which we need to check
9161 @raise errors.OpPrereqError: if the parameters are not valid
9162
9163 """
9164 nodenames = _FilterVmNodes(lu, nodenames)
9165
9166 cluster = lu.cfg.GetClusterInfo()
9167 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9168
9169 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9170 for node in nodenames:
9171 info = hvinfo[node]
9172 if info.offline:
9173 continue
9174 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9175
9178 """OS parameters validation.
9179
9180 @type lu: L{LogicalUnit}
9181 @param lu: the logical unit for which we check
9182 @type required: boolean
9183 @param required: whether the validation should fail if the OS is not
9184 found
9185 @type nodenames: list
9186 @param nodenames: the list of nodes on which we should check
9187 @type osname: string
9188 @param osname: the name of the hypervisor we should use
9189 @type osparams: dict
9190 @param osparams: the parameters which we need to check
9191 @raise errors.OpPrereqError: if the parameters are not valid
9192
9193 """
9194 nodenames = _FilterVmNodes(lu, nodenames)
9195 result = lu.rpc.call_os_validate(nodenames, required, osname,
9196 [constants.OS_VALIDATE_PARAMETERS],
9197 osparams)
9198 for node, nres in result.items():
9199
9200
9201 nres.Raise("OS Parameters validation failed on node %s" % node)
9202 if not nres.payload:
9203 lu.LogInfo("OS %s not found on node %s, validation skipped",
9204 osname, node)
9205
9208 """Create an instance.
9209
9210 """
9211 HPATH = "instance-add"
9212 HTYPE = constants.HTYPE_INSTANCE
9213 REQ_BGL = False
9214
9216 """Check arguments.
9217
9218 """
9219
9220
9221 if self.op.no_install and self.op.start:
9222 self.LogInfo("No-installation mode selected, disabling startup")
9223 self.op.start = False
9224
9225 self.op.instance_name = \
9226 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9227
9228 if self.op.ip_check and not self.op.name_check:
9229
9230 raise errors.OpPrereqError("Cannot do IP address check without a name"
9231 " check", errors.ECODE_INVAL)
9232
9233
9234 for nic in self.op.nics:
9235 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9236
9237
9238 has_adopt = has_no_adopt = False
9239 for disk in self.op.disks:
9240 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9241 if constants.IDISK_ADOPT in disk:
9242 has_adopt = True
9243 else:
9244 has_no_adopt = True
9245 if has_adopt and has_no_adopt:
9246 raise errors.OpPrereqError("Either all disks are adopted or none is",
9247 errors.ECODE_INVAL)
9248 if has_adopt:
9249 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9250 raise errors.OpPrereqError("Disk adoption is not supported for the"
9251 " '%s' disk template" %
9252 self.op.disk_template,
9253 errors.ECODE_INVAL)
9254 if self.op.iallocator is not None:
9255 raise errors.OpPrereqError("Disk adoption not allowed with an"
9256 " iallocator script", errors.ECODE_INVAL)
9257 if self.op.mode == constants.INSTANCE_IMPORT:
9258 raise errors.OpPrereqError("Disk adoption not allowed for"
9259 " instance import", errors.ECODE_INVAL)
9260 else:
9261 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9262 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9263 " but no 'adopt' parameter given" %
9264 self.op.disk_template,
9265 errors.ECODE_INVAL)
9266
9267 self.adopt_disks = has_adopt
9268
9269
9270 if self.op.name_check:
9271 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9272 self.op.instance_name = self.hostname1.name
9273
9274 self.check_ip = self.hostname1.ip
9275 else:
9276 self.check_ip = None
9277
9278
9279 if (self.op.file_driver and
9280 not self.op.file_driver in constants.FILE_DRIVER):
9281 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9282 self.op.file_driver, errors.ECODE_INVAL)
9283
9284 if self.op.disk_template == constants.DT_FILE:
9285 opcodes.RequireFileStorage()
9286 elif self.op.disk_template == constants.DT_SHARED_FILE:
9287 opcodes.RequireSharedFileStorage()
9288
9289
9290 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9291
9292 if self.op.pnode is not None:
9293 if self.op.disk_template in constants.DTS_INT_MIRROR:
9294 if self.op.snode is None:
9295 raise errors.OpPrereqError("The networked disk templates need"
9296 " a mirror node", errors.ECODE_INVAL)
9297 elif self.op.snode:
9298 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9299 " template")
9300 self.op.snode = None
9301
9302 self._cds = _GetClusterDomainSecret()
9303
9304 if self.op.mode == constants.INSTANCE_IMPORT:
9305
9306
9307
9308 self.op.force_variant = True
9309
9310 if self.op.no_install:
9311 self.LogInfo("No-installation mode has no effect during import")
9312
9313 elif self.op.mode == constants.INSTANCE_CREATE:
9314 if self.op.os_type is None:
9315 raise errors.OpPrereqError("No guest OS specified",
9316 errors.ECODE_INVAL)
9317 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9318 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9319 " installation" % self.op.os_type,
9320 errors.ECODE_STATE)
9321 if self.op.disk_template is None:
9322 raise errors.OpPrereqError("No disk template specified",
9323 errors.ECODE_INVAL)
9324
9325 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9326
9327 src_handshake = self.op.source_handshake
9328 if not src_handshake:
9329 raise errors.OpPrereqError("Missing source handshake",
9330 errors.ECODE_INVAL)
9331
9332 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9333 src_handshake)
9334 if errmsg:
9335 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9336 errors.ECODE_INVAL)
9337
9338
9339 self.source_x509_ca_pem = self.op.source_x509_ca
9340 if not self.source_x509_ca_pem:
9341 raise errors.OpPrereqError("Missing source X509 CA",
9342 errors.ECODE_INVAL)
9343
9344 try:
9345 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9346 self._cds)
9347 except OpenSSL.crypto.Error, err:
9348 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9349 (err, ), errors.ECODE_INVAL)
9350
9351 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9352 if errcode is not None:
9353 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9354 errors.ECODE_INVAL)
9355
9356 self.source_x509_ca = cert
9357
9358 src_instance_name = self.op.source_instance_name
9359 if not src_instance_name:
9360 raise errors.OpPrereqError("Missing source instance name",
9361 errors.ECODE_INVAL)
9362
9363 self.source_instance_name = \
9364 netutils.GetHostname(name=src_instance_name).name
9365
9366 else:
9367 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9368 self.op.mode, errors.ECODE_INVAL)
9369
9426
9428 """Run the allocator based on input opcode.
9429
9430 """
9431 nics = [n.ToDict() for n in self.nics]
9432 ial = IAllocator(self.cfg, self.rpc,
9433 mode=constants.IALLOCATOR_MODE_ALLOC,
9434 name=self.op.instance_name,
9435 disk_template=self.op.disk_template,
9436 tags=self.op.tags,
9437 os=self.op.os_type,
9438 vcpus=self.be_full[constants.BE_VCPUS],
9439 memory=self.be_full[constants.BE_MAXMEM],
9440 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9441 disks=self.disks,
9442 nics=nics,
9443 hypervisor=self.op.hypervisor,
9444 )
9445
9446 ial.Run(self.op.iallocator)
9447
9448 if not ial.success:
9449 raise errors.OpPrereqError("Can't compute nodes using"
9450 " iallocator '%s': %s" %
9451 (self.op.iallocator, ial.info),
9452 errors.ECODE_NORES)
9453 if len(ial.result) != ial.required_nodes:
9454 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9455 " of nodes (%s), required %s" %
9456 (self.op.iallocator, len(ial.result),
9457 ial.required_nodes), errors.ECODE_FAULT)
9458 self.op.pnode = ial.result[0]
9459 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9460 self.op.instance_name, self.op.iallocator,
9461 utils.CommaJoin(ial.result))
9462 if ial.required_nodes == 2:
9463 self.op.snode = ial.result[1]
9464
9466 """Build hooks env.
9467
9468 This runs on master, primary and secondary nodes of the instance.
9469
9470 """
9471 env = {
9472 "ADD_MODE": self.op.mode,
9473 }
9474 if self.op.mode == constants.INSTANCE_IMPORT:
9475 env["SRC_NODE"] = self.op.src_node
9476 env["SRC_PATH"] = self.op.src_path
9477 env["SRC_IMAGES"] = self.src_images
9478
9479 env.update(_BuildInstanceHookEnv(
9480 name=self.op.instance_name,
9481 primary_node=self.op.pnode,
9482 secondary_nodes=self.secondaries,
9483 status=self.op.start,
9484 os_type=self.op.os_type,
9485 minmem=self.be_full[constants.BE_MINMEM],
9486 maxmem=self.be_full[constants.BE_MAXMEM],
9487 vcpus=self.be_full[constants.BE_VCPUS],
9488 nics=_NICListToTuple(self, self.nics),
9489 disk_template=self.op.disk_template,
9490 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9491 for d in self.disks],
9492 bep=self.be_full,
9493 hvp=self.hv_full,
9494 hypervisor_name=self.op.hypervisor,
9495 tags=self.op.tags,
9496 ))
9497
9498 return env
9499
9501 """Build hooks nodes.
9502
9503 """
9504 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9505 return nl, nl
9506
9553
9555 """Use export parameters as defaults.
9556
9557 In case the opcode doesn't specify (as in override) some instance
9558 parameters, then try to use them from the export information, if
9559 that declares them.
9560
9561 """
9562 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9563
9564 if self.op.disk_template is None:
9565 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9566 self.op.disk_template = einfo.get(constants.INISECT_INS,
9567 "disk_template")
9568 if self.op.disk_template not in constants.DISK_TEMPLATES:
9569 raise errors.OpPrereqError("Disk template specified in configuration"
9570 " file is not one of the allowed values:"
9571 " %s" % " ".join(constants.DISK_TEMPLATES))
9572 else:
9573 raise errors.OpPrereqError("No disk template specified and the export"
9574 " is missing the disk_template information",
9575 errors.ECODE_INVAL)
9576
9577 if not self.op.disks:
9578 disks = []
9579
9580 for idx in range(constants.MAX_DISKS):
9581 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9582 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9583 disks.append({constants.IDISK_SIZE: disk_sz})
9584 self.op.disks = disks
9585 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9586 raise errors.OpPrereqError("No disk info specified and the export"
9587 " is missing the disk information",
9588 errors.ECODE_INVAL)
9589
9590 if not self.op.nics:
9591 nics = []
9592 for idx in range(constants.MAX_NICS):
9593 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9594 ndict = {}
9595 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9596 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9597 ndict[name] = v
9598 nics.append(ndict)
9599 else:
9600 break
9601 self.op.nics = nics
9602
9603 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9604 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9605
9606 if (self.op.hypervisor is None and
9607 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9608 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9609
9610 if einfo.has_section(constants.INISECT_HYP):
9611
9612
9613 for name, value in einfo.items(constants.INISECT_HYP):
9614 if name not in self.op.hvparams:
9615 self.op.hvparams[name] = value
9616
9617 if einfo.has_section(constants.INISECT_BEP):
9618
9619 for name, value in einfo.items(constants.INISECT_BEP):
9620 if name not in self.op.beparams:
9621 self.op.beparams[name] = value
9622
9623 if name == constants.BE_MEMORY:
9624 if constants.BE_MAXMEM not in self.op.beparams:
9625 self.op.beparams[constants.BE_MAXMEM] = value
9626 if constants.BE_MINMEM not in self.op.beparams:
9627 self.op.beparams[constants.BE_MINMEM] = value
9628 else:
9629
9630 for name in constants.BES_PARAMETERS:
9631 if (name not in self.op.beparams and
9632 einfo.has_option(constants.INISECT_INS, name)):
9633 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9634
9635 if einfo.has_section(constants.INISECT_OSP):
9636
9637 for name, value in einfo.items(constants.INISECT_OSP):
9638 if name not in self.op.osparams:
9639 self.op.osparams[name] = value
9640
9642 """Revert the instance parameters to the default values.
9643
9644 """
9645
9646 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9647 for name in self.op.hvparams.keys():
9648 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9649 del self.op.hvparams[name]
9650
9651 be_defs = cluster.SimpleFillBE({})
9652 for name in self.op.beparams.keys():
9653 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9654 del self.op.beparams[name]
9655
9656 nic_defs = cluster.SimpleFillNIC({})
9657 for nic in self.op.nics:
9658 for name in constants.NICS_PARAMETERS:
9659 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9660 del nic[name]
9661
9662 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9663 for name in self.op.osparams.keys():
9664 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9665 del self.op.osparams[name]
9666
9668 """Calculate final instance file storage dir.
9669
9670 """
9671
9672 self.instance_file_storage_dir = None
9673 if self.op.disk_template in constants.DTS_FILEBASED:
9674
9675 joinargs = []
9676
9677 if self.op.disk_template == constants.DT_SHARED_FILE:
9678 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9679 else:
9680 get_fsd_fn = self.cfg.GetFileStorageDir
9681
9682 cfg_storagedir = get_fsd_fn()
9683 if not cfg_storagedir:
9684 raise errors.OpPrereqError("Cluster file storage dir not defined")
9685 joinargs.append(cfg_storagedir)
9686
9687 if self.op.file_storage_dir is not None:
9688 joinargs.append(self.op.file_storage_dir)
9689
9690 joinargs.append(self.op.instance_name)
9691
9692
9693 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9694
9696 """Check prerequisites.
9697
9698 """
9699 self._CalculateFileStorageDir()
9700
9701 if self.op.mode == constants.INSTANCE_IMPORT:
9702 export_info = self._ReadExportInfo()
9703 self._ReadExportParams(export_info)
9704 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9705 else:
9706 self._old_instance_name = None
9707
9708 if (not self.cfg.GetVGName() and
9709 self.op.disk_template not in constants.DTS_NOT_LVM):
9710 raise errors.OpPrereqError("Cluster does not support lvm-based"
9711 " instances", errors.ECODE_STATE)
9712
9713 if (self.op.hypervisor is None or
9714 self.op.hypervisor == constants.VALUE_AUTO):
9715 self.op.hypervisor = self.cfg.GetHypervisorType()
9716
9717 cluster = self.cfg.GetClusterInfo()
9718 enabled_hvs = cluster.enabled_hypervisors
9719 if self.op.hypervisor not in enabled_hvs:
9720 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9721 " cluster (%s)" % (self.op.hypervisor,
9722 ",".join(enabled_hvs)),
9723 errors.ECODE_STATE)
9724
9725
9726 for tag in self.op.tags:
9727 objects.TaggableObject.ValidateTag(tag)
9728
9729
9730 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9731 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9732 self.op.hvparams)
9733 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
9734 hv_type.CheckParameterSyntax(filled_hvp)
9735 self.hv_full = filled_hvp
9736
9737 _CheckGlobalHvParams(self.op.hvparams)
9738
9739
9740 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9741 for param, value in self.op.beparams.iteritems():
9742 if value == constants.VALUE_AUTO:
9743 self.op.beparams[param] = default_beparams[param]
9744 objects.UpgradeBeParams(self.op.beparams)
9745 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9746 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9747
9748
9749 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9750
9751
9752
9753 if self.op.identify_defaults:
9754 self._RevertToDefaults(cluster)
9755
9756
9757 self.nics = []
9758 for idx, nic in enumerate(self.op.nics):
9759 nic_mode_req = nic.get(constants.INIC_MODE, None)
9760 nic_mode = nic_mode_req
9761 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9762 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9763
9764
9765 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9766 default_ip_mode = constants.VALUE_AUTO
9767 else:
9768 default_ip_mode = constants.VALUE_NONE
9769
9770
9771 ip = nic.get(constants.INIC_IP, default_ip_mode)
9772 if ip is None or ip.lower() == constants.VALUE_NONE:
9773 nic_ip = None
9774 elif ip.lower() == constants.VALUE_AUTO:
9775 if not self.op.name_check:
9776 raise errors.OpPrereqError("IP address set to auto but name checks"
9777 " have been skipped",
9778 errors.ECODE_INVAL)
9779 nic_ip = self.hostname1.ip
9780 else:
9781 if not netutils.IPAddress.IsValid(ip):
9782 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9783 errors.ECODE_INVAL)
9784 nic_ip = ip
9785
9786
9787 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9788 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9789 errors.ECODE_INVAL)
9790
9791
9792 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9793 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9794 mac = utils.NormalizeAndValidateMac(mac)
9795
9796 try:
9797 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9798 except errors.ReservationError:
9799 raise errors.OpPrereqError("MAC address %s already in use"
9800 " in cluster" % mac,
9801 errors.ECODE_NOTUNIQUE)
9802
9803
9804 link = nic.get(constants.INIC_LINK, None)
9805 if link == constants.VALUE_AUTO:
9806 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9807 nicparams = {}
9808 if nic_mode_req:
9809 nicparams[constants.NIC_MODE] = nic_mode
9810 if link:
9811 nicparams[constants.NIC_LINK] = link
9812
9813 check_params = cluster.SimpleFillNIC(nicparams)
9814 objects.NIC.CheckParameterSyntax(check_params)
9815 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9816
9817
9818 default_vg = self.cfg.GetVGName()
9819 self.disks = []
9820 for disk in self.op.disks:
9821 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9822 if mode not in constants.DISK_ACCESS_SET:
9823 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9824 mode, errors.ECODE_INVAL)
9825 size = disk.get(constants.IDISK_SIZE, None)
9826 if size is None:
9827 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9828 try:
9829 size = int(size)
9830 except (TypeError, ValueError):
9831 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9832 errors.ECODE_INVAL)
9833
9834 data_vg = disk.get(constants.IDISK_VG, default_vg)
9835 new_disk = {
9836 constants.IDISK_SIZE: size,
9837 constants.IDISK_MODE: mode,
9838 constants.IDISK_VG: data_vg,
9839 }
9840 if constants.IDISK_METAVG in disk:
9841 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9842 if constants.IDISK_ADOPT in disk:
9843 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9844 self.disks.append(new_disk)
9845
9846 if self.op.mode == constants.INSTANCE_IMPORT:
9847 disk_images = []
9848 for idx in range(len(self.disks)):
9849 option = "disk%d_dump" % idx
9850 if export_info.has_option(constants.INISECT_INS, option):
9851
9852 export_name = export_info.get(constants.INISECT_INS, option)
9853 image = utils.PathJoin(self.op.src_path, export_name)
9854 disk_images.append(image)
9855 else:
9856 disk_images.append(False)
9857
9858 self.src_images = disk_images
9859
9860 if self.op.instance_name == self._old_instance_name:
9861 for idx, nic in enumerate(self.nics):
9862 if nic.mac == constants.VALUE_AUTO:
9863 nic_mac_ini = "nic%d_mac" % idx
9864 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9865
9866
9867
9868
9869 if self.op.ip_check:
9870 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9871 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9872 (self.check_ip, self.op.instance_name),
9873 errors.ECODE_NOTUNIQUE)
9874
9875
9876
9877
9878
9879
9880
9881
9882
9883 for nic in self.nics:
9884 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9885 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9886
9887
9888
9889 if self.op.iallocator is not None:
9890 self._RunAllocator()
9891
9892
9893 _ReleaseLocks(self, locking.LEVEL_NODE,
9894 keep=filter(None, [self.op.pnode, self.op.snode,
9895 self.op.src_node]))
9896 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9897 keep=filter(None, [self.op.pnode, self.op.snode,
9898 self.op.src_node]))
9899
9900
9901
9902
9903 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9904 assert self.pnode is not None, \
9905 "Cannot retrieve locked node %s" % self.op.pnode
9906 if pnode.offline:
9907 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9908 pnode.name, errors.ECODE_STATE)
9909 if pnode.drained:
9910 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9911 pnode.name, errors.ECODE_STATE)
9912 if not pnode.vm_capable:
9913 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9914 " '%s'" % pnode.name, errors.ECODE_STATE)
9915
9916 self.secondaries = []
9917
9918
9919 if self.op.disk_template in constants.DTS_INT_MIRROR:
9920 if self.op.snode == pnode.name:
9921 raise errors.OpPrereqError("The secondary node cannot be the"
9922 " primary node", errors.ECODE_INVAL)
9923 _CheckNodeOnline(self, self.op.snode)
9924 _CheckNodeNotDrained(self, self.op.snode)
9925 _CheckNodeVmCapable(self, self.op.snode)
9926 self.secondaries.append(self.op.snode)
9927
9928 snode = self.cfg.GetNodeInfo(self.op.snode)
9929 if pnode.group != snode.group:
9930 self.LogWarning("The primary and secondary nodes are in two"
9931 " different node groups; the disk parameters"
9932 " from the first disk's node group will be"
9933 " used")
9934
9935 nodenames = [pnode.name] + self.secondaries
9936
9937 if not self.adopt_disks:
9938 if self.op.disk_template == constants.DT_RBD:
9939
9940
9941
9942 _CheckRADOSFreeSpace()
9943 else:
9944
9945 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9946 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9947
9948 elif self.op.disk_template == constants.DT_PLAIN:
9949 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9950 disk[constants.IDISK_ADOPT])
9951 for disk in self.disks])
9952 if len(all_lvs) != len(self.disks):
9953 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9954 errors.ECODE_INVAL)
9955 for lv_name in all_lvs:
9956 try:
9957
9958
9959 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9960 except errors.ReservationError:
9961 raise errors.OpPrereqError("LV named %s used by another instance" %
9962 lv_name, errors.ECODE_NOTUNIQUE)
9963
9964 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9965 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9966
9967 node_lvs = self.rpc.call_lv_list([pnode.name],
9968 vg_names.payload.keys())[pnode.name]
9969 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9970 node_lvs = node_lvs.payload
9971
9972 delta = all_lvs.difference(node_lvs.keys())
9973 if delta:
9974 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9975 utils.CommaJoin(delta),
9976 errors.ECODE_INVAL)
9977 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9978 if online_lvs:
9979 raise errors.OpPrereqError("Online logical volumes found, cannot"
9980 " adopt: %s" % utils.CommaJoin(online_lvs),
9981 errors.ECODE_STATE)
9982
9983 for dsk in self.disks:
9984 dsk[constants.IDISK_SIZE] = \
9985 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9986 dsk[constants.IDISK_ADOPT])][0]))
9987
9988 elif self.op.disk_template == constants.DT_BLOCK:
9989
9990 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9991 for disk in self.disks])
9992 if len(all_disks) != len(self.disks):
9993 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9994 errors.ECODE_INVAL)
9995 baddisks = [d for d in all_disks
9996 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9997 if baddisks:
9998 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9999 " cannot be adopted" %
10000 (", ".join(baddisks),
10001 constants.ADOPTABLE_BLOCKDEV_ROOT),
10002 errors.ECODE_INVAL)
10003
10004 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10005 list(all_disks))[pnode.name]
10006 node_disks.Raise("Cannot get block device information from node %s" %
10007 pnode.name)
10008 node_disks = node_disks.payload
10009 delta = all_disks.difference(node_disks.keys())
10010 if delta:
10011 raise errors.OpPrereqError("Missing block device(s): %s" %
10012 utils.CommaJoin(delta),
10013 errors.ECODE_INVAL)
10014 for dsk in self.disks:
10015 dsk[constants.IDISK_SIZE] = \
10016 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10017
10018
10019 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10020 ispec = {
10021 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10022 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10023 constants.ISPEC_DISK_COUNT: len(self.disks),
10024 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10025 for disk in self.disks],
10026 constants.ISPEC_NIC_COUNT: len(self.nics),
10027 constants.ISPEC_SPINDLE_USE: spindle_use,
10028 }
10029
10030 group_info = self.cfg.GetNodeGroup(pnode.group)
10031 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
10032 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10033 if not self.op.ignore_ipolicy and res:
10034 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10035 " policy: %s") % (pnode.group,
10036 utils.CommaJoin(res)),
10037 errors.ECODE_INVAL)
10038
10039 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10040
10041 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10042
10043 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10044
10045 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10046
10047
10048
10049 if self.op.start:
10050 _CheckNodeFreeMemory(self, self.pnode.name,
10051 "creating instance %s" % self.op.instance_name,
10052 self.be_full[constants.BE_MAXMEM],
10053 self.op.hypervisor)
10054
10055 self.dry_run_result = list(nodenames)
10056
10057 - def Exec(self, feedback_fn):
10058 """Create and add the instance to the cluster.
10059
10060 """
10061 instance = self.op.instance_name
10062 pnode_name = self.pnode.name
10063
10064 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10065 self.owned_locks(locking.LEVEL_NODE)), \
10066 "Node locks differ from node resource locks"
10067
10068 ht_kind = self.op.hypervisor
10069 if ht_kind in constants.HTS_REQ_PORT:
10070 network_port = self.cfg.AllocatePort()
10071 else:
10072 network_port = None
10073
10074
10075
10076
10077 node = self.cfg.GetNodeInfo(pnode_name)
10078 nodegroup = self.cfg.GetNodeGroup(node.group)
10079 disks = _GenerateDiskTemplate(self,
10080 self.op.disk_template,
10081 instance, pnode_name,
10082 self.secondaries,
10083 self.disks,
10084 self.instance_file_storage_dir,
10085 self.op.file_driver,
10086 0,
10087 feedback_fn,
10088 self.cfg.GetGroupDiskParams(nodegroup))
10089
10090 iobj = objects.Instance(name=instance, os=self.op.os_type,
10091 primary_node=pnode_name,
10092 nics=self.nics, disks=disks,
10093 disk_template=self.op.disk_template,
10094 admin_state=constants.ADMINST_DOWN,
10095 network_port=network_port,
10096 beparams=self.op.beparams,
10097 hvparams=self.op.hvparams,
10098 hypervisor=self.op.hypervisor,
10099 osparams=self.op.osparams,
10100 )
10101
10102 if self.op.tags:
10103 for tag in self.op.tags:
10104 iobj.AddTag(tag)
10105
10106 if self.adopt_disks:
10107 if self.op.disk_template == constants.DT_PLAIN:
10108
10109
10110 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10111 rename_to = []
10112 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10113 rename_to.append(t_dsk.logical_id)
10114 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10115 self.cfg.SetDiskID(t_dsk, pnode_name)
10116 result = self.rpc.call_blockdev_rename(pnode_name,
10117 zip(tmp_disks, rename_to))
10118 result.Raise("Failed to rename adoped LVs")
10119 else:
10120 feedback_fn("* creating instance disks...")
10121 try:
10122 _CreateDisks(self, iobj)
10123 except errors.OpExecError:
10124 self.LogWarning("Device creation failed, reverting...")
10125 try:
10126 _RemoveDisks(self, iobj)
10127 finally:
10128 self.cfg.ReleaseDRBDMinors(instance)
10129 raise
10130
10131 feedback_fn("adding instance %s to cluster config" % instance)
10132
10133 self.cfg.AddInstance(iobj, self.proc.GetECId())
10134
10135
10136
10137 del self.remove_locks[locking.LEVEL_INSTANCE]
10138
10139 if self.op.mode == constants.INSTANCE_IMPORT:
10140
10141 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10142 else:
10143
10144 _ReleaseLocks(self, locking.LEVEL_NODE)
10145
10146 disk_abort = False
10147 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10148 feedback_fn("* wiping instance disks...")
10149 try:
10150 _WipeDisks(self, iobj)
10151 except errors.OpExecError, err:
10152 logging.exception("Wiping disks failed")
10153 self.LogWarning("Wiping instance disks failed (%s)", err)
10154 disk_abort = True
10155
10156 if disk_abort:
10157
10158 pass
10159 elif self.op.wait_for_sync:
10160 disk_abort = not _WaitForSync(self, iobj)
10161 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10162
10163 feedback_fn("* checking mirrors status")
10164 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10165 else:
10166 disk_abort = False
10167
10168 if disk_abort:
10169 _RemoveDisks(self, iobj)
10170 self.cfg.RemoveInstance(iobj.name)
10171
10172 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10173 raise errors.OpExecError("There are some degraded disks for"
10174 " this instance")
10175
10176
10177 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10178
10179 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10180
10181
10182
10183 for disk in iobj.disks:
10184 self.cfg.SetDiskID(disk, pnode_name)
10185 if self.op.mode == constants.INSTANCE_CREATE:
10186 if not self.op.no_install:
10187 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10188 not self.op.wait_for_sync)
10189 if pause_sync:
10190 feedback_fn("* pausing disk sync to install instance OS")
10191 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10192 (iobj.disks,
10193 iobj), True)
10194 for idx, success in enumerate(result.payload):
10195 if not success:
10196 logging.warn("pause-sync of instance %s for disk %d failed",
10197 instance, idx)
10198
10199 feedback_fn("* running the instance OS create scripts...")
10200
10201 os_add_result = \
10202 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10203 self.op.debug_level)
10204 if pause_sync:
10205 feedback_fn("* resuming disk sync")
10206 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10207 (iobj.disks,
10208 iobj), False)
10209 for idx, success in enumerate(result.payload):
10210 if not success:
10211 logging.warn("resume-sync of instance %s for disk %d failed",
10212 instance, idx)
10213
10214 os_add_result.Raise("Could not add os for instance %s"
10215 " on node %s" % (instance, pnode_name))
10216
10217 else:
10218 if self.op.mode == constants.INSTANCE_IMPORT:
10219 feedback_fn("* running the instance OS import scripts...")
10220
10221 transfers = []
10222
10223 for idx, image in enumerate(self.src_images):
10224 if not image:
10225 continue
10226
10227
10228 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10229 constants.IEIO_FILE, (image, ),
10230 constants.IEIO_SCRIPT,
10231 (iobj.disks[idx], idx),
10232 None)
10233 transfers.append(dt)
10234
10235 import_result = \
10236 masterd.instance.TransferInstanceData(self, feedback_fn,
10237 self.op.src_node, pnode_name,
10238 self.pnode.secondary_ip,
10239 iobj, transfers)
10240 if not compat.all(import_result):
10241 self.LogWarning("Some disks for instance %s on node %s were not"
10242 " imported successfully" % (instance, pnode_name))
10243
10244 rename_from = self._old_instance_name
10245
10246 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10247 feedback_fn("* preparing remote import...")
10248
10249
10250
10251
10252 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10253 self.op.source_shutdown_timeout)
10254 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10255
10256 assert iobj.primary_node == self.pnode.name
10257 disk_results = \
10258 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10259 self.source_x509_ca,
10260 self._cds, timeouts)
10261 if not compat.all(disk_results):
10262
10263
10264 self.LogWarning("Some disks for instance %s on node %s were not"
10265 " imported successfully" % (instance, pnode_name))
10266
10267 rename_from = self.source_instance_name
10268
10269 else:
10270
10271 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10272 % self.op.mode)
10273
10274
10275 assert iobj.name == instance
10276 feedback_fn("Running rename script for %s" % instance)
10277 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10278 rename_from,
10279 self.op.debug_level)
10280 if result.fail_msg:
10281 self.LogWarning("Failed to run rename script for %s on node"
10282 " %s: %s" % (instance, pnode_name, result.fail_msg))
10283
10284 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10285
10286 if self.op.start:
10287 iobj.admin_state = constants.ADMINST_UP
10288 self.cfg.Update(iobj, feedback_fn)
10289 logging.info("Starting instance %s on node %s", instance, pnode_name)
10290 feedback_fn("* starting instance...")
10291 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10292 False)
10293 result.Raise("Could not start instance")
10294
10295 return list(iobj.all_nodes)
10296
10299 """Compute disk size requirements inside the RADOS cluster.
10300
10301 """
10302
10303 pass
10304
10307 """Connect to an instance's console.
10308
10309 This is somewhat special in that it returns the command line that
10310 you need to run on the master node in order to connect to the
10311 console.
10312
10313 """
10314 REQ_BGL = False
10315
10319
10321 """Check prerequisites.
10322
10323 This checks that the instance is in the cluster.
10324
10325 """
10326 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10327 assert self.instance is not None, \
10328 "Cannot retrieve locked instance %s" % self.op.instance_name
10329 _CheckNodeOnline(self, self.instance.primary_node)
10330
10331 - def Exec(self, feedback_fn):
10355
10376
10379 """Replace the disks of an instance.
10380
10381 """
10382 HPATH = "mirrors-replace"
10383 HTYPE = constants.HTYPE_INSTANCE
10384 REQ_BGL = False
10385
10389
10391 self._ExpandAndLockInstance()
10392
10393 assert locking.LEVEL_NODE not in self.needed_locks
10394 assert locking.LEVEL_NODE_RES not in self.needed_locks
10395 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10396
10397 assert self.op.iallocator is None or self.op.remote_node is None, \
10398 "Conflicting options"
10399
10400 if self.op.remote_node is not None:
10401 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10402
10403
10404
10405
10406
10407 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10408 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10409 else:
10410 self.needed_locks[locking.LEVEL_NODE] = []
10411 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10412
10413 if self.op.iallocator is not None:
10414
10415 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10416
10417 self.needed_locks[locking.LEVEL_NODE_RES] = []
10418
10419 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10420 self.op.iallocator, self.op.remote_node,
10421 self.op.disks, False, self.op.early_release,
10422 self.op.ignore_ipolicy)
10423
10424 self.tasklets = [self.replacer]
10425
10453
10455 """Build hooks env.
10456
10457 This runs on the master, the primary and all the secondaries.
10458
10459 """
10460 instance = self.replacer.instance
10461 env = {
10462 "MODE": self.op.mode,
10463 "NEW_SECONDARY": self.op.remote_node,
10464 "OLD_SECONDARY": instance.secondary_nodes[0],
10465 }
10466 env.update(_BuildInstanceHookEnvByObject(self, instance))
10467 return env
10468
10470 """Build hooks nodes.
10471
10472 """
10473 instance = self.replacer.instance
10474 nl = [
10475 self.cfg.GetMasterNode(),
10476 instance.primary_node,
10477 ]
10478 if self.op.remote_node is not None:
10479 nl.append(self.op.remote_node)
10480 return nl, nl
10481
10495
10498 """Replaces disks for an instance.
10499
10500 Note: Locking is not within the scope of this class.
10501
10502 """
10503 - def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10504 disks, delay_iallocator, early_release, ignore_ipolicy):
10505 """Initializes this class.
10506
10507 """
10508 Tasklet.__init__(self, lu)
10509
10510
10511 self.instance_name = instance_name
10512 self.mode = mode
10513 self.iallocator_name = iallocator_name
10514 self.remote_node = remote_node
10515 self.disks = disks
10516 self.delay_iallocator = delay_iallocator
10517 self.early_release = early_release
10518 self.ignore_ipolicy = ignore_ipolicy
10519
10520
10521 self.instance = None
10522 self.new_node = None
10523 self.target_node = None
10524 self.other_node = None
10525 self.remote_node_info = None
10526 self.node_secondary_ip = None
10527
10528 @staticmethod
10530 """Helper function for users of this class.
10531
10532 """
10533
10534 if mode == constants.REPLACE_DISK_CHG:
10535 if remote_node is None and iallocator is None:
10536 raise errors.OpPrereqError("When changing the secondary either an"
10537 " iallocator script must be used or the"
10538 " new node given", errors.ECODE_INVAL)
10539
10540 if remote_node is not None and iallocator is not None:
10541 raise errors.OpPrereqError("Give either the iallocator or the new"
10542 " secondary, not both", errors.ECODE_INVAL)
10543
10544 elif remote_node is not None or iallocator is not None:
10545
10546 raise errors.OpPrereqError("The iallocator and new node options can"
10547 " only be used when changing the"
10548 " secondary node", errors.ECODE_INVAL)
10549
10550 @staticmethod
10551 - def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10552 """Compute a new secondary node using an IAllocator.
10553
10554 """
10555 ial = IAllocator(lu.cfg, lu.rpc,
10556 mode=constants.IALLOCATOR_MODE_RELOC,
10557 name=instance_name,
10558 relocate_from=list(relocate_from))
10559
10560 ial.Run(iallocator_name)
10561
10562 if not ial.success:
10563 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10564 " %s" % (iallocator_name, ial.info),
10565 errors.ECODE_NORES)
10566
10567 if len(ial.result) != ial.required_nodes:
10568 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10569 " of nodes (%s), required %s" %
10570 (iallocator_name,
10571 len(ial.result), ial.required_nodes),
10572 errors.ECODE_FAULT)
10573
10574 remote_node_name = ial.result[0]
10575
10576 lu.LogInfo("Selected new secondary for instance '%s': %s",
10577 instance_name, remote_node_name)
10578
10579 return remote_node_name
10580
10587
10589 """Checks if the instance disks are activated.
10590
10591 @param instance: The instance to check disks
10592 @return: True if they are activated, False otherwise
10593
10594 """
10595 nodes = instance.all_nodes
10596
10597 for idx, dev in enumerate(instance.disks):
10598 for node in nodes:
10599 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10600 self.cfg.SetDiskID(dev, node)
10601
10602 result = _BlockdevFind(self, node, dev, instance)
10603
10604 if result.offline:
10605 continue
10606 elif result.fail_msg or not result.payload:
10607 return False
10608
10609 return True
10610
10633
10635 """Check prerequisites, second part.
10636
10637 This function should always be part of CheckPrereq. It was separated and is
10638 now called from Exec because during node evacuation iallocator was only
10639 called with an unmodified cluster model, not taking planned changes into
10640 account.
10641
10642 """
10643 instance = self.instance
10644 secondary_node = instance.secondary_nodes[0]
10645
10646 if self.iallocator_name is None:
10647 remote_node = self.remote_node
10648 else:
10649 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10650 instance.name, instance.secondary_nodes)
10651
10652 if remote_node is None:
10653 self.remote_node_info = None
10654 else:
10655 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10656 "Remote node '%s' is not locked" % remote_node
10657
10658 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10659 assert self.remote_node_info is not None, \
10660 "Cannot retrieve locked node %s" % remote_node
10661
10662 if remote_node == self.instance.primary_node:
10663 raise errors.OpPrereqError("The specified node is the primary node of"
10664 " the instance", errors.ECODE_INVAL)
10665
10666 if remote_node == secondary_node:
10667 raise errors.OpPrereqError("The specified node is already the"
10668 " secondary node of the instance",
10669 errors.ECODE_INVAL)
10670
10671 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10672 constants.REPLACE_DISK_CHG):
10673 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10674 errors.ECODE_INVAL)
10675
10676 if self.mode == constants.REPLACE_DISK_AUTO:
10677 if not self._CheckDisksActivated(instance):
10678 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10679 " first" % self.instance_name,
10680 errors.ECODE_STATE)
10681 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10682 faulty_secondary = self._FindFaultyDisks(secondary_node)
10683
10684 if faulty_primary and faulty_secondary:
10685 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10686 " one node and can not be repaired"
10687 " automatically" % self.instance_name,
10688 errors.ECODE_STATE)
10689
10690 if faulty_primary:
10691 self.disks = faulty_primary
10692 self.target_node = instance.primary_node
10693 self.other_node = secondary_node
10694 check_nodes = [self.target_node, self.other_node]
10695 elif faulty_secondary:
10696 self.disks = faulty_secondary
10697 self.target_node = secondary_node
10698 self.other_node = instance.primary_node
10699 check_nodes = [self.target_node, self.other_node]
10700 else:
10701 self.disks = []
10702 check_nodes = []
10703
10704 else:
10705
10706 if self.mode == constants.REPLACE_DISK_PRI:
10707 self.target_node = instance.primary_node
10708 self.other_node = secondary_node
10709 check_nodes = [self.target_node, self.other_node]
10710
10711 elif self.mode == constants.REPLACE_DISK_SEC:
10712 self.target_node = secondary_node
10713 self.other_node = instance.primary_node
10714 check_nodes = [self.target_node, self.other_node]
10715
10716 elif self.mode == constants.REPLACE_DISK_CHG:
10717 self.new_node = remote_node
10718 self.other_node = instance.primary_node
10719 self.target_node = secondary_node
10720 check_nodes = [self.new_node, self.other_node]
10721
10722 _CheckNodeNotDrained(self.lu, remote_node)
10723 _CheckNodeVmCapable(self.lu, remote_node)
10724
10725 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10726 assert old_node_info is not None
10727 if old_node_info.offline and not self.early_release:
10728
10729 self.early_release = True
10730 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10731 " early-release mode", secondary_node)
10732
10733 else:
10734 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10735 self.mode)
10736
10737
10738 if not self.disks:
10739 self.disks = range(len(self.instance.disks))
10740
10741
10742
10743 if self.remote_node_info:
10744
10745 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10746 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10747 new_group_info)
10748 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10749 ignore=self.ignore_ipolicy)
10750
10751 for node in check_nodes:
10752 _CheckNodeOnline(self.lu, node)
10753
10754 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10755 self.other_node,
10756 self.target_node]
10757 if node_name is not None)
10758
10759
10760 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10761 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10762
10763
10764 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10765 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10766
10767
10768 for disk_idx in self.disks:
10769 instance.FindDisk(disk_idx)
10770
10771
10772 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10773 in self.cfg.GetMultiNodeInfo(touched_nodes))
10774
10775 - def Exec(self, feedback_fn):
10776 """Execute disk replacement.
10777
10778 This dispatches the disk replacement to the appropriate handler.
10779
10780 """
10781 if self.delay_iallocator:
10782 self._CheckPrereq2()
10783
10784 if __debug__:
10785
10786 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10787 assert set(owned_nodes) == set(self.node_secondary_ip), \
10788 ("Incorrect node locks, owning %s, expected %s" %
10789 (owned_nodes, self.node_secondary_ip.keys()))
10790 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10791 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10792
10793 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10794 assert list(owned_instances) == [self.instance_name], \
10795 "Instance '%s' not locked" % self.instance_name
10796
10797 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10798 "Should not own any node group lock at this point"
10799
10800 if not self.disks:
10801 feedback_fn("No disks need replacement for instance '%s'" %
10802 self.instance.name)
10803 return
10804
10805 feedback_fn("Replacing disk(s) %s for instance '%s'" %
10806 (utils.CommaJoin(self.disks), self.instance.name))
10807 feedback_fn("Current primary node: %s", self.instance.primary_node)
10808 feedback_fn("Current seconary node: %s",
10809 utils.CommaJoin(self.instance.secondary_nodes))
10810
10811 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10812
10813
10814 if activate_disks:
10815 _StartInstanceDisks(self.lu, self.instance, True)
10816
10817 try:
10818
10819 if self.new_node is not None:
10820 fn = self._ExecDrbd8Secondary
10821 else:
10822 fn = self._ExecDrbd8DiskOnly
10823
10824 result = fn(feedback_fn)
10825 finally:
10826
10827
10828 if activate_disks:
10829 _SafeShutdownInstanceDisks(self.lu, self.instance)
10830
10831 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10832
10833 if __debug__:
10834
10835 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10836 nodes = frozenset(self.node_secondary_ip)
10837 assert ((self.early_release and not owned_nodes) or
10838 (not self.early_release and not (set(owned_nodes) - nodes))), \
10839 ("Not owning the correct locks, early_release=%s, owned=%r,"
10840 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10841
10842 return result
10843
10845 self.lu.LogInfo("Checking volume groups")
10846
10847 vgname = self.cfg.GetVGName()
10848
10849
10850 results = self.rpc.call_vg_list(nodes)
10851 if not results:
10852 raise errors.OpExecError("Can't list volume groups on the nodes")
10853
10854 for node in nodes:
10855 res = results[node]
10856 res.Raise("Error checking node %s" % node)
10857 if vgname not in res.payload:
10858 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10859 (vgname, node))
10860
10862
10863 for idx, dev in enumerate(self.instance.disks):
10864 if idx not in self.disks:
10865 continue
10866
10867 for node in nodes:
10868 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10869 self.cfg.SetDiskID(dev, node)
10870
10871 result = _BlockdevFind(self, node, dev, self.instance)
10872
10873 msg = result.fail_msg
10874 if msg or not result.payload:
10875 if not msg:
10876 msg = "disk not found"
10877 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10878 (idx, node, msg))
10879
10881 for idx, dev in enumerate(self.instance.disks):
10882 if idx not in self.disks:
10883 continue
10884
10885 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10886 (idx, node_name))
10887
10888 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10889 on_primary, ldisk=ldisk):
10890 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10891 " replace disks for instance %s" %
10892 (node_name, self.instance.name))
10893
10895 """Create new storage on the primary or secondary node.
10896
10897 This is only used for same-node replaces, not for changing the
10898 secondary node, hence we don't want to modify the existing disk.
10899
10900 """
10901 iv_names = {}
10902
10903 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10904 for idx, dev in enumerate(disks):
10905 if idx not in self.disks:
10906 continue
10907
10908 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10909
10910 self.cfg.SetDiskID(dev, node_name)
10911
10912 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10913 names = _GenerateUniqueNames(self.lu, lv_names)
10914
10915 (data_disk, meta_disk) = dev.children
10916 vg_data = data_disk.logical_id[0]
10917 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10918 logical_id=(vg_data, names[0]),
10919 params=data_disk.params)
10920 vg_meta = meta_disk.logical_id[0]
10921 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10922 logical_id=(vg_meta, names[1]),
10923 params=meta_disk.params)
10924
10925 new_lvs = [lv_data, lv_meta]
10926 old_lvs = [child.Copy() for child in dev.children]
10927 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10928
10929
10930 for new_lv in new_lvs:
10931 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10932 _GetInstanceInfoText(self.instance), False)
10933
10934 return iv_names
10935
10937 for name, (dev, _, _) in iv_names.iteritems():
10938 self.cfg.SetDiskID(dev, node_name)
10939
10940 result = _BlockdevFind(self, node_name, dev, self.instance)
10941
10942 msg = result.fail_msg
10943 if msg or not result.payload:
10944 if not msg:
10945 msg = "disk not found"
10946 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10947 (name, msg))
10948
10949 if result.payload.is_degraded:
10950 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10951
10953 for name, (_, old_lvs, _) in iv_names.iteritems():
10954 self.lu.LogInfo("Remove logical volumes for %s" % name)
10955
10956 for lv in old_lvs:
10957 self.cfg.SetDiskID(lv, node_name)
10958
10959 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10960 if msg:
10961 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10962 hint="remove unused LVs manually")
10963
10965 """Replace a disk on the primary or secondary for DRBD 8.
10966
10967 The algorithm for replace is quite complicated:
10968
10969 1. for each disk to be replaced:
10970
10971 1. create new LVs on the target node with unique names
10972 1. detach old LVs from the drbd device
10973 1. rename old LVs to name_replaced.<time_t>
10974 1. rename new LVs to old LVs
10975 1. attach the new LVs (with the old names now) to the drbd device
10976
10977 1. wait for sync across all devices
10978
10979 1. for each modified disk:
10980
10981 1. remove old LVs (which have the name name_replaces.<time_t>)
10982
10983 Failures are not very well handled.
10984
10985 """
10986 steps_total = 6
10987
10988
10989 self.lu.LogStep(1, steps_total, "Check device existence")
10990 self._CheckDisksExistence([self.other_node, self.target_node])
10991 self._CheckVolumeGroup([self.target_node, self.other_node])
10992
10993
10994 self.lu.LogStep(2, steps_total, "Check peer consistency")
10995 self._CheckDisksConsistency(self.other_node,
10996 self.other_node == self.instance.primary_node,
10997 False)
10998
10999
11000 self.lu.LogStep(3, steps_total, "Allocate new storage")
11001 iv_names = self._CreateNewStorage(self.target_node)
11002
11003
11004 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11005 for dev, old_lvs, new_lvs in iv_names.itervalues():
11006 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11007
11008 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11009 old_lvs)
11010 result.Raise("Can't detach drbd from local storage on node"
11011 " %s for device %s" % (self.target_node, dev.iv_name))
11012
11013
11014
11015
11016
11017
11018
11019
11020
11021
11022 temp_suffix = int(time.time())
11023 ren_fn = lambda d, suff: (d.physical_id[0],
11024 d.physical_id[1] + "_replaced-%s" % suff)
11025
11026
11027 rename_old_to_new = []
11028 for to_ren in old_lvs:
11029 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11030 if not result.fail_msg and result.payload:
11031
11032 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11033
11034 self.lu.LogInfo("Renaming the old LVs on the target node")
11035 result = self.rpc.call_blockdev_rename(self.target_node,
11036 rename_old_to_new)
11037 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11038
11039
11040 self.lu.LogInfo("Renaming the new LVs on the target node")
11041 rename_new_to_old = [(new, old.physical_id)
11042 for old, new in zip(old_lvs, new_lvs)]
11043 result = self.rpc.call_blockdev_rename(self.target_node,
11044 rename_new_to_old)
11045 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11046
11047
11048 for old, new in zip(old_lvs, new_lvs):
11049 new.logical_id = old.logical_id
11050 self.cfg.SetDiskID(new, self.target_node)
11051
11052
11053
11054
11055 for disk in old_lvs:
11056 disk.logical_id = ren_fn(disk, temp_suffix)
11057 self.cfg.SetDiskID(disk, self.target_node)
11058
11059
11060 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11061 result = self.rpc.call_blockdev_addchildren(self.target_node,
11062 (dev, self.instance), new_lvs)
11063 msg = result.fail_msg
11064 if msg:
11065 for new_lv in new_lvs:
11066 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11067 new_lv).fail_msg
11068 if msg2:
11069 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11070 hint=("cleanup manually the unused logical"
11071 "volumes"))
11072 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11073
11074 cstep = itertools.count(5)
11075
11076 if self.early_release:
11077 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11078 self._RemoveOldStorage(self.target_node, iv_names)
11079
11080 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11081 else:
11082
11083 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11084 keep=self.node_secondary_ip.keys())
11085
11086
11087 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11088
11089
11090
11091
11092
11093
11094
11095 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11096 _WaitForSync(self.lu, self.instance)
11097
11098
11099 self._CheckDevices(self.instance.primary_node, iv_names)
11100
11101
11102 if not self.early_release:
11103 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11104 self._RemoveOldStorage(self.target_node, iv_names)
11105
11107 """Replace the secondary node for DRBD 8.
11108
11109 The algorithm for replace is quite complicated:
11110 - for all disks of the instance:
11111 - create new LVs on the new node with same names
11112 - shutdown the drbd device on the old secondary
11113 - disconnect the drbd network on the primary
11114 - create the drbd device on the new secondary
11115 - network attach the drbd on the primary, using an artifice:
11116 the drbd code for Attach() will connect to the network if it
11117 finds a device which is connected to the good local disks but
11118 not network enabled
11119 - wait for sync across all devices
11120 - remove all disks from the old secondary
11121
11122 Failures are not very well handled.
11123
11124 """
11125 steps_total = 6
11126
11127 pnode = self.instance.primary_node
11128
11129
11130 self.lu.LogStep(1, steps_total, "Check device existence")
11131 self._CheckDisksExistence([self.instance.primary_node])
11132 self._CheckVolumeGroup([self.instance.primary_node])
11133
11134
11135 self.lu.LogStep(2, steps_total, "Check peer consistency")
11136 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11137
11138
11139 self.lu.LogStep(3, steps_total, "Allocate new storage")
11140 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11141 for idx, dev in enumerate(disks):
11142 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11143 (self.new_node, idx))
11144
11145 for new_lv in dev.children:
11146 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11147 True, _GetInstanceInfoText(self.instance), False)
11148
11149
11150
11151
11152 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11153 minors = self.cfg.AllocateDRBDMinor([self.new_node
11154 for dev in self.instance.disks],
11155 self.instance.name)
11156 logging.debug("Allocated minors %r", minors)
11157
11158 iv_names = {}
11159 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11160 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11161 (self.new_node, idx))
11162
11163
11164
11165
11166 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11167 if self.instance.primary_node == o_node1:
11168 p_minor = o_minor1
11169 else:
11170 assert self.instance.primary_node == o_node2, "Three-node instance?"
11171 p_minor = o_minor2
11172
11173 new_alone_id = (self.instance.primary_node, self.new_node, None,
11174 p_minor, new_minor, o_secret)
11175 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11176 p_minor, new_minor, o_secret)
11177
11178 iv_names[idx] = (dev, dev.children, new_net_id)
11179 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11180 new_net_id)
11181 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11182 logical_id=new_alone_id,
11183 children=dev.children,
11184 size=dev.size,
11185 params={})
11186 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11187 self.cfg)
11188 try:
11189 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11190 anno_new_drbd,
11191 _GetInstanceInfoText(self.instance), False)
11192 except errors.GenericError:
11193 self.cfg.ReleaseDRBDMinors(self.instance.name)
11194 raise
11195
11196
11197 for idx, dev in enumerate(self.instance.disks):
11198 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11199 self.cfg.SetDiskID(dev, self.target_node)
11200 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11201 (dev, self.instance)).fail_msg
11202 if msg:
11203 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11204 "node: %s" % (idx, msg),
11205 hint=("Please cleanup this device manually as"
11206 " soon as possible"))
11207
11208 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11209 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11210 self.instance.disks)[pnode]
11211
11212 msg = result.fail_msg
11213 if msg:
11214
11215 self.cfg.ReleaseDRBDMinors(self.instance.name)
11216 raise errors.OpExecError("Can't detach the disks from the network on"
11217 " old node: %s" % (msg,))
11218
11219
11220
11221 self.lu.LogInfo("Updating instance configuration")
11222 for dev, _, new_logical_id in iv_names.itervalues():
11223 dev.logical_id = new_logical_id
11224 self.cfg.SetDiskID(dev, self.instance.primary_node)
11225
11226 self.cfg.Update(self.instance, feedback_fn)
11227
11228
11229 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11230
11231
11232 self.lu.LogInfo("Attaching primary drbds to new secondary"
11233 " (standalone => connected)")
11234 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11235 self.new_node],
11236 self.node_secondary_ip,
11237 (self.instance.disks, self.instance),
11238 self.instance.name,
11239 False)
11240 for to_node, to_result in result.items():
11241 msg = to_result.fail_msg
11242 if msg:
11243 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11244 to_node, msg,
11245 hint=("please do a gnt-instance info to see the"
11246 " status of disks"))
11247
11248 cstep = itertools.count(5)
11249
11250 if self.early_release:
11251 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11252 self._RemoveOldStorage(self.target_node, iv_names)
11253
11254 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11255 else:
11256
11257 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11258 keep=self.node_secondary_ip.keys())
11259
11260
11261
11262
11263
11264
11265
11266 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11267 _WaitForSync(self.lu, self.instance)
11268
11269
11270 self._CheckDevices(self.instance.primary_node, iv_names)
11271
11272
11273 if not self.early_release:
11274 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11275 self._RemoveOldStorage(self.target_node, iv_names)
11276
11279 """Repairs the volume group on a node.
11280
11281 """
11282 REQ_BGL = False
11283
11294
11299
11313
11315 """Check prerequisites.
11316
11317 """
11318
11319 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11320 if inst.admin_state != constants.ADMINST_UP:
11321 continue
11322 check_nodes = set(inst.all_nodes)
11323 check_nodes.discard(self.op.node_name)
11324 for inst_node_name in check_nodes:
11325 self._CheckFaultyDisks(inst, inst_node_name)
11326
11327 - def Exec(self, feedback_fn):
11328 feedback_fn("Repairing storage unit '%s' on %s ..." %
11329 (self.op.name, self.op.node_name))
11330
11331 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11332 result = self.rpc.call_storage_execute(self.op.node_name,
11333 self.op.storage_type, st_args,
11334 self.op.name,
11335 constants.SO_FIX_CONSISTENCY)
11336 result.Raise("Failed to repair storage unit '%s' on %s" %
11337 (self.op.name, self.op.node_name))
11338
11341 """Evacuates instances off a list of nodes.
11342
11343 """
11344 REQ_BGL = False
11345
11346 _MODE2IALLOCATOR = {
11347 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11348 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11349 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11350 }
11351 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11352 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11353 constants.IALLOCATOR_NEVAC_MODES)
11354
11357
11385
11387 """Gets the list of nodes to operate on.
11388
11389 """
11390 if self.op.remote_node is None:
11391
11392 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11393 else:
11394 group_nodes = frozenset([self.op.remote_node])
11395
11396
11397 return set([self.op.node_name]) | group_nodes
11398
11427
11443
11445
11446 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11447 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11448 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11449
11450 need_nodes = self._DetermineNodes()
11451
11452 if not owned_nodes.issuperset(need_nodes):
11453 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11454 " locks were acquired, current nodes are"
11455 " are '%s', used to be '%s'; retry the"
11456 " operation" %
11457 (self.op.node_name,
11458 utils.CommaJoin(need_nodes),
11459 utils.CommaJoin(owned_nodes)),
11460 errors.ECODE_STATE)
11461
11462 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11463 if owned_groups != wanted_groups:
11464 raise errors.OpExecError("Node groups changed since locks were acquired,"
11465 " current groups are '%s', used to be '%s';"
11466 " retry the operation" %
11467 (utils.CommaJoin(wanted_groups),
11468 utils.CommaJoin(owned_groups)))
11469
11470
11471 self.instances = self._DetermineInstances()
11472 self.instance_names = [i.name for i in self.instances]
11473
11474 if set(self.instance_names) != owned_instances:
11475 raise errors.OpExecError("Instances on node '%s' changed since locks"
11476 " were acquired, current instances are '%s',"
11477 " used to be '%s'; retry the operation" %
11478 (self.op.node_name,
11479 utils.CommaJoin(self.instance_names),
11480 utils.CommaJoin(owned_instances)))
11481
11482 if self.instance_names:
11483 self.LogInfo("Evacuating instances from node '%s': %s",
11484 self.op.node_name,
11485 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11486 else:
11487 self.LogInfo("No instances to evacuate from node '%s'",
11488 self.op.node_name)
11489
11490 if self.op.remote_node is not None:
11491 for i in self.instances:
11492 if i.primary_node == self.op.remote_node:
11493 raise errors.OpPrereqError("Node %s is the primary node of"
11494 " instance %s, cannot use it as"
11495 " secondary" %
11496 (self.op.remote_node, i.name),
11497 errors.ECODE_INVAL)
11498
11499 - def Exec(self, feedback_fn):
11500 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11501
11502 if not self.instance_names:
11503
11504 jobs = []
11505
11506 elif self.op.iallocator is not None:
11507
11508 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11509 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11510 instances=list(self.instance_names))
11511
11512 ial.Run(self.op.iallocator)
11513
11514 if not ial.success:
11515 raise errors.OpPrereqError("Can't compute node evacuation using"
11516 " iallocator '%s': %s" %
11517 (self.op.iallocator, ial.info),
11518 errors.ECODE_NORES)
11519
11520 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11521
11522 elif self.op.remote_node is not None:
11523 assert self.op.mode == constants.NODE_EVAC_SEC
11524 jobs = [
11525 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11526 remote_node=self.op.remote_node,
11527 disks=[],
11528 mode=constants.REPLACE_DISK_CHG,
11529 early_release=self.op.early_release)]
11530 for instance_name in self.instance_names
11531 ]
11532
11533 else:
11534 raise errors.ProgrammerError("No iallocator or remote node")
11535
11536 return ResultWithJobs(jobs)
11537
11540 """Sets C{early_release} flag on opcodes if available.
11541
11542 """
11543 try:
11544 op.early_release = early_release
11545 except AttributeError:
11546 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11547
11548 return op
11549
11552 """Returns group or nodes depending on caller's choice.
11553
11554 """
11555 if use_nodes:
11556 return utils.CommaJoin(nodes)
11557 else:
11558 return group
11559
11562 """Unpacks the result of change-group and node-evacuate iallocator requests.
11563
11564 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11565 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11566
11567 @type lu: L{LogicalUnit}
11568 @param lu: Logical unit instance
11569 @type alloc_result: tuple/list
11570 @param alloc_result: Result from iallocator
11571 @type early_release: bool
11572 @param early_release: Whether to release locks early if possible
11573 @type use_nodes: bool
11574 @param use_nodes: Whether to display node names instead of groups
11575
11576 """
11577 (moved, failed, jobs) = alloc_result
11578
11579 if failed:
11580 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11581 for (name, reason) in failed)
11582 lu.LogWarning("Unable to evacuate instances %s", failreason)
11583 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11584
11585 if moved:
11586 lu.LogInfo("Instances to be moved: %s",
11587 utils.CommaJoin("%s (to %s)" %
11588 (name, _NodeEvacDest(use_nodes, group, nodes))
11589 for (name, group, nodes) in moved))
11590
11591 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11592 map(opcodes.OpCode.LoadOpCode, ops))
11593 for ops in jobs]
11594
11597 """Grow a disk of an instance.
11598
11599 """
11600 HPATH = "disk-grow"
11601 HTYPE = constants.HTYPE_INSTANCE
11602 REQ_BGL = False
11603
11610
11618
11620 """Build hooks env.
11621
11622 This runs on the master, the primary and all the secondaries.
11623
11624 """
11625 env = {
11626 "DISK": self.op.disk,
11627 "AMOUNT": self.op.amount,
11628 "ABSOLUTE": self.op.absolute,
11629 }
11630 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11631 return env
11632
11639
11641 """Check prerequisites.
11642
11643 This checks that the instance is in the cluster.
11644
11645 """
11646 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11647 assert instance is not None, \
11648 "Cannot retrieve locked instance %s" % self.op.instance_name
11649 nodenames = list(instance.all_nodes)
11650 for node in nodenames:
11651 _CheckNodeOnline(self, node)
11652
11653 self.instance = instance
11654
11655 if instance.disk_template not in constants.DTS_GROWABLE:
11656 raise errors.OpPrereqError("Instance's disk layout does not support"
11657 " growing", errors.ECODE_INVAL)
11658
11659 self.disk = instance.FindDisk(self.op.disk)
11660
11661 if self.op.absolute:
11662 self.target = self.op.amount
11663 self.delta = self.target - self.disk.size
11664 if self.delta < 0:
11665 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11666 "current disk size (%s)" %
11667 (utils.FormatUnit(self.target, "h"),
11668 utils.FormatUnit(self.disk.size, "h")),
11669 errors.ECODE_STATE)
11670 else:
11671 self.delta = self.op.amount
11672 self.target = self.disk.size + self.delta
11673 if self.delta < 0:
11674 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11675 utils.FormatUnit(self.delta, "h"),
11676 errors.ECODE_INVAL)
11677
11678 if instance.disk_template not in (constants.DT_FILE,
11679 constants.DT_SHARED_FILE,
11680 constants.DT_RBD):
11681
11682
11683 _CheckNodesFreeDiskPerVG(self, nodenames,
11684 self.disk.ComputeGrowth(self.delta))
11685
11686 - def Exec(self, feedback_fn):
11687 """Execute disk grow.
11688
11689 """
11690 instance = self.instance
11691 disk = self.disk
11692
11693 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11694 assert (self.owned_locks(locking.LEVEL_NODE) ==
11695 self.owned_locks(locking.LEVEL_NODE_RES))
11696
11697 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11698 if not disks_ok:
11699 raise errors.OpExecError("Cannot activate block device to grow")
11700
11701 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11702 (self.op.disk, instance.name,
11703 utils.FormatUnit(self.delta, "h"),
11704 utils.FormatUnit(self.target, "h")))
11705
11706
11707 for node in instance.all_nodes:
11708 self.cfg.SetDiskID(disk, node)
11709 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11710 True)
11711 result.Raise("Grow request failed to node %s" % node)
11712
11713
11714
11715 for node in instance.all_nodes:
11716 self.cfg.SetDiskID(disk, node)
11717 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11718 False)
11719 result.Raise("Grow request failed to node %s" % node)
11720
11721
11722
11723
11724
11725
11726 time.sleep(5)
11727
11728 disk.RecordGrow(self.delta)
11729 self.cfg.Update(instance, feedback_fn)
11730
11731
11732 _ReleaseLocks(self, locking.LEVEL_NODE)
11733
11734
11735 self.glm.downgrade(locking.LEVEL_INSTANCE)
11736
11737 if self.op.wait_for_sync:
11738 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11739 if disk_abort:
11740 self.proc.LogWarning("Disk sync-ing has not returned a good"
11741 " status; please check the instance")
11742 if instance.admin_state != constants.ADMINST_UP:
11743 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11744 elif instance.admin_state != constants.ADMINST_UP:
11745 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11746 " not supposed to be running because no wait for"
11747 " sync mode was requested")
11748
11749 assert self.owned_locks(locking.LEVEL_NODE_RES)
11750 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11751
11754 """Query runtime instance data.
11755
11756 """
11757 REQ_BGL = False
11758
11785
11801
11803 """Check prerequisites.
11804
11805 This only checks the optional instance list against the existing names.
11806
11807 """
11808 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11809 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11810 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11811
11812 if self.wanted_names is None:
11813 assert self.op.use_locking, "Locking was not used"
11814 self.wanted_names = owned_instances
11815
11816 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11817
11818 if self.op.use_locking:
11819 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11820 None)
11821 else:
11822 assert not (owned_instances or owned_groups or owned_nodes)
11823
11824 self.wanted_instances = instances.values()
11825
11827 """Returns the status of a block device
11828
11829 """
11830 if self.op.static or not node:
11831 return None
11832
11833 self.cfg.SetDiskID(dev, node)
11834
11835 result = self.rpc.call_blockdev_find(node, dev)
11836 if result.offline:
11837 return None
11838
11839 result.Raise("Can't compute disk status for %s" % instance.name)
11840
11841 status = result.payload
11842 if status is None:
11843 return None
11844
11845 return (status.dev_path, status.major, status.minor,
11846 status.sync_percent, status.estimated_time,
11847 status.is_degraded, status.ldisk_status)
11848
11856
11858 """Compute block device status.
11859
11860 @attention: The device has to be annotated already.
11861
11862 """
11863 if dev.dev_type in constants.LDS_DRBD:
11864
11865 if dev.logical_id[0] == instance.primary_node:
11866 snode = dev.logical_id[1]
11867 else:
11868 snode = dev.logical_id[0]
11869
11870 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11871 instance, dev)
11872 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11873
11874 if dev.children:
11875 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11876 instance, snode),
11877 dev.children)
11878 else:
11879 dev_children = []
11880
11881 return {
11882 "iv_name": dev.iv_name,
11883 "dev_type": dev.dev_type,
11884 "logical_id": dev.logical_id,
11885 "physical_id": dev.physical_id,
11886 "pstatus": dev_pstatus,
11887 "sstatus": dev_sstatus,
11888 "children": dev_children,
11889 "mode": dev.mode,
11890 "size": dev.size,
11891 }
11892
11893 - def Exec(self, feedback_fn):
11894 """Gather and return data"""
11895 result = {}
11896
11897 cluster = self.cfg.GetClusterInfo()
11898
11899 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11900 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11901
11902 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11903 for node in nodes.values()))
11904
11905 group2name_fn = lambda uuid: groups[uuid].name
11906
11907 for instance in self.wanted_instances:
11908 pnode = nodes[instance.primary_node]
11909
11910 if self.op.static or pnode.offline:
11911 remote_state = None
11912 if pnode.offline:
11913 self.LogWarning("Primary node %s is marked offline, returning static"
11914 " information only for instance %s" %
11915 (pnode.name, instance.name))
11916 else:
11917 remote_info = self.rpc.call_instance_info(instance.primary_node,
11918 instance.name,
11919 instance.hypervisor)
11920 remote_info.Raise("Error checking node %s" % instance.primary_node)
11921 remote_info = remote_info.payload
11922 if remote_info and "state" in remote_info:
11923 remote_state = "up"
11924 else:
11925 if instance.admin_state == constants.ADMINST_UP:
11926 remote_state = "down"
11927 else:
11928 remote_state = instance.admin_state
11929
11930 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11931 instance.disks)
11932
11933 snodes_group_uuids = [nodes[snode_name].group
11934 for snode_name in instance.secondary_nodes]
11935
11936 result[instance.name] = {
11937 "name": instance.name,
11938 "config_state": instance.admin_state,
11939 "run_state": remote_state,
11940 "pnode": instance.primary_node,
11941 "pnode_group_uuid": pnode.group,
11942 "pnode_group_name": group2name_fn(pnode.group),
11943 "snodes": instance.secondary_nodes,
11944 "snodes_group_uuids": snodes_group_uuids,
11945 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11946 "os": instance.os,
11947
11948 "nics": _NICListToTuple(self, instance.nics),
11949 "disk_template": instance.disk_template,
11950 "disks": disks,
11951 "hypervisor": instance.hypervisor,
11952 "network_port": instance.network_port,
11953 "hv_instance": instance.hvparams,
11954 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11955 "be_instance": instance.beparams,
11956 "be_actual": cluster.FillBE(instance),
11957 "os_instance": instance.osparams,
11958 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11959 "serial_no": instance.serial_no,
11960 "mtime": instance.mtime,
11961 "ctime": instance.ctime,
11962 "uuid": instance.uuid,
11963 }
11964
11965 return result
11966
11969 """Prepares a list of container modifications by adding a private data field.
11970
11971 @type mods: list of tuples; (operation, index, parameters)
11972 @param mods: List of modifications
11973 @type private_fn: callable or None
11974 @param private_fn: Callable for constructing a private data field for a
11975 modification
11976 @rtype: list
11977
11978 """
11979 if private_fn is None:
11980 fn = lambda: None
11981 else:
11982 fn = private_fn
11983
11984 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11985
11986
11987
11988
11989 _TApplyContModsCbChanges = \
11990 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11991 ht.TNonEmptyString,
11992 ht.TAny,
11993 ])))
11994
11995
11996 -def ApplyContainerMods(kind, container, chgdesc, mods,
11997 create_fn, modify_fn, remove_fn):
11998 """Applies descriptions in C{mods} to C{container}.
11999
12000 @type kind: string
12001 @param kind: One-word item description
12002 @type container: list
12003 @param container: Container to modify
12004 @type chgdesc: None or list
12005 @param chgdesc: List of applied changes
12006 @type mods: list
12007 @param mods: Modifications as returned by L{PrepareContainerMods}
12008 @type create_fn: callable
12009 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12010 receives absolute item index, parameters and private data object as added
12011 by L{PrepareContainerMods}, returns tuple containing new item and changes
12012 as list
12013 @type modify_fn: callable
12014 @param modify_fn: Callback for modifying an existing item
12015 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12016 and private data object as added by L{PrepareContainerMods}, returns
12017 changes as list
12018 @type remove_fn: callable
12019 @param remove_fn: Callback on removing item; receives absolute item index,
12020 item and private data object as added by L{PrepareContainerMods}
12021
12022 """
12023 for (op, idx, params, private) in mods:
12024 if idx == -1:
12025
12026 absidx = len(container) - 1
12027 elif idx < 0:
12028 raise IndexError("Not accepting negative indices other than -1")
12029 elif idx > len(container):
12030 raise IndexError("Got %s index %s, but there are only %s" %
12031 (kind, idx, len(container)))
12032 else:
12033 absidx = idx
12034
12035 changes = None
12036
12037 if op == constants.DDM_ADD:
12038
12039 if idx == -1:
12040 addidx = len(container)
12041 else:
12042 addidx = idx
12043
12044 if create_fn is None:
12045 item = params
12046 else:
12047 (item, changes) = create_fn(addidx, params, private)
12048
12049 if idx == -1:
12050 container.append(item)
12051 else:
12052 assert idx >= 0
12053 assert idx <= len(container)
12054
12055 container.insert(idx, item)
12056 else:
12057
12058 try:
12059 item = container[absidx]
12060 except IndexError:
12061 raise IndexError("Invalid %s index %s" % (kind, idx))
12062
12063 if op == constants.DDM_REMOVE:
12064 assert not params
12065
12066 if remove_fn is not None:
12067 remove_fn(absidx, item, private)
12068
12069 changes = [("%s/%s" % (kind, absidx), "remove")]
12070
12071 assert container[absidx] == item
12072 del container[absidx]
12073 elif op == constants.DDM_MODIFY:
12074 if modify_fn is not None:
12075 changes = modify_fn(absidx, item, params, private)
12076 else:
12077 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12078
12079 assert _TApplyContModsCbChanges(changes)
12080
12081 if not (chgdesc is None or changes is None):
12082 chgdesc.extend(changes)
12083
12086 """Updates the C{iv_name} attribute of disks.
12087
12088 @type disks: list of L{objects.Disk}
12089
12090 """
12091 for (idx, disk) in enumerate(disks):
12092 disk.iv_name = "disk/%s" % (base_index + idx, )
12093
12096 """Data structure for network interface modifications.
12097
12098 Used by L{LUInstanceSetParams}.
12099
12100 """
12102 self.params = None
12103 self.filled = None
12104
12107 """Modifies an instances's parameters.
12108
12109 """
12110 HPATH = "instance-modify"
12111 HTYPE = constants.HTYPE_INSTANCE
12112 REQ_BGL = False
12113
12114 @staticmethod
12116 assert ht.TList(mods)
12117 assert not mods or len(mods[0]) in (2, 3)
12118
12119 if mods and len(mods[0]) == 2:
12120 result = []
12121
12122 addremove = 0
12123 for op, params in mods:
12124 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12125 result.append((op, -1, params))
12126 addremove += 1
12127
12128 if addremove > 1:
12129 raise errors.OpPrereqError("Only one %s add or remove operation is"
12130 " supported at a time" % kind,
12131 errors.ECODE_INVAL)
12132 else:
12133 result.append((constants.DDM_MODIFY, op, params))
12134
12135 assert verify_fn(result)
12136 else:
12137 result = mods
12138
12139 return result
12140
12141 @staticmethod
12142 - def _CheckMods(kind, mods, key_types, item_fn):
12160
12161 @staticmethod
12188
12189 @staticmethod
12228
12230 if not (self.op.nics or self.op.disks or self.op.disk_template or
12231 self.op.hvparams or self.op.beparams or self.op.os_name or
12232 self.op.offline is not None or self.op.runtime_mem):
12233 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12234
12235 if self.op.hvparams:
12236 _CheckGlobalHvParams(self.op.hvparams)
12237
12238 self.op.disks = \
12239 self._UpgradeDiskNicMods("disk", self.op.disks,
12240 opcodes.OpInstanceSetParams.TestDiskModifications)
12241 self.op.nics = \
12242 self._UpgradeDiskNicMods("NIC", self.op.nics,
12243 opcodes.OpInstanceSetParams.TestNicModifications)
12244
12245
12246 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12247 self._VerifyDiskModification)
12248
12249 if self.op.disks and self.op.disk_template is not None:
12250 raise errors.OpPrereqError("Disk template conversion and other disk"
12251 " changes not supported at the same time",
12252 errors.ECODE_INVAL)
12253
12254 if (self.op.disk_template and
12255 self.op.disk_template in constants.DTS_INT_MIRROR and
12256 self.op.remote_node is None):
12257 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12258 " one requires specifying a secondary node",
12259 errors.ECODE_INVAL)
12260
12261
12262 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12263 self._VerifyNicModification)
12264
12272
12284
12319
12326
12329 update_params_dict = dict([(key, params[key])
12330 for key in constants.NICS_PARAMETERS
12331 if key in params])
12332
12333 if "bridge" in params:
12334 update_params_dict[constants.NIC_LINK] = params["bridge"]
12335
12336 new_params = _GetUpdatedParams(old_params, update_params_dict)
12337 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12338
12339 new_filled_params = cluster.SimpleFillNIC(new_params)
12340 objects.NIC.CheckParameterSyntax(new_filled_params)
12341
12342 new_mode = new_filled_params[constants.NIC_MODE]
12343 if new_mode == constants.NIC_MODE_BRIDGED:
12344 bridge = new_filled_params[constants.NIC_LINK]
12345 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12346 if msg:
12347 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12348 if self.op.force:
12349 self.warn.append(msg)
12350 else:
12351 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12352
12353 elif new_mode == constants.NIC_MODE_ROUTED:
12354 ip = params.get(constants.INIC_IP, old_ip)
12355 if ip is None:
12356 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12357 " on a routed NIC", errors.ECODE_INVAL)
12358
12359 if constants.INIC_MAC in params:
12360 mac = params[constants.INIC_MAC]
12361 if mac is None:
12362 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12363 errors.ECODE_INVAL)
12364 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12365
12366 params[constants.INIC_MAC] = \
12367 self.cfg.GenerateMAC(self.proc.GetECId())
12368 else:
12369
12370 try:
12371 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12372 except errors.ReservationError:
12373 raise errors.OpPrereqError("MAC address '%s' already in use"
12374 " in cluster" % mac,
12375 errors.ECODE_NOTUNIQUE)
12376
12377 private.params = new_params
12378 private.filled = new_filled_params
12379
12381 """Check prerequisites.
12382
12383 This only checks the instance list against the existing names.
12384
12385 """
12386
12387
12388 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12389 cluster = self.cluster = self.cfg.GetClusterInfo()
12390 assert self.instance is not None, \
12391 "Cannot retrieve locked instance %s" % self.op.instance_name
12392 pnode = instance.primary_node
12393 nodelist = list(instance.all_nodes)
12394 pnode_info = self.cfg.GetNodeInfo(pnode)
12395 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12396
12397
12398 self.diskmod = PrepareContainerMods(self.op.disks, None)
12399 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12400
12401
12402 if self.op.os_name and not self.op.force:
12403 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12404 self.op.force_variant)
12405 instance_os = self.op.os_name
12406 else:
12407 instance_os = instance.os
12408
12409 assert not (self.op.disk_template and self.op.disks), \
12410 "Can't modify disk template and apply disk changes at the same time"
12411
12412 if self.op.disk_template:
12413 if instance.disk_template == self.op.disk_template:
12414 raise errors.OpPrereqError("Instance already has disk template %s" %
12415 instance.disk_template, errors.ECODE_INVAL)
12416
12417 if (instance.disk_template,
12418 self.op.disk_template) not in self._DISK_CONVERSIONS:
12419 raise errors.OpPrereqError("Unsupported disk template conversion from"
12420 " %s to %s" % (instance.disk_template,
12421 self.op.disk_template),
12422 errors.ECODE_INVAL)
12423 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12424 msg="cannot change disk template")
12425 if self.op.disk_template in constants.DTS_INT_MIRROR:
12426 if self.op.remote_node == pnode:
12427 raise errors.OpPrereqError("Given new secondary node %s is the same"
12428 " as the primary node of the instance" %
12429 self.op.remote_node, errors.ECODE_STATE)
12430 _CheckNodeOnline(self, self.op.remote_node)
12431 _CheckNodeNotDrained(self, self.op.remote_node)
12432
12433 assert instance.disk_template == constants.DT_PLAIN
12434 disks = [{constants.IDISK_SIZE: d.size,
12435 constants.IDISK_VG: d.logical_id[0]}
12436 for d in instance.disks]
12437 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12438 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12439
12440 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12441 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12442 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12443 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12444 ignore=self.op.ignore_ipolicy)
12445 if pnode_info.group != snode_info.group:
12446 self.LogWarning("The primary and secondary nodes are in two"
12447 " different node groups; the disk parameters"
12448 " from the first disk's node group will be"
12449 " used")
12450
12451
12452 if self.op.hvparams:
12453 hv_type = instance.hypervisor
12454 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12455 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12456 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12457
12458
12459 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
12460 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12461 self.hv_proposed = self.hv_new = hv_new
12462 self.hv_inst = i_hvdict
12463 else:
12464 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12465 instance.hvparams)
12466 self.hv_new = self.hv_inst = {}
12467
12468
12469 if self.op.beparams:
12470 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12471 use_none=True)
12472 objects.UpgradeBeParams(i_bedict)
12473 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12474 be_new = cluster.SimpleFillBE(i_bedict)
12475 self.be_proposed = self.be_new = be_new
12476 self.be_inst = i_bedict
12477 else:
12478 self.be_new = self.be_inst = {}
12479 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12480 be_old = cluster.FillBE(instance)
12481
12482
12483
12484
12485 if (constants.BE_VCPUS in self.be_proposed and
12486 constants.HV_CPU_MASK in self.hv_proposed):
12487 cpu_list = \
12488 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12489
12490
12491
12492 if (len(cpu_list) > 1 and
12493 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12494 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12495 " CPU mask [%s]" %
12496 (self.be_proposed[constants.BE_VCPUS],
12497 self.hv_proposed[constants.HV_CPU_MASK]),
12498 errors.ECODE_INVAL)
12499
12500
12501 if constants.HV_CPU_MASK in self.hv_new:
12502
12503 max_requested_cpu = max(map(max, cpu_list))
12504
12505
12506 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12507 max_requested_cpu + 1, instance.hypervisor)
12508
12509
12510 if self.op.osparams:
12511 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12512 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12513 self.os_inst = i_osdict
12514 else:
12515 self.os_inst = {}
12516
12517 self.warn = []
12518
12519
12520 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12521 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12522 mem_check_list = [pnode]
12523 if be_new[constants.BE_AUTO_BALANCE]:
12524
12525 mem_check_list.extend(instance.secondary_nodes)
12526 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12527 instance.hypervisor)
12528 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12529 [instance.hypervisor])
12530 pninfo = nodeinfo[pnode]
12531 msg = pninfo.fail_msg
12532 if msg:
12533
12534 self.warn.append("Can't get info from primary node %s: %s" %
12535 (pnode, msg))
12536 else:
12537 (_, _, (pnhvinfo, )) = pninfo.payload
12538 if not isinstance(pnhvinfo.get("memory_free", None), int):
12539 self.warn.append("Node data from primary node %s doesn't contain"
12540 " free memory information" % pnode)
12541 elif instance_info.fail_msg:
12542 self.warn.append("Can't get instance runtime information: %s" %
12543 instance_info.fail_msg)
12544 else:
12545 if instance_info.payload:
12546 current_mem = int(instance_info.payload["memory"])
12547 else:
12548
12549
12550
12551
12552 current_mem = 0
12553
12554 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12555 pnhvinfo["memory_free"])
12556 if miss_mem > 0:
12557 raise errors.OpPrereqError("This change will prevent the instance"
12558 " from starting, due to %d MB of memory"
12559 " missing on its primary node" %
12560 miss_mem,
12561 errors.ECODE_NORES)
12562
12563 if be_new[constants.BE_AUTO_BALANCE]:
12564 for node, nres in nodeinfo.items():
12565 if node not in instance.secondary_nodes:
12566 continue
12567 nres.Raise("Can't get info from secondary node %s" % node,
12568 prereq=True, ecode=errors.ECODE_STATE)
12569 (_, _, (nhvinfo, )) = nres.payload
12570 if not isinstance(nhvinfo.get("memory_free", None), int):
12571 raise errors.OpPrereqError("Secondary node %s didn't return free"
12572 " memory information" % node,
12573 errors.ECODE_STATE)
12574
12575 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12576 raise errors.OpPrereqError("This change will prevent the instance"
12577 " from failover to its secondary node"
12578 " %s, due to not enough memory" % node,
12579 errors.ECODE_STATE)
12580
12581 if self.op.runtime_mem:
12582 remote_info = self.rpc.call_instance_info(instance.primary_node,
12583 instance.name,
12584 instance.hypervisor)
12585 remote_info.Raise("Error checking node %s" % instance.primary_node)
12586 if not remote_info.payload:
12587 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12588 errors.ECODE_STATE)
12589
12590 current_memory = remote_info.payload["memory"]
12591 if (not self.op.force and
12592 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12593 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12594 raise errors.OpPrereqError("Instance %s must have memory between %d"
12595 " and %d MB of memory unless --force is"
12596 " given" % (instance.name,
12597 self.be_proposed[constants.BE_MINMEM],
12598 self.be_proposed[constants.BE_MAXMEM]),
12599 errors.ECODE_INVAL)
12600
12601 delta = self.op.runtime_mem - current_memory
12602 if delta > 0:
12603 _CheckNodeFreeMemory(self, instance.primary_node,
12604 "ballooning memory for instance %s" %
12605 instance.name, delta, instance.hypervisor)
12606
12607 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12608 raise errors.OpPrereqError("Disk operations not supported for"
12609 " diskless instances",
12610 errors.ECODE_INVAL)
12611
12612 def _PrepareNicCreate(_, params, private):
12613 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12614 return (None, None)
12615
12616 def _PrepareNicMod(_, nic, params, private):
12617 self._PrepareNicModification(params, private, nic.ip,
12618 nic.nicparams, cluster, pnode)
12619 return None
12620
12621
12622 nics = instance.nics[:]
12623 ApplyContainerMods("NIC", nics, None, self.nicmod,
12624 _PrepareNicCreate, _PrepareNicMod, None)
12625 if len(nics) > constants.MAX_NICS:
12626 raise errors.OpPrereqError("Instance has too many network interfaces"
12627 " (%d), cannot add more" % constants.MAX_NICS,
12628 errors.ECODE_STATE)
12629
12630
12631 disks = instance.disks[:]
12632 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12633 if len(disks) > constants.MAX_DISKS:
12634 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12635 " more" % constants.MAX_DISKS,
12636 errors.ECODE_STATE)
12637
12638 if self.op.offline is not None:
12639 if self.op.offline:
12640 msg = "can't change to offline"
12641 else:
12642 msg = "can't change to online"
12643 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12644
12645
12646 self._nic_chgdesc = []
12647 if self.nicmod:
12648
12649 nics = [nic.Copy() for nic in instance.nics]
12650 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12651 self._CreateNewNic, self._ApplyNicMods, None)
12652 self._new_nics = nics
12653 else:
12654 self._new_nics = None
12655
12657 """Converts an instance from plain to drbd.
12658
12659 """
12660 feedback_fn("Converting template to drbd")
12661 instance = self.instance
12662 pnode = instance.primary_node
12663 snode = self.op.remote_node
12664
12665 assert instance.disk_template == constants.DT_PLAIN
12666
12667
12668 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12669 constants.IDISK_VG: d.logical_id[0]}
12670 for d in instance.disks]
12671 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12672 instance.name, pnode, [snode],
12673 disk_info, None, None, 0, feedback_fn,
12674 self.diskparams)
12675 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12676 self.diskparams)
12677 info = _GetInstanceInfoText(instance)
12678 feedback_fn("Creating additional volumes...")
12679
12680 for disk in anno_disks:
12681
12682 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12683 info, True)
12684 for child in disk.children:
12685 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12686
12687
12688 feedback_fn("Renaming original volumes...")
12689 rename_list = [(o, n.children[0].logical_id)
12690 for (o, n) in zip(instance.disks, new_disks)]
12691 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12692 result.Raise("Failed to rename original LVs")
12693
12694 feedback_fn("Initializing DRBD devices...")
12695
12696 for disk in anno_disks:
12697 for node in [pnode, snode]:
12698 f_create = node == pnode
12699 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12700
12701
12702 instance.disk_template = constants.DT_DRBD8
12703 instance.disks = new_disks
12704 self.cfg.Update(instance, feedback_fn)
12705
12706
12707 _ReleaseLocks(self, locking.LEVEL_NODE)
12708
12709
12710 disk_abort = not _WaitForSync(self, instance,
12711 oneshot=not self.op.wait_for_sync)
12712 if disk_abort:
12713 raise errors.OpExecError("There are some degraded disks for"
12714 " this instance, please cleanup manually")
12715
12716
12717
12719 """Converts an instance from drbd to plain.
12720
12721 """
12722 instance = self.instance
12723
12724 assert len(instance.secondary_nodes) == 1
12725 assert instance.disk_template == constants.DT_DRBD8
12726
12727 pnode = instance.primary_node
12728 snode = instance.secondary_nodes[0]
12729 feedback_fn("Converting template to plain")
12730
12731 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12732 new_disks = [d.children[0] for d in instance.disks]
12733
12734
12735 for parent, child in zip(old_disks, new_disks):
12736 child.size = parent.size
12737 child.mode = parent.mode
12738
12739
12740
12741 for disk in old_disks:
12742 tcp_port = disk.logical_id[2]
12743 self.cfg.AddTcpUdpPort(tcp_port)
12744
12745
12746 instance.disks = new_disks
12747 instance.disk_template = constants.DT_PLAIN
12748 self.cfg.Update(instance, feedback_fn)
12749
12750
12751 _ReleaseLocks(self, locking.LEVEL_NODE)
12752
12753 feedback_fn("Removing volumes on the secondary node...")
12754 for disk in old_disks:
12755 self.cfg.SetDiskID(disk, snode)
12756 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12757 if msg:
12758 self.LogWarning("Could not remove block device %s on node %s,"
12759 " continuing anyway: %s", disk.iv_name, snode, msg)
12760
12761 feedback_fn("Removing unneeded volumes on the primary node...")
12762 for idx, disk in enumerate(old_disks):
12763 meta = disk.children[1]
12764 self.cfg.SetDiskID(meta, pnode)
12765 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12766 if msg:
12767 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12768 " continuing anyway: %s", idx, pnode, msg)
12769
12771 """Creates a new disk.
12772
12773 """
12774 instance = self.instance
12775
12776
12777 if instance.disk_template in constants.DTS_FILEBASED:
12778 (file_driver, file_path) = instance.disks[0].logical_id
12779 file_path = os.path.dirname(file_path)
12780 else:
12781 file_driver = file_path = None
12782
12783 disk = \
12784 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12785 instance.primary_node, instance.secondary_nodes,
12786 [params], file_path, file_driver, idx,
12787 self.Log, self.diskparams)[0]
12788
12789 info = _GetInstanceInfoText(instance)
12790
12791 logging.info("Creating volume %s for instance %s",
12792 disk.iv_name, instance.name)
12793
12794
12795 for node in instance.all_nodes:
12796 f_create = (node == instance.primary_node)
12797 try:
12798 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12799 except errors.OpExecError, err:
12800 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12801 disk.iv_name, disk, node, err)
12802
12803 return (disk, [
12804 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12805 ])
12806
12807 @staticmethod
12809 """Modifies a disk.
12810
12811 """
12812 disk.mode = params[constants.IDISK_MODE]
12813
12814 return [
12815 ("disk.mode/%d" % idx, disk.mode),
12816 ]
12817
12819 """Removes a disk.
12820
12821 """
12822 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12823 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12824 self.cfg.SetDiskID(disk, node)
12825 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12826 if msg:
12827 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12828 " continuing anyway", idx, node, msg)
12829
12830
12831 if root.dev_type in constants.LDS_DRBD:
12832 self.cfg.AddTcpUdpPort(root.logical_id[2])
12833
12834 @staticmethod
12836 """Creates data structure for a new network interface.
12837
12838 """
12839 mac = params[constants.INIC_MAC]
12840 ip = params.get(constants.INIC_IP, None)
12841 nicparams = private.params
12842
12843 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12844 ("nic.%d" % idx,
12845 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12846 (mac, ip, private.filled[constants.NIC_MODE],
12847 private.filled[constants.NIC_LINK])),
12848 ])
12849
12850 @staticmethod
12852 """Modifies a network interface.
12853
12854 """
12855 changes = []
12856
12857 for key in [constants.INIC_MAC, constants.INIC_IP]:
12858 if key in params:
12859 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12860 setattr(nic, key, params[key])
12861
12862 if private.params:
12863 nic.nicparams = private.params
12864
12865 for (key, val) in params.items():
12866 changes.append(("nic.%s/%d" % (key, idx), val))
12867
12868 return changes
12869
12870 - def Exec(self, feedback_fn):
12871 """Modifies an instance.
12872
12873 All parameters take effect only at the next restart of the instance.
12874
12875 """
12876
12877
12878
12879 for warn in self.warn:
12880 feedback_fn("WARNING: %s" % warn)
12881
12882 assert ((self.op.disk_template is None) ^
12883 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12884 "Not owning any node resource locks"
12885
12886 result = []
12887 instance = self.instance
12888
12889
12890 if self.op.runtime_mem:
12891 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12892 instance,
12893 self.op.runtime_mem)
12894 rpcres.Raise("Cannot modify instance runtime memory")
12895 result.append(("runtime_memory", self.op.runtime_mem))
12896
12897
12898 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12899 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12900 _UpdateIvNames(0, instance.disks)
12901
12902 if self.op.disk_template:
12903 if __debug__:
12904 check_nodes = set(instance.all_nodes)
12905 if self.op.remote_node:
12906 check_nodes.add(self.op.remote_node)
12907 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12908 owned = self.owned_locks(level)
12909 assert not (check_nodes - owned), \
12910 ("Not owning the correct locks, owning %r, expected at least %r" %
12911 (owned, check_nodes))
12912
12913 r_shut = _ShutdownInstanceDisks(self, instance)
12914 if not r_shut:
12915 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12916 " proceed with disk template conversion")
12917 mode = (instance.disk_template, self.op.disk_template)
12918 try:
12919 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12920 except:
12921 self.cfg.ReleaseDRBDMinors(instance.name)
12922 raise
12923 result.append(("disk_template", self.op.disk_template))
12924
12925 assert instance.disk_template == self.op.disk_template, \
12926 ("Expected disk template '%s', found '%s'" %
12927 (self.op.disk_template, instance.disk_template))
12928
12929
12930
12931 _ReleaseLocks(self, locking.LEVEL_NODE)
12932 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12933
12934
12935 if self._new_nics is not None:
12936 instance.nics = self._new_nics
12937 result.extend(self._nic_chgdesc)
12938
12939
12940 if self.op.hvparams:
12941 instance.hvparams = self.hv_inst
12942 for key, val in self.op.hvparams.iteritems():
12943 result.append(("hv/%s" % key, val))
12944
12945
12946 if self.op.beparams:
12947 instance.beparams = self.be_inst
12948 for key, val in self.op.beparams.iteritems():
12949 result.append(("be/%s" % key, val))
12950
12951
12952 if self.op.os_name:
12953 instance.os = self.op.os_name
12954
12955
12956 if self.op.osparams:
12957 instance.osparams = self.os_inst
12958 for key, val in self.op.osparams.iteritems():
12959 result.append(("os/%s" % key, val))
12960
12961 if self.op.offline is None:
12962
12963 pass
12964 elif self.op.offline:
12965
12966 self.cfg.MarkInstanceOffline(instance.name)
12967 result.append(("admin_state", constants.ADMINST_OFFLINE))
12968 else:
12969
12970 self.cfg.MarkInstanceDown(instance.name)
12971 result.append(("admin_state", constants.ADMINST_DOWN))
12972
12973 self.cfg.Update(instance, feedback_fn)
12974
12975 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12976 self.owned_locks(locking.LEVEL_NODE)), \
12977 "All node locks should have been released by now"
12978
12979 return result
12980
12981 _DISK_CONVERSIONS = {
12982 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12983 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12984 }
12985
12988 HPATH = "instance-change-group"
12989 HTYPE = constants.HTYPE_INSTANCE
12990 REQ_BGL = False
12991
13008
13042
13044 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13045 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13046 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13047
13048 assert (self.req_target_uuids is None or
13049 owned_groups.issuperset(self.req_target_uuids))
13050 assert owned_instances == set([self.op.instance_name])
13051
13052
13053 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13054
13055
13056 assert owned_nodes.issuperset(self.instance.all_nodes), \
13057 ("Instance %s's nodes changed while we kept the lock" %
13058 self.op.instance_name)
13059
13060 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13061 owned_groups)
13062
13063 if self.req_target_uuids:
13064
13065 self.target_uuids = frozenset(self.req_target_uuids)
13066 else:
13067
13068 self.target_uuids = owned_groups - inst_groups
13069
13070 conflicting_groups = self.target_uuids & inst_groups
13071 if conflicting_groups:
13072 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13073 " used by the instance '%s'" %
13074 (utils.CommaJoin(conflicting_groups),
13075 self.op.instance_name),
13076 errors.ECODE_INVAL)
13077
13078 if not self.target_uuids:
13079 raise errors.OpPrereqError("There are no possible target groups",
13080 errors.ECODE_INVAL)
13081
13083 """Build hooks env.
13084
13085 """
13086 assert self.target_uuids
13087
13088 env = {
13089 "TARGET_GROUPS": " ".join(self.target_uuids),
13090 }
13091
13092 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13093
13094 return env
13095
13097 """Build hooks nodes.
13098
13099 """
13100 mn = self.cfg.GetMasterNode()
13101 return ([mn], [mn])
13102
13103 - def Exec(self, feedback_fn):
13104 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13105
13106 assert instances == [self.op.instance_name], "Instance not locked"
13107
13108 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13109 instances=instances, target_groups=list(self.target_uuids))
13110
13111 ial.Run(self.op.iallocator)
13112
13113 if not ial.success:
13114 raise errors.OpPrereqError("Can't compute solution for changing group of"
13115 " instance '%s' using iallocator '%s': %s" %
13116 (self.op.instance_name, self.op.iallocator,
13117 ial.info),
13118 errors.ECODE_NORES)
13119
13120 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13121
13122 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13123 " instance '%s'", len(jobs), self.op.instance_name)
13124
13125 return ResultWithJobs(jobs)
13126
13129 """Query the exports list
13130
13131 """
13132 REQ_BGL = False
13133
13137
13140
13143
13144 - def Exec(self, feedback_fn):
13145 result = {}
13146
13147 for (node, expname) in self.expq.OldStyleQuery(self):
13148 if expname is None:
13149 result[node] = False
13150 else:
13151 result.setdefault(node, []).append(expname)
13152
13153 return result
13154
13157 FIELDS = query.EXPORT_FIELDS
13158
13159
13160 SORT_FIELD = "node"
13161
13163 lu.needed_locks = {}
13164
13165
13166 if self.names:
13167 self.wanted = _GetWantedNodes(lu, self.names)
13168 else:
13169 self.wanted = locking.ALL_SET
13170
13171 self.do_locking = self.use_locking
13172
13173 if self.do_locking:
13174 lu.share_locks = _ShareAll()
13175 lu.needed_locks = {
13176 locking.LEVEL_NODE: self.wanted,
13177 }
13178
13181
13183 """Computes the list of nodes and their attributes.
13184
13185 """
13186
13187
13188 assert not (compat.any(lu.glm.is_owned(level)
13189 for level in locking.LEVELS
13190 if level != locking.LEVEL_CLUSTER) or
13191 self.do_locking or self.use_locking)
13192
13193 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13194
13195 result = []
13196
13197 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13198 if nres.fail_msg:
13199 result.append((node, None))
13200 else:
13201 result.extend((node, expname) for expname in nres.payload)
13202
13203 return result
13204
13207 """Prepares an instance for an export and returns useful information.
13208
13209 """
13210 REQ_BGL = False
13211
13214
13227
13228 - def Exec(self, feedback_fn):
13229 """Prepares an instance for an export.
13230
13231 """
13232 instance = self.instance
13233
13234 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13235 salt = utils.GenerateSecret(8)
13236
13237 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13238 result = self.rpc.call_x509_cert_create(instance.primary_node,
13239 constants.RIE_CERT_VALIDITY)
13240 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13241
13242 (name, cert_pem) = result.payload
13243
13244 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13245 cert_pem)
13246
13247 return {
13248 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13249 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13250 salt),
13251 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13252 }
13253
13254 return None
13255
13258 """Export an instance to an image in the cluster.
13259
13260 """
13261 HPATH = "instance-export"
13262 HTYPE = constants.HTYPE_INSTANCE
13263 REQ_BGL = False
13264
13280
13295
13297 """Last minute lock declaration."""
13298
13299
13301 """Build hooks env.
13302
13303 This will run on the master, primary node and target node.
13304
13305 """
13306 env = {
13307 "EXPORT_MODE": self.op.mode,
13308 "EXPORT_NODE": self.op.target_node,
13309 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13310 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13311
13312 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13313 }
13314
13315 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13316
13317 return env
13318
13329
13331 """Check prerequisites.
13332
13333 This checks that the instance and node names are valid.
13334
13335 """
13336 instance_name = self.op.instance_name
13337
13338 self.instance = self.cfg.GetInstanceInfo(instance_name)
13339 assert self.instance is not None, \
13340 "Cannot retrieve locked instance %s" % self.op.instance_name
13341 _CheckNodeOnline(self, self.instance.primary_node)
13342
13343 if (self.op.remove_instance and
13344 self.instance.admin_state == constants.ADMINST_UP and
13345 not self.op.shutdown):
13346 raise errors.OpPrereqError("Can not remove instance without shutting it"
13347 " down before")
13348
13349 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13350 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13351 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13352 assert self.dst_node is not None
13353
13354 _CheckNodeOnline(self, self.dst_node.name)
13355 _CheckNodeNotDrained(self, self.dst_node.name)
13356
13357 self._cds = None
13358 self.dest_disk_info = None
13359 self.dest_x509_ca = None
13360
13361 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13362 self.dst_node = None
13363
13364 if len(self.op.target_node) != len(self.instance.disks):
13365 raise errors.OpPrereqError(("Received destination information for %s"
13366 " disks, but instance %s has %s disks") %
13367 (len(self.op.target_node), instance_name,
13368 len(self.instance.disks)),
13369 errors.ECODE_INVAL)
13370
13371 cds = _GetClusterDomainSecret()
13372
13373
13374 try:
13375 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13376 except (TypeError, ValueError), err:
13377 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13378
13379 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13380 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13381 errors.ECODE_INVAL)
13382
13383
13384 try:
13385 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13386 except OpenSSL.crypto.Error, err:
13387 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13388 (err, ), errors.ECODE_INVAL)
13389
13390 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13391 if errcode is not None:
13392 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13393 (msg, ), errors.ECODE_INVAL)
13394
13395 self.dest_x509_ca = cert
13396
13397
13398 disk_info = []
13399 for idx, disk_data in enumerate(self.op.target_node):
13400 try:
13401 (host, port, magic) = \
13402 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13403 except errors.GenericError, err:
13404 raise errors.OpPrereqError("Target info for disk %s: %s" %
13405 (idx, err), errors.ECODE_INVAL)
13406
13407 disk_info.append((host, port, magic))
13408
13409 assert len(disk_info) == len(self.op.target_node)
13410 self.dest_disk_info = disk_info
13411
13412 else:
13413 raise errors.ProgrammerError("Unhandled export mode %r" %
13414 self.op.mode)
13415
13416
13417
13418 for disk in self.instance.disks:
13419 if disk.dev_type == constants.LD_FILE:
13420 raise errors.OpPrereqError("Export not supported for instances with"
13421 " file-based disks", errors.ECODE_INVAL)
13422
13424 """Removes exports of current instance from all other nodes.
13425
13426 If an instance in a cluster with nodes A..D was exported to node C, its
13427 exports will be removed from the nodes A, B and D.
13428
13429 """
13430 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13431
13432 nodelist = self.cfg.GetNodeList()
13433 nodelist.remove(self.dst_node.name)
13434
13435
13436
13437
13438 iname = self.instance.name
13439 if nodelist:
13440 feedback_fn("Removing old exports for instance %s" % iname)
13441 exportlist = self.rpc.call_export_list(nodelist)
13442 for node in exportlist:
13443 if exportlist[node].fail_msg:
13444 continue
13445 if iname in exportlist[node].payload:
13446 msg = self.rpc.call_export_remove(node, iname).fail_msg
13447 if msg:
13448 self.LogWarning("Could not remove older export for instance %s"
13449 " on node %s: %s", iname, node, msg)
13450
13451 - def Exec(self, feedback_fn):
13452 """Export an instance to an image in the cluster.
13453
13454 """
13455 assert self.op.mode in constants.EXPORT_MODES
13456
13457 instance = self.instance
13458 src_node = instance.primary_node
13459
13460 if self.op.shutdown:
13461
13462 feedback_fn("Shutting down instance %s" % instance.name)
13463 result = self.rpc.call_instance_shutdown(src_node, instance,
13464 self.op.shutdown_timeout)
13465
13466 result.Raise("Could not shutdown instance %s on"
13467 " node %s" % (instance.name, src_node))
13468
13469
13470
13471 for disk in instance.disks:
13472 self.cfg.SetDiskID(disk, src_node)
13473
13474 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13475
13476 if activate_disks:
13477
13478 feedback_fn("Activating disks for %s" % instance.name)
13479 _StartInstanceDisks(self, instance, None)
13480
13481 try:
13482 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13483 instance)
13484
13485 helper.CreateSnapshots()
13486 try:
13487 if (self.op.shutdown and
13488 instance.admin_state == constants.ADMINST_UP and
13489 not self.op.remove_instance):
13490 assert not activate_disks
13491 feedback_fn("Starting instance %s" % instance.name)
13492 result = self.rpc.call_instance_start(src_node,
13493 (instance, None, None), False)
13494 msg = result.fail_msg
13495 if msg:
13496 feedback_fn("Failed to start instance: %s" % msg)
13497 _ShutdownInstanceDisks(self, instance)
13498 raise errors.OpExecError("Could not start instance: %s" % msg)
13499
13500 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13501 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13502 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13503 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13504 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13505
13506 (key_name, _, _) = self.x509_key_name
13507
13508 dest_ca_pem = \
13509 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13510 self.dest_x509_ca)
13511
13512 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13513 key_name, dest_ca_pem,
13514 timeouts)
13515 finally:
13516 helper.Cleanup()
13517
13518
13519 assert len(dresults) == len(instance.disks)
13520 assert compat.all(isinstance(i, bool) for i in dresults), \
13521 "Not all results are boolean: %r" % dresults
13522
13523 finally:
13524 if activate_disks:
13525 feedback_fn("Deactivating disks for %s" % instance.name)
13526 _ShutdownInstanceDisks(self, instance)
13527
13528 if not (compat.all(dresults) and fin_resu):
13529 failures = []
13530 if not fin_resu:
13531 failures.append("export finalization")
13532 if not compat.all(dresults):
13533 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13534 if not dsk)
13535 failures.append("disk export: disk(s) %s" % fdsk)
13536
13537 raise errors.OpExecError("Export failed, errors in %s" %
13538 utils.CommaJoin(failures))
13539
13540
13541
13542
13543 if self.op.remove_instance:
13544 feedback_fn("Removing instance %s" % instance.name)
13545 _RemoveInstance(self, feedback_fn, instance,
13546 self.op.ignore_remove_failures)
13547
13548 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13549 self._CleanupExports(feedback_fn)
13550
13551 return fin_resu, dresults
13552
13555 """Remove exports related to the named instance.
13556
13557 """
13558 REQ_BGL = False
13559
13566
13567 - def Exec(self, feedback_fn):
13568 """Remove any export.
13569
13570 """
13571 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13572
13573
13574 fqdn_warn = False
13575 if not instance_name:
13576 fqdn_warn = True
13577 instance_name = self.op.instance_name
13578
13579 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13580 exportlist = self.rpc.call_export_list(locked_nodes)
13581 found = False
13582 for node in exportlist:
13583 msg = exportlist[node].fail_msg
13584 if msg:
13585 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13586 continue
13587 if instance_name in exportlist[node].payload:
13588 found = True
13589 result = self.rpc.call_export_remove(node, instance_name)
13590 msg = result.fail_msg
13591 if msg:
13592 logging.error("Could not remove export for instance %s"
13593 " on node %s: %s", instance_name, node, msg)
13594
13595 if fqdn_warn and not found:
13596 feedback_fn("Export not found. If trying to remove an export belonging"
13597 " to a deleted instance please use its Fully Qualified"
13598 " Domain Name.")
13599
13602 """Logical unit for creating node groups.
13603
13604 """
13605 HPATH = "group-add"
13606 HTYPE = constants.HTYPE_GROUP
13607 REQ_BGL = False
13608
13616
13618 """Check prerequisites.
13619
13620 This checks that the given group name is not an existing node group
13621 already.
13622
13623 """
13624 try:
13625 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13626 except errors.OpPrereqError:
13627 pass
13628 else:
13629 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13630 " node group (UUID: %s)" %
13631 (self.op.group_name, existing_uuid),
13632 errors.ECODE_EXISTS)
13633
13634 if self.op.ndparams:
13635 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13636
13637 if self.op.hv_state:
13638 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13639 else:
13640 self.new_hv_state = None
13641
13642 if self.op.disk_state:
13643 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13644 else:
13645 self.new_disk_state = None
13646
13647 if self.op.diskparams:
13648 for templ in constants.DISK_TEMPLATES:
13649 if templ in self.op.diskparams:
13650 utils.ForceDictType(self.op.diskparams[templ],
13651 constants.DISK_DT_TYPES)
13652 self.new_diskparams = self.op.diskparams
13653 try:
13654 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13655 except errors.OpPrereqError, err:
13656 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13657 errors.ECODE_INVAL)
13658 else:
13659 self.new_diskparams = {}
13660
13661 if self.op.ipolicy:
13662 cluster = self.cfg.GetClusterInfo()
13663 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13664 try:
13665 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13666 except errors.ConfigurationError, err:
13667 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13668 errors.ECODE_INVAL)
13669
13671 """Build hooks env.
13672
13673 """
13674 return {
13675 "GROUP_NAME": self.op.group_name,
13676 }
13677
13679 """Build hooks nodes.
13680
13681 """
13682 mn = self.cfg.GetMasterNode()
13683 return ([mn], [mn])
13684
13685 - def Exec(self, feedback_fn):
13686 """Add the node group to the cluster.
13687
13688 """
13689 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13690 uuid=self.group_uuid,
13691 alloc_policy=self.op.alloc_policy,
13692 ndparams=self.op.ndparams,
13693 diskparams=self.new_diskparams,
13694 ipolicy=self.op.ipolicy,
13695 hv_state_static=self.new_hv_state,
13696 disk_state_static=self.new_disk_state)
13697
13698 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13699 del self.remove_locks[locking.LEVEL_NODEGROUP]
13700
13703 """Logical unit for assigning nodes to groups.
13704
13705 """
13706 REQ_BGL = False
13707
13720
13730
13732 """Check prerequisites.
13733
13734 """
13735 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13736 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13737 frozenset(self.op.nodes))
13738
13739 expected_locks = (set([self.group_uuid]) |
13740 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13741 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13742 if actual_locks != expected_locks:
13743 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13744 " current groups are '%s', used to be '%s'" %
13745 (utils.CommaJoin(expected_locks),
13746 utils.CommaJoin(actual_locks)))
13747
13748 self.node_data = self.cfg.GetAllNodesInfo()
13749 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13750 instance_data = self.cfg.GetAllInstancesInfo()
13751
13752 if self.group is None:
13753 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13754 (self.op.group_name, self.group_uuid))
13755
13756 (new_splits, previous_splits) = \
13757 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13758 for node in self.op.nodes],
13759 self.node_data, instance_data)
13760
13761 if new_splits:
13762 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13763
13764 if not self.op.force:
13765 raise errors.OpExecError("The following instances get split by this"
13766 " change and --force was not given: %s" %
13767 fmt_new_splits)
13768 else:
13769 self.LogWarning("This operation will split the following instances: %s",
13770 fmt_new_splits)
13771
13772 if previous_splits:
13773 self.LogWarning("In addition, these already-split instances continue"
13774 " to be split across groups: %s",
13775 utils.CommaJoin(utils.NiceSort(previous_splits)))
13776
13777 - def Exec(self, feedback_fn):
13778 """Assign nodes to a new group.
13779
13780 """
13781 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13782
13783 self.cfg.AssignGroupNodes(mods)
13784
13785 @staticmethod
13787 """Check for split instances after a node assignment.
13788
13789 This method considers a series of node assignments as an atomic operation,
13790 and returns information about split instances after applying the set of
13791 changes.
13792
13793 In particular, it returns information about newly split instances, and
13794 instances that were already split, and remain so after the change.
13795
13796 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13797 considered.
13798
13799 @type changes: list of (node_name, new_group_uuid) pairs.
13800 @param changes: list of node assignments to consider.
13801 @param node_data: a dict with data for all nodes
13802 @param instance_data: a dict with all instances to consider
13803 @rtype: a two-tuple
13804 @return: a list of instances that were previously okay and result split as a
13805 consequence of this change, and a list of instances that were previously
13806 split and this change does not fix.
13807
13808 """
13809 changed_nodes = dict((node, group) for node, group in changes
13810 if node_data[node].group != group)
13811
13812 all_split_instances = set()
13813 previously_split_instances = set()
13814
13815 def InstanceNodes(instance):
13816 return [instance.primary_node] + list(instance.secondary_nodes)
13817
13818 for inst in instance_data.values():
13819 if inst.disk_template not in constants.DTS_INT_MIRROR:
13820 continue
13821
13822 instance_nodes = InstanceNodes(inst)
13823
13824 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13825 previously_split_instances.add(inst.name)
13826
13827 if len(set(changed_nodes.get(node, node_data[node].group)
13828 for node in instance_nodes)) > 1:
13829 all_split_instances.add(inst.name)
13830
13831 return (list(all_split_instances - previously_split_instances),
13832 list(previously_split_instances & all_split_instances))
13833
13836 FIELDS = query.GROUP_FIELDS
13837
13839 lu.needed_locks = {}
13840
13841 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13842 self._cluster = lu.cfg.GetClusterInfo()
13843 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13844
13845 if not self.names:
13846 self.wanted = [name_to_uuid[name]
13847 for name in utils.NiceSort(name_to_uuid.keys())]
13848 else:
13849
13850 missing = []
13851 self.wanted = []
13852 all_uuid = frozenset(self._all_groups.keys())
13853
13854 for name in self.names:
13855 if name in all_uuid:
13856 self.wanted.append(name)
13857 elif name in name_to_uuid:
13858 self.wanted.append(name_to_uuid[name])
13859 else:
13860 missing.append(name)
13861
13862 if missing:
13863 raise errors.OpPrereqError("Some groups do not exist: %s" %
13864 utils.CommaJoin(missing),
13865 errors.ECODE_NOENT)
13866
13869
13871 """Computes the list of node groups and their attributes.
13872
13873 """
13874 do_nodes = query.GQ_NODE in self.requested_data
13875 do_instances = query.GQ_INST in self.requested_data
13876
13877 group_to_nodes = None
13878 group_to_instances = None
13879
13880
13881
13882
13883
13884
13885 if do_nodes or do_instances:
13886 all_nodes = lu.cfg.GetAllNodesInfo()
13887 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13888 node_to_group = {}
13889
13890 for node in all_nodes.values():
13891 if node.group in group_to_nodes:
13892 group_to_nodes[node.group].append(node.name)
13893 node_to_group[node.name] = node.group
13894
13895 if do_instances:
13896 all_instances = lu.cfg.GetAllInstancesInfo()
13897 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13898
13899 for instance in all_instances.values():
13900 node = instance.primary_node
13901 if node in node_to_group:
13902 group_to_instances[node_to_group[node]].append(instance.name)
13903
13904 if not do_nodes:
13905
13906 group_to_nodes = None
13907
13908 return query.GroupQueryData(self._cluster,
13909 [self._all_groups[uuid]
13910 for uuid in self.wanted],
13911 group_to_nodes, group_to_instances,
13912 query.GQ_DISKPARAMS in self.requested_data)
13913
13916 """Logical unit for querying node groups.
13917
13918 """
13919 REQ_BGL = False
13920
13924
13927
13930
13931 - def Exec(self, feedback_fn):
13933
13936 """Modifies the parameters of a node group.
13937
13938 """
13939 HPATH = "group-modify"
13940 HTYPE = constants.HTYPE_GROUP
13941 REQ_BGL = False
13942
13944 all_changes = [
13945 self.op.ndparams,
13946 self.op.diskparams,
13947 self.op.alloc_policy,
13948 self.op.hv_state,
13949 self.op.disk_state,
13950 self.op.ipolicy,
13951 ]
13952
13953 if all_changes.count(None) == len(all_changes):
13954 raise errors.OpPrereqError("Please pass at least one modification",
13955 errors.ECODE_INVAL)
13956
13967
13976
13977 @staticmethod
13985
13987 """Check prerequisites.
13988
13989 """
13990 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13991
13992
13993 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13994
13995 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13996 cluster = self.cfg.GetClusterInfo()
13997
13998 if self.group is None:
13999 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14000 (self.op.group_name, self.group_uuid))
14001
14002 if self.op.ndparams:
14003 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14004 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14005 self.new_ndparams = new_ndparams
14006
14007 if self.op.diskparams:
14008 diskparams = self.group.diskparams
14009 uavdp = self._UpdateAndVerifyDiskParams
14010
14011 new_diskparams = dict((dt,
14012 uavdp(diskparams.get(dt, {}),
14013 self.op.diskparams[dt]))
14014 for dt in constants.DISK_TEMPLATES
14015 if dt in self.op.diskparams)
14016
14017
14018 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14019 try:
14020 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14021 except errors.OpPrereqError, err:
14022 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14023 errors.ECODE_INVAL)
14024
14025 if self.op.hv_state:
14026 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14027 self.group.hv_state_static)
14028
14029 if self.op.disk_state:
14030 self.new_disk_state = \
14031 _MergeAndVerifyDiskState(self.op.disk_state,
14032 self.group.disk_state_static)
14033
14034 if self.op.ipolicy:
14035 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14036 self.op.ipolicy,
14037 group_policy=True)
14038
14039 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14040 inst_filter = lambda inst: inst.name in owned_instances
14041 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14042 violations = \
14043 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14044 self.group),
14045 new_ipolicy, instances)
14046
14047 if violations:
14048 self.LogWarning("After the ipolicy change the following instances"
14049 " violate them: %s",
14050 utils.CommaJoin(violations))
14051
14053 """Build hooks env.
14054
14055 """
14056 return {
14057 "GROUP_NAME": self.op.group_name,
14058 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14059 }
14060
14062 """Build hooks nodes.
14063
14064 """
14065 mn = self.cfg.GetMasterNode()
14066 return ([mn], [mn])
14067
14068 - def Exec(self, feedback_fn):
14069 """Modifies the node group.
14070
14071 """
14072 result = []
14073
14074 if self.op.ndparams:
14075 self.group.ndparams = self.new_ndparams
14076 result.append(("ndparams", str(self.group.ndparams)))
14077
14078 if self.op.diskparams:
14079 self.group.diskparams = self.new_diskparams
14080 result.append(("diskparams", str(self.group.diskparams)))
14081
14082 if self.op.alloc_policy:
14083 self.group.alloc_policy = self.op.alloc_policy
14084
14085 if self.op.hv_state:
14086 self.group.hv_state_static = self.new_hv_state
14087
14088 if self.op.disk_state:
14089 self.group.disk_state_static = self.new_disk_state
14090
14091 if self.op.ipolicy:
14092 self.group.ipolicy = self.new_ipolicy
14093
14094 self.cfg.Update(self.group, feedback_fn)
14095 return result
14096
14099 HPATH = "group-remove"
14100 HTYPE = constants.HTYPE_GROUP
14101 REQ_BGL = False
14102
14109
14111 """Check prerequisites.
14112
14113 This checks that the given group name exists as a node group, that is
14114 empty (i.e., contains no nodes), and that is not the last group of the
14115 cluster.
14116
14117 """
14118
14119 group_nodes = [node.name
14120 for node in self.cfg.GetAllNodesInfo().values()
14121 if node.group == self.group_uuid]
14122
14123 if group_nodes:
14124 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14125 " nodes: %s" %
14126 (self.op.group_name,
14127 utils.CommaJoin(utils.NiceSort(group_nodes))),
14128 errors.ECODE_STATE)
14129
14130
14131 if len(self.cfg.GetNodeGroupList()) == 1:
14132 raise errors.OpPrereqError("Group '%s' is the only group,"
14133 " cannot be removed" %
14134 self.op.group_name,
14135 errors.ECODE_STATE)
14136
14138 """Build hooks env.
14139
14140 """
14141 return {
14142 "GROUP_NAME": self.op.group_name,
14143 }
14144
14146 """Build hooks nodes.
14147
14148 """
14149 mn = self.cfg.GetMasterNode()
14150 return ([mn], [mn])
14151
14152 - def Exec(self, feedback_fn):
14163
14166 HPATH = "group-rename"
14167 HTYPE = constants.HTYPE_GROUP
14168 REQ_BGL = False
14169
14177
14179 """Check prerequisites.
14180
14181 Ensures requested new name is not yet used.
14182
14183 """
14184 try:
14185 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14186 except errors.OpPrereqError:
14187 pass
14188 else:
14189 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14190 " node group (UUID: %s)" %
14191 (self.op.new_name, new_name_uuid),
14192 errors.ECODE_EXISTS)
14193
14195 """Build hooks env.
14196
14197 """
14198 return {
14199 "OLD_NAME": self.op.group_name,
14200 "NEW_NAME": self.op.new_name,
14201 }
14202
14204 """Build hooks nodes.
14205
14206 """
14207 mn = self.cfg.GetMasterNode()
14208
14209 all_nodes = self.cfg.GetAllNodesInfo()
14210 all_nodes.pop(mn, None)
14211
14212 run_nodes = [mn]
14213 run_nodes.extend(node.name for node in all_nodes.values()
14214 if node.group == self.group_uuid)
14215
14216 return (run_nodes, run_nodes)
14217
14218 - def Exec(self, feedback_fn):
14219 """Rename the node group.
14220
14221 """
14222 group = self.cfg.GetNodeGroup(self.group_uuid)
14223
14224 if group is None:
14225 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14226 (self.op.group_name, self.group_uuid))
14227
14228 group.name = self.op.new_name
14229 self.cfg.Update(group, feedback_fn)
14230
14231 return self.op.new_name
14232
14235 HPATH = "group-evacuate"
14236 HTYPE = constants.HTYPE_GROUP
14237 REQ_BGL = False
14238
14264
14306
14308 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14309 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14310 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14311
14312 assert owned_groups.issuperset(self.req_target_uuids)
14313 assert self.group_uuid in owned_groups
14314
14315
14316 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14317
14318
14319 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14320
14321
14322 _CheckInstancesNodeGroups(self.cfg, self.instances,
14323 owned_groups, owned_nodes, self.group_uuid)
14324
14325 if self.req_target_uuids:
14326
14327 self.target_uuids = self.req_target_uuids
14328 else:
14329
14330 self.target_uuids = [group_uuid for group_uuid in owned_groups
14331 if group_uuid != self.group_uuid]
14332
14333 if not self.target_uuids:
14334 raise errors.OpPrereqError("There are no possible target groups",
14335 errors.ECODE_INVAL)
14336
14338 """Build hooks env.
14339
14340 """
14341 return {
14342 "GROUP_NAME": self.op.group_name,
14343 "TARGET_GROUPS": " ".join(self.target_uuids),
14344 }
14345
14347 """Build hooks nodes.
14348
14349 """
14350 mn = self.cfg.GetMasterNode()
14351
14352 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14353
14354 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14355
14356 return (run_nodes, run_nodes)
14357
14358 - def Exec(self, feedback_fn):
14359 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14360
14361 assert self.group_uuid not in self.target_uuids
14362
14363 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14364 instances=instances, target_groups=self.target_uuids)
14365
14366 ial.Run(self.op.iallocator)
14367
14368 if not ial.success:
14369 raise errors.OpPrereqError("Can't compute group evacuation using"
14370 " iallocator '%s': %s" %
14371 (self.op.iallocator, ial.info),
14372 errors.ECODE_NORES)
14373
14374 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14375
14376 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14377 len(jobs), self.op.group_name)
14378
14379 return ResultWithJobs(jobs)
14380
14383 """Generic tags LU.
14384
14385 This is an abstract class which is the parent of all the other tags LUs.
14386
14387 """
14410
14411
14412
14413
14429
14448
14489
14517
14551
14554 """Sleep for a specified amount of time.
14555
14556 This LU sleeps on the master and/or nodes for a specified amount of
14557 time.
14558
14559 """
14560 REQ_BGL = False
14561
14563 """Expand names and set required locks.
14564
14565 This expands the node list, if any.
14566
14567 """
14568 self.needed_locks = {}
14569 if self.op.on_nodes:
14570
14571
14572
14573 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14574 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14575
14577 """Do the actual sleep.
14578
14579 """
14580 if self.op.on_master:
14581 if not utils.TestDelay(self.op.duration):
14582 raise errors.OpExecError("Error during master delay test")
14583 if self.op.on_nodes:
14584 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14585 for node, node_result in result.items():
14586 node_result.Raise("Failure during rpc call to node %s" % node)
14587
14588 - def Exec(self, feedback_fn):
14589 """Execute the test delay opcode, with the wanted repetitions.
14590
14591 """
14592 if self.op.repeat == 0:
14593 self._TestDelay()
14594 else:
14595 top_value = self.op.repeat - 1
14596 for i in range(self.op.repeat):
14597 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14598 self._TestDelay()
14599
14602 """Utility LU to test some aspects of the job queue.
14603
14604 """
14605 REQ_BGL = False
14606
14607
14608
14609 _CLIENT_CONNECT_TIMEOUT = 20.0
14610 _CLIENT_CONFIRM_TIMEOUT = 60.0
14611
14612 @classmethod
14614 """Opens a Unix socket and waits for another program to connect.
14615
14616 @type cb: callable
14617 @param cb: Callback to send socket name to client
14618 @type errcls: class
14619 @param errcls: Exception class to use for errors
14620
14621 """
14622
14623
14624
14625 tmpdir = tempfile.mkdtemp()
14626 try:
14627 tmpsock = utils.PathJoin(tmpdir, "sock")
14628
14629 logging.debug("Creating temporary socket at %s", tmpsock)
14630 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14631 try:
14632 sock.bind(tmpsock)
14633 sock.listen(1)
14634
14635
14636 cb(tmpsock)
14637
14638
14639 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14640 try:
14641 (conn, _) = sock.accept()
14642 except socket.error, err:
14643 raise errcls("Client didn't connect in time (%s)" % err)
14644 finally:
14645 sock.close()
14646 finally:
14647
14648 shutil.rmtree(tmpdir)
14649
14650
14651 try:
14652 try:
14653
14654
14655 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14656 conn.recv(1)
14657 except socket.error, err:
14658 raise errcls("Client failed to confirm notification (%s)" % err)
14659 finally:
14660 conn.close()
14661
14663 """Sends a notification to the client.
14664
14665 @type test: string
14666 @param test: Test name
14667 @param arg: Test argument (depends on test)
14668 @type sockname: string
14669 @param sockname: Socket path
14670
14671 """
14672 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14673
14674 - def _Notify(self, prereq, test, arg):
14675 """Notifies the client of a test.
14676
14677 @type prereq: bool
14678 @param prereq: Whether this is a prereq-phase test
14679 @type test: string
14680 @param test: Test name
14681 @param arg: Test argument (depends on test)
14682
14683 """
14684 if prereq:
14685 errcls = errors.OpPrereqError
14686 else:
14687 errcls = errors.OpExecError
14688
14689 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14690 test, arg),
14691 errcls)
14692
14694 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14695 self.expandnames_calls = 0
14696
14713
14714 - def Exec(self, feedback_fn):
14735
14738 """IAllocator framework.
14739
14740 An IAllocator instance has three sets of attributes:
14741 - cfg that is needed to query the cluster
14742 - input data (all members of the _KEYS class attribute are required)
14743 - four buffer attributes (in|out_data|text), that represent the
14744 input (to the external script) in text and data structure format,
14745 and the output from it, again in two formats
14746 - the result variables from the script (success, info, nodes) for
14747 easy usage
14748
14749 """
14750
14751
14752
14753 - def __init__(self, cfg, rpc_runner, mode, **kwargs):
14754 self.cfg = cfg
14755 self.rpc = rpc_runner
14756
14757 self.in_text = self.out_text = self.in_data = self.out_data = None
14758
14759 self.mode = mode
14760 self.memory = self.disks = self.disk_template = self.spindle_use = None
14761 self.os = self.tags = self.nics = self.vcpus = None
14762 self.hypervisor = None
14763 self.relocate_from = None
14764 self.name = None
14765 self.instances = None
14766 self.evac_mode = None
14767 self.target_groups = []
14768
14769 self.required_nodes = None
14770
14771 self.success = self.info = self.result = None
14772
14773 try:
14774 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14775 except KeyError:
14776 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14777 " IAllocator" % self.mode)
14778
14779 keyset = [n for (n, _) in keydata]
14780
14781 for key in kwargs:
14782 if key not in keyset:
14783 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14784 " IAllocator" % key)
14785 setattr(self, key, kwargs[key])
14786
14787 for key in keyset:
14788 if key not in kwargs:
14789 raise errors.ProgrammerError("Missing input parameter '%s' to"
14790 " IAllocator" % key)
14791 self._BuildInputData(compat.partial(fn, self), keydata)
14792
14794 """Compute the generic allocator input data.
14795
14796 This is the data that is independent of the actual operation.
14797
14798 """
14799 cfg = self.cfg
14800 cluster_info = cfg.GetClusterInfo()
14801
14802 data = {
14803 "version": constants.IALLOCATOR_VERSION,
14804 "cluster_name": cfg.GetClusterName(),
14805 "cluster_tags": list(cluster_info.GetTags()),
14806 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14807 "ipolicy": cluster_info.ipolicy,
14808 }
14809 ninfo = cfg.GetAllNodesInfo()
14810 iinfo = cfg.GetAllInstancesInfo().values()
14811 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14812
14813
14814 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14815
14816 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14817 hypervisor_name = self.hypervisor
14818 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14819 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14820 else:
14821 hypervisor_name = cluster_info.primary_hypervisor
14822
14823 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14824 [hypervisor_name])
14825 node_iinfo = \
14826 self.rpc.call_all_instances_info(node_list,
14827 cluster_info.enabled_hypervisors)
14828
14829 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14830
14831 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14832 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14833 i_list, config_ndata)
14834 assert len(data["nodes"]) == len(ninfo), \
14835 "Incomplete node data computed"
14836
14837 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14838
14839 self.in_data = data
14840
14841 @staticmethod
14843 """Compute node groups data.
14844
14845 """
14846 cluster = cfg.GetClusterInfo()
14847 ng = dict((guuid, {
14848 "name": gdata.name,
14849 "alloc_policy": gdata.alloc_policy,
14850 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14851 })
14852 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14853
14854 return ng
14855
14856 @staticmethod
14858 """Compute global node data.
14859
14860 @rtype: dict
14861 @returns: a dict of name: (node dict, node config)
14862
14863 """
14864
14865 node_results = dict((ninfo.name, {
14866 "tags": list(ninfo.GetTags()),
14867 "primary_ip": ninfo.primary_ip,
14868 "secondary_ip": ninfo.secondary_ip,
14869 "offline": ninfo.offline,
14870 "drained": ninfo.drained,
14871 "master_candidate": ninfo.master_candidate,
14872 "group": ninfo.group,
14873 "master_capable": ninfo.master_capable,
14874 "vm_capable": ninfo.vm_capable,
14875 "ndparams": cfg.GetNdParams(ninfo),
14876 })
14877 for ninfo in node_cfg.values())
14878
14879 return node_results
14880
14881 @staticmethod
14884 """Compute global node data.
14885
14886 @param node_results: the basic node structures as filled from the config
14887
14888 """
14889
14890
14891 node_results = dict(node_results)
14892 for nname, nresult in node_data.items():
14893 assert nname in node_results, "Missing basic data for node %s" % nname
14894 ninfo = node_cfg[nname]
14895
14896 if not (ninfo.offline or ninfo.drained):
14897 nresult.Raise("Can't get data for node %s" % nname)
14898 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14899 nname)
14900 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14901
14902 for attr in ["memory_total", "memory_free", "memory_dom0",
14903 "vg_size", "vg_free", "cpu_total"]:
14904 if attr not in remote_info:
14905 raise errors.OpExecError("Node '%s' didn't return attribute"
14906 " '%s'" % (nname, attr))
14907 if not isinstance(remote_info[attr], int):
14908 raise errors.OpExecError("Node '%s' returned invalid value"
14909 " for '%s': %s" %
14910 (nname, attr, remote_info[attr]))
14911
14912 i_p_mem = i_p_up_mem = 0
14913 for iinfo, beinfo in i_list:
14914 if iinfo.primary_node == nname:
14915 i_p_mem += beinfo[constants.BE_MAXMEM]
14916 if iinfo.name not in node_iinfo[nname].payload:
14917 i_used_mem = 0
14918 else:
14919 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14920 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14921 remote_info["memory_free"] -= max(0, i_mem_diff)
14922
14923 if iinfo.admin_state == constants.ADMINST_UP:
14924 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14925
14926
14927 pnr_dyn = {
14928 "total_memory": remote_info["memory_total"],
14929 "reserved_memory": remote_info["memory_dom0"],
14930 "free_memory": remote_info["memory_free"],
14931 "total_disk": remote_info["vg_size"],
14932 "free_disk": remote_info["vg_free"],
14933 "total_cpus": remote_info["cpu_total"],
14934 "i_pri_memory": i_p_mem,
14935 "i_pri_up_memory": i_p_up_mem,
14936 }
14937 pnr_dyn.update(node_results[nname])
14938 node_results[nname] = pnr_dyn
14939
14940 return node_results
14941
14942 @staticmethod
14944 """Compute global instance data.
14945
14946 """
14947 instance_data = {}
14948 for iinfo, beinfo in i_list:
14949 nic_data = []
14950 for nic in iinfo.nics:
14951 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14952 nic_dict = {
14953 "mac": nic.mac,
14954 "ip": nic.ip,
14955 "mode": filled_params[constants.NIC_MODE],
14956 "link": filled_params[constants.NIC_LINK],
14957 }
14958 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14959 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14960 nic_data.append(nic_dict)
14961 pir = {
14962 "tags": list(iinfo.GetTags()),
14963 "admin_state": iinfo.admin_state,
14964 "vcpus": beinfo[constants.BE_VCPUS],
14965 "memory": beinfo[constants.BE_MAXMEM],
14966 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14967 "os": iinfo.os,
14968 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14969 "nics": nic_data,
14970 "disks": [{constants.IDISK_SIZE: dsk.size,
14971 constants.IDISK_MODE: dsk.mode}
14972 for dsk in iinfo.disks],
14973 "disk_template": iinfo.disk_template,
14974 "hypervisor": iinfo.hypervisor,
14975 }
14976 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14977 pir["disks"])
14978 instance_data[iinfo.name] = pir
14979
14980 return instance_data
14981
14983 """Add new instance data to allocator structure.
14984
14985 This in combination with _AllocatorGetClusterData will create the
14986 correct structure needed as input for the allocator.
14987
14988 The checks for the completeness of the opcode must have already been
14989 done.
14990
14991 """
14992 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14993
14994 if self.disk_template in constants.DTS_INT_MIRROR:
14995 self.required_nodes = 2
14996 else:
14997 self.required_nodes = 1
14998
14999 request = {
15000 "name": self.name,
15001 "disk_template": self.disk_template,
15002 "tags": self.tags,
15003 "os": self.os,
15004 "vcpus": self.vcpus,
15005 "memory": self.memory,
15006 "spindle_use": self.spindle_use,
15007 "disks": self.disks,
15008 "disk_space_total": disk_space,
15009 "nics": self.nics,
15010 "required_nodes": self.required_nodes,
15011 "hypervisor": self.hypervisor,
15012 }
15013
15014 return request
15015
15051
15053 """Get data for node-evacuate requests.
15054
15055 """
15056 return {
15057 "instances": self.instances,
15058 "evac_mode": self.evac_mode,
15059 }
15060
15062 """Get data for node-evacuate requests.
15063
15064 """
15065 return {
15066 "instances": self.instances,
15067 "target_groups": self.target_groups,
15068 }
15069
15090
15091 _STRING_LIST = ht.TListOf(ht.TString)
15092 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15093
15094
15095 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15096 opcodes.OpInstanceMigrate.OP_ID,
15097 opcodes.OpInstanceReplaceDisks.OP_ID])
15098 })))
15099
15100 _NEVAC_MOVED = \
15101 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15102 ht.TItems([ht.TNonEmptyString,
15103 ht.TNonEmptyString,
15104 ht.TListOf(ht.TNonEmptyString),
15105 ])))
15106 _NEVAC_FAILED = \
15107 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15108 ht.TItems([ht.TNonEmptyString,
15109 ht.TMaybeString,
15110 ])))
15111 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15112 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15113
15114 _MODE_DATA = {
15115 constants.IALLOCATOR_MODE_ALLOC:
15116 (_AddNewInstance,
15117 [
15118 ("name", ht.TString),
15119 ("memory", ht.TInt),
15120 ("spindle_use", ht.TInt),
15121 ("disks", ht.TListOf(ht.TDict)),
15122 ("disk_template", ht.TString),
15123 ("os", ht.TString),
15124 ("tags", _STRING_LIST),
15125 ("nics", ht.TListOf(ht.TDict)),
15126 ("vcpus", ht.TInt),
15127 ("hypervisor", ht.TString),
15128 ], ht.TList),
15129 constants.IALLOCATOR_MODE_RELOC:
15130 (_AddRelocateInstance,
15131 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15132 ht.TList),
15133 constants.IALLOCATOR_MODE_NODE_EVAC:
15134 (_AddNodeEvacuate, [
15135 ("instances", _STRING_LIST),
15136 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15137 ], _NEVAC_RESULT),
15138 constants.IALLOCATOR_MODE_CHG_GROUP:
15139 (_AddChangeGroup, [
15140 ("instances", _STRING_LIST),
15141 ("target_groups", _STRING_LIST),
15142 ], _NEVAC_RESULT),
15143 }
15144
15145 - def Run(self, name, validate=True, call_fn=None):
15146 """Run an instance allocator and return the results.
15147
15148 """
15149 if call_fn is None:
15150 call_fn = self.rpc.call_iallocator_runner
15151
15152 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15153 result.Raise("Failure while running the iallocator script")
15154
15155 self.out_text = result.payload
15156 if validate:
15157 self._ValidateResult()
15158
15160 """Process the allocator results.
15161
15162 This will process and if successful save the result in
15163 self.out_data and the other parameters.
15164
15165 """
15166 try:
15167 rdict = serializer.Load(self.out_text)
15168 except Exception, err:
15169 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15170
15171 if not isinstance(rdict, dict):
15172 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15173
15174
15175 if "nodes" in rdict and "result" not in rdict:
15176 rdict["result"] = rdict["nodes"]
15177 del rdict["nodes"]
15178
15179 for key in "success", "info", "result":
15180 if key not in rdict:
15181 raise errors.OpExecError("Can't parse iallocator results:"
15182 " missing key '%s'" % key)
15183 setattr(self, key, rdict[key])
15184
15185 if not self._result_check(self.result):
15186 raise errors.OpExecError("Iallocator returned invalid result,"
15187 " expected %s, got %s" %
15188 (self._result_check, self.result),
15189 errors.ECODE_INVAL)
15190
15191 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15192 assert self.relocate_from is not None
15193 assert self.required_nodes == 1
15194
15195 node2group = dict((name, ndata["group"])
15196 for (name, ndata) in self.in_data["nodes"].items())
15197
15198 fn = compat.partial(self._NodesToGroups, node2group,
15199 self.in_data["nodegroups"])
15200
15201 instance = self.cfg.GetInstanceInfo(self.name)
15202 request_groups = fn(self.relocate_from + [instance.primary_node])
15203 result_groups = fn(rdict["result"] + [instance.primary_node])
15204
15205 if self.success and not set(result_groups).issubset(request_groups):
15206 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15207 " differ from original groups (%s)" %
15208 (utils.CommaJoin(result_groups),
15209 utils.CommaJoin(request_groups)))
15210
15211 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15212 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15213
15214 self.out_data = rdict
15215
15216 @staticmethod
15218 """Returns a list of unique group names for a list of nodes.
15219
15220 @type node2group: dict
15221 @param node2group: Map from node name to group UUID
15222 @type groups: dict
15223 @param groups: Group information
15224 @type nodes: list
15225 @param nodes: Node names
15226
15227 """
15228 result = set()
15229
15230 for node in nodes:
15231 try:
15232 group_uuid = node2group[node]
15233 except KeyError:
15234
15235 pass
15236 else:
15237 try:
15238 group = groups[group_uuid]
15239 except KeyError:
15240
15241 group_name = group_uuid
15242 else:
15243 group_name = group["name"]
15244
15245 result.add(group_name)
15246
15247 return sorted(result)
15248
15251 """Run allocator tests.
15252
15253 This LU runs the allocator tests
15254
15255 """
15257 """Check prerequisites.
15258
15259 This checks the opcode parameters depending on the director and mode test.
15260
15261 """
15262 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15263 for attr in ["memory", "disks", "disk_template",
15264 "os", "tags", "nics", "vcpus"]:
15265 if not hasattr(self.op, attr):
15266 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15267 attr, errors.ECODE_INVAL)
15268 iname = self.cfg.ExpandInstanceName(self.op.name)
15269 if iname is not None:
15270 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15271 iname, errors.ECODE_EXISTS)
15272 if not isinstance(self.op.nics, list):
15273 raise errors.OpPrereqError("Invalid parameter 'nics'",
15274 errors.ECODE_INVAL)
15275 if not isinstance(self.op.disks, list):
15276 raise errors.OpPrereqError("Invalid parameter 'disks'",
15277 errors.ECODE_INVAL)
15278 for row in self.op.disks:
15279 if (not isinstance(row, dict) or
15280 constants.IDISK_SIZE not in row or
15281 not isinstance(row[constants.IDISK_SIZE], int) or
15282 constants.IDISK_MODE not in row or
15283 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15284 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15285 " parameter", errors.ECODE_INVAL)
15286 if self.op.hypervisor is None:
15287 self.op.hypervisor = self.cfg.GetHypervisorType()
15288 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15289 fname = _ExpandInstanceName(self.cfg, self.op.name)
15290 self.op.name = fname
15291 self.relocate_from = \
15292 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15293 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15294 constants.IALLOCATOR_MODE_NODE_EVAC):
15295 if not self.op.instances:
15296 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15297 self.op.instances = _GetWantedInstances(self, self.op.instances)
15298 else:
15299 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15300 self.op.mode, errors.ECODE_INVAL)
15301
15302 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15303 if self.op.allocator is None:
15304 raise errors.OpPrereqError("Missing allocator name",
15305 errors.ECODE_INVAL)
15306 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15307 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15308 self.op.direction, errors.ECODE_INVAL)
15309
15310 - def Exec(self, feedback_fn):
15311 """Run the allocator test.
15312
15313 """
15314 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15315 ial = IAllocator(self.cfg, self.rpc,
15316 mode=self.op.mode,
15317 name=self.op.name,
15318 memory=self.op.memory,
15319 disks=self.op.disks,
15320 disk_template=self.op.disk_template,
15321 os=self.op.os,
15322 tags=self.op.tags,
15323 nics=self.op.nics,
15324 vcpus=self.op.vcpus,
15325 hypervisor=self.op.hypervisor,
15326 spindle_use=self.op.spindle_use,
15327 )
15328 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15329 ial = IAllocator(self.cfg, self.rpc,
15330 mode=self.op.mode,
15331 name=self.op.name,
15332 relocate_from=list(self.relocate_from),
15333 )
15334 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15335 ial = IAllocator(self.cfg, self.rpc,
15336 mode=self.op.mode,
15337 instances=self.op.instances,
15338 target_groups=self.op.target_groups)
15339 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15340 ial = IAllocator(self.cfg, self.rpc,
15341 mode=self.op.mode,
15342 instances=self.op.instances,
15343 evac_mode=self.op.evac_mode)
15344 else:
15345 raise errors.ProgrammerError("Uncatched mode %s in"
15346 " LUTestAllocator.Exec", self.op.mode)
15347
15348 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15349 result = ial.in_text
15350 else:
15351 ial.Run(self.op.allocator, validate=False)
15352 result = ial.out_text
15353 return result
15354
15355
15356
15357 _QUERY_IMPL = {
15358 constants.QR_CLUSTER: _ClusterQuery,
15359 constants.QR_INSTANCE: _InstanceQuery,
15360 constants.QR_NODE: _NodeQuery,
15361 constants.QR_GROUP: _GroupQuery,
15362 constants.QR_OS: _OsQuery,
15363 constants.QR_EXPORT: _ExportQuery,
15364 }
15365
15366 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15370 """Returns the implemtnation for a query type.
15371
15372 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15373
15374 """
15375 try:
15376 return _QUERY_IMPL[name]
15377 except KeyError:
15378 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15379 errors.ECODE_INVAL)
15380