1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Module implementing the master-side code."""
23
24
25
26
27
28
29
30
31 import os
32 import os.path
33 import time
34 import re
35 import platform
36 import logging
37 import copy
38 import OpenSSL
39 import socket
40 import tempfile
41 import shutil
42 import itertools
43 import operator
44
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
61
62 import ganeti.masterd.instance
66 """Tells if node supports OOB.
67
68 @type cfg: L{config.ConfigWriter}
69 @param cfg: The cluster configuration
70 @type node: L{objects.Node}
71 @param node: The node
72 @return: The OOB script if supported or an empty string otherwise
73
74 """
75 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
76
80 """Logical Unit base class.
81
82 Subclasses must follow these rules:
83 - implement ExpandNames
84 - implement CheckPrereq (except when tasklets are used)
85 - implement Exec (except when tasklets are used)
86 - implement BuildHooksEnv
87 - redefine HPATH and HTYPE
88 - optionally redefine their run requirements:
89 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
90
91 Note that all commands require root permissions.
92
93 @ivar dry_run_result: the value (if any) that will be returned to the caller
94 in dry-run mode (signalled by opcode dry_run parameter)
95
96 """
97 HPATH = None
98 HTYPE = None
99 REQ_BGL = True
100
101 - def __init__(self, processor, op, context, rpc):
102 """Constructor for LogicalUnit.
103
104 This needs to be overridden in derived classes in order to check op
105 validity.
106
107 """
108 self.proc = processor
109 self.op = op
110 self.cfg = context.cfg
111 self.context = context
112 self.rpc = rpc
113
114 self.needed_locks = None
115 self.acquired_locks = {}
116 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
117 self.add_locks = {}
118 self.remove_locks = {}
119
120 self.recalculate_locks = {}
121 self.__ssh = None
122
123 self.Log = processor.Log
124 self.LogWarning = processor.LogWarning
125 self.LogInfo = processor.LogInfo
126 self.LogStep = processor.LogStep
127
128 self.dry_run_result = None
129
130 if (not hasattr(self.op, "debug_level") or
131 not isinstance(self.op.debug_level, int)):
132 self.op.debug_level = 0
133
134
135 self.tasklets = None
136
137
138 self.op.Validate(True)
139
140 self.CheckArguments()
141
143 """Returns the SshRunner object
144
145 """
146 if not self.__ssh:
147 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
148 return self.__ssh
149
150 ssh = property(fget=__GetSSH)
151
153 """Check syntactic validity for the opcode arguments.
154
155 This method is for doing a simple syntactic check and ensure
156 validity of opcode parameters, without any cluster-related
157 checks. While the same can be accomplished in ExpandNames and/or
158 CheckPrereq, doing these separate is better because:
159
160 - ExpandNames is left as as purely a lock-related function
161 - CheckPrereq is run after we have acquired locks (and possible
162 waited for them)
163
164 The function is allowed to change the self.op attribute so that
165 later methods can no longer worry about missing parameters.
166
167 """
168 pass
169
171 """Expand names for this LU.
172
173 This method is called before starting to execute the opcode, and it should
174 update all the parameters of the opcode to their canonical form (e.g. a
175 short node name must be fully expanded after this method has successfully
176 completed). This way locking, hooks, logging, etc. can work correctly.
177
178 LUs which implement this method must also populate the self.needed_locks
179 member, as a dict with lock levels as keys, and a list of needed lock names
180 as values. Rules:
181
182 - use an empty dict if you don't need any lock
183 - if you don't need any lock at a particular level omit that level
184 - don't put anything for the BGL level
185 - if you want all locks at a level use locking.ALL_SET as a value
186
187 If you need to share locks (rather than acquire them exclusively) at one
188 level you can modify self.share_locks, setting a true value (usually 1) for
189 that level. By default locks are not shared.
190
191 This function can also define a list of tasklets, which then will be
192 executed in order instead of the usual LU-level CheckPrereq and Exec
193 functions, if those are not defined by the LU.
194
195 Examples::
196
197 # Acquire all nodes and one instance
198 self.needed_locks = {
199 locking.LEVEL_NODE: locking.ALL_SET,
200 locking.LEVEL_INSTANCE: ['instance1.example.com'],
201 }
202 # Acquire just two nodes
203 self.needed_locks = {
204 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
205 }
206 # Acquire no locks
207 self.needed_locks = {} # No, you can't leave it to the default value None
208
209 """
210
211
212
213 if self.REQ_BGL:
214 self.needed_locks = {}
215 else:
216 raise NotImplementedError
217
219 """Declare LU locking needs for a level
220
221 While most LUs can just declare their locking needs at ExpandNames time,
222 sometimes there's the need to calculate some locks after having acquired
223 the ones before. This function is called just before acquiring locks at a
224 particular level, but after acquiring the ones at lower levels, and permits
225 such calculations. It can be used to modify self.needed_locks, and by
226 default it does nothing.
227
228 This function is only called if you have something already set in
229 self.needed_locks for the level.
230
231 @param level: Locking level which is going to be locked
232 @type level: member of ganeti.locking.LEVELS
233
234 """
235
237 """Check prerequisites for this LU.
238
239 This method should check that the prerequisites for the execution
240 of this LU are fulfilled. It can do internode communication, but
241 it should be idempotent - no cluster or system changes are
242 allowed.
243
244 The method should raise errors.OpPrereqError in case something is
245 not fulfilled. Its return value is ignored.
246
247 This method should also update all the parameters of the opcode to
248 their canonical form if it hasn't been done by ExpandNames before.
249
250 """
251 if self.tasklets is not None:
252 for (idx, tl) in enumerate(self.tasklets):
253 logging.debug("Checking prerequisites for tasklet %s/%s",
254 idx + 1, len(self.tasklets))
255 tl.CheckPrereq()
256 else:
257 pass
258
259 - def Exec(self, feedback_fn):
260 """Execute the LU.
261
262 This method should implement the actual work. It should raise
263 errors.OpExecError for failures that are somewhat dealt with in
264 code, or expected.
265
266 """
267 if self.tasklets is not None:
268 for (idx, tl) in enumerate(self.tasklets):
269 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
270 tl.Exec(feedback_fn)
271 else:
272 raise NotImplementedError
273
275 """Build hooks environment for this LU.
276
277 This method should return a three-node tuple consisting of: a dict
278 containing the environment that will be used for running the
279 specific hook for this LU, a list of node names on which the hook
280 should run before the execution, and a list of node names on which
281 the hook should run after the execution.
282
283 The keys of the dict must not have 'GANETI_' prefixed as this will
284 be handled in the hooks runner. Also note additional keys will be
285 added by the hooks runner. If the LU doesn't define any
286 environment, an empty dict (and not None) should be returned.
287
288 No nodes should be returned as an empty list (and not None).
289
290 Note that if the HPATH for a LU class is None, this function will
291 not be called.
292
293 """
294 raise NotImplementedError
295
296 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
297 """Notify the LU about the results of its hooks.
298
299 This method is called every time a hooks phase is executed, and notifies
300 the Logical Unit about the hooks' result. The LU can then use it to alter
301 its result based on the hooks. By default the method does nothing and the
302 previous result is passed back unchanged but any LU can define it if it
303 wants to use the local cluster hook-scripts somehow.
304
305 @param phase: one of L{constants.HOOKS_PHASE_POST} or
306 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
307 @param hook_results: the results of the multi-node hooks rpc call
308 @param feedback_fn: function used send feedback back to the caller
309 @param lu_result: the previous Exec result this LU had, or None
310 in the PRE phase
311 @return: the new Exec result, based on the previous result
312 and hook results
313
314 """
315
316
317
318 return lu_result
319
321 """Helper function to expand and lock an instance.
322
323 Many LUs that work on an instance take its name in self.op.instance_name
324 and need to expand it and then declare the expanded name for locking. This
325 function does it, and then updates self.op.instance_name to the expanded
326 name. It also initializes needed_locks as a dict, if this hasn't been done
327 before.
328
329 """
330 if self.needed_locks is None:
331 self.needed_locks = {}
332 else:
333 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
334 "_ExpandAndLockInstance called with instance-level locks set"
335 self.op.instance_name = _ExpandInstanceName(self.cfg,
336 self.op.instance_name)
337 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
338
340 """Helper function to declare instances' nodes for locking.
341
342 This function should be called after locking one or more instances to lock
343 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
344 with all primary or secondary nodes for instances already locked and
345 present in self.needed_locks[locking.LEVEL_INSTANCE].
346
347 It should be called from DeclareLocks, and for safety only works if
348 self.recalculate_locks[locking.LEVEL_NODE] is set.
349
350 In the future it may grow parameters to just lock some instance's nodes, or
351 to just lock primaries or secondary nodes, if needed.
352
353 If should be called in DeclareLocks in a way similar to::
354
355 if level == locking.LEVEL_NODE:
356 self._LockInstancesNodes()
357
358 @type primary_only: boolean
359 @param primary_only: only lock primary nodes of locked instances
360
361 """
362 assert locking.LEVEL_NODE in self.recalculate_locks, \
363 "_LockInstancesNodes helper function called with no nodes to recalculate"
364
365
366
367
368
369
370 wanted_nodes = []
371 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
372 instance = self.context.cfg.GetInstanceInfo(instance_name)
373 wanted_nodes.append(instance.primary_node)
374 if not primary_only:
375 wanted_nodes.extend(instance.secondary_nodes)
376
377 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
378 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
379 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
380 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
381
382 del self.recalculate_locks[locking.LEVEL_NODE]
383
386 """Simple LU which runs no hooks.
387
388 This LU is intended as a parent for other LogicalUnits which will
389 run no hooks, in order to reduce duplicate code.
390
391 """
392 HPATH = None
393 HTYPE = None
394
396 """Empty BuildHooksEnv for NoHooksLu.
397
398 This just raises an error.
399
400 """
401 assert False, "BuildHooksEnv called for NoHooksLUs"
402
405 """Tasklet base class.
406
407 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
408 they can mix legacy code with tasklets. Locking needs to be done in the LU,
409 tasklets know nothing about locks.
410
411 Subclasses must follow these rules:
412 - Implement CheckPrereq
413 - Implement Exec
414
415 """
417 self.lu = lu
418
419
420 self.cfg = lu.cfg
421 self.rpc = lu.rpc
422
424 """Check prerequisites for this tasklets.
425
426 This method should check whether the prerequisites for the execution of
427 this tasklet are fulfilled. It can do internode communication, but it
428 should be idempotent - no cluster or system changes are allowed.
429
430 The method should raise errors.OpPrereqError in case something is not
431 fulfilled. Its return value is ignored.
432
433 This method should also update all parameters to their canonical form if it
434 hasn't been done before.
435
436 """
437 pass
438
439 - def Exec(self, feedback_fn):
440 """Execute the tasklet.
441
442 This method should implement the actual work. It should raise
443 errors.OpExecError for failures that are somewhat dealt with in code, or
444 expected.
445
446 """
447 raise NotImplementedError
448
451 """Base for query utility classes.
452
453 """
454
455 FIELDS = None
456
457 - def __init__(self, names, fields, use_locking):
458 """Initializes this class.
459
460 """
461 self.names = names
462 self.use_locking = use_locking
463
464 self.query = query.Query(self.FIELDS, fields)
465 self.requested_data = self.query.RequestedData()
466
467 self.do_locking = None
468 self.wanted = None
469
470 - def _GetNames(self, lu, all_names, lock_level):
471 """Helper function to determine names asked for in the query.
472
473 """
474 if self.do_locking:
475 names = lu.acquired_locks[lock_level]
476 else:
477 names = all_names
478
479 if self.wanted == locking.ALL_SET:
480 assert not self.names
481
482 return utils.NiceSort(names)
483
484
485 assert self.names
486 assert not self.do_locking or lu.acquired_locks[lock_level]
487
488 missing = set(self.wanted).difference(names)
489 if missing:
490 raise errors.OpExecError("Some items were removed before retrieving"
491 " their data: %s" % missing)
492
493
494 return self.wanted
495
496 @classmethod
498 """Returns list of available fields.
499
500 @return: List of L{objects.QueryFieldDefinition}
501
502 """
503 return query.QueryFields(cls.FIELDS, fields)
504
506 """Expand names for this query.
507
508 See L{LogicalUnit.ExpandNames}.
509
510 """
511 raise NotImplementedError()
512
514 """Declare locks for this query.
515
516 See L{LogicalUnit.DeclareLocks}.
517
518 """
519 raise NotImplementedError()
520
522 """Collects all data for this query.
523
524 @return: Query data object
525
526 """
527 raise NotImplementedError()
528
534
540
543 """Returns list of checked and expanded node names.
544
545 @type lu: L{LogicalUnit}
546 @param lu: the logical unit on whose behalf we execute
547 @type nodes: list
548 @param nodes: list of node names or None for all nodes
549 @rtype: list
550 @return: the list of nodes, sorted
551 @raise errors.ProgrammerError: if the nodes parameter is wrong type
552
553 """
554 if nodes:
555 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
556
557 return utils.NiceSort(lu.cfg.GetNodeList())
558
561 """Returns list of checked and expanded instance names.
562
563 @type lu: L{LogicalUnit}
564 @param lu: the logical unit on whose behalf we execute
565 @type instances: list
566 @param instances: list of instance names or None for all instances
567 @rtype: list
568 @return: the list of instances, sorted
569 @raise errors.OpPrereqError: if the instances parameter is wrong type
570 @raise errors.OpPrereqError: if any of the passed instances is not found
571
572 """
573 if instances:
574 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
575 else:
576 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
577 return wanted
578
579
580 -def _GetUpdatedParams(old_params, update_dict,
581 use_default=True, use_none=False):
582 """Return the new version of a parameter dictionary.
583
584 @type old_params: dict
585 @param old_params: old parameters
586 @type update_dict: dict
587 @param update_dict: dict containing new parameter values, or
588 constants.VALUE_DEFAULT to reset the parameter to its default
589 value
590 @param use_default: boolean
591 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
592 values as 'to be deleted' values
593 @param use_none: boolean
594 @type use_none: whether to recognise C{None} values as 'to be
595 deleted' values
596 @rtype: dict
597 @return: the new parameter dictionary
598
599 """
600 params_copy = copy.deepcopy(old_params)
601 for key, val in update_dict.iteritems():
602 if ((use_default and val == constants.VALUE_DEFAULT) or
603 (use_none and val is None)):
604 try:
605 del params_copy[key]
606 except KeyError:
607 pass
608 else:
609 params_copy[key] = val
610 return params_copy
611
614 """Checks whether all selected fields are valid.
615
616 @type static: L{utils.FieldSet}
617 @param static: static fields set
618 @type dynamic: L{utils.FieldSet}
619 @param dynamic: dynamic fields set
620
621 """
622 f = utils.FieldSet()
623 f.Extend(static)
624 f.Extend(dynamic)
625
626 delta = f.NonMatching(selected)
627 if delta:
628 raise errors.OpPrereqError("Unknown output fields selected: %s"
629 % ",".join(delta), errors.ECODE_INVAL)
630
633 """Validates that given hypervisor params are not global ones.
634
635 This will ensure that instances don't get customised versions of
636 global params.
637
638 """
639 used_globals = constants.HVC_GLOBALS.intersection(params)
640 if used_globals:
641 msg = ("The following hypervisor parameters are global and cannot"
642 " be customized at instance level, please modify them at"
643 " cluster level: %s" % utils.CommaJoin(used_globals))
644 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
645
648 """Ensure that a given node is online.
649
650 @param lu: the LU on behalf of which we make the check
651 @param node: the node to check
652 @param msg: if passed, should be a message to replace the default one
653 @raise errors.OpPrereqError: if the node is offline
654
655 """
656 if msg is None:
657 msg = "Can't use offline node"
658 if lu.cfg.GetNodeInfo(node).offline:
659 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
660
663 """Ensure that a given node is not drained.
664
665 @param lu: the LU on behalf of which we make the check
666 @param node: the node to check
667 @raise errors.OpPrereqError: if the node is drained
668
669 """
670 if lu.cfg.GetNodeInfo(node).drained:
671 raise errors.OpPrereqError("Can't use drained node %s" % node,
672 errors.ECODE_STATE)
673
676 """Ensure that a given node is vm capable.
677
678 @param lu: the LU on behalf of which we make the check
679 @param node: the node to check
680 @raise errors.OpPrereqError: if the node is not vm capable
681
682 """
683 if not lu.cfg.GetNodeInfo(node).vm_capable:
684 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
685 errors.ECODE_STATE)
686
689 """Ensure that a node supports a given OS.
690
691 @param lu: the LU on behalf of which we make the check
692 @param node: the node to check
693 @param os_name: the OS to query about
694 @param force_variant: whether to ignore variant errors
695 @raise errors.OpPrereqError: if the node is not supporting the OS
696
697 """
698 result = lu.rpc.call_os_get(node, os_name)
699 result.Raise("OS '%s' not in supported OS list for node %s" %
700 (os_name, node),
701 prereq=True, ecode=errors.ECODE_INVAL)
702 if not force_variant:
703 _CheckOSVariant(result.payload, os_name)
704
707 """Ensure that a node has the given secondary ip.
708
709 @type lu: L{LogicalUnit}
710 @param lu: the LU on behalf of which we make the check
711 @type node: string
712 @param node: the node to check
713 @type secondary_ip: string
714 @param secondary_ip: the ip to check
715 @type prereq: boolean
716 @param prereq: whether to throw a prerequisite or an execute error
717 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
718 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
719
720 """
721 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
722 result.Raise("Failure checking secondary ip on node %s" % node,
723 prereq=prereq, ecode=errors.ECODE_ENVIRON)
724 if not result.payload:
725 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
726 " please fix and re-run this command" % secondary_ip)
727 if prereq:
728 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
729 else:
730 raise errors.OpExecError(msg)
731
734 """Reads the cluster domain secret.
735
736 """
737 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
738 strict=True)
739
755
758 """Expand an item name.
759
760 @param fn: the function to use for expansion
761 @param name: requested item name
762 @param kind: text description ('Node' or 'Instance')
763 @return: the resolved (full) name
764 @raise errors.OpPrereqError: if the item is not found
765
766 """
767 full_name = fn(name)
768 if full_name is None:
769 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
770 errors.ECODE_NOENT)
771 return full_name
772
777
782
783
784 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
785 memory, vcpus, nics, disk_template, disks,
786 bep, hvp, hypervisor_name):
787 """Builds instance related env variables for hooks
788
789 This builds the hook environment from individual variables.
790
791 @type name: string
792 @param name: the name of the instance
793 @type primary_node: string
794 @param primary_node: the name of the instance's primary node
795 @type secondary_nodes: list
796 @param secondary_nodes: list of secondary nodes as strings
797 @type os_type: string
798 @param os_type: the name of the instance's OS
799 @type status: boolean
800 @param status: the should_run status of the instance
801 @type memory: string
802 @param memory: the memory size of the instance
803 @type vcpus: string
804 @param vcpus: the count of VCPUs the instance has
805 @type nics: list
806 @param nics: list of tuples (ip, mac, mode, link) representing
807 the NICs the instance has
808 @type disk_template: string
809 @param disk_template: the disk template of the instance
810 @type disks: list
811 @param disks: the list of (size, mode) pairs
812 @type bep: dict
813 @param bep: the backend parameters for the instance
814 @type hvp: dict
815 @param hvp: the hypervisor parameters for the instance
816 @type hypervisor_name: string
817 @param hypervisor_name: the hypervisor for the instance
818 @rtype: dict
819 @return: the hook environment for this instance
820
821 """
822 if status:
823 str_status = "up"
824 else:
825 str_status = "down"
826 env = {
827 "OP_TARGET": name,
828 "INSTANCE_NAME": name,
829 "INSTANCE_PRIMARY": primary_node,
830 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
831 "INSTANCE_OS_TYPE": os_type,
832 "INSTANCE_STATUS": str_status,
833 "INSTANCE_MEMORY": memory,
834 "INSTANCE_VCPUS": vcpus,
835 "INSTANCE_DISK_TEMPLATE": disk_template,
836 "INSTANCE_HYPERVISOR": hypervisor_name,
837 }
838
839 if nics:
840 nic_count = len(nics)
841 for idx, (ip, mac, mode, link) in enumerate(nics):
842 if ip is None:
843 ip = ""
844 env["INSTANCE_NIC%d_IP" % idx] = ip
845 env["INSTANCE_NIC%d_MAC" % idx] = mac
846 env["INSTANCE_NIC%d_MODE" % idx] = mode
847 env["INSTANCE_NIC%d_LINK" % idx] = link
848 if mode == constants.NIC_MODE_BRIDGED:
849 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
850 else:
851 nic_count = 0
852
853 env["INSTANCE_NIC_COUNT"] = nic_count
854
855 if disks:
856 disk_count = len(disks)
857 for idx, (size, mode) in enumerate(disks):
858 env["INSTANCE_DISK%d_SIZE" % idx] = size
859 env["INSTANCE_DISK%d_MODE" % idx] = mode
860 else:
861 disk_count = 0
862
863 env["INSTANCE_DISK_COUNT"] = disk_count
864
865 for source, kind in [(bep, "BE"), (hvp, "HV")]:
866 for key, value in source.items():
867 env["INSTANCE_%s_%s" % (kind, key)] = value
868
869 return env
870
873 """Build a list of nic information tuples.
874
875 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
876 value in LUInstanceQueryData.
877
878 @type lu: L{LogicalUnit}
879 @param lu: the logical unit on whose behalf we execute
880 @type nics: list of L{objects.NIC}
881 @param nics: list of nics to convert to hooks tuples
882
883 """
884 hooks_nics = []
885 cluster = lu.cfg.GetClusterInfo()
886 for nic in nics:
887 ip = nic.ip
888 mac = nic.mac
889 filled_params = cluster.SimpleFillNIC(nic.nicparams)
890 mode = filled_params[constants.NIC_MODE]
891 link = filled_params[constants.NIC_LINK]
892 hooks_nics.append((ip, mac, mode, link))
893 return hooks_nics
894
897 """Builds instance related env variables for hooks from an object.
898
899 @type lu: L{LogicalUnit}
900 @param lu: the logical unit on whose behalf we execute
901 @type instance: L{objects.Instance}
902 @param instance: the instance for which we should build the
903 environment
904 @type override: dict
905 @param override: dictionary with key/values that will override
906 our values
907 @rtype: dict
908 @return: the hook environment dictionary
909
910 """
911 cluster = lu.cfg.GetClusterInfo()
912 bep = cluster.FillBE(instance)
913 hvp = cluster.FillHV(instance)
914 args = {
915 'name': instance.name,
916 'primary_node': instance.primary_node,
917 'secondary_nodes': instance.secondary_nodes,
918 'os_type': instance.os,
919 'status': instance.admin_up,
920 'memory': bep[constants.BE_MEMORY],
921 'vcpus': bep[constants.BE_VCPUS],
922 'nics': _NICListToTuple(lu, instance.nics),
923 'disk_template': instance.disk_template,
924 'disks': [(disk.size, disk.mode) for disk in instance.disks],
925 'bep': bep,
926 'hvp': hvp,
927 'hypervisor_name': instance.hypervisor,
928 }
929 if override:
930 args.update(override)
931 return _BuildInstanceHookEnv(**args)
932
935 """Adjust the candidate pool after node operations.
936
937 """
938 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
939 if mod_list:
940 lu.LogInfo("Promoted nodes to master candidate role: %s",
941 utils.CommaJoin(node.name for node in mod_list))
942 for name in mod_list:
943 lu.context.ReaddNode(name)
944 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
945 if mc_now > mc_max:
946 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
947 (mc_now, mc_max))
948
959
973
982
985 """Check whether an OS name conforms to the os variants specification.
986
987 @type os_obj: L{objects.OS}
988 @param os_obj: OS object to check
989 @type name: string
990 @param name: OS name passed by the user, to check for validity
991
992 """
993 if not os_obj.supported_variants:
994 return
995 variant = objects.OS.GetVariant(name)
996 if not variant:
997 raise errors.OpPrereqError("OS name must include a variant",
998 errors.ECODE_INVAL)
999
1000 if variant not in os_obj.supported_variants:
1001 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1002
1006
1009 """Returns a list of all primary and secondary instances on a node.
1010
1011 """
1012
1013 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1014
1017 """Returns primary instances on a node.
1018
1019 """
1020 return _GetNodeInstancesInner(cfg,
1021 lambda inst: node_name == inst.primary_node)
1022
1030
1033 """Returns the arguments for a storage type.
1034
1035 """
1036
1037 if storage_type == constants.ST_FILE:
1038
1039 return [[cfg.GetFileStorageDir()]]
1040
1041 return []
1042
1045 faulty = []
1046
1047 for dev in instance.disks:
1048 cfg.SetDiskID(dev, node_name)
1049
1050 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1051 result.Raise("Failed to get disk status from node %s" % node_name,
1052 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1053
1054 for idx, bdev_status in enumerate(result.payload):
1055 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1056 faulty.append(idx)
1057
1058 return faulty
1059
1062 """Check the sanity of iallocator and node arguments and use the
1063 cluster-wide iallocator if appropriate.
1064
1065 Check that at most one of (iallocator, node) is specified. If none is
1066 specified, then the LU's opcode's iallocator slot is filled with the
1067 cluster-wide default iallocator.
1068
1069 @type iallocator_slot: string
1070 @param iallocator_slot: the name of the opcode iallocator slot
1071 @type node_slot: string
1072 @param node_slot: the name of the opcode target node slot
1073
1074 """
1075 node = getattr(lu.op, node_slot, None)
1076 iallocator = getattr(lu.op, iallocator_slot, None)
1077
1078 if node is not None and iallocator is not None:
1079 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1080 errors.ECODE_INVAL)
1081 elif node is None and iallocator is None:
1082 default_iallocator = lu.cfg.GetDefaultIAllocator()
1083 if default_iallocator:
1084 setattr(lu.op, iallocator_slot, default_iallocator)
1085 else:
1086 raise errors.OpPrereqError("No iallocator or node given and no"
1087 " cluster-wide default iallocator found."
1088 " Please specify either an iallocator or a"
1089 " node, or set a cluster-wide default"
1090 " iallocator.")
1091
1092
1093 -class LUClusterPostInit(LogicalUnit):
1094 """Logical unit for running hooks after cluster initialization.
1095
1096 """
1097 HPATH = "cluster-init"
1098 HTYPE = constants.HTYPE_CLUSTER
1099
1100 - def BuildHooksEnv(self):
1101 """Build hooks env.
1102
1103 """
1104 env = {"OP_TARGET": self.cfg.GetClusterName()}
1105 mn = self.cfg.GetMasterNode()
1106 return env, [], [mn]
1107
1108 - def Exec(self, feedback_fn):
1109 """Nothing to do.
1110
1111 """
1112 return True
1113
1116 """Logical unit for destroying the cluster.
1117
1118 """
1119 HPATH = "cluster-destroy"
1120 HTYPE = constants.HTYPE_CLUSTER
1121
1123 """Build hooks env.
1124
1125 """
1126 env = {"OP_TARGET": self.cfg.GetClusterName()}
1127 return env, [], []
1128
1130 """Check prerequisites.
1131
1132 This checks whether the cluster is empty.
1133
1134 Any errors are signaled by raising errors.OpPrereqError.
1135
1136 """
1137 master = self.cfg.GetMasterNode()
1138
1139 nodelist = self.cfg.GetNodeList()
1140 if len(nodelist) != 1 or nodelist[0] != master:
1141 raise errors.OpPrereqError("There are still %d node(s) in"
1142 " this cluster." % (len(nodelist) - 1),
1143 errors.ECODE_INVAL)
1144 instancelist = self.cfg.GetInstanceList()
1145 if instancelist:
1146 raise errors.OpPrereqError("There are still %d instance(s) in"
1147 " this cluster." % len(instancelist),
1148 errors.ECODE_INVAL)
1149
1150 - def Exec(self, feedback_fn):
1168
1171 """Verifies a certificate for LUClusterVerify.
1172
1173 @type filename: string
1174 @param filename: Path to PEM file
1175
1176 """
1177 try:
1178 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1179 utils.ReadFile(filename))
1180 except Exception, err:
1181 return (LUClusterVerify.ETYPE_ERROR,
1182 "Failed to load X509 certificate %s: %s" % (filename, err))
1183
1184 (errcode, msg) = \
1185 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1186 constants.SSL_CERT_EXPIRATION_ERROR)
1187
1188 if msg:
1189 fnamemsg = "While verifying %s: %s" % (filename, msg)
1190 else:
1191 fnamemsg = None
1192
1193 if errcode is None:
1194 return (None, fnamemsg)
1195 elif errcode == utils.CERT_WARNING:
1196 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1197 elif errcode == utils.CERT_ERROR:
1198 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1199
1200 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1201
1204 """Verifies the cluster status.
1205
1206 """
1207 HPATH = "cluster-verify"
1208 HTYPE = constants.HTYPE_CLUSTER
1209 REQ_BGL = False
1210
1211 TCLUSTER = "cluster"
1212 TNODE = "node"
1213 TINSTANCE = "instance"
1214
1215 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1216 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1217 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1218 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1219 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1220 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1221 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1222 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1223 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1224 ENODEDRBD = (TNODE, "ENODEDRBD")
1225 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1226 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1227 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1228 ENODEHV = (TNODE, "ENODEHV")
1229 ENODELVM = (TNODE, "ENODELVM")
1230 ENODEN1 = (TNODE, "ENODEN1")
1231 ENODENET = (TNODE, "ENODENET")
1232 ENODEOS = (TNODE, "ENODEOS")
1233 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1234 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1235 ENODERPC = (TNODE, "ENODERPC")
1236 ENODESSH = (TNODE, "ENODESSH")
1237 ENODEVERSION = (TNODE, "ENODEVERSION")
1238 ENODESETUP = (TNODE, "ENODESETUP")
1239 ENODETIME = (TNODE, "ENODETIME")
1240 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1241
1242 ETYPE_FIELD = "code"
1243 ETYPE_ERROR = "ERROR"
1244 ETYPE_WARNING = "WARNING"
1245
1246 _HOOKS_INDENT_RE = re.compile("^", re.M)
1247
1249 """A class representing the logical and physical status of a node.
1250
1251 @type name: string
1252 @ivar name: the node name to which this object refers
1253 @ivar volumes: a structure as returned from
1254 L{ganeti.backend.GetVolumeList} (runtime)
1255 @ivar instances: a list of running instances (runtime)
1256 @ivar pinst: list of configured primary instances (config)
1257 @ivar sinst: list of configured secondary instances (config)
1258 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1259 of this node (config)
1260 @ivar mfree: free memory, as reported by hypervisor (runtime)
1261 @ivar dfree: free disk, as reported by the node (runtime)
1262 @ivar offline: the offline status (config)
1263 @type rpc_fail: boolean
1264 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1265 not whether the individual keys were correct) (runtime)
1266 @type lvm_fail: boolean
1267 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1268 @type hyp_fail: boolean
1269 @ivar hyp_fail: whether the RPC call didn't return the instance list
1270 @type ghost: boolean
1271 @ivar ghost: whether this is a known node or not (config)
1272 @type os_fail: boolean
1273 @ivar os_fail: whether the RPC call didn't return valid OS data
1274 @type oslist: list
1275 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1276 @type vm_capable: boolean
1277 @ivar vm_capable: whether the node can host instances
1278
1279 """
1280 - def __init__(self, offline=False, name=None, vm_capable=True):
1281 self.name = name
1282 self.volumes = {}
1283 self.instances = []
1284 self.pinst = []
1285 self.sinst = []
1286 self.sbp = {}
1287 self.mfree = 0
1288 self.dfree = 0
1289 self.offline = offline
1290 self.vm_capable = vm_capable
1291 self.rpc_fail = False
1292 self.lvm_fail = False
1293 self.hyp_fail = False
1294 self.ghost = False
1295 self.os_fail = False
1296 self.oslist = {}
1297
1304
1305 - def _Error(self, ecode, item, msg, *args, **kwargs):
1306 """Format an error message.
1307
1308 Based on the opcode's error_codes parameter, either format a
1309 parseable error code, or a simpler error string.
1310
1311 This must be called only from Exec and functions called from Exec.
1312
1313 """
1314 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1315 itype, etxt = ecode
1316
1317 if args:
1318 msg = msg % args
1319
1320 if self.op.error_codes:
1321 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1322 else:
1323 if item:
1324 item = " " + item
1325 else:
1326 item = ""
1327 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1328
1329 self._feedback_fn(" - %s" % msg)
1330
1331 - def _ErrorIf(self, cond, *args, **kwargs):
1332 """Log an error message if the passed condition is True.
1333
1334 """
1335 cond = bool(cond) or self.op.debug_simulate_errors
1336 if cond:
1337 self._Error(*args, **kwargs)
1338
1339 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1340 self.bad = self.bad or cond
1341
1343 """Perform some basic validation on data returned from a node.
1344
1345 - check the result data structure is well formed and has all the
1346 mandatory fields
1347 - check ganeti version
1348
1349 @type ninfo: L{objects.Node}
1350 @param ninfo: the node to check
1351 @param nresult: the results from the node
1352 @rtype: boolean
1353 @return: whether overall this call was successful (and we can expect
1354 reasonable values in the respose)
1355
1356 """
1357 node = ninfo.name
1358 _ErrorIf = self._ErrorIf
1359
1360
1361 test = not nresult or not isinstance(nresult, dict)
1362 _ErrorIf(test, self.ENODERPC, node,
1363 "unable to verify node: no data returned")
1364 if test:
1365 return False
1366
1367
1368 local_version = constants.PROTOCOL_VERSION
1369 remote_version = nresult.get("version", None)
1370 test = not (remote_version and
1371 isinstance(remote_version, (list, tuple)) and
1372 len(remote_version) == 2)
1373 _ErrorIf(test, self.ENODERPC, node,
1374 "connection to node returned invalid data")
1375 if test:
1376 return False
1377
1378 test = local_version != remote_version[0]
1379 _ErrorIf(test, self.ENODEVERSION, node,
1380 "incompatible protocol versions: master %s,"
1381 " node %s", local_version, remote_version[0])
1382 if test:
1383 return False
1384
1385
1386
1387
1388 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1389 self.ENODEVERSION, node,
1390 "software version mismatch: master %s, node %s",
1391 constants.RELEASE_VERSION, remote_version[1],
1392 code=self.ETYPE_WARNING)
1393
1394 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1395 if ninfo.vm_capable and isinstance(hyp_result, dict):
1396 for hv_name, hv_result in hyp_result.iteritems():
1397 test = hv_result is not None
1398 _ErrorIf(test, self.ENODEHV, node,
1399 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1400
1401 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1402 if ninfo.vm_capable and isinstance(hvp_result, list):
1403 for item, hv_name, hv_result in hvp_result:
1404 _ErrorIf(True, self.ENODEHV, node,
1405 "hypervisor %s parameter verify failure (source %s): %s",
1406 hv_name, item, hv_result)
1407
1408 test = nresult.get(constants.NV_NODESETUP,
1409 ["Missing NODESETUP results"])
1410 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1411 "; ".join(test))
1412
1413 return True
1414
1415 - def _VerifyNodeTime(self, ninfo, nresult,
1416 nvinfo_starttime, nvinfo_endtime):
1417 """Check the node time.
1418
1419 @type ninfo: L{objects.Node}
1420 @param ninfo: the node to check
1421 @param nresult: the remote results for the node
1422 @param nvinfo_starttime: the start time of the RPC call
1423 @param nvinfo_endtime: the end time of the RPC call
1424
1425 """
1426 node = ninfo.name
1427 _ErrorIf = self._ErrorIf
1428
1429 ntime = nresult.get(constants.NV_TIME, None)
1430 try:
1431 ntime_merged = utils.MergeTime(ntime)
1432 except (ValueError, TypeError):
1433 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1434 return
1435
1436 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1437 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1438 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1439 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1440 else:
1441 ntime_diff = None
1442
1443 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1444 "Node time diverges by at least %s from master node time",
1445 ntime_diff)
1446
1448 """Check the node LVM results.
1449
1450 @type ninfo: L{objects.Node}
1451 @param ninfo: the node to check
1452 @param nresult: the remote results for the node
1453 @param vg_name: the configured VG name
1454
1455 """
1456 if vg_name is None:
1457 return
1458
1459 node = ninfo.name
1460 _ErrorIf = self._ErrorIf
1461
1462
1463 vglist = nresult.get(constants.NV_VGLIST, None)
1464 test = not vglist
1465 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1466 if not test:
1467 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1468 constants.MIN_VG_SIZE)
1469 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1470
1471
1472 pvlist = nresult.get(constants.NV_PVLIST, None)
1473 test = pvlist is None
1474 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1475 if not test:
1476
1477
1478
1479 for _, pvname, owner_vg in pvlist:
1480 test = ":" in pvname
1481 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1482 " '%s' of VG '%s'", pvname, owner_vg)
1483
1485 """Check the node bridges.
1486
1487 @type ninfo: L{objects.Node}
1488 @param ninfo: the node to check
1489 @param nresult: the remote results for the node
1490 @param bridges: the expected list of bridges
1491
1492 """
1493 if not bridges:
1494 return
1495
1496 node = ninfo.name
1497 _ErrorIf = self._ErrorIf
1498
1499 missing = nresult.get(constants.NV_BRIDGES, None)
1500 test = not isinstance(missing, list)
1501 _ErrorIf(test, self.ENODENET, node,
1502 "did not return valid bridge information")
1503 if not test:
1504 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1505 utils.CommaJoin(sorted(missing)))
1506
1508 """Check the node network connectivity results.
1509
1510 @type ninfo: L{objects.Node}
1511 @param ninfo: the node to check
1512 @param nresult: the remote results for the node
1513
1514 """
1515 node = ninfo.name
1516 _ErrorIf = self._ErrorIf
1517
1518 test = constants.NV_NODELIST not in nresult
1519 _ErrorIf(test, self.ENODESSH, node,
1520 "node hasn't returned node ssh connectivity data")
1521 if not test:
1522 if nresult[constants.NV_NODELIST]:
1523 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1524 _ErrorIf(True, self.ENODESSH, node,
1525 "ssh communication with node '%s': %s", a_node, a_msg)
1526
1527 test = constants.NV_NODENETTEST not in nresult
1528 _ErrorIf(test, self.ENODENET, node,
1529 "node hasn't returned node tcp connectivity data")
1530 if not test:
1531 if nresult[constants.NV_NODENETTEST]:
1532 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1533 for anode in nlist:
1534 _ErrorIf(True, self.ENODENET, node,
1535 "tcp communication with node '%s': %s",
1536 anode, nresult[constants.NV_NODENETTEST][anode])
1537
1538 test = constants.NV_MASTERIP not in nresult
1539 _ErrorIf(test, self.ENODENET, node,
1540 "node hasn't returned node master IP reachability data")
1541 if not test:
1542 if not nresult[constants.NV_MASTERIP]:
1543 if node == self.master_node:
1544 msg = "the master node cannot reach the master IP (not configured?)"
1545 else:
1546 msg = "cannot reach the master IP"
1547 _ErrorIf(True, self.ENODENET, node, msg)
1548
1549 - def _VerifyInstance(self, instance, instanceconfig, node_image,
1550 diskstatus):
1551 """Verify an instance.
1552
1553 This function checks to see if the required block devices are
1554 available on the instance's node.
1555
1556 """
1557 _ErrorIf = self._ErrorIf
1558 node_current = instanceconfig.primary_node
1559
1560 node_vol_should = {}
1561 instanceconfig.MapLVsByNode(node_vol_should)
1562
1563 for node in node_vol_should:
1564 n_img = node_image[node]
1565 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1566
1567 continue
1568 for volume in node_vol_should[node]:
1569 test = volume not in n_img.volumes
1570 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1571 "volume %s missing on node %s", volume, node)
1572
1573 if instanceconfig.admin_up:
1574 pri_img = node_image[node_current]
1575 test = instance not in pri_img.instances and not pri_img.offline
1576 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1577 "instance not running on its primary node %s",
1578 node_current)
1579
1580 for node, n_img in node_image.items():
1581 if node != node_current:
1582 test = instance in n_img.instances
1583 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1584 "instance should not run on node %s", node)
1585
1586 diskdata = [(nname, success, status, idx)
1587 for (nname, disks) in diskstatus.items()
1588 for idx, (success, status) in enumerate(disks)]
1589
1590 for nname, success, bdev_status, idx in diskdata:
1591
1592
1593 snode = node_image[nname]
1594 bad_snode = snode.ghost or snode.offline
1595 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1596 self.EINSTANCEFAULTYDISK, instance,
1597 "couldn't retrieve status for disk/%s on %s: %s",
1598 idx, nname, bdev_status)
1599 _ErrorIf((instanceconfig.admin_up and success and
1600 bdev_status.ldisk_status == constants.LDS_FAULTY),
1601 self.EINSTANCEFAULTYDISK, instance,
1602 "disk/%s on %s is faulty", idx, nname)
1603
1605 """Verify if there are any unknown volumes in the cluster.
1606
1607 The .os, .swap and backup volumes are ignored. All other volumes are
1608 reported as unknown.
1609
1610 @type reserved: L{ganeti.utils.FieldSet}
1611 @param reserved: a FieldSet of reserved volume names
1612
1613 """
1614 for node, n_img in node_image.items():
1615 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1616
1617 continue
1618 for volume in n_img.volumes:
1619 test = ((node not in node_vol_should or
1620 volume not in node_vol_should[node]) and
1621 not reserved.Matches(volume))
1622 self._ErrorIf(test, self.ENODEORPHANLV, node,
1623 "volume %s is unknown", volume)
1624
1626 """Verify the list of running instances.
1627
1628 This checks what instances are running but unknown to the cluster.
1629
1630 """
1631 for node, n_img in node_image.items():
1632 for o_inst in n_img.instances:
1633 test = o_inst not in instancelist
1634 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1635 "instance %s on node %s should not exist", o_inst, node)
1636
1638 """Verify N+1 Memory Resilience.
1639
1640 Check that if one single node dies we can still start all the
1641 instances it was primary for.
1642
1643 """
1644 for node, n_img in node_image.items():
1645
1646
1647
1648
1649
1650
1651
1652
1653 if n_img.offline:
1654
1655
1656
1657
1658 continue
1659 for prinode, instances in n_img.sbp.items():
1660 needed_mem = 0
1661 for instance in instances:
1662 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1663 if bep[constants.BE_AUTO_BALANCE]:
1664 needed_mem += bep[constants.BE_MEMORY]
1665 test = n_img.mfree < needed_mem
1666 self._ErrorIf(test, self.ENODEN1, node,
1667 "not enough memory to accomodate instance failovers"
1668 " should node %s fail (%dMiB needed, %dMiB available)",
1669 prinode, needed_mem, n_img.mfree)
1670
1671 - def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1672 master_files):
1673 """Verifies and computes the node required file checksums.
1674
1675 @type ninfo: L{objects.Node}
1676 @param ninfo: the node to check
1677 @param nresult: the remote results for the node
1678 @param file_list: required list of files
1679 @param local_cksum: dictionary of local files and their checksums
1680 @param master_files: list of files that only masters should have
1681
1682 """
1683 node = ninfo.name
1684 _ErrorIf = self._ErrorIf
1685
1686 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1687 test = not isinstance(remote_cksum, dict)
1688 _ErrorIf(test, self.ENODEFILECHECK, node,
1689 "node hasn't returned file checksum data")
1690 if test:
1691 return
1692
1693 for file_name in file_list:
1694 node_is_mc = ninfo.master_candidate
1695 must_have = (file_name not in master_files) or node_is_mc
1696
1697 test1 = file_name not in remote_cksum
1698
1699 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1700
1701 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1702 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1703 "file '%s' missing", file_name)
1704 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1705 "file '%s' has wrong checksum", file_name)
1706
1707 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1708 "file '%s' should not exist on non master"
1709 " candidates (and the file is outdated)", file_name)
1710
1711 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1712 "file '%s' should not exist"
1713 " on non master candidates", file_name)
1714
1715 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1716 drbd_map):
1717 """Verifies and the node DRBD status.
1718
1719 @type ninfo: L{objects.Node}
1720 @param ninfo: the node to check
1721 @param nresult: the remote results for the node
1722 @param instanceinfo: the dict of instances
1723 @param drbd_helper: the configured DRBD usermode helper
1724 @param drbd_map: the DRBD map as returned by
1725 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1726
1727 """
1728 node = ninfo.name
1729 _ErrorIf = self._ErrorIf
1730
1731 if drbd_helper:
1732 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1733 test = (helper_result == None)
1734 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1735 "no drbd usermode helper returned")
1736 if helper_result:
1737 status, payload = helper_result
1738 test = not status
1739 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1740 "drbd usermode helper check unsuccessful: %s", payload)
1741 test = status and (payload != drbd_helper)
1742 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1743 "wrong drbd usermode helper: %s", payload)
1744
1745
1746 node_drbd = {}
1747 for minor, instance in drbd_map[node].items():
1748 test = instance not in instanceinfo
1749 _ErrorIf(test, self.ECLUSTERCFG, None,
1750 "ghost instance '%s' in temporary DRBD map", instance)
1751
1752
1753
1754 if test:
1755 node_drbd[minor] = (instance, False)
1756 else:
1757 instance = instanceinfo[instance]
1758 node_drbd[minor] = (instance.name, instance.admin_up)
1759
1760
1761 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1762 test = not isinstance(used_minors, (tuple, list))
1763 _ErrorIf(test, self.ENODEDRBD, node,
1764 "cannot parse drbd status file: %s", str(used_minors))
1765 if test:
1766
1767 return
1768
1769 for minor, (iname, must_exist) in node_drbd.items():
1770 test = minor not in used_minors and must_exist
1771 _ErrorIf(test, self.ENODEDRBD, node,
1772 "drbd minor %d of instance %s is not active", minor, iname)
1773 for minor in used_minors:
1774 test = minor not in node_drbd
1775 _ErrorIf(test, self.ENODEDRBD, node,
1776 "unallocated drbd minor %d is in use", minor)
1777
1779 """Builds the node OS structures.
1780
1781 @type ninfo: L{objects.Node}
1782 @param ninfo: the node to check
1783 @param nresult: the remote results for the node
1784 @param nimg: the node image object
1785
1786 """
1787 node = ninfo.name
1788 _ErrorIf = self._ErrorIf
1789
1790 remote_os = nresult.get(constants.NV_OSLIST, None)
1791 test = (not isinstance(remote_os, list) or
1792 not compat.all(isinstance(v, list) and len(v) == 7
1793 for v in remote_os))
1794
1795 _ErrorIf(test, self.ENODEOS, node,
1796 "node hasn't returned valid OS data")
1797
1798 nimg.os_fail = test
1799
1800 if test:
1801 return
1802
1803 os_dict = {}
1804
1805 for (name, os_path, status, diagnose,
1806 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1807
1808 if name not in os_dict:
1809 os_dict[name] = []
1810
1811
1812
1813 parameters = [tuple(v) for v in parameters]
1814 os_dict[name].append((os_path, status, diagnose,
1815 set(variants), set(parameters), set(api_ver)))
1816
1817 nimg.oslist = os_dict
1818
1820 """Verifies the node OS list.
1821
1822 @type ninfo: L{objects.Node}
1823 @param ninfo: the node to check
1824 @param nimg: the node image object
1825 @param base: the 'template' node we match against (e.g. from the master)
1826
1827 """
1828 node = ninfo.name
1829 _ErrorIf = self._ErrorIf
1830
1831 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1832
1833 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1834 for os_name, os_data in nimg.oslist.items():
1835 assert os_data, "Empty OS status for OS %s?!" % os_name
1836 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1837 _ErrorIf(not f_status, self.ENODEOS, node,
1838 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1839 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1840 "OS '%s' has multiple entries (first one shadows the rest): %s",
1841 os_name, utils.CommaJoin([v[0] for v in os_data]))
1842
1843 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1844 and not f_var, self.ENODEOS, node,
1845 "OS %s with API at least %d does not declare any variant",
1846 os_name, constants.OS_API_V15)
1847
1848 test = os_name not in base.oslist
1849 _ErrorIf(test, self.ENODEOS, node,
1850 "Extra OS %s not present on reference node (%s)",
1851 os_name, base.name)
1852 if test:
1853 continue
1854 assert base.oslist[os_name], "Base node has empty OS status?"
1855 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1856 if not b_status:
1857
1858 continue
1859 for kind, a, b in [("API version", f_api, b_api),
1860 ("variants list", f_var, b_var),
1861 ("parameters", beautify_params(f_param),
1862 beautify_params(b_param))]:
1863 _ErrorIf(a != b, self.ENODEOS, node,
1864 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1865 kind, os_name, base.name,
1866 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1867
1868
1869 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1870 _ErrorIf(missing, self.ENODEOS, node,
1871 "OSes present on reference node %s but missing on this node: %s",
1872 base.name, utils.CommaJoin(missing))
1873
1875 """Verifies out of band functionality of a node.
1876
1877 @type ninfo: L{objects.Node}
1878 @param ninfo: the node to check
1879 @param nresult: the remote results for the node
1880
1881 """
1882 node = ninfo.name
1883
1884
1885 if ((ninfo.master_candidate or ninfo.master_capable) and
1886 constants.NV_OOB_PATHS in nresult):
1887 for path_result in nresult[constants.NV_OOB_PATHS]:
1888 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1889
1891 """Verifies and updates the node volume data.
1892
1893 This function will update a L{NodeImage}'s internal structures
1894 with data from the remote call.
1895
1896 @type ninfo: L{objects.Node}
1897 @param ninfo: the node to check
1898 @param nresult: the remote results for the node
1899 @param nimg: the node image object
1900 @param vg_name: the configured VG name
1901
1902 """
1903 node = ninfo.name
1904 _ErrorIf = self._ErrorIf
1905
1906 nimg.lvm_fail = True
1907 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1908 if vg_name is None:
1909 pass
1910 elif isinstance(lvdata, basestring):
1911 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1912 utils.SafeEncode(lvdata))
1913 elif not isinstance(lvdata, dict):
1914 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1915 else:
1916 nimg.volumes = lvdata
1917 nimg.lvm_fail = False
1918
1920 """Verifies and updates the node instance list.
1921
1922 If the listing was successful, then updates this node's instance
1923 list. Otherwise, it marks the RPC call as failed for the instance
1924 list key.
1925
1926 @type ninfo: L{objects.Node}
1927 @param ninfo: the node to check
1928 @param nresult: the remote results for the node
1929 @param nimg: the node image object
1930
1931 """
1932 idata = nresult.get(constants.NV_INSTANCELIST, None)
1933 test = not isinstance(idata, list)
1934 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1935 " (instancelist): %s", utils.SafeEncode(str(idata)))
1936 if test:
1937 nimg.hyp_fail = True
1938 else:
1939 nimg.instances = idata
1940
1942 """Verifies and computes a node information map
1943
1944 @type ninfo: L{objects.Node}
1945 @param ninfo: the node to check
1946 @param nresult: the remote results for the node
1947 @param nimg: the node image object
1948 @param vg_name: the configured VG name
1949
1950 """
1951 node = ninfo.name
1952 _ErrorIf = self._ErrorIf
1953
1954
1955 hv_info = nresult.get(constants.NV_HVINFO, None)
1956 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1957 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1958 if not test:
1959 try:
1960 nimg.mfree = int(hv_info["memory_free"])
1961 except (ValueError, TypeError):
1962 _ErrorIf(True, self.ENODERPC, node,
1963 "node returned invalid nodeinfo, check hypervisor")
1964
1965
1966 if vg_name is not None:
1967 test = (constants.NV_VGLIST not in nresult or
1968 vg_name not in nresult[constants.NV_VGLIST])
1969 _ErrorIf(test, self.ENODELVM, node,
1970 "node didn't return data for the volume group '%s'"
1971 " - it is either missing or broken", vg_name)
1972 if not test:
1973 try:
1974 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1975 except (ValueError, TypeError):
1976 _ErrorIf(True, self.ENODERPC, node,
1977 "node returned invalid LVM info, check LVM status")
1978
1980 """Gets per-disk status information for all instances.
1981
1982 @type nodelist: list of strings
1983 @param nodelist: Node names
1984 @type node_image: dict of (name, L{objects.Node})
1985 @param node_image: Node objects
1986 @type instanceinfo: dict of (name, L{objects.Instance})
1987 @param instanceinfo: Instance objects
1988 @rtype: {instance: {node: [(succes, payload)]}}
1989 @return: a dictionary of per-instance dictionaries with nodes as
1990 keys and disk information as values; the disk information is a
1991 list of tuples (success, payload)
1992
1993 """
1994 _ErrorIf = self._ErrorIf
1995
1996 node_disks = {}
1997 node_disks_devonly = {}
1998 diskless_instances = set()
1999 diskless = constants.DT_DISKLESS
2000
2001 for nname in nodelist:
2002 node_instances = list(itertools.chain(node_image[nname].pinst,
2003 node_image[nname].sinst))
2004 diskless_instances.update(inst for inst in node_instances
2005 if instanceinfo[inst].disk_template == diskless)
2006 disks = [(inst, disk)
2007 for inst in node_instances
2008 for disk in instanceinfo[inst].disks]
2009
2010 if not disks:
2011
2012 continue
2013
2014 node_disks[nname] = disks
2015
2016
2017
2018 devonly = [dev.Copy() for (_, dev) in disks]
2019
2020 for dev in devonly:
2021 self.cfg.SetDiskID(dev, nname)
2022
2023 node_disks_devonly[nname] = devonly
2024
2025 assert len(node_disks) == len(node_disks_devonly)
2026
2027
2028 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2029 node_disks_devonly)
2030
2031 assert len(result) == len(node_disks)
2032
2033 instdisk = {}
2034
2035 for (nname, nres) in result.items():
2036 disks = node_disks[nname]
2037
2038 if nres.offline:
2039
2040 data = len(disks) * [(False, "node offline")]
2041 else:
2042 msg = nres.fail_msg
2043 _ErrorIf(msg, self.ENODERPC, nname,
2044 "while getting disk information: %s", msg)
2045 if msg:
2046
2047 data = len(disks) * [(False, msg)]
2048 else:
2049 data = []
2050 for idx, i in enumerate(nres.payload):
2051 if isinstance(i, (tuple, list)) and len(i) == 2:
2052 data.append(i)
2053 else:
2054 logging.warning("Invalid result from node %s, entry %d: %s",
2055 nname, idx, i)
2056 data.append((False, "Invalid result from the remote node"))
2057
2058 for ((inst, _), status) in zip(disks, data):
2059 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2060
2061
2062 for inst in diskless_instances:
2063 assert inst not in instdisk
2064 instdisk[inst] = {}
2065
2066 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2067 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2068 compat.all(isinstance(s, (tuple, list)) and
2069 len(s) == 2 for s in statuses)
2070 for inst, nnames in instdisk.items()
2071 for nname, statuses in nnames.items())
2072 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2073
2074 return instdisk
2075
2089
2090
2092 """Build hooks env.
2093
2094 Cluster-Verify hooks just ran in the post phase and their failure makes
2095 the output be logged in the verify output and the verification to fail.
2096
2097 """
2098 all_nodes = self.cfg.GetNodeList()
2099 env = {
2100 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2101 }
2102 for node in self.cfg.GetAllNodesInfo().values():
2103 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2104
2105 return env, [], all_nodes
2106
2107 - def Exec(self, feedback_fn):
2108 """Verify integrity of cluster, performing various test on nodes.
2109
2110 """
2111
2112 self.bad = False
2113 _ErrorIf = self._ErrorIf
2114 verbose = self.op.verbose
2115 self._feedback_fn = feedback_fn
2116 feedback_fn("* Verifying global settings")
2117 for msg in self.cfg.VerifyConfig():
2118 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2119
2120
2121 for cert_filename in constants.ALL_CERT_FILES:
2122 (errcode, msg) = _VerifyCertificate(cert_filename)
2123 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2124
2125 vg_name = self.cfg.GetVGName()
2126 drbd_helper = self.cfg.GetDRBDHelper()
2127 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2128 cluster = self.cfg.GetClusterInfo()
2129 nodeinfo_byname = self.cfg.GetAllNodesInfo()
2130 nodelist = utils.NiceSort(nodeinfo_byname.keys())
2131 nodeinfo = [nodeinfo_byname[nname] for nname in nodelist]
2132 instanceinfo = self.cfg.GetAllInstancesInfo()
2133 instancelist = utils.NiceSort(instanceinfo.keys())
2134 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2135 i_non_redundant = []
2136 i_non_a_balanced = []
2137 n_offline = 0
2138 n_drained = 0
2139 node_vol_should = {}
2140
2141
2142
2143 master_files = [constants.CLUSTER_CONF_FILE]
2144 master_node = self.master_node = self.cfg.GetMasterNode()
2145 master_ip = self.cfg.GetMasterIP()
2146
2147 file_names = ssconf.SimpleStore().GetFileList()
2148 file_names.extend(constants.ALL_CERT_FILES)
2149 file_names.extend(master_files)
2150 if cluster.modify_etc_hosts:
2151 file_names.append(constants.ETC_HOSTS)
2152
2153 local_checksums = utils.FingerprintFiles(file_names)
2154
2155
2156 hvp_data = []
2157 for hv_name in hypervisors:
2158 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2159 for os_name, os_hvp in cluster.os_hvp.items():
2160 for hv_name, hv_params in os_hvp.items():
2161 if not hv_params:
2162 continue
2163 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2164 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2165
2166 for instance in instanceinfo.values():
2167 if not instance.hvparams:
2168 continue
2169 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2170 cluster.FillHV(instance)))
2171
2172 self._VerifyHVP(hvp_data)
2173
2174 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2175 node_verify_param = {
2176 constants.NV_FILELIST: file_names,
2177 constants.NV_NODELIST: [node.name for node in nodeinfo
2178 if not node.offline],
2179 constants.NV_HYPERVISOR: hypervisors,
2180 constants.NV_HVPARAMS: hvp_data,
2181 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2182 node.secondary_ip) for node in nodeinfo
2183 if not node.offline],
2184 constants.NV_INSTANCELIST: hypervisors,
2185 constants.NV_VERSION: None,
2186 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2187 constants.NV_NODESETUP: None,
2188 constants.NV_TIME: None,
2189 constants.NV_MASTERIP: (master_node, master_ip),
2190 constants.NV_OSLIST: None,
2191 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2192 }
2193
2194 if vg_name is not None:
2195 node_verify_param[constants.NV_VGLIST] = None
2196 node_verify_param[constants.NV_LVLIST] = vg_name
2197 node_verify_param[constants.NV_PVLIST] = [vg_name]
2198 node_verify_param[constants.NV_DRBDLIST] = None
2199
2200 if drbd_helper:
2201 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2202
2203
2204
2205 bridges = set()
2206 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2207 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2208 bridges.add(default_nicpp[constants.NIC_LINK])
2209 for instance in instanceinfo.values():
2210 for nic in instance.nics:
2211 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2212 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2213 bridges.add(full_nic[constants.NIC_LINK])
2214
2215 if bridges:
2216 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2217
2218
2219 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2220 name=node.name,
2221 vm_capable=node.vm_capable))
2222 for node in nodeinfo)
2223
2224
2225 oob_paths = []
2226 for node in nodeinfo:
2227 path = _SupportsOob(self.cfg, node)
2228 if path and path not in oob_paths:
2229 oob_paths.append(path)
2230
2231 if oob_paths:
2232 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2233
2234 for instance in instancelist:
2235 inst_config = instanceinfo[instance]
2236
2237 for nname in inst_config.all_nodes:
2238 if nname not in node_image:
2239
2240 gnode = self.NodeImage(name=nname)
2241 gnode.ghost = True
2242 node_image[nname] = gnode
2243
2244 inst_config.MapLVsByNode(node_vol_should)
2245
2246 pnode = inst_config.primary_node
2247 node_image[pnode].pinst.append(instance)
2248
2249 for snode in inst_config.secondary_nodes:
2250 nimg = node_image[snode]
2251 nimg.sinst.append(instance)
2252 if pnode not in nimg.sbp:
2253 nimg.sbp[pnode] = []
2254 nimg.sbp[pnode].append(instance)
2255
2256
2257
2258
2259
2260
2261
2262
2263 nvinfo_starttime = time.time()
2264 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2265 self.cfg.GetClusterName())
2266 nvinfo_endtime = time.time()
2267
2268 all_drbd_map = self.cfg.ComputeDRBDMap()
2269
2270 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2271 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2272
2273 feedback_fn("* Verifying node status")
2274
2275 refos_img = None
2276
2277 for node_i in nodeinfo:
2278 node = node_i.name
2279 nimg = node_image[node]
2280
2281 if node_i.offline:
2282 if verbose:
2283 feedback_fn("* Skipping offline node %s" % (node,))
2284 n_offline += 1
2285 continue
2286
2287 if node == master_node:
2288 ntype = "master"
2289 elif node_i.master_candidate:
2290 ntype = "master candidate"
2291 elif node_i.drained:
2292 ntype = "drained"
2293 n_drained += 1
2294 else:
2295 ntype = "regular"
2296 if verbose:
2297 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2298
2299 msg = all_nvinfo[node].fail_msg
2300 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2301 if msg:
2302 nimg.rpc_fail = True
2303 continue
2304
2305 nresult = all_nvinfo[node].payload
2306
2307 nimg.call_ok = self._VerifyNode(node_i, nresult)
2308 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2309 self._VerifyNodeNetwork(node_i, nresult)
2310 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2311 master_files)
2312
2313 self._VerifyOob(node_i, nresult)
2314
2315 if nimg.vm_capable:
2316 self._VerifyNodeLVM(node_i, nresult, vg_name)
2317 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2318 all_drbd_map)
2319
2320 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2321 self._UpdateNodeInstances(node_i, nresult, nimg)
2322 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2323 self._UpdateNodeOS(node_i, nresult, nimg)
2324 if not nimg.os_fail:
2325 if refos_img is None:
2326 refos_img = nimg
2327 self._VerifyNodeOS(node_i, nimg, refos_img)
2328 self._VerifyNodeBridges(node_i, nresult, bridges)
2329
2330 feedback_fn("* Verifying instance status")
2331 for instance in instancelist:
2332 if verbose:
2333 feedback_fn("* Verifying instance %s" % instance)
2334 inst_config = instanceinfo[instance]
2335 self._VerifyInstance(instance, inst_config, node_image,
2336 instdisk[instance])
2337 inst_nodes_offline = []
2338
2339 pnode = inst_config.primary_node
2340 pnode_img = node_image[pnode]
2341 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2342 self.ENODERPC, pnode, "instance %s, connection to"
2343 " primary node failed", instance)
2344
2345 _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2346 "instance lives on offline node %s", inst_config.primary_node)
2347
2348
2349
2350
2351
2352
2353 if not inst_config.secondary_nodes:
2354 i_non_redundant.append(instance)
2355
2356 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2357 instance, "instance has multiple secondary nodes: %s",
2358 utils.CommaJoin(inst_config.secondary_nodes),
2359 code=self.ETYPE_WARNING)
2360
2361 if inst_config.disk_template in constants.DTS_NET_MIRROR:
2362 pnode = inst_config.primary_node
2363 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2364 instance_groups = {}
2365
2366 for node in instance_nodes:
2367 instance_groups.setdefault(nodeinfo_byname[node].group,
2368 []).append(node)
2369
2370 pretty_list = [
2371 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2372
2373 for group, nodes in sorted(instance_groups.items(),
2374 key=lambda (_, nodes): pnode in nodes,
2375 reverse=True)]
2376
2377 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2378 instance, "instance has primary and secondary nodes in"
2379 " different groups: %s", utils.CommaJoin(pretty_list),
2380 code=self.ETYPE_WARNING)
2381
2382 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2383 i_non_a_balanced.append(instance)
2384
2385 for snode in inst_config.secondary_nodes:
2386 s_img = node_image[snode]
2387 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2388 "instance %s, connection to secondary node failed", instance)
2389
2390 if s_img.offline:
2391 inst_nodes_offline.append(snode)
2392
2393
2394 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2395 "instance has offline secondary node(s) %s",
2396 utils.CommaJoin(inst_nodes_offline))
2397
2398 for node in inst_config.all_nodes:
2399 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2400 "instance lives on ghost node %s", node)
2401 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2402 instance, "instance lives on non-vm_capable node %s", node)
2403
2404 feedback_fn("* Verifying orphan volumes")
2405 reserved = utils.FieldSet(*cluster.reserved_lvs)
2406 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2407
2408 feedback_fn("* Verifying orphan instances")
2409 self._VerifyOrphanInstances(instancelist, node_image)
2410
2411 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2412 feedback_fn("* Verifying N+1 Memory redundancy")
2413 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2414
2415 feedback_fn("* Other Notes")
2416 if i_non_redundant:
2417 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2418 % len(i_non_redundant))
2419
2420 if i_non_a_balanced:
2421 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2422 % len(i_non_a_balanced))
2423
2424 if n_offline:
2425 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2426
2427 if n_drained:
2428 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2429
2430 return not self.bad
2431
2432 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2433 """Analyze the post-hooks' result
2434
2435 This method analyses the hook result, handles it, and sends some
2436 nicely-formatted feedback back to the user.
2437
2438 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2439 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2440 @param hooks_results: the results of the multi-node hooks rpc call
2441 @param feedback_fn: function used send feedback back to the caller
2442 @param lu_result: previous Exec result
2443 @return: the new Exec result, based on the previous result
2444 and hook results
2445
2446 """
2447
2448
2449 if phase == constants.HOOKS_PHASE_POST:
2450
2451 feedback_fn("* Hooks Results")
2452 assert hooks_results, "invalid result from hooks"
2453
2454 for node_name in hooks_results:
2455 res = hooks_results[node_name]
2456 msg = res.fail_msg
2457 test = msg and not res.offline
2458 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2459 "Communication failure in hooks execution: %s", msg)
2460 if res.offline or msg:
2461
2462
2463
2464 lu_result = 1
2465 continue
2466 for script, hkr, output in res.payload:
2467 test = hkr == constants.HKR_FAIL
2468 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2469 "Script %s failed, output:", script)
2470 if test:
2471 output = self._HOOKS_INDENT_RE.sub(' ', output)
2472 feedback_fn("%s" % output)
2473 lu_result = 0
2474
2475 return lu_result
2476
2479 """Verifies the cluster disks status.
2480
2481 """
2482 REQ_BGL = False
2483
2490
2491 - def Exec(self, feedback_fn):
2492 """Verify integrity of cluster disks.
2493
2494 @rtype: tuple of three items
2495 @return: a tuple of (dict of node-to-node_error, list of instances
2496 which need activate-disks, dict of instance: (node, volume) for
2497 missing volumes
2498
2499 """
2500 result = res_nodes, res_instances, res_missing = {}, [], {}
2501
2502 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2503 instances = self.cfg.GetAllInstancesInfo().values()
2504
2505 nv_dict = {}
2506 for inst in instances:
2507 inst_lvs = {}
2508 if not inst.admin_up:
2509 continue
2510 inst.MapLVsByNode(inst_lvs)
2511
2512 for node, vol_list in inst_lvs.iteritems():
2513 for vol in vol_list:
2514 nv_dict[(node, vol)] = inst
2515
2516 if not nv_dict:
2517 return result
2518
2519 node_lvs = self.rpc.call_lv_list(nodes, [])
2520 for node, node_res in node_lvs.items():
2521 if node_res.offline:
2522 continue
2523 msg = node_res.fail_msg
2524 if msg:
2525 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2526 res_nodes[node] = msg
2527 continue
2528
2529 lvs = node_res.payload
2530 for lv_name, (_, _, lv_online) in lvs.items():
2531 inst = nv_dict.pop((node, lv_name), None)
2532 if (not lv_online and inst is not None
2533 and inst.name not in res_instances):
2534 res_instances.append(inst.name)
2535
2536
2537
2538 for key, inst in nv_dict.iteritems():
2539 if inst.name not in res_missing:
2540 res_missing[inst.name] = []
2541 res_missing[inst.name].append(key)
2542
2543 return result
2544
2547 """Verifies the cluster disks sizes.
2548
2549 """
2550 REQ_BGL = False
2551
2570
2574
2576 """Check prerequisites.
2577
2578 This only checks the optional instance list against the existing names.
2579
2580 """
2581 if self.wanted_names is None:
2582 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2583
2584 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2585 in self.wanted_names]
2586
2588 """Ensure children of the disk have the needed disk size.
2589
2590 This is valid mainly for DRBD8 and fixes an issue where the
2591 children have smaller disk size.
2592
2593 @param disk: an L{ganeti.objects.Disk} object
2594
2595 """
2596 if disk.dev_type == constants.LD_DRBD8:
2597 assert disk.children, "Empty children for DRBD8?"
2598 fchild = disk.children[0]
2599 mismatch = fchild.size < disk.size
2600 if mismatch:
2601 self.LogInfo("Child disk has size %d, parent %d, fixing",
2602 fchild.size, disk.size)
2603 fchild.size = disk.size
2604
2605
2606 return self._EnsureChildSizes(fchild) or mismatch
2607 else:
2608 return False
2609
2610 - def Exec(self, feedback_fn):
2611 """Verify the size of cluster disks.
2612
2613 """
2614
2615
2616 per_node_disks = {}
2617 for instance in self.wanted_instances:
2618 pnode = instance.primary_node
2619 if pnode not in per_node_disks:
2620 per_node_disks[pnode] = []
2621 for idx, disk in enumerate(instance.disks):
2622 per_node_disks[pnode].append((instance, idx, disk))
2623
2624 changed = []
2625 for node, dskl in per_node_disks.items():
2626 newl = [v[2].Copy() for v in dskl]
2627 for dsk in newl:
2628 self.cfg.SetDiskID(dsk, node)
2629 result = self.rpc.call_blockdev_getsize(node, newl)
2630 if result.fail_msg:
2631 self.LogWarning("Failure in blockdev_getsize call to node"
2632 " %s, ignoring", node)
2633 continue
2634 if len(result.payload) != len(dskl):
2635 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2636 " result.payload=%s", node, len(dskl), result.payload)
2637 self.LogWarning("Invalid result from node %s, ignoring node results",
2638 node)
2639 continue
2640 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2641 if size is None:
2642 self.LogWarning("Disk %d of instance %s did not return size"
2643 " information, ignoring", idx, instance.name)
2644 continue
2645 if not isinstance(size, (int, long)):
2646 self.LogWarning("Disk %d of instance %s did not return valid"
2647 " size information, ignoring", idx, instance.name)
2648 continue
2649 size = size >> 20
2650 if size != disk.size:
2651 self.LogInfo("Disk %d of instance %s has mismatched size,"
2652 " correcting: recorded %d, actual %d", idx,
2653 instance.name, disk.size, size)
2654 disk.size = size
2655 self.cfg.Update(instance, feedback_fn)
2656 changed.append((instance.name, idx, size))
2657 if self._EnsureChildSizes(disk):
2658 self.cfg.Update(instance, feedback_fn)
2659 changed.append((instance.name, idx, disk.size))
2660 return changed
2661
2739
2742 """Change the parameters of the cluster.
2743
2744 """
2745 HPATH = "cluster-modify"
2746 HTYPE = constants.HTYPE_CLUSTER
2747 REQ_BGL = False
2748
2761
2769
2771 """Build hooks env.
2772
2773 """
2774 env = {
2775 "OP_TARGET": self.cfg.GetClusterName(),
2776 "NEW_VG_NAME": self.op.vg_name,
2777 }
2778 mn = self.cfg.GetMasterNode()
2779 return env, [mn], [mn]
2780
2782 """Check prerequisites.
2783
2784 This checks whether the given params don't conflict and
2785 if the given volume group is valid.
2786
2787 """
2788 if self.op.vg_name is not None and not self.op.vg_name:
2789 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2790 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2791 " instances exist", errors.ECODE_INVAL)
2792
2793 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2794 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2795 raise errors.OpPrereqError("Cannot disable drbd helper while"
2796 " drbd-based instances exist",
2797 errors.ECODE_INVAL)
2798
2799 node_list = self.acquired_locks[locking.LEVEL_NODE]
2800
2801
2802 if self.op.vg_name:
2803 vglist = self.rpc.call_vg_list(node_list)
2804 for node in node_list:
2805 msg = vglist[node].fail_msg
2806 if msg:
2807
2808 self.LogWarning("Error while gathering data on node %s"
2809 " (ignoring node): %s", node, msg)
2810 continue
2811 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2812 self.op.vg_name,
2813 constants.MIN_VG_SIZE)
2814 if vgstatus:
2815 raise errors.OpPrereqError("Error on node '%s': %s" %
2816 (node, vgstatus), errors.ECODE_ENVIRON)
2817
2818 if self.op.drbd_helper:
2819
2820 helpers = self.rpc.call_drbd_helper(node_list)
2821 for node in node_list:
2822 ninfo = self.cfg.GetNodeInfo(node)
2823 if ninfo.offline:
2824 self.LogInfo("Not checking drbd helper on offline node %s", node)
2825 continue
2826 msg = helpers[node].fail_msg
2827 if msg:
2828 raise errors.OpPrereqError("Error checking drbd helper on node"
2829 " '%s': %s" % (node, msg),
2830 errors.ECODE_ENVIRON)
2831 node_helper = helpers[node].payload
2832 if node_helper != self.op.drbd_helper:
2833 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2834 (node, node_helper), errors.ECODE_ENVIRON)
2835
2836 self.cluster = cluster = self.cfg.GetClusterInfo()
2837
2838 if self.op.beparams:
2839 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2840 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2841
2842 if self.op.ndparams:
2843 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2844 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2845
2846
2847
2848 if self.new_ndparams["oob_program"] == "":
2849 self.new_ndparams["oob_program"] = \
2850 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2851
2852 if self.op.nicparams:
2853 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2854 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2855 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2856 nic_errors = []
2857
2858
2859 for instance in self.cfg.GetAllInstancesInfo().values():
2860 for nic_idx, nic in enumerate(instance.nics):
2861 params_copy = copy.deepcopy(nic.nicparams)
2862 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2863
2864
2865 try:
2866 objects.NIC.CheckParameterSyntax(params_filled)
2867 except errors.ConfigurationError, err:
2868 nic_errors.append("Instance %s, nic/%d: %s" %
2869 (instance.name, nic_idx, err))
2870
2871
2872 target_mode = params_filled[constants.NIC_MODE]
2873 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2874 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
2875 " address" % (instance.name, nic_idx))
2876 if nic_errors:
2877 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2878 "\n".join(nic_errors))
2879
2880
2881 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2882 if self.op.hvparams:
2883 for hv_name, hv_dict in self.op.hvparams.items():
2884 if hv_name not in self.new_hvparams:
2885 self.new_hvparams[hv_name] = hv_dict
2886 else:
2887 self.new_hvparams[hv_name].update(hv_dict)
2888
2889
2890 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2891 if self.op.os_hvp:
2892 for os_name, hvs in self.op.os_hvp.items():
2893 if os_name not in self.new_os_hvp:
2894 self.new_os_hvp[os_name] = hvs
2895 else:
2896 for hv_name, hv_dict in hvs.items():
2897 if hv_name not in self.new_os_hvp[os_name]:
2898 self.new_os_hvp[os_name][hv_name] = hv_dict
2899 else:
2900 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2901
2902
2903 self.new_osp = objects.FillDict(cluster.osparams, {})
2904 if self.op.osparams:
2905 for os_name, osp in self.op.osparams.items():
2906 if os_name not in self.new_osp:
2907 self.new_osp[os_name] = {}
2908
2909 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2910 use_none=True)
2911
2912 if not self.new_osp[os_name]:
2913
2914 del self.new_osp[os_name]
2915 else:
2916
2917 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2918 os_name, self.new_osp[os_name])
2919
2920
2921 if self.op.enabled_hypervisors is not None:
2922 self.hv_list = self.op.enabled_hypervisors
2923 for hv in self.hv_list:
2924
2925
2926
2927
2928
2929 if hv not in new_hvp:
2930 new_hvp[hv] = {}
2931 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2932 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2933 else:
2934 self.hv_list = cluster.enabled_hypervisors
2935
2936 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2937
2938 for hv_name, hv_params in self.new_hvparams.items():
2939 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2940 (self.op.enabled_hypervisors and
2941 hv_name in self.op.enabled_hypervisors)):
2942
2943 hv_class = hypervisor.GetHypervisor(hv_name)
2944 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2945 hv_class.CheckParameterSyntax(hv_params)
2946 _CheckHVParams(self, node_list, hv_name, hv_params)
2947
2948 if self.op.os_hvp:
2949
2950
2951 for os_name, os_hvp in self.new_os_hvp.items():
2952 for hv_name, hv_params in os_hvp.items():
2953 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2954
2955 cluster_defaults = self.new_hvparams.get(hv_name, {})
2956 new_osp = objects.FillDict(cluster_defaults, hv_params)
2957 hv_class = hypervisor.GetHypervisor(hv_name)
2958 hv_class.CheckParameterSyntax(new_osp)
2959 _CheckHVParams(self, node_list, hv_name, new_osp)
2960
2961 if self.op.default_iallocator:
2962 alloc_script = utils.FindFile(self.op.default_iallocator,
2963 constants.IALLOCATOR_SEARCH_PATH,
2964 os.path.isfile)
2965 if alloc_script is None:
2966 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2967 " specified" % self.op.default_iallocator,
2968 errors.ECODE_INVAL)
2969
2970 - def Exec(self, feedback_fn):
2971 """Change the parameters of the cluster.
2972
2973 """
2974 if self.op.vg_name is not None:
2975 new_volume = self.op.vg_name
2976 if not new_volume:
2977 new_volume = None
2978 if new_volume != self.cfg.GetVGName():
2979 self.cfg.SetVGName(new_volume)
2980 else:
2981 feedback_fn("Cluster LVM configuration already in desired"
2982 " state, not changing")
2983 if self.op.drbd_helper is not None:
2984 new_helper = self.op.drbd_helper
2985 if not new_helper:
2986 new_helper = None
2987 if new_helper != self.cfg.GetDRBDHelper():
2988 self.cfg.SetDRBDHelper(new_helper)
2989 else:
2990 feedback_fn("Cluster DRBD helper already in desired state,"
2991 " not changing")
2992 if self.op.hvparams:
2993 self.cluster.hvparams = self.new_hvparams
2994 if self.op.os_hvp:
2995 self.cluster.os_hvp = self.new_os_hvp
2996 if self.op.enabled_hypervisors is not None:
2997 self.cluster.hvparams = self.new_hvparams
2998 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2999 if self.op.beparams:
3000 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3001 if self.op.nicparams:
3002 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3003 if self.op.osparams:
3004 self.cluster.osparams = self.new_osp
3005 if self.op.ndparams:
3006 self.cluster.ndparams = self.new_ndparams
3007
3008 if self.op.candidate_pool_size is not None:
3009 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3010
3011 _AdjustCandidatePool(self, [])
3012
3013 if self.op.maintain_node_health is not None:
3014 self.cluster.maintain_node_health = self.op.maintain_node_health
3015
3016 if self.op.prealloc_wipe_disks is not None:
3017 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3018
3019 if self.op.add_uids is not None:
3020 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3021
3022 if self.op.remove_uids is not None:
3023 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3024
3025 if self.op.uid_pool is not None:
3026 self.cluster.uid_pool = self.op.uid_pool
3027
3028 if self.op.default_iallocator is not None:
3029 self.cluster.default_iallocator = self.op.default_iallocator
3030
3031 if self.op.reserved_lvs is not None:
3032 self.cluster.reserved_lvs = self.op.reserved_lvs
3033
3034 def helper_os(aname, mods, desc):
3035 desc += " OS list"
3036 lst = getattr(self.cluster, aname)
3037 for key, val in mods:
3038 if key == constants.DDM_ADD:
3039 if val in lst:
3040 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3041 else:
3042 lst.append(val)
3043 elif key == constants.DDM_REMOVE:
3044 if val in lst:
3045 lst.remove(val)
3046 else:
3047 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3048 else:
3049 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3050
3051 if self.op.hidden_os:
3052 helper_os("hidden_os", self.op.hidden_os, "hidden")
3053
3054 if self.op.blacklisted_os:
3055 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3056
3057 if self.op.master_netdev:
3058 master = self.cfg.GetMasterNode()
3059 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3060 self.cluster.master_netdev)
3061 result = self.rpc.call_node_stop_master(master, False)
3062 result.Raise("Could not disable the master ip")
3063 feedback_fn("Changing master_netdev from %s to %s" %
3064 (self.cluster.master_netdev, self.op.master_netdev))
3065 self.cluster.master_netdev = self.op.master_netdev
3066
3067 self.cfg.Update(self.cluster, feedback_fn)
3068
3069 if self.op.master_netdev:
3070 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3071 self.op.master_netdev)
3072 result = self.rpc.call_node_start_master(master, False, False)
3073 if result.fail_msg:
3074 self.LogWarning("Could not re-enable the master ip on"
3075 " the master, please restart manually: %s",
3076 result.fail_msg)
3077
3080 """Helper for uploading a file and showing warnings.
3081
3082 """
3083 if os.path.exists(fname):
3084 result = lu.rpc.call_upload_file(nodes, fname)
3085 for to_node, to_result in result.items():
3086 msg = to_result.fail_msg
3087 if msg:
3088 msg = ("Copy of file %s to node %s failed: %s" %
3089 (fname, to_node, msg))
3090 lu.proc.LogWarning(msg)
3091
3094 """Distribute additional files which are part of the cluster configuration.
3095
3096 ConfigWriter takes care of distributing the config and ssconf files, but
3097 there are more files which should be distributed to all nodes. This function
3098 makes sure those are copied.
3099
3100 @param lu: calling logical unit
3101 @param additional_nodes: list of nodes not in the config to distribute to
3102 @type additional_vm: boolean
3103 @param additional_vm: whether the additional nodes are vm-capable or not
3104
3105 """
3106
3107 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3108 dist_nodes = lu.cfg.GetOnlineNodeList()
3109 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3110 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3111 if additional_nodes is not None:
3112 dist_nodes.extend(additional_nodes)
3113 if additional_vm:
3114 vm_nodes.extend(additional_nodes)
3115 if myself.name in dist_nodes:
3116 dist_nodes.remove(myself.name)
3117 if myself.name in vm_nodes:
3118 vm_nodes.remove(myself.name)
3119
3120
3121 dist_files = set([constants.ETC_HOSTS,
3122 constants.SSH_KNOWN_HOSTS_FILE,
3123 constants.RAPI_CERT_FILE,
3124 constants.RAPI_USERS_FILE,
3125 constants.CONFD_HMAC_KEY,
3126 constants.CLUSTER_DOMAIN_SECRET_FILE,
3127 ])
3128
3129 vm_files = set()
3130 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3131 for hv_name in enabled_hypervisors:
3132 hv_class = hypervisor.GetHypervisor(hv_name)
3133 vm_files.update(hv_class.GetAncillaryFiles())
3134
3135
3136 for fname in dist_files:
3137 _UploadHelper(lu, dist_nodes, fname)
3138 for fname in vm_files:
3139 _UploadHelper(lu, vm_nodes, fname)
3140
3143 """Force the redistribution of cluster configuration.
3144
3145 This is a very simple LU.
3146
3147 """
3148 REQ_BGL = False
3149
3155
3156 - def Exec(self, feedback_fn):
3162
3163
3164 -def _WaitForSync(lu, instance, disks=None, oneshot=False):
3165 """Sleep and poll for an instance's disk to sync.
3166
3167 """
3168 if not instance.disks or disks is not None and not disks:
3169 return True
3170
3171 disks = _ExpandCheckDisks(instance, disks)
3172
3173 if not oneshot:
3174 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3175
3176 node = instance.primary_node
3177
3178 for dev in disks:
3179 lu.cfg.SetDiskID(dev, node)
3180
3181
3182
3183 retries = 0
3184 degr_retries = 10
3185 while True:
3186 max_time = 0
3187 done = True
3188 cumul_degraded = False
3189 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3190 msg = rstats.fail_msg
3191 if msg:
3192 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3193 retries += 1
3194 if retries >= 10:
3195 raise errors.RemoteError("Can't contact node %s for mirror data,"
3196 " aborting." % node)
3197 time.sleep(6)
3198 continue
3199 rstats = rstats.payload
3200 retries = 0
3201 for i, mstat in enumerate(rstats):
3202 if mstat is None:
3203 lu.LogWarning("Can't compute data for node %s/%s",
3204 node, disks[i].iv_name)
3205 continue
3206
3207 cumul_degraded = (cumul_degraded or
3208 (mstat.is_degraded and mstat.sync_percent is None))
3209 if mstat.sync_percent is not None:
3210 done = False
3211 if mstat.estimated_time is not None:
3212 rem_time = ("%s remaining (estimated)" %
3213 utils.FormatSeconds(mstat.estimated_time))
3214 max_time = mstat.estimated_time
3215 else:
3216 rem_time = "no time estimate"
3217 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3218 (disks[i].iv_name, mstat.sync_percent, rem_time))
3219
3220
3221
3222
3223 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3224 logging.info("Degraded disks found, %d retries left", degr_retries)
3225 degr_retries -= 1
3226 time.sleep(1)
3227 continue
3228
3229 if done or oneshot:
3230 break
3231
3232 time.sleep(min(60, max_time))
3233
3234 if done:
3235 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3236 return not cumul_degraded
3237
3240 """Check that mirrors are not degraded.
3241
3242 The ldisk parameter, if True, will change the test from the
3243 is_degraded attribute (which represents overall non-ok status for
3244 the device(s)) to the ldisk (representing the local storage status).
3245
3246 """
3247 lu.cfg.SetDiskID(dev, node)
3248
3249 result = True
3250
3251 if on_primary or dev.AssembleOnSecondary():
3252 rstats = lu.rpc.call_blockdev_find(node, dev)
3253 msg = rstats.fail_msg
3254 if msg:
3255 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3256 result = False
3257 elif not rstats.payload:
3258 lu.LogWarning("Can't find disk on node %s", node)
3259 result = False
3260 else:
3261 if ldisk:
3262 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3263 else:
3264 result = result and not rstats.payload.is_degraded
3265
3266 if dev.children:
3267 for child in dev.children:
3268 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3269
3270 return result
3271
3274 """Logical unit for OOB handling.
3275
3276 """
3277 REG_BGL = False
3278
3280 """Check prerequisites.
3281
3282 This checks:
3283 - the node exists in the configuration
3284 - OOB is supported
3285
3286 Any errors are signaled by raising errors.OpPrereqError.
3287
3288 """
3289 self.nodes = []
3290 for node_name in self.op.node_names:
3291 node = self.cfg.GetNodeInfo(node_name)
3292
3293 if node is None:
3294 raise errors.OpPrereqError("Node %s not found" % node_name,
3295 errors.ECODE_NOENT)
3296 else:
3297 self.nodes.append(node)
3298
3299 if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
3300 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3301 " not marked offline") % node_name,
3302 errors.ECODE_STATE)
3303
3305 """Gather locks we need.
3306
3307 """
3308 if self.op.node_names:
3309 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3310 for name in self.op.node_names]
3311 else:
3312 self.op.node_names = self.cfg.GetNodeList()
3313
3314 self.needed_locks = {
3315 locking.LEVEL_NODE: self.op.node_names,
3316 }
3317
3318 - def Exec(self, feedback_fn):
3319 """Execute OOB and return result if we expect any.
3320
3321 """
3322 master_node = self.cfg.GetMasterNode()
3323 ret = []
3324
3325 for node in self.nodes:
3326 node_entry = [(constants.RS_NORMAL, node.name)]
3327 ret.append(node_entry)
3328
3329 oob_program = _SupportsOob(self.cfg, node)
3330
3331 if not oob_program:
3332 node_entry.append((constants.RS_UNAVAIL, None))
3333 continue
3334
3335 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3336 self.op.command, oob_program, node.name)
3337 result = self.rpc.call_run_oob(master_node, oob_program,
3338 self.op.command, node.name,
3339 self.op.timeout)
3340
3341 if result.fail_msg:
3342 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3343 node.name, result.fail_msg)
3344 node_entry.append((constants.RS_NODATA, None))
3345 else:
3346 try:
3347 self._CheckPayload(result)
3348 except errors.OpExecError, err:
3349 self.LogWarning("The payload returned by '%s' is not valid: %s",
3350 node.name, err)
3351 node_entry.append((constants.RS_NODATA, None))
3352 else:
3353 if self.op.command == constants.OOB_HEALTH:
3354
3355 for item, status in result.payload:
3356 if status in [constants.OOB_STATUS_WARNING,
3357 constants.OOB_STATUS_CRITICAL]:
3358 self.LogWarning("On node '%s' item '%s' has status '%s'",
3359 node.name, item, status)
3360
3361 if self.op.command == constants.OOB_POWER_ON:
3362 node.powered = True
3363 elif self.op.command == constants.OOB_POWER_OFF:
3364 node.powered = False
3365 elif self.op.command == constants.OOB_POWER_STATUS:
3366 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3367 if powered != node.powered:
3368 logging.warning(("Recorded power state (%s) of node '%s' does not"
3369 " match actual power state (%s)"), node.powered,
3370 node.name, powered)
3371
3372
3373 if self.op.command in (constants.OOB_POWER_ON,
3374 constants.OOB_POWER_OFF):
3375 self.cfg.Update(node, feedback_fn)
3376
3377 node_entry.append((constants.RS_NORMAL, result.payload))
3378
3379 return ret
3380
3382 """Checks if the payload is valid.
3383
3384 @param result: RPC result
3385 @raises errors.OpExecError: If payload is not valid
3386
3387 """
3388 errs = []
3389 if self.op.command == constants.OOB_HEALTH:
3390 if not isinstance(result.payload, list):
3391 errs.append("command 'health' is expected to return a list but got %s" %
3392 type(result.payload))
3393 else:
3394 for item, status in result.payload:
3395 if status not in constants.OOB_STATUSES:
3396 errs.append("health item '%s' has invalid status '%s'" %
3397 (item, status))
3398
3399 if self.op.command == constants.OOB_POWER_STATUS:
3400 if not isinstance(result.payload, dict):
3401 errs.append("power-status is expected to return a dict but got %s" %
3402 type(result.payload))
3403
3404 if self.op.command in [
3405 constants.OOB_POWER_ON,
3406 constants.OOB_POWER_OFF,
3407 constants.OOB_POWER_CYCLE,
3408 ]:
3409 if result.payload is not None:
3410 errs.append("%s is expected to not return payload but got '%s'" %
3411 (self.op.command, result.payload))
3412
3413 if errs:
3414 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3415 utils.CommaJoin(errs))
3416
3420 """Logical unit for OS diagnose/query.
3421
3422 """
3423 REQ_BGL = False
3424 _HID = "hidden"
3425 _BLK = "blacklisted"
3426 _VLD = "valid"
3427 _FIELDS_STATIC = utils.FieldSet()
3428 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3429 "parameters", "api_versions", _HID, _BLK)
3430
3439
3441
3442
3443
3444 self.needed_locks = {}
3445
3446
3447
3448 @staticmethod
3450 """Remaps a per-node return list into an a per-os per-node dictionary
3451
3452 @param rlist: a map with node names as keys and OS objects as values
3453
3454 @rtype: dict
3455 @return: a dictionary with osnames as keys and as value another
3456 map, with nodes as keys and tuples of (path, status, diagnose,
3457 variants, parameters, api_versions) as values, eg::
3458
3459 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3460 (/srv/..., False, "invalid api")],
3461 "node2": [(/srv/..., True, "", [], [])]}
3462 }
3463
3464 """
3465 all_os = {}
3466
3467
3468
3469 good_nodes = [node_name for node_name in rlist
3470 if not rlist[node_name].fail_msg]
3471 for node_name, nr in rlist.items():
3472 if nr.fail_msg or not nr.payload:
3473 continue
3474 for (name, path, status, diagnose, variants,
3475 params, api_versions) in nr.payload:
3476 if name not in all_os:
3477
3478
3479 all_os[name] = {}
3480 for nname in good_nodes:
3481 all_os[name][nname] = []
3482
3483 params = [tuple(v) for v in params]
3484 all_os[name][node_name].append((path, status, diagnose,
3485 variants, params, api_versions))
3486 return all_os
3487
3488 - def Exec(self, feedback_fn):
3489 """Compute the list of OSes.
3490
3491 """
3492 valid_nodes = [node.name
3493 for node in self.cfg.GetAllNodesInfo().values()
3494 if not node.offline and node.vm_capable]
3495 node_data = self.rpc.call_os_diagnose(valid_nodes)
3496 pol = self._DiagnoseByOS(node_data)
3497 output = []
3498 cluster = self.cfg.GetClusterInfo()
3499
3500 for os_name in utils.NiceSort(pol.keys()):
3501 os_data = pol[os_name]
3502 row = []
3503 valid = True
3504 (variants, params, api_versions) = null_state = (set(), set(), set())
3505 for idx, osl in enumerate(os_data.values()):
3506 valid = bool(valid and osl and osl[0][1])
3507 if not valid:
3508 (variants, params, api_versions) = null_state
3509 break
3510 node_variants, node_params, node_api = osl[0][3:6]
3511 if idx == 0:
3512 variants = set(node_variants)
3513 params = set(node_params)
3514 api_versions = set(node_api)
3515 else:
3516 variants.intersection_update(node_variants)
3517 params.intersection_update(node_params)
3518 api_versions.intersection_update(node_api)
3519
3520 is_hid = os_name in cluster.hidden_os
3521 is_blk = os_name in cluster.blacklisted_os
3522 if ((self._HID not in self.op.output_fields and is_hid) or
3523 (self._BLK not in self.op.output_fields and is_blk) or
3524 (self._VLD not in self.op.output_fields and not valid)):
3525 continue
3526
3527 for field in self.op.output_fields:
3528 if field == "name":
3529 val = os_name
3530 elif field == self._VLD:
3531 val = valid
3532 elif field == "node_status":
3533
3534 val = {}
3535 for node_name, nos_list in os_data.items():
3536 val[node_name] = nos_list
3537 elif field == "variants":
3538 val = utils.NiceSort(list(variants))
3539 elif field == "parameters":
3540 val = list(params)
3541 elif field == "api_versions":
3542 val = list(api_versions)
3543 elif field == self._HID:
3544 val = is_hid
3545 elif field == self._BLK:
3546 val = is_blk
3547 else:
3548 raise errors.ParameterError(field)
3549 row.append(val)
3550 output.append(row)
3551
3552 return output
3553
3556 """Logical unit for removing a node.
3557
3558 """
3559 HPATH = "node-remove"
3560 HTYPE = constants.HTYPE_NODE
3561
3563 """Build hooks env.
3564
3565 This doesn't run on the target node in the pre phase as a failed
3566 node would then be impossible to remove.
3567
3568 """
3569 env = {
3570 "OP_TARGET": self.op.node_name,
3571 "NODE_NAME": self.op.node_name,
3572 }
3573 all_nodes = self.cfg.GetNodeList()
3574 try:
3575 all_nodes.remove(self.op.node_name)
3576 except ValueError:
3577 logging.warning("Node %s which is about to be removed not found"
3578 " in the all nodes list", self.op.node_name)
3579 return env, all_nodes, all_nodes
3580
3582 """Check prerequisites.
3583
3584 This checks:
3585 - the node exists in the configuration
3586 - it does not have primary or secondary instances
3587 - it's not the master
3588
3589 Any errors are signaled by raising errors.OpPrereqError.
3590
3591 """
3592 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3593 node = self.cfg.GetNodeInfo(self.op.node_name)
3594 assert node is not None
3595
3596 instance_list = self.cfg.GetInstanceList()
3597
3598 masternode = self.cfg.GetMasterNode()
3599 if node.name == masternode:
3600 raise errors.OpPrereqError("Node is the master node,"
3601 " you need to failover first.",
3602 errors.ECODE_INVAL)
3603
3604 for instance_name in instance_list:
3605 instance = self.cfg.GetInstanceInfo(instance_name)
3606 if node.name in instance.all_nodes:
3607 raise errors.OpPrereqError("Instance %s is still running on the node,"
3608 " please remove first." % instance_name,
3609 errors.ECODE_INVAL)
3610 self.op.node_name = node.name
3611 self.node = node
3612
3613 - def Exec(self, feedback_fn):
3649
3652 FIELDS = query.NODE_FIELDS
3653
3655 lu.needed_locks = {}
3656 lu.share_locks[locking.LEVEL_NODE] = 1
3657
3658 if self.names:
3659 self.wanted = _GetWantedNodes(lu, self.names)
3660 else:
3661 self.wanted = locking.ALL_SET
3662
3663 self.do_locking = (self.use_locking and
3664 query.NQ_LIVE in self.requested_data)
3665
3666 if self.do_locking:
3667
3668 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3669
3672
3674 """Computes the list of nodes and their attributes.
3675
3676 """
3677 all_info = lu.cfg.GetAllNodesInfo()
3678
3679 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3680
3681
3682 if query.NQ_LIVE in self.requested_data:
3683
3684 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3685
3686 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3687 lu.cfg.GetHypervisorType())
3688 live_data = dict((name, nresult.payload)
3689 for (name, nresult) in node_data.items()
3690 if not nresult.fail_msg and nresult.payload)
3691 else:
3692 live_data = None
3693
3694 if query.NQ_INST in self.requested_data:
3695 node_to_primary = dict([(name, set()) for name in nodenames])
3696 node_to_secondary = dict([(name, set()) for name in nodenames])
3697
3698 inst_data = lu.cfg.GetAllInstancesInfo()
3699
3700 for inst in inst_data.values():
3701 if inst.primary_node in node_to_primary:
3702 node_to_primary[inst.primary_node].add(inst.name)
3703 for secnode in inst.secondary_nodes:
3704 if secnode in node_to_secondary:
3705 node_to_secondary[secnode].add(inst.name)
3706 else:
3707 node_to_primary = None
3708 node_to_secondary = None
3709
3710 if query.NQ_OOB in self.requested_data:
3711 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3712 for name, node in all_info.iteritems())
3713 else:
3714 oob_support = None
3715
3716 if query.NQ_GROUP in self.requested_data:
3717 groups = lu.cfg.GetAllNodeGroupsInfo()
3718 else:
3719 groups = {}
3720
3721 return query.NodeQueryData([all_info[name] for name in nodenames],
3722 live_data, lu.cfg.GetMasterNode(),
3723 node_to_primary, node_to_secondary, groups,
3724 oob_support, lu.cfg.GetClusterInfo())
3725
3728 """Logical unit for querying nodes.
3729
3730 """
3731
3732 REQ_BGL = False
3733
3735 self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3736 self.op.use_locking)
3737
3740
3741 - def Exec(self, feedback_fn):
3743
3746 """Logical unit for getting volumes on node(s).
3747
3748 """
3749 REQ_BGL = False
3750 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3751 _FIELDS_STATIC = utils.FieldSet("node")
3752
3757
3766
3767 - def Exec(self, feedback_fn):
3768 """Computes the list of nodes and their attributes.
3769
3770 """
3771 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3772 volumes = self.rpc.call_node_volumes(nodenames)
3773
3774 ilist = self.cfg.GetAllInstancesInfo()
3775
3776 vol2inst = dict(((node, vol), inst.name)
3777 for inst in ilist.values()
3778 for (node, vols) in inst.MapLVsByNode().items()
3779 for vol in vols)
3780
3781 output = []
3782 for node in nodenames:
3783 nresult = volumes[node]
3784 if nresult.offline:
3785 continue
3786 msg = nresult.fail_msg
3787 if msg:
3788 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3789 continue
3790
3791 node_vols = sorted(nresult.payload,
3792 key=operator.itemgetter("dev"))
3793
3794 for vol in node_vols:
3795 node_output = []
3796 for field in self.op.output_fields:
3797 if field == "node":
3798 val = node
3799 elif field == "phys":
3800 val = vol['dev']
3801 elif field == "vg":
3802 val = vol['vg']
3803 elif field == "name":
3804 val = vol['name']
3805 elif field == "size":
3806 val = int(float(vol['size']))
3807 elif field == "instance":
3808 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
3809 else:
3810 raise errors.ParameterError(field)
3811 node_output.append(str(val))
3812
3813 output.append(node_output)
3814
3815 return output
3816
3819 """Logical unit for getting information on storage units on node(s).
3820
3821 """
3822 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3823 REQ_BGL = False
3824
3829
3839
3840 - def Exec(self, feedback_fn):
3841 """Computes the list of nodes and their attributes.
3842
3843 """
3844 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3845
3846
3847 if constants.SF_NAME in self.op.output_fields:
3848 fields = self.op.output_fields[:]
3849 else:
3850 fields = [constants.SF_NAME] + self.op.output_fields
3851
3852
3853 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3854 while extra in fields:
3855 fields.remove(extra)
3856
3857 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3858 name_idx = field_idx[constants.SF_NAME]
3859
3860 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3861 data = self.rpc.call_storage_list(self.nodes,
3862 self.op.storage_type, st_args,
3863 self.op.name, fields)
3864
3865 result = []
3866
3867 for node in utils.NiceSort(self.nodes):
3868 nresult = data[node]
3869 if nresult.offline:
3870 continue
3871
3872 msg = nresult.fail_msg
3873 if msg:
3874 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3875 continue
3876
3877 rows = dict([(row[name_idx], row) for row in nresult.payload])
3878
3879 for name in utils.NiceSort(rows.keys()):
3880 row = rows[name]
3881
3882 out = []
3883
3884 for field in self.op.output_fields:
3885 if field == constants.SF_NODE:
3886 val = node
3887 elif field == constants.SF_TYPE:
3888 val = self.op.storage_type
3889 elif field in field_idx:
3890 val = row[field_idx[field]]
3891 else:
3892 raise errors.ParameterError(field)
3893
3894 out.append(val)
3895
3896 result.append(out)
3897
3898 return result
3899
3902 FIELDS = query.INSTANCE_FIELDS
3903
3920
3924
3926 """Computes the list of instances and their attributes.
3927
3928 """
3929 cluster = lu.cfg.GetClusterInfo()
3930 all_info = lu.cfg.GetAllInstancesInfo()
3931
3932 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3933
3934 instance_list = [all_info[name] for name in instance_names]
3935 nodes = frozenset(itertools.chain(*(inst.all_nodes
3936 for inst in instance_list)))
3937 hv_list = list(set([inst.hypervisor for inst in instance_list]))
3938 bad_nodes = []
3939 offline_nodes = []
3940 wrongnode_inst = set()
3941
3942
3943 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3944 live_data = {}
3945 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3946 for name in nodes:
3947 result = node_data[name]
3948 if result.offline:
3949
3950 assert result.fail_msg
3951 offline_nodes.append(name)
3952 if result.fail_msg:
3953 bad_nodes.append(name)
3954 elif result.payload:
3955 for inst in result.payload:
3956 if inst in all_info:
3957 if all_info[inst].primary_node == name:
3958 live_data.update(result.payload)
3959 else:
3960 wrongnode_inst.add(inst)
3961 else:
3962
3963
3964 logging.warning("Orphan instance '%s' found on node %s",
3965 inst, name)
3966
3967 else:
3968 live_data = {}
3969
3970 if query.IQ_DISKUSAGE in self.requested_data:
3971 disk_usage = dict((inst.name,
3972 _ComputeDiskSize(inst.disk_template,
3973 [{"size": disk.size}
3974 for disk in inst.disks]))
3975 for inst in instance_list)
3976 else:
3977 disk_usage = None
3978
3979 if query.IQ_CONSOLE in self.requested_data:
3980 consinfo = {}
3981 for inst in instance_list:
3982 if inst.name in live_data:
3983
3984 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3985 else:
3986 consinfo[inst.name] = None
3987 assert set(consinfo.keys()) == set(instance_names)
3988 else:
3989 consinfo = None
3990
3991 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3992 disk_usage, offline_nodes, bad_nodes,
3993 live_data, wrongnode_inst, consinfo)
3994
3997 """Query for resources/items of a certain kind.
3998
3999 """
4000
4001 REQ_BGL = False
4002
4008
4011
4014
4015 - def Exec(self, feedback_fn):
4017
4020 """Query for resources/items of a certain kind.
4021
4022 """
4023
4024 REQ_BGL = False
4025
4028
4030 self.needed_locks = {}
4031
4032 - def Exec(self, feedback_fn):
4034
4037 """Logical unit for modifying a storage volume on a node.
4038
4039 """
4040 REQ_BGL = False
4041
4060
4065
4066 - def Exec(self, feedback_fn):
4067 """Computes the list of nodes and their attributes.
4068
4069 """
4070 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4071 result = self.rpc.call_storage_modify(self.op.node_name,
4072 self.op.storage_type, st_args,
4073 self.op.name, self.op.changes)
4074 result.Raise("Failed to modify storage unit '%s' on %s" %
4075 (self.op.name, self.op.node_name))
4076
4079 """Logical unit for adding node to the cluster.
4080
4081 """
4082 HPATH = "node-add"
4083 HTYPE = constants.HTYPE_NODE
4084 _NFLAGS = ["master_capable", "vm_capable"]
4085
4100
4102 """Build hooks env.
4103
4104 This will run on all nodes before, and on all nodes + the new node after.
4105
4106 """
4107 env = {
4108 "OP_TARGET": self.op.node_name,
4109 "NODE_NAME": self.op.node_name,
4110 "NODE_PIP": self.op.primary_ip,
4111 "NODE_SIP": self.op.secondary_ip,
4112 "MASTER_CAPABLE": str(self.op.master_capable),
4113 "VM_CAPABLE": str(self.op.vm_capable),
4114 }
4115 nodes_0 = self.cfg.GetNodeList()
4116 nodes_1 = nodes_0 + [self.op.node_name, ]
4117 return env, nodes_0, nodes_1
4118
4120 """Check prerequisites.
4121
4122 This checks:
4123 - the new node is not already in the config
4124 - it is resolvable
4125 - its parameters (single/dual homed) matches the cluster
4126
4127 Any errors are signaled by raising errors.OpPrereqError.
4128
4129 """
4130 cfg = self.cfg
4131 hostname = self.hostname
4132 node = hostname.name
4133 primary_ip = self.op.primary_ip = hostname.ip
4134 if self.op.secondary_ip is None:
4135 if self.primary_ip_family == netutils.IP6Address.family:
4136 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4137 " IPv4 address must be given as secondary",
4138 errors.ECODE_INVAL)
4139 self.op.secondary_ip = primary_ip
4140
4141 secondary_ip = self.op.secondary_ip
4142 if not netutils.IP4Address.IsValid(secondary_ip):
4143 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4144 " address" % secondary_ip, errors.ECODE_INVAL)
4145
4146 node_list = cfg.GetNodeList()
4147 if not self.op.readd and node in node_list:
4148 raise errors.OpPrereqError("Node %s is already in the configuration" %
4149 node, errors.ECODE_EXISTS)
4150 elif self.op.readd and node not in node_list:
4151 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4152 errors.ECODE_NOENT)
4153
4154 self.changed_primary_ip = False
4155
4156 for existing_node_name in node_list:
4157 existing_node = cfg.GetNodeInfo(existing_node_name)
4158
4159 if self.op.readd and node == existing_node_name:
4160 if existing_node.secondary_ip != secondary_ip:
4161 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4162 " address configuration as before",
4163 errors.ECODE_INVAL)
4164 if existing_node.primary_ip != primary_ip:
4165 self.changed_primary_ip = True
4166
4167 continue
4168
4169 if (existing_node.primary_ip == primary_ip or
4170 existing_node.secondary_ip == primary_ip or
4171 existing_node.primary_ip == secondary_ip or
4172 existing_node.secondary_ip == secondary_ip):
4173 raise errors.OpPrereqError("New node ip address(es) conflict with"
4174 " existing node %s" % existing_node.name,
4175 errors.ECODE_NOTUNIQUE)
4176
4177
4178
4179 if self.op.readd:
4180 old_node = self.cfg.GetNodeInfo(node)
4181 assert old_node is not None, "Can't retrieve locked node %s" % node
4182 for attr in self._NFLAGS:
4183 if getattr(self.op, attr) is None:
4184 setattr(self.op, attr, getattr(old_node, attr))
4185 else:
4186 for attr in self._NFLAGS:
4187 if getattr(self.op, attr) is None:
4188 setattr(self.op, attr, True)
4189
4190 if self.op.readd and not self.op.vm_capable:
4191 pri, sec = cfg.GetNodeInstances(node)
4192 if pri or sec:
4193 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4194 " flag set to false, but it already holds"
4195 " instances" % node,
4196 errors.ECODE_STATE)
4197
4198
4199
4200 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4201 master_singlehomed = myself.secondary_ip == myself.primary_ip
4202 newbie_singlehomed = secondary_ip == primary_ip
4203 if master_singlehomed != newbie_singlehomed:
4204 if master_singlehomed:
4205 raise errors.OpPrereqError("The master has no secondary ip but the"
4206 " new node has one",
4207 errors.ECODE_INVAL)
4208 else:
4209 raise errors.OpPrereqError("The master has a secondary ip but the"
4210 " new node doesn't have one",
4211 errors.ECODE_INVAL)
4212
4213
4214 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4215 raise errors.OpPrereqError("Node not reachable by ping",
4216 errors.ECODE_ENVIRON)
4217
4218 if not newbie_singlehomed:
4219
4220 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4221 source=myself.secondary_ip):
4222 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4223 " based ping to node daemon port",
4224 errors.ECODE_ENVIRON)
4225
4226 if self.op.readd:
4227 exceptions = [node]
4228 else:
4229 exceptions = []
4230
4231 if self.op.master_capable:
4232 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4233 else:
4234 self.master_candidate = False
4235
4236 if self.op.readd:
4237 self.new_node = old_node
4238 else:
4239 node_group = cfg.LookupNodeGroup(self.op.group)
4240 self.new_node = objects.Node(name=node,
4241 primary_ip=primary_ip,
4242 secondary_ip=secondary_ip,
4243 master_candidate=self.master_candidate,
4244 offline=False, drained=False,
4245 group=node_group)
4246
4247 if self.op.ndparams:
4248 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4249
4250 - def Exec(self, feedback_fn):
4251 """Adds the new node to the cluster.
4252
4253 """
4254 new_node = self.new_node
4255 node = new_node.name
4256
4257
4258 new_node.powered = True
4259
4260
4261
4262
4263
4264 if self.op.readd:
4265 new_node.drained = new_node.offline = False
4266 self.LogInfo("Readding a node, the offline/drained flags were reset")
4267
4268 new_node.master_candidate = self.master_candidate
4269 if self.changed_primary_ip:
4270 new_node.primary_ip = self.op.primary_ip
4271
4272
4273 for attr in self._NFLAGS:
4274 setattr(new_node, attr, getattr(self.op, attr))
4275
4276
4277 if new_node.master_candidate:
4278 self.LogInfo("Node will be a master candidate")
4279
4280 if self.op.ndparams:
4281 new_node.ndparams = self.op.ndparams
4282 else:
4283 new_node.ndparams = {}
4284
4285
4286 result = self.rpc.call_version([node])[node]
4287 result.Raise("Can't get version information from node %s" % node)
4288 if constants.PROTOCOL_VERSION == result.payload:
4289 logging.info("Communication to node %s fine, sw version %s match",
4290 node, result.payload)
4291 else:
4292 raise errors.OpExecError("Version mismatch master version %s,"
4293 " node version %s" %
4294 (constants.PROTOCOL_VERSION, result.payload))
4295
4296
4297 if self.cfg.GetClusterInfo().modify_etc_hosts:
4298 master_node = self.cfg.GetMasterNode()
4299 result = self.rpc.call_etc_hosts_modify(master_node,
4300 constants.ETC_HOSTS_ADD,
4301 self.hostname.name,
4302 self.hostname.ip)
4303 result.Raise("Can't update hosts file with new host data")
4304
4305 if new_node.secondary_ip != new_node.primary_ip:
4306 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4307 False)
4308
4309 node_verify_list = [self.cfg.GetMasterNode()]
4310 node_verify_param = {
4311 constants.NV_NODELIST: [node],
4312
4313 }
4314
4315 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4316 self.cfg.GetClusterName())
4317 for verifier in node_verify_list:
4318 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4319 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4320 if nl_payload:
4321 for failed in nl_payload:
4322 feedback_fn("ssh/hostname verification failed"
4323 " (checking from %s): %s" %
4324 (verifier, nl_payload[failed]))
4325 raise errors.OpExecError("ssh/hostname verification failed")
4326
4327 if self.op.readd:
4328 _RedistributeAncillaryFiles(self)
4329 self.context.ReaddNode(new_node)
4330
4331 self.cfg.Update(new_node, feedback_fn)
4332
4333 if not new_node.master_candidate:
4334 result = self.rpc.call_node_demote_from_mc(new_node.name)
4335 msg = result.fail_msg
4336 if msg:
4337 self.LogWarning("Node failed to demote itself from master"
4338 " candidate status: %s" % msg)
4339 else:
4340 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4341 additional_vm=self.op.vm_capable)
4342 self.context.AddNode(new_node, self.proc.GetECId())
4343
4346 """Modifies the parameters of a node.
4347
4348 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4349 to the node role (as _ROLE_*)
4350 @cvar _R2F: a dictionary from node role to tuples of flags
4351 @cvar _FLAGS: a list of attribute names corresponding to the flags
4352
4353 """
4354 HPATH = "node-modify"
4355 HTYPE = constants.HTYPE_NODE
4356 REQ_BGL = False
4357 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4358 _F2R = {
4359 (True, False, False): _ROLE_CANDIDATE,
4360 (False, True, False): _ROLE_DRAINED,
4361 (False, False, True): _ROLE_OFFLINE,
4362 (False, False, False): _ROLE_REGULAR,
4363 }
4364 _R2F = dict((v, k) for k, v in _F2R.items())
4365 _FLAGS = ["master_candidate", "drained", "offline"]
4366
4368 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4369 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4370 self.op.master_capable, self.op.vm_capable,
4371 self.op.secondary_ip, self.op.ndparams]
4372 if all_mods.count(None) == len(all_mods):
4373 raise errors.OpPrereqError("Please pass at least one modification",
4374 errors.ECODE_INVAL)
4375 if all_mods.count(True) > 1:
4376 raise errors.OpPrereqError("Can't set the node into more than one"
4377 " state at the same time",
4378 errors.ECODE_INVAL)
4379
4380
4381 self.might_demote = (self.op.master_candidate == False or
4382 self.op.offline == True or
4383 self.op.drained == True or
4384 self.op.master_capable == False)
4385
4386 if self.op.secondary_ip:
4387 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4388 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4389 " address" % self.op.secondary_ip,
4390 errors.ECODE_INVAL)
4391
4392 self.lock_all = self.op.auto_promote and self.might_demote
4393 self.lock_instances = self.op.secondary_ip is not None
4394
4403
4423
4425 """Build hooks env.
4426
4427 This runs on the master node.
4428
4429 """
4430 env = {
4431 "OP_TARGET": self.op.node_name,
4432 "MASTER_CANDIDATE": str(self.op.master_candidate),
4433 "OFFLINE": str(self.op.offline),
4434 "DRAINED": str(self.op.drained),
4435 "MASTER_CAPABLE": str(self.op.master_capable),
4436 "VM_CAPABLE": str(self.op.vm_capable),
4437 }
4438 nl = [self.cfg.GetMasterNode(),
4439 self.op.node_name]
4440 return env, nl, nl
4441
4443 """Check prerequisites.
4444
4445 This only checks the instance list against the existing names.
4446
4447 """
4448 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4449
4450 if (self.op.master_candidate is not None or
4451 self.op.drained is not None or
4452 self.op.offline is not None):
4453
4454 if self.op.node_name == self.cfg.GetMasterNode():
4455 raise errors.OpPrereqError("The master role can be changed"
4456 " only via master-failover",
4457 errors.ECODE_INVAL)
4458
4459 if self.op.master_candidate and not node.master_capable:
4460 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4461 " it a master candidate" % node.name,
4462 errors.ECODE_STATE)
4463
4464 if self.op.vm_capable == False:
4465 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4466 if ipri or isec:
4467 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4468 " the vm_capable flag" % node.name,
4469 errors.ECODE_STATE)
4470
4471 if node.master_candidate and self.might_demote and not self.lock_all:
4472 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4473
4474
4475 (mc_remaining, mc_should, _) = \
4476 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4477 if mc_remaining < mc_should:
4478 raise errors.OpPrereqError("Not enough master candidates, please"
4479 " pass auto promote option to allow"
4480 " promotion", errors.ECODE_STATE)
4481
4482 self.old_flags = old_flags = (node.master_candidate,
4483 node.drained, node.offline)
4484 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4485 self.old_role = old_role = self._F2R[old_flags]
4486
4487
4488 for attr in self._FLAGS:
4489 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4490 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4491 setattr(self.op, attr, None)
4492
4493
4494
4495
4496
4497 if _SupportsOob(self.cfg, node):
4498 if self.op.offline is False and not (node.powered or
4499 self.op.powered == True):
4500 raise errors.OpPrereqError(("Please power on node %s first before you"
4501 " can reset offline state") %
4502 self.op.node_name)
4503 elif self.op.powered is not None:
4504 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4505 " which does not support out-of-band"
4506 " handling") % self.op.node_name)
4507
4508
4509 if (self.op.drained == False or self.op.offline == False or
4510 (self.op.master_capable and not node.master_capable)):
4511 if _DecideSelfPromotion(self):
4512 self.op.master_candidate = True
4513 self.LogInfo("Auto-promoting node to master candidate")
4514
4515
4516 if self.op.master_capable == False and node.master_candidate:
4517 self.LogInfo("Demoting from master candidate")
4518 self.op.master_candidate = False
4519
4520
4521 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4522 if self.op.master_candidate:
4523 new_role = self._ROLE_CANDIDATE
4524 elif self.op.drained:
4525 new_role = self._ROLE_DRAINED
4526 elif self.op.offline:
4527 new_role = self._ROLE_OFFLINE
4528 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4529
4530
4531 new_role = self._ROLE_REGULAR
4532 else:
4533 new_role = old_role
4534
4535 self.new_role = new_role
4536
4537 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4538
4539 result = self.rpc.call_version([node.name])[node.name]
4540 if result.fail_msg:
4541 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4542 " to report its version: %s" %
4543 (node.name, result.fail_msg),
4544 errors.ECODE_STATE)
4545 else:
4546 self.LogWarning("Transitioning node from offline to online state"
4547 " without using re-add. Please make sure the node"
4548 " is healthy!")
4549
4550 if self.op.secondary_ip:
4551
4552 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4553 master_singlehomed = master.secondary_ip == master.primary_ip
4554 if master_singlehomed and self.op.secondary_ip:
4555 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4556 " homed cluster", errors.ECODE_INVAL)
4557
4558 if node.offline:
4559 if self.affected_instances:
4560 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4561 " node has instances (%s) configured"
4562 " to use it" % self.affected_instances)
4563 else:
4564
4565
4566 for instance in self.affected_instances:
4567 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4568
4569 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4570 if master.name != node.name:
4571
4572 if not netutils.TcpPing(self.op.secondary_ip,
4573 constants.DEFAULT_NODED_PORT,
4574 source=master.secondary_ip):
4575 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4576 " based ping to node daemon port",
4577 errors.ECODE_ENVIRON)
4578
4579 if self.op.ndparams:
4580 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4581 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4582 self.new_ndparams = new_ndparams
4583
4584 - def Exec(self, feedback_fn):
4585 """Modifies a node.
4586
4587 """
4588 node = self.node
4589 old_role = self.old_role
4590 new_role = self.new_role
4591
4592 result = []
4593
4594 if self.op.ndparams:
4595 node.ndparams = self.new_ndparams
4596
4597 if self.op.powered is not None:
4598 node.powered = self.op.powered
4599
4600 for attr in ["master_capable", "vm_capable"]:
4601 val = getattr(self.op, attr)
4602 if val is not None:
4603 setattr(node, attr, val)
4604 result.append((attr, str(val)))
4605
4606 if new_role != old_role:
4607
4608 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4609 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4610 if msg:
4611 self.LogWarning("Node failed to demote itself: %s", msg)
4612
4613 new_flags = self._R2F[new_role]
4614 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4615 if of != nf:
4616 result.append((desc, str(nf)))
4617 (node.master_candidate, node.drained, node.offline) = new_flags
4618
4619
4620 if self.lock_all:
4621 _AdjustCandidatePool(self, [node.name])
4622
4623 if self.op.secondary_ip:
4624 node.secondary_ip = self.op.secondary_ip
4625 result.append(("secondary_ip", self.op.secondary_ip))
4626
4627
4628 self.cfg.Update(node, feedback_fn)
4629
4630
4631
4632 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4633 self.context.ReaddNode(node)
4634
4635 return result
4636
4639 """Powercycles a node.
4640
4641 """
4642 REQ_BGL = False
4643
4650
4652 """Locking for PowercycleNode.
4653
4654 This is a last-resort option and shouldn't block on other
4655 jobs. Therefore, we grab no locks.
4656
4657 """
4658 self.needed_locks = {}
4659
4660 - def Exec(self, feedback_fn):
4668
4671 """Query cluster configuration.
4672
4673 """
4674 REQ_BGL = False
4675
4677 self.needed_locks = {}
4678
4679 - def Exec(self, feedback_fn):
4680 """Return cluster config.
4681
4682 """
4683 cluster = self.cfg.GetClusterInfo()
4684 os_hvp = {}
4685
4686
4687 for os_name, hv_dict in cluster.os_hvp.items():
4688 os_hvp[os_name] = {}
4689 for hv_name, hv_params in hv_dict.items():
4690 if hv_name in cluster.enabled_hypervisors:
4691 os_hvp[os_name][hv_name] = hv_params
4692
4693
4694 primary_ip_version = constants.IP4_VERSION
4695 if cluster.primary_ip_family == netutils.IP6Address.family:
4696 primary_ip_version = constants.IP6_VERSION
4697
4698 result = {
4699 "software_version": constants.RELEASE_VERSION,
4700 "protocol_version": constants.PROTOCOL_VERSION,
4701 "config_version": constants.CONFIG_VERSION,
4702 "os_api_version": max(constants.OS_API_VERSIONS),
4703 "export_version": constants.EXPORT_VERSION,
4704 "architecture": (platform.architecture()[0], platform.machine()),
4705 "name": cluster.cluster_name,
4706 "master": cluster.master_node,
4707 "default_hypervisor": cluster.enabled_hypervisors[0],
4708 "enabled_hypervisors": cluster.enabled_hypervisors,
4709 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4710 for hypervisor_name in cluster.enabled_hypervisors]),
4711 "os_hvp": os_hvp,
4712 "beparams": cluster.beparams,
4713 "osparams": cluster.osparams,
4714 "nicparams": cluster.nicparams,
4715 "ndparams": cluster.ndparams,
4716 "candidate_pool_size": cluster.candidate_pool_size,
4717 "master_netdev": cluster.master_netdev,
4718 "volume_group_name": cluster.volume_group_name,
4719 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4720 "file_storage_dir": cluster.file_storage_dir,
4721 "maintain_node_health": cluster.maintain_node_health,
4722 "ctime": cluster.ctime,
4723 "mtime": cluster.mtime,
4724 "uuid": cluster.uuid,
4725 "tags": list(cluster.GetTags()),
4726 "uid_pool": cluster.uid_pool,
4727 "default_iallocator": cluster.default_iallocator,
4728 "reserved_lvs": cluster.reserved_lvs,
4729 "primary_ip_version": primary_ip_version,
4730 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4731 "hidden_os": cluster.hidden_os,
4732 "blacklisted_os": cluster.blacklisted_os,
4733 }
4734
4735 return result
4736
4775
4778 """Bring up an instance's disks.
4779
4780 """
4781 REQ_BGL = False
4782
4787
4791
4793 """Check prerequisites.
4794
4795 This checks that the instance is in the cluster.
4796
4797 """
4798 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4799 assert self.instance is not None, \
4800 "Cannot retrieve locked instance %s" % self.op.instance_name
4801 _CheckNodeOnline(self, self.instance.primary_node)
4802
4803 - def Exec(self, feedback_fn):
4804 """Activate the disks.
4805
4806 """
4807 disks_ok, disks_info = \
4808 _AssembleInstanceDisks(self, self.instance,
4809 ignore_size=self.op.ignore_size)
4810 if not disks_ok:
4811 raise errors.OpExecError("Cannot activate block devices")
4812
4813 return disks_info
4814
4818 """Prepare the block devices for an instance.
4819
4820 This sets up the block devices on all nodes.
4821
4822 @type lu: L{LogicalUnit}
4823 @param lu: the logical unit on whose behalf we execute
4824 @type instance: L{objects.Instance}
4825 @param instance: the instance for whose disks we assemble
4826 @type disks: list of L{objects.Disk} or None
4827 @param disks: which disks to assemble (or all, if None)
4828 @type ignore_secondaries: boolean
4829 @param ignore_secondaries: if true, errors on secondary nodes
4830 won't result in an error return from the function
4831 @type ignore_size: boolean
4832 @param ignore_size: if true, the current known size of the disk
4833 will not be used during the disk activation, useful for cases
4834 when the size is wrong
4835 @return: False if the operation failed, otherwise a list of
4836 (host, instance_visible_name, node_visible_name)
4837 with the mapping from node devices to instance devices
4838
4839 """
4840 device_info = []
4841 disks_ok = True
4842 iname = instance.name
4843 disks = _ExpandCheckDisks(instance, disks)
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855 for idx, inst_disk in enumerate(disks):
4856 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4857 if ignore_size:
4858 node_disk = node_disk.Copy()
4859 node_disk.UnsetSize()
4860 lu.cfg.SetDiskID(node_disk, node)
4861 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4862 msg = result.fail_msg
4863 if msg:
4864 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4865 " (is_primary=False, pass=1): %s",
4866 inst_disk.iv_name, node, msg)
4867 if not ignore_secondaries:
4868 disks_ok = False
4869
4870
4871
4872
4873 for idx, inst_disk in enumerate(disks):
4874 dev_path = None
4875
4876 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4877 if node != instance.primary_node:
4878 continue
4879 if ignore_size:
4880 node_disk = node_disk.Copy()
4881 node_disk.UnsetSize()
4882 lu.cfg.SetDiskID(node_disk, node)
4883 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4884 msg = result.fail_msg
4885 if msg:
4886 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4887 " (is_primary=True, pass=2): %s",
4888 inst_disk.iv_name, node, msg)
4889 disks_ok = False
4890 else:
4891 dev_path = result.payload
4892
4893 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4894
4895
4896
4897
4898 for disk in disks:
4899 lu.cfg.SetDiskID(disk, instance.primary_node)
4900
4901 return disks_ok, device_info
4902
4905 """Start the disks of an instance.
4906
4907 """
4908 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4909 ignore_secondaries=force)
4910 if not disks_ok:
4911 _ShutdownInstanceDisks(lu, instance)
4912 if force is not None and not force:
4913 lu.proc.LogWarning("", hint="If the message above refers to a"
4914 " secondary node,"
4915 " you can retry the operation using '--force'.")
4916 raise errors.OpExecError("Disk consistency error")
4917
4920 """Shutdown an instance's disks.
4921
4922 """
4923 REQ_BGL = False
4924
4929
4933
4935 """Check prerequisites.
4936
4937 This checks that the instance is in the cluster.
4938
4939 """
4940 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4941 assert self.instance is not None, \
4942 "Cannot retrieve locked instance %s" % self.op.instance_name
4943
4944 - def Exec(self, feedback_fn):
4953
4956 """Shutdown block devices of an instance.
4957
4958 This function checks if an instance is running, before calling
4959 _ShutdownInstanceDisks.
4960
4961 """
4962 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4963 _ShutdownInstanceDisks(lu, instance, disks=disks)
4964
4967 """Return the instance disks selected by the disks list
4968
4969 @type disks: list of L{objects.Disk} or None
4970 @param disks: selected disks
4971 @rtype: list of L{objects.Disk}
4972 @return: selected instance disks to act on
4973
4974 """
4975 if disks is None:
4976 return instance.disks
4977 else:
4978 if not set(disks).issubset(instance.disks):
4979 raise errors.ProgrammerError("Can only act on disks belonging to the"
4980 " target instance")
4981 return disks
4982
4985 """Shutdown block devices of an instance.
4986
4987 This does the shutdown on all nodes of the instance.
4988
4989 If the ignore_primary is false, errors on the primary node are
4990 ignored.
4991
4992 """
4993 all_result = True
4994 disks = _ExpandCheckDisks(instance, disks)
4995
4996 for disk in disks:
4997 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4998 lu.cfg.SetDiskID(top_disk, node)
4999 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5000 msg = result.fail_msg
5001 if msg:
5002 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5003 disk.iv_name, node, msg)
5004 if ((node == instance.primary_node and not ignore_primary) or
5005 (node != instance.primary_node and not result.offline)):
5006 all_result = False
5007 return all_result
5008
5011 """Checks if a node has enough free memory.
5012
5013 This function check if a given node has the needed amount of free
5014 memory. In case the node has less memory or we cannot get the
5015 information from the node, this function raise an OpPrereqError
5016 exception.
5017
5018 @type lu: C{LogicalUnit}
5019 @param lu: a logical unit from which we get configuration data
5020 @type node: C{str}
5021 @param node: the node to check
5022 @type reason: C{str}
5023 @param reason: string to use in the error message
5024 @type requested: C{int}
5025 @param requested: the amount of memory in MiB to check for
5026 @type hypervisor_name: C{str}
5027 @param hypervisor_name: the hypervisor to ask for memory stats
5028 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5029 we cannot check the node
5030
5031 """
5032 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5033 nodeinfo[node].Raise("Can't get data from node %s" % node,
5034 prereq=True, ecode=errors.ECODE_ENVIRON)
5035 free_mem = nodeinfo[node].payload.get('memory_free', None)
5036 if not isinstance(free_mem, int):
5037 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5038 " was '%s'" % (node, free_mem),
5039 errors.ECODE_ENVIRON)
5040 if requested > free_mem:
5041 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5042 " needed %s MiB, available %s MiB" %
5043 (node, reason, requested, free_mem),
5044 errors.ECODE_NORES)
5045
5048 """Checks if nodes have enough free disk space in the all VGs.
5049
5050 This function check if all given nodes have the needed amount of
5051 free disk. In case any node has less disk or we cannot get the
5052 information from the node, this function raise an OpPrereqError
5053 exception.
5054
5055 @type lu: C{LogicalUnit}
5056 @param lu: a logical unit from which we get configuration data
5057 @type nodenames: C{list}
5058 @param nodenames: the list of node names to check
5059 @type req_sizes: C{dict}
5060 @param req_sizes: the hash of vg and corresponding amount of disk in
5061 MiB to check for
5062 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5063 or we cannot check the node
5064
5065 """
5066 for vg, req_size in req_sizes.items():
5067 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5068
5071 """Checks if nodes have enough free disk space in the specified VG.
5072
5073 This function check if all given nodes have the needed amount of
5074 free disk. In case any node has less disk or we cannot get the
5075 information from the node, this function raise an OpPrereqError
5076 exception.
5077
5078 @type lu: C{LogicalUnit}
5079 @param lu: a logical unit from which we get configuration data
5080 @type nodenames: C{list}
5081 @param nodenames: the list of node names to check
5082 @type vg: C{str}
5083 @param vg: the volume group to check
5084 @type requested: C{int}
5085 @param requested: the amount of disk in MiB to check for
5086 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5087 or we cannot check the node
5088
5089 """
5090 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5091 for node in nodenames:
5092 info = nodeinfo[node]
5093 info.Raise("Cannot get current information from node %s" % node,
5094 prereq=True, ecode=errors.ECODE_ENVIRON)
5095 vg_free = info.payload.get("vg_free", None)
5096 if not isinstance(vg_free, int):
5097 raise errors.OpPrereqError("Can't compute free disk space on node"
5098 " %s for vg %s, result was '%s'" %
5099 (node, vg, vg_free), errors.ECODE_ENVIRON)
5100 if requested > vg_free:
5101 raise errors.OpPrereqError("Not enough disk space on target node %s"
5102 " vg %s: required %d MiB, available %d MiB" %
5103 (node, vg, requested, vg_free),
5104 errors.ECODE_NORES)
5105
5108 """Starts an instance.
5109
5110 """
5111 HPATH = "instance-start"
5112 HTYPE = constants.HTYPE_INSTANCE
5113 REQ_BGL = False
5114
5120
5123
5136
5138 """Check prerequisites.
5139
5140 This checks that the instance is in the cluster.
5141
5142 """
5143 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5144 assert self.instance is not None, \
5145 "Cannot retrieve locked instance %s" % self.op.instance_name
5146
5147
5148 if self.op.hvparams:
5149
5150 cluster = self.cfg.GetClusterInfo()
5151 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5152 filled_hvp = cluster.FillHV(instance)
5153 filled_hvp.update(self.op.hvparams)
5154 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5155 hv_type.CheckParameterSyntax(filled_hvp)
5156 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5157
5158 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5159
5160 if self.primary_offline and self.op.ignore_offline_nodes:
5161 self.proc.LogWarning("Ignoring offline primary node")
5162
5163 if self.op.hvparams or self.op.beparams:
5164 self.proc.LogWarning("Overridden parameters are ignored")
5165 else:
5166 _CheckNodeOnline(self, instance.primary_node)
5167
5168 bep = self.cfg.GetClusterInfo().FillBE(instance)
5169
5170
5171 _CheckInstanceBridgesExist(self, instance)
5172
5173 remote_info = self.rpc.call_instance_info(instance.primary_node,
5174 instance.name,
5175 instance.hypervisor)
5176 remote_info.Raise("Error checking node %s" % instance.primary_node,
5177 prereq=True, ecode=errors.ECODE_ENVIRON)
5178 if not remote_info.payload:
5179 _CheckNodeFreeMemory(self, instance.primary_node,
5180 "starting instance %s" % instance.name,
5181 bep[constants.BE_MEMORY], instance.hypervisor)
5182
5183 - def Exec(self, feedback_fn):
5207
5210 """Reboot an instance.
5211
5212 """
5213 HPATH = "instance-reboot"
5214 HTYPE = constants.HTYPE_INSTANCE
5215 REQ_BGL = False
5216
5219
5221 """Build hooks env.
5222
5223 This runs on master, primary and secondary nodes of the instance.
5224
5225 """
5226 env = {
5227 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5228 "REBOOT_TYPE": self.op.reboot_type,
5229 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5230 }
5231 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5232 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5233 return env, nl, nl
5234
5249
5250 - def Exec(self, feedback_fn):
5251 """Reboot the instance.
5252
5253 """
5254 instance = self.instance
5255 ignore_secondaries = self.op.ignore_secondaries
5256 reboot_type = self.op.reboot_type
5257
5258 node_current = instance.primary_node
5259
5260 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5261 constants.INSTANCE_REBOOT_HARD]:
5262 for disk in instance.disks:
5263 self.cfg.SetDiskID(disk, node_current)
5264 result = self.rpc.call_instance_reboot(node_current, instance,
5265 reboot_type,
5266 self.op.shutdown_timeout)
5267 result.Raise("Could not reboot instance")
5268 else:
5269 result = self.rpc.call_instance_shutdown(node_current, instance,
5270 self.op.shutdown_timeout)
5271 result.Raise("Could not shutdown instance for full reboot")
5272 _ShutdownInstanceDisks(self, instance)
5273 _StartInstanceDisks(self, instance, ignore_secondaries)
5274 result = self.rpc.call_instance_start(node_current, instance, None, None)
5275 msg = result.fail_msg
5276 if msg:
5277 _ShutdownInstanceDisks(self, instance)
5278 raise errors.OpExecError("Could not start instance for"
5279 " full reboot: %s" % msg)
5280
5281 self.cfg.MarkInstanceUp(instance.name)
5282
5285 """Shutdown an instance.
5286
5287 """
5288 HPATH = "instance-stop"
5289 HTYPE = constants.HTYPE_INSTANCE
5290 REQ_BGL = False
5291
5294
5305
5307 """Check prerequisites.
5308
5309 This checks that the instance is in the cluster.
5310
5311 """
5312 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5313 assert self.instance is not None, \
5314 "Cannot retrieve locked instance %s" % self.op.instance_name
5315
5316 self.primary_offline = \
5317 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5318
5319 if self.primary_offline and self.op.ignore_offline_nodes:
5320 self.proc.LogWarning("Ignoring offline primary node")
5321 else:
5322 _CheckNodeOnline(self, self.instance.primary_node)
5323
5324 - def Exec(self, feedback_fn):
5345
5348 """Reinstall an instance.
5349
5350 """
5351 HPATH = "instance-reinstall"
5352 HTYPE = constants.HTYPE_INSTANCE
5353 REQ_BGL = False
5354
5357
5367
5369 """Check prerequisites.
5370
5371 This checks that the instance is in the cluster and is not running.
5372
5373 """
5374 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5375 assert instance is not None, \
5376 "Cannot retrieve locked instance %s" % self.op.instance_name
5377 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5378 " offline, cannot reinstall")
5379 for node in instance.secondary_nodes:
5380 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5381 " cannot reinstall")
5382
5383 if instance.disk_template == constants.DT_DISKLESS:
5384 raise errors.OpPrereqError("Instance '%s' has no disks" %
5385 self.op.instance_name,
5386 errors.ECODE_INVAL)
5387 _CheckInstanceDown(self, instance, "cannot reinstall")
5388
5389 if self.op.os_type is not None:
5390
5391 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5392 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5393 instance_os = self.op.os_type
5394 else:
5395 instance_os = instance.os
5396
5397 nodelist = list(instance.all_nodes)
5398
5399 if self.op.osparams:
5400 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5401 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5402 self.os_inst = i_osdict
5403 else:
5404 self.os_inst = None
5405
5406 self.instance = instance
5407
5408 - def Exec(self, feedback_fn):
5409 """Reinstall the instance.
5410
5411 """
5412 inst = self.instance
5413
5414 if self.op.os_type is not None:
5415 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5416 inst.os = self.op.os_type
5417
5418 self.cfg.Update(inst, feedback_fn)
5419
5420 _StartInstanceDisks(self, inst, None)
5421 try:
5422 feedback_fn("Running the instance OS create scripts...")
5423
5424 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5425 self.op.debug_level,
5426 osparams=self.os_inst)
5427 result.Raise("Could not install OS for instance %s on node %s" %
5428 (inst.name, inst.primary_node))
5429 finally:
5430 _ShutdownInstanceDisks(self, inst)
5431
5434 """Recreate an instance's missing disks.
5435
5436 """
5437 HPATH = "instance-recreate-disks"
5438 HTYPE = constants.HTYPE_INSTANCE
5439 REQ_BGL = False
5440
5442
5443 self.op.disks = sorted(frozenset(self.op.disks))
5444
5453
5460
5470
5472 """Check prerequisites.
5473
5474 This checks that the instance is in the cluster and is not running.
5475
5476 """
5477 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5478 assert instance is not None, \
5479 "Cannot retrieve locked instance %s" % self.op.instance_name
5480 if self.op.nodes:
5481 if len(self.op.nodes) != len(instance.all_nodes):
5482 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
5483 " %d replacement nodes were specified" %
5484 (instance.name, len(instance.all_nodes),
5485 len(self.op.nodes)),
5486 errors.ECODE_INVAL)
5487 assert instance.disk_template != constants.DT_DRBD8 or \
5488 len(self.op.nodes) == 2
5489 assert instance.disk_template != constants.DT_PLAIN or \
5490 len(self.op.nodes) == 1
5491 primary_node = self.op.nodes[0]
5492 else:
5493 primary_node = instance.primary_node
5494 _CheckNodeOnline(self, primary_node)
5495
5496 if instance.disk_template == constants.DT_DISKLESS:
5497 raise errors.OpPrereqError("Instance '%s' has no disks" %
5498 self.op.instance_name, errors.ECODE_INVAL)
5499
5500
5501 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
5502 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
5503 if not (self.op.nodes and old_pnode.offline):
5504 _CheckInstanceDown(self, instance, "cannot recreate disks")
5505
5506 if not self.op.disks:
5507 self.op.disks = range(len(instance.disks))
5508 else:
5509 for idx in self.op.disks:
5510 if idx >= len(instance.disks):
5511 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5512 errors.ECODE_INVAL)
5513 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
5514 raise errors.OpPrereqError("Can't recreate disks partially and"
5515 " change the nodes at the same time",
5516 errors.ECODE_INVAL)
5517 self.instance = instance
5518
5519 - def Exec(self, feedback_fn):
5520 """Recreate the disks.
5521
5522 """
5523 instance = self.instance
5524
5525 to_skip = []
5526 mods = []
5527
5528 for idx, disk in enumerate(instance.disks):
5529 if idx not in self.op.disks:
5530 to_skip.append(idx)
5531 continue
5532
5533 if self.op.nodes:
5534 if disk.dev_type == constants.LD_DRBD8:
5535
5536 assert len(self.op.nodes) == 2
5537 assert len(disk.logical_id) == 6
5538
5539 (_, _, old_port, _, _, old_secret) = disk.logical_id
5540 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
5541 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
5542 new_minors[0], new_minors[1], old_secret)
5543 assert len(disk.logical_id) == len(new_id)
5544 mods.append((idx, new_id))
5545
5546
5547
5548 for idx, new_id in mods:
5549 instance.disks[idx].logical_id = new_id
5550
5551
5552 if self.op.nodes:
5553 instance.primary_node = self.op.nodes[0]
5554 self.LogWarning("Changing the instance's nodes, you will have to"
5555 " remove any disks left on the older nodes manually")
5556
5557 if self.op.nodes:
5558 self.cfg.Update(instance, feedback_fn)
5559
5560 _CreateDisks(self, instance, to_skip=to_skip)
5561
5564 """Rename an instance.
5565
5566 """
5567 HPATH = "instance-rename"
5568 HTYPE = constants.HTYPE_INSTANCE
5569
5571 """Check arguments.
5572
5573 """
5574 if self.op.ip_check and not self.op.name_check:
5575
5576 raise errors.OpPrereqError("Cannot do ip check without a name check",
5577 errors.ECODE_INVAL)
5578
5589
5591 """Check prerequisites.
5592
5593 This checks that the instance is in the cluster and is not running.
5594
5595 """
5596 self.op.instance_name = _ExpandInstanceName(self.cfg,
5597 self.op.instance_name)
5598 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5599 assert instance is not None
5600 _CheckNodeOnline(self, instance.primary_node)
5601 _CheckInstanceDown(self, instance, "cannot rename")
5602 self.instance = instance
5603
5604 new_name = self.op.new_name
5605 if self.op.name_check:
5606 hostname = netutils.GetHostname(name=new_name)
5607 if hostname != new_name:
5608 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5609 hostname.name)
5610 new_name = self.op.new_name = hostname.name
5611 if (self.op.ip_check and
5612 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5613 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5614 (hostname.ip, new_name),
5615 errors.ECODE_NOTUNIQUE)
5616
5617 instance_list = self.cfg.GetInstanceList()
5618 if new_name in instance_list and new_name != instance.name:
5619 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5620 new_name, errors.ECODE_EXISTS)
5621
5622 - def Exec(self, feedback_fn):
5623 """Rename the instance.
5624
5625 """
5626 inst = self.instance
5627 old_name = inst.name
5628
5629 rename_file_storage = False
5630 if (inst.disk_template == constants.DT_FILE and
5631 self.op.new_name != inst.name):
5632 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5633 rename_file_storage = True
5634
5635 self.cfg.RenameInstance(inst.name, self.op.new_name)
5636
5637 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5638 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5639
5640
5641 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5642
5643 if rename_file_storage:
5644 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5645 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5646 old_file_storage_dir,
5647 new_file_storage_dir)
5648 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5649 " (but the instance has been renamed in Ganeti)" %
5650 (inst.primary_node, old_file_storage_dir,
5651 new_file_storage_dir))
5652
5653 _StartInstanceDisks(self, inst, None)
5654 try:
5655 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5656 old_name, self.op.debug_level)
5657 msg = result.fail_msg
5658 if msg:
5659 msg = ("Could not run OS rename script for instance %s on node %s"
5660 " (but the instance has been renamed in Ganeti): %s" %
5661 (inst.name, inst.primary_node, msg))
5662 self.proc.LogWarning(msg)
5663 finally:
5664 _ShutdownInstanceDisks(self, inst)
5665
5666 return inst.name
5667
5670 """Remove an instance.
5671
5672 """
5673 HPATH = "instance-remove"
5674 HTYPE = constants.HTYPE_INSTANCE
5675 REQ_BGL = False
5676
5681
5685
5687 """Build hooks env.
5688
5689 This runs on master, primary and secondary nodes of the instance.
5690
5691 """
5692 env = _BuildInstanceHookEnvByObject(self, self.instance)
5693 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5694 nl = [self.cfg.GetMasterNode()]
5695 nl_post = list(self.instance.all_nodes) + nl
5696 return env, nl, nl_post
5697
5699 """Check prerequisites.
5700
5701 This checks that the instance is in the cluster.
5702
5703 """
5704 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5705 assert self.instance is not None, \
5706 "Cannot retrieve locked instance %s" % self.op.instance_name
5707
5708 - def Exec(self, feedback_fn):
5709 """Remove the instance.
5710
5711 """
5712 instance = self.instance
5713 logging.info("Shutting down instance %s on node %s",
5714 instance.name, instance.primary_node)
5715
5716 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5717 self.op.shutdown_timeout)
5718 msg = result.fail_msg
5719 if msg:
5720 if self.op.ignore_failures:
5721 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5722 else:
5723 raise errors.OpExecError("Could not shutdown instance %s on"
5724 " node %s: %s" %
5725 (instance.name, instance.primary_node, msg))
5726
5727 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5728
5731 """Utility function to remove an instance.
5732
5733 """
5734 logging.info("Removing block devices for instance %s", instance.name)
5735
5736 if not _RemoveDisks(lu, instance):
5737 if not ignore_failures:
5738 raise errors.OpExecError("Can't remove instance's disks")
5739 feedback_fn("Warning: can't remove instance's disks")
5740
5741 logging.info("Removing instance %s out of cluster config", instance.name)
5742
5743 lu.cfg.RemoveInstance(instance.name)
5744
5745 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5746 "Instance lock removal conflict"
5747
5748
5749 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5750
5753 """Logical unit for querying instances.
5754
5755 """
5756
5757 REQ_BGL = False
5758
5760 self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5761 self.op.use_locking)
5762
5765
5768
5769 - def Exec(self, feedback_fn):
5771
5774 """Failover an instance.
5775
5776 """
5777 HPATH = "instance-failover"
5778 HTYPE = constants.HTYPE_INSTANCE
5779 REQ_BGL = False
5780
5785
5789
5791 """Build hooks env.
5792
5793 This runs on master, primary and secondary nodes of the instance.
5794
5795 """
5796 instance = self.instance
5797 source_node = instance.primary_node
5798 target_node = instance.secondary_nodes[0]
5799 env = {
5800 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5801 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5802 "OLD_PRIMARY": source_node,
5803 "OLD_SECONDARY": target_node,
5804 "NEW_PRIMARY": target_node,
5805 "NEW_SECONDARY": source_node,
5806 }
5807 env.update(_BuildInstanceHookEnvByObject(self, instance))
5808 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5809 nl_post = list(nl)
5810 nl_post.append(source_node)
5811 return env, nl, nl_post
5812
5814 """Check prerequisites.
5815
5816 This checks that the instance is in the cluster.
5817
5818 """
5819 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5820 assert self.instance is not None, \
5821 "Cannot retrieve locked instance %s" % self.op.instance_name
5822
5823 bep = self.cfg.GetClusterInfo().FillBE(instance)
5824 if instance.disk_template not in constants.DTS_NET_MIRROR:
5825 raise errors.OpPrereqError("Instance's disk layout is not"
5826 " network mirrored, cannot failover.",
5827 errors.ECODE_STATE)
5828
5829 secondary_nodes = instance.secondary_nodes
5830 if not secondary_nodes:
5831 raise errors.ProgrammerError("no secondary node but using "
5832 "a mirrored disk template")
5833
5834 target_node = secondary_nodes[0]
5835 _CheckNodeOnline(self, target_node)
5836 _CheckNodeNotDrained(self, target_node)
5837 if instance.admin_up:
5838
5839 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5840 instance.name, bep[constants.BE_MEMORY],
5841 instance.hypervisor)
5842 else:
5843 self.LogInfo("Not checking memory on the secondary node as"
5844 " instance will not be started")
5845
5846
5847 _CheckInstanceBridgesExist(self, instance, node=target_node)
5848
5849 - def Exec(self, feedback_fn):
5850 """Failover an instance.
5851
5852 The failover is done by shutting it down on its present node and
5853 starting it on the secondary.
5854
5855 """
5856 instance = self.instance
5857 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5858
5859 source_node = instance.primary_node
5860 target_node = instance.secondary_nodes[0]
5861
5862 if instance.admin_up:
5863 feedback_fn("* checking disk consistency between source and target")
5864 for dev in instance.disks:
5865
5866 if not _CheckDiskConsistency(self, dev, target_node, False):
5867 if not self.op.ignore_consistency:
5868 raise errors.OpExecError("Disk %s is degraded on target node,"
5869 " aborting failover." % dev.iv_name)
5870 else:
5871 feedback_fn("* not checking disk consistency as instance is not running")
5872
5873 feedback_fn("* shutting down instance on source node")
5874 logging.info("Shutting down instance %s on node %s",
5875 instance.name, source_node)
5876
5877 result = self.rpc.call_instance_shutdown(source_node, instance,
5878 self.op.shutdown_timeout)
5879 msg = result.fail_msg
5880 if msg:
5881 if self.op.ignore_consistency or primary_node.offline:
5882 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5883 " Proceeding anyway. Please make sure node"
5884 " %s is down. Error details: %s",
5885 instance.name, source_node, source_node, msg)
5886 else:
5887 raise errors.OpExecError("Could not shutdown instance %s on"
5888 " node %s: %s" %
5889 (instance.name, source_node, msg))
5890
5891 feedback_fn("* deactivating the instance's disks on source node")
5892 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5893 raise errors.OpExecError("Can't shut down the instance's disks.")
5894
5895 instance.primary_node = target_node
5896
5897 self.cfg.Update(instance, feedback_fn)
5898
5899
5900 if instance.admin_up:
5901 feedback_fn("* activating the instance's disks on target node")
5902 logging.info("Starting instance %s on node %s",
5903 instance.name, target_node)
5904
5905 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5906 ignore_secondaries=True)
5907 if not disks_ok:
5908 _ShutdownInstanceDisks(self, instance)
5909 raise errors.OpExecError("Can't activate the instance's disks")
5910
5911 feedback_fn("* starting the instance on the target node")
5912 result = self.rpc.call_instance_start(target_node, instance, None, None)
5913 msg = result.fail_msg
5914 if msg:
5915 _ShutdownInstanceDisks(self, instance)
5916 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5917 (instance.name, target_node, msg))
5918
5921 """Migrate an instance.
5922
5923 This is migration without shutting down, compared to the failover,
5924 which is done with shutdown.
5925
5926 """
5927 HPATH = "instance-migrate"
5928 HTYPE = constants.HTYPE_INSTANCE
5929 REQ_BGL = False
5930
5940
5944
5946 """Build hooks env.
5947
5948 This runs on master, primary and secondary nodes of the instance.
5949
5950 """
5951 instance = self._migrater.instance
5952 source_node = instance.primary_node
5953 target_node = instance.secondary_nodes[0]
5954 env = _BuildInstanceHookEnvByObject(self, instance)
5955 env["MIGRATE_LIVE"] = self._migrater.live
5956 env["MIGRATE_CLEANUP"] = self.op.cleanup
5957 env.update({
5958 "OLD_PRIMARY": source_node,
5959 "OLD_SECONDARY": target_node,
5960 "NEW_PRIMARY": target_node,
5961 "NEW_SECONDARY": source_node,
5962 })
5963 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5964 nl_post = list(nl)
5965 nl_post.append(source_node)
5966 return env, nl, nl_post
5967
5970 """Move an instance by data-copying.
5971
5972 """
5973 HPATH = "instance-move"
5974 HTYPE = constants.HTYPE_INSTANCE
5975 REQ_BGL = False
5976
5983
5987
5989 """Build hooks env.
5990
5991 This runs on master, primary and secondary nodes of the instance.
5992
5993 """
5994 env = {
5995 "TARGET_NODE": self.op.target_node,
5996 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5997 }
5998 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5999 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
6000 self.op.target_node]
6001 return env, nl, nl
6002
6004 """Check prerequisites.
6005
6006 This checks that the instance is in the cluster.
6007
6008 """
6009 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6010 assert self.instance is not None, \
6011 "Cannot retrieve locked instance %s" % self.op.instance_name
6012
6013 node = self.cfg.GetNodeInfo(self.op.target_node)
6014 assert node is not None, \
6015 "Cannot retrieve locked node %s" % self.op.target_node
6016
6017 self.target_node = target_node = node.name
6018
6019 if target_node == instance.primary_node:
6020 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6021 (instance.name, target_node),
6022 errors.ECODE_STATE)
6023
6024 bep = self.cfg.GetClusterInfo().FillBE(instance)
6025
6026 for idx, dsk in enumerate(instance.disks):
6027 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6028 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6029 " cannot copy" % idx, errors.ECODE_STATE)
6030
6031 _CheckNodeOnline(self, target_node)
6032 _CheckNodeNotDrained(self, target_node)
6033 _CheckNodeVmCapable(self, target_node)
6034
6035 if instance.admin_up:
6036
6037 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6038 instance.name, bep[constants.BE_MEMORY],
6039 instance.hypervisor)
6040 else:
6041 self.LogInfo("Not checking memory on the secondary node as"
6042 " instance will not be started")
6043
6044
6045 _CheckInstanceBridgesExist(self, instance, node=target_node)
6046
6047 - def Exec(self, feedback_fn):
6048 """Move an instance.
6049
6050 The move is done by shutting it down on its present node, copying
6051 the data over (slow) and starting it on the new node.
6052
6053 """
6054 instance = self.instance
6055
6056 source_node = instance.primary_node
6057 target_node = self.target_node
6058
6059 self.LogInfo("Shutting down instance %s on source node %s",
6060 instance.name, source_node)
6061
6062 result = self.rpc.call_instance_shutdown(source_node, instance,
6063 self.op.shutdown_timeout)
6064 msg = result.fail_msg
6065 if msg:
6066 if self.op.ignore_consistency:
6067 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6068 " Proceeding anyway. Please make sure node"
6069 " %s is down. Error details: %s",
6070 instance.name, source_node, source_node, msg)
6071 else:
6072 raise errors.OpExecError("Could not shutdown instance %s on"
6073 " node %s: %s" %
6074 (instance.name, source_node, msg))
6075
6076
6077 try:
6078 _CreateDisks(self, instance, target_node=target_node)
6079 except errors.OpExecError:
6080 self.LogWarning("Device creation failed, reverting...")
6081 try:
6082 _RemoveDisks(self, instance, target_node=target_node)
6083 finally:
6084 self.cfg.ReleaseDRBDMinors(instance.name)
6085 raise
6086
6087 cluster_name = self.cfg.GetClusterInfo().cluster_name
6088
6089 errs = []
6090
6091 for idx, disk in enumerate(instance.disks):
6092 self.LogInfo("Copying data for disk %d", idx)
6093 result = self.rpc.call_blockdev_assemble(target_node, disk,
6094 instance.name, True, idx)
6095 if result.fail_msg:
6096 self.LogWarning("Can't assemble newly created disk %d: %s",
6097 idx, result.fail_msg)
6098 errs.append(result.fail_msg)
6099 break
6100 dev_path = result.payload
6101 result = self.rpc.call_blockdev_export(source_node, disk,
6102 target_node, dev_path,
6103 cluster_name)
6104 if result.fail_msg:
6105 self.LogWarning("Can't copy data over for disk %d: %s",
6106 idx, result.fail_msg)
6107 errs.append(result.fail_msg)
6108 break
6109
6110 if errs:
6111 self.LogWarning("Some disks failed to copy, aborting")
6112 try:
6113 _RemoveDisks(self, instance, target_node=target_node)
6114 finally:
6115 self.cfg.ReleaseDRBDMinors(instance.name)
6116 raise errors.OpExecError("Errors during disk copy: %s" %
6117 (",".join(errs),))
6118
6119 instance.primary_node = target_node
6120 self.cfg.Update(instance, feedback_fn)
6121
6122 self.LogInfo("Removing the disks on the original node")
6123 _RemoveDisks(self, instance, target_node=source_node)
6124
6125
6126 if instance.admin_up:
6127 self.LogInfo("Starting instance %s on node %s",
6128 instance.name, target_node)
6129
6130 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6131 ignore_secondaries=True)
6132 if not disks_ok:
6133 _ShutdownInstanceDisks(self, instance)
6134 raise errors.OpExecError("Can't activate the instance's disks")
6135
6136 result = self.rpc.call_instance_start(target_node, instance, None, None)
6137 msg = result.fail_msg
6138 if msg:
6139 _ShutdownInstanceDisks(self, instance)
6140 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6141 (instance.name, target_node, msg))
6142
6145 """Migrate all instances from a node.
6146
6147 """
6148 HPATH = "node-migrate"
6149 HTYPE = constants.HTYPE_NODE
6150 REQ_BGL = False
6151
6175
6179
6181 """Build hooks env.
6182
6183 This runs on the master, the primary and all the secondaries.
6184
6185 """
6186 env = {
6187 "NODE_NAME": self.op.node_name,
6188 }
6189
6190 nl = [self.cfg.GetMasterNode()]
6191
6192 return (env, nl, nl)
6193
6196 """Tasklet class for instance migration.
6197
6198 @type live: boolean
6199 @ivar live: whether the migration will be done live or non-live;
6200 this variable is initalized only after CheckPrereq has run
6201
6202 """
6203 - def __init__(self, lu, instance_name, cleanup):
6204 """Initializes this class.
6205
6206 """
6207 Tasklet.__init__(self, lu)
6208
6209
6210 self.instance_name = instance_name
6211 self.cleanup = cleanup
6212 self.live = False
6213
6215 """Check prerequisites.
6216
6217 This checks that the instance is in the cluster.
6218
6219 """
6220 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6221 instance = self.cfg.GetInstanceInfo(instance_name)
6222 assert instance is not None
6223
6224 if instance.disk_template != constants.DT_DRBD8:
6225 raise errors.OpPrereqError("Instance's disk layout is not"
6226 " drbd8, cannot migrate.", errors.ECODE_STATE)
6227
6228 secondary_nodes = instance.secondary_nodes
6229 if not secondary_nodes:
6230 raise errors.ConfigurationError("No secondary node but using"
6231 " drbd8 disk template")
6232
6233 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6234
6235 target_node = secondary_nodes[0]
6236
6237 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6238 instance.name, i_be[constants.BE_MEMORY],
6239 instance.hypervisor)
6240
6241
6242 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6243
6244 if not self.cleanup:
6245 _CheckNodeNotDrained(self.lu, target_node)
6246 result = self.rpc.call_instance_migratable(instance.primary_node,
6247 instance)
6248 result.Raise("Can't migrate, please use failover",
6249 prereq=True, ecode=errors.ECODE_STATE)
6250
6251 self.instance = instance
6252
6253 if self.lu.op.live is not None and self.lu.op.mode is not None:
6254 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6255 " parameters are accepted",
6256 errors.ECODE_INVAL)
6257 if self.lu.op.live is not None:
6258 if self.lu.op.live:
6259 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6260 else:
6261 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6262
6263
6264 self.lu.op.live = None
6265 elif self.lu.op.mode is None:
6266
6267 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6268 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6269
6270 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6271
6273 """Poll with custom rpc for disk sync.
6274
6275 This uses our own step-based rpc call.
6276
6277 """
6278 self.feedback_fn("* wait until resync is done")
6279 all_done = False
6280 while not all_done:
6281 all_done = True
6282 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6283 self.nodes_ip,
6284 self.instance.disks)
6285 min_percent = 100
6286 for node, nres in result.items():
6287 nres.Raise("Cannot resync disks on node %s" % node)
6288 node_done, node_percent = nres.payload
6289 all_done = all_done and node_done
6290 if node_percent is not None:
6291 min_percent = min(min_percent, node_percent)
6292 if not all_done:
6293 if min_percent < 100:
6294 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6295 time.sleep(2)
6296
6298 """Demote a node to secondary.
6299
6300 """
6301 self.feedback_fn("* switching node %s to secondary mode" % node)
6302
6303 for dev in self.instance.disks:
6304 self.cfg.SetDiskID(dev, node)
6305
6306 result = self.rpc.call_blockdev_close(node, self.instance.name,
6307 self.instance.disks)
6308 result.Raise("Cannot change disk to secondary on node %s" % node)
6309
6311 """Disconnect from the network.
6312
6313 """
6314 self.feedback_fn("* changing into standalone mode")
6315 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6316 self.instance.disks)
6317 for node, nres in result.items():
6318 nres.Raise("Cannot disconnect disks node %s" % node)
6319
6321 """Reconnect to the network.
6322
6323 """
6324 if multimaster:
6325 msg = "dual-master"
6326 else:
6327 msg = "single-master"
6328 self.feedback_fn("* changing disks into %s mode" % msg)
6329 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6330 self.instance.disks,
6331 self.instance.name, multimaster)
6332 for node, nres in result.items():
6333 nres.Raise("Cannot change disks config on node %s" % node)
6334
6336 """Try to cleanup after a failed migration.
6337
6338 The cleanup is done by:
6339 - check that the instance is running only on one node
6340 (and update the config if needed)
6341 - change disks on its secondary node to secondary
6342 - wait until disks are fully synchronized
6343 - disconnect from the network
6344 - change disks into single-master mode
6345 - wait again until disks are fully synchronized
6346
6347 """
6348 instance = self.instance
6349 target_node = self.target_node
6350 source_node = self.source_node
6351
6352
6353 self.feedback_fn("* checking where the instance actually runs"
6354 " (if this hangs, the hypervisor might be in"
6355 " a bad state)")
6356 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6357 for node, result in ins_l.items():
6358 result.Raise("Can't contact node %s" % node)
6359
6360 runningon_source = instance.name in ins_l[source_node].payload
6361 runningon_target = instance.name in ins_l[target_node].payload
6362
6363 if runningon_source and runningon_target:
6364 raise errors.OpExecError("Instance seems to be running on two nodes,"
6365 " or the hypervisor is confused. You will have"
6366 " to ensure manually that it runs only on one"
6367 " and restart this operation.")
6368
6369 if not (runningon_source or runningon_target):
6370 raise errors.OpExecError("Instance does not seem to be running at all."
6371 " In this case, it's safer to repair by"
6372 " running 'gnt-instance stop' to ensure disk"
6373 " shutdown, and then restarting it.")
6374
6375 if runningon_target:
6376
6377 self.feedback_fn("* instance running on secondary node (%s),"
6378 " updating config" % target_node)
6379 instance.primary_node = target_node
6380 self.cfg.Update(instance, self.feedback_fn)
6381 demoted_node = source_node
6382 else:
6383 self.feedback_fn("* instance confirmed to be running on its"
6384 " primary node (%s)" % source_node)
6385 demoted_node = target_node
6386
6387 self._EnsureSecondary(demoted_node)
6388 try:
6389 self._WaitUntilSync()
6390 except errors.OpExecError:
6391
6392
6393 pass
6394 self._GoStandalone()
6395 self._GoReconnect(False)
6396 self._WaitUntilSync()
6397
6398 self.feedback_fn("* done")
6399
6401 """Try to revert the disk status after a failed migration.
6402
6403 """
6404 target_node = self.target_node
6405 try:
6406 self._EnsureSecondary(target_node)
6407 self._GoStandalone()
6408 self._GoReconnect(False)
6409 self._WaitUntilSync()
6410 except errors.OpExecError, err:
6411 self.lu.LogWarning("Migration failed and I can't reconnect the"
6412 " drives: error '%s'\n"
6413 "Please look and recover the instance status" %
6414 str(err))
6415
6417 """Call the hypervisor code to abort a started migration.
6418
6419 """
6420 instance = self.instance
6421 target_node = self.target_node
6422 migration_info = self.migration_info
6423
6424 abort_result = self.rpc.call_finalize_migration(target_node,
6425 instance,
6426 migration_info,
6427 False)
6428 abort_msg = abort_result.fail_msg
6429 if abort_msg:
6430 logging.error("Aborting migration failed on target node %s: %s",
6431 target_node, abort_msg)
6432
6433
6434
6436 """Migrate an instance.
6437
6438 The migrate is done by:
6439 - change the disks into dual-master mode
6440 - wait until disks are fully synchronized again
6441 - migrate the instance
6442 - change disks on the new secondary node (the old primary) to secondary
6443 - wait until disks are fully synchronized
6444 - change disks into single-master mode
6445
6446 """
6447 instance = self.instance
6448 target_node = self.target_node
6449 source_node = self.source_node
6450
6451 self.feedback_fn("* checking disk consistency between source and target")
6452 for dev in instance.disks:
6453 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6454 raise errors.OpExecError("Disk %s is degraded or not fully"
6455 " synchronized on target node,"
6456 " aborting migrate." % dev.iv_name)
6457
6458
6459 result = self.rpc.call_migration_info(source_node, instance)
6460 msg = result.fail_msg
6461 if msg:
6462 log_err = ("Failed fetching source migration information from %s: %s" %
6463 (source_node, msg))
6464 logging.error(log_err)
6465 raise errors.OpExecError(log_err)
6466
6467 self.migration_info = migration_info = result.payload
6468
6469
6470 self._EnsureSecondary(target_node)
6471 self._GoStandalone()
6472 self._GoReconnect(True)
6473 self._WaitUntilSync()
6474
6475 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6476 result = self.rpc.call_accept_instance(target_node,
6477 instance,
6478 migration_info,
6479 self.nodes_ip[target_node])
6480
6481 msg = result.fail_msg
6482 if msg:
6483 logging.error("Instance pre-migration failed, trying to revert"
6484 " disk status: %s", msg)
6485 self.feedback_fn("Pre-migration failed, aborting")
6486 self._AbortMigration()
6487 self._RevertDiskStatus()
6488 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6489 (instance.name, msg))
6490
6491 self.feedback_fn("* migrating instance to %s" % target_node)
6492 time.sleep(10)
6493 result = self.rpc.call_instance_migrate(source_node, instance,
6494 self.nodes_ip[target_node],
6495 self.live)
6496 msg = result.fail_msg
6497 if msg:
6498 logging.error("Instance migration failed, trying to revert"
6499 " disk status: %s", msg)
6500 self.feedback_fn("Migration failed, aborting")
6501 self._AbortMigration()
6502 self._RevertDiskStatus()
6503 raise errors.OpExecError("Could not migrate instance %s: %s" %
6504 (instance.name, msg))
6505 time.sleep(10)
6506
6507 instance.primary_node = target_node
6508
6509 self.cfg.Update(instance, self.feedback_fn)
6510
6511 result = self.rpc.call_finalize_migration(target_node,
6512 instance,
6513 migration_info,
6514 True)
6515 msg = result.fail_msg
6516 if msg:
6517 logging.error("Instance migration succeeded, but finalization failed:"
6518 " %s", msg)
6519 raise errors.OpExecError("Could not finalize instance migration: %s" %
6520 msg)
6521
6522 self._EnsureSecondary(source_node)
6523 self._WaitUntilSync()
6524 self._GoStandalone()
6525 self._GoReconnect(False)
6526 self._WaitUntilSync()
6527
6528 self.feedback_fn("* done")
6529
6530 - def Exec(self, feedback_fn):
6531 """Perform the migration.
6532
6533 """
6534 feedback_fn("Migrating instance %s" % self.instance.name)
6535
6536 self.feedback_fn = feedback_fn
6537
6538 self.source_node = self.instance.primary_node
6539 self.target_node = self.instance.secondary_nodes[0]
6540 self.all_nodes = [self.source_node, self.target_node]
6541 self.nodes_ip = {
6542 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6543 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6544 }
6545
6546 if self.cleanup:
6547 return self._ExecCleanup()
6548 else:
6549 return self._ExecMigration()
6550
6551
6552 -def _CreateBlockDev(lu, node, instance, device, force_create,
6553 info, force_open):
6554 """Create a tree of block devices on a given node.
6555
6556 If this device type has to be created on secondaries, create it and
6557 all its children.
6558
6559 If not, just recurse to children keeping the same 'force' value.
6560
6561 @param lu: the lu on whose behalf we execute
6562 @param node: the node on which to create the device
6563 @type instance: L{objects.Instance}
6564 @param instance: the instance which owns the device
6565 @type device: L{objects.Disk}
6566 @param device: the device to create
6567 @type force_create: boolean
6568 @param force_create: whether to force creation of this device; this
6569 will be change to True whenever we find a device which has
6570 CreateOnSecondary() attribute
6571 @param info: the extra 'metadata' we should attach to the device
6572 (this will be represented as a LVM tag)
6573 @type force_open: boolean
6574 @param force_open: this parameter will be passes to the
6575 L{backend.BlockdevCreate} function where it specifies
6576 whether we run on primary or not, and it affects both
6577 the child assembly and the device own Open() execution
6578
6579 """
6580 if device.CreateOnSecondary():
6581 force_create = True
6582
6583 if device.children:
6584 for child in device.children:
6585 _CreateBlockDev(lu, node, instance, child, force_create,
6586 info, force_open)
6587
6588 if not force_create:
6589 return
6590
6591 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6592
6595 """Create a single block device on a given node.
6596
6597 This will not recurse over children of the device, so they must be
6598 created in advance.
6599
6600 @param lu: the lu on whose behalf we execute
6601 @param node: the node on which to create the device
6602 @type instance: L{objects.Instance}
6603 @param instance: the instance which owns the device
6604 @type device: L{objects.Disk}
6605 @param device: the device to create
6606 @param info: the extra 'metadata' we should attach to the device
6607 (this will be represented as a LVM tag)
6608 @type force_open: boolean
6609 @param force_open: this parameter will be passes to the
6610 L{backend.BlockdevCreate} function where it specifies
6611 whether we run on primary or not, and it affects both
6612 the child assembly and the device own Open() execution
6613
6614 """
6615 lu.cfg.SetDiskID(device, node)
6616 result = lu.rpc.call_blockdev_create(node, device, device.size,
6617 instance.name, force_open, info)
6618 result.Raise("Can't create block device %s on"
6619 " node %s for instance %s" % (device, node, instance.name))
6620 if device.physical_id is None:
6621 device.physical_id = result.payload
6622
6625 """Generate a suitable LV name.
6626
6627 This will generate a logical volume name for the given instance.
6628
6629 """
6630 results = []
6631 for val in exts:
6632 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6633 results.append("%s%s" % (new_id, val))
6634 return results
6635
6636
6637 -def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
6638 iv_name, p_minor, s_minor):
6639 """Generate a drbd8 device complete with its children.
6640
6641 """
6642 assert len(vgnames) == len(names) == 2
6643 port = lu.cfg.AllocatePort()
6644 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6645 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6646 logical_id=(vgnames[0], names[0]))
6647 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6648 logical_id=(vgnames[1], names[1]))
6649 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6650 logical_id=(primary, secondary, port,
6651 p_minor, s_minor,
6652 shared_secret),
6653 children=[dev_data, dev_meta],
6654 iv_name=iv_name)
6655 return drbd_dev
6656
6657
6658 -def _GenerateDiskTemplate(lu, template_name,
6659 instance_name, primary_node,
6660 secondary_nodes, disk_info,
6661 file_storage_dir, file_driver,
6662 base_index, feedback_fn):
6663 """Generate the entire disk layout for a given template type.
6664
6665 """
6666
6667
6668 vgname = lu.cfg.GetVGName()
6669 disk_count = len(disk_info)
6670 disks = []
6671 if template_name == constants.DT_DISKLESS:
6672 pass
6673 elif template_name == constants.DT_PLAIN:
6674 if len(secondary_nodes) != 0:
6675 raise errors.ProgrammerError("Wrong template configuration")
6676
6677 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6678 for i in range(disk_count)])
6679 for idx, disk in enumerate(disk_info):
6680 disk_index = idx + base_index
6681 vg = disk.get("vg", vgname)
6682 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6683 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6684 logical_id=(vg, names[idx]),
6685 iv_name="disk/%d" % disk_index,
6686 mode=disk["mode"])
6687 disks.append(disk_dev)
6688 elif template_name == constants.DT_DRBD8:
6689 if len(secondary_nodes) != 1:
6690 raise errors.ProgrammerError("Wrong template configuration")
6691 remote_node = secondary_nodes[0]
6692 minors = lu.cfg.AllocateDRBDMinor(
6693 [primary_node, remote_node] * len(disk_info), instance_name)
6694
6695 names = []
6696 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6697 for i in range(disk_count)]):
6698 names.append(lv_prefix + "_data")
6699 names.append(lv_prefix + "_meta")
6700 for idx, disk in enumerate(disk_info):
6701 disk_index = idx + base_index
6702 data_vg = disk.get("vg", vgname)
6703 meta_vg = disk.get("metavg", data_vg)
6704 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6705 disk["size"], [data_vg, meta_vg],
6706 names[idx*2:idx*2+2],
6707 "disk/%d" % disk_index,
6708 minors[idx*2], minors[idx*2+1])
6709 disk_dev.mode = disk["mode"]
6710 disks.append(disk_dev)
6711 elif template_name == constants.DT_FILE:
6712 if len(secondary_nodes) != 0:
6713 raise errors.ProgrammerError("Wrong template configuration")
6714
6715 opcodes.RequireFileStorage()
6716
6717 for idx, disk in enumerate(disk_info):
6718 disk_index = idx + base_index
6719 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6720 iv_name="disk/%d" % disk_index,
6721 logical_id=(file_driver,
6722 "%s/disk%d" % (file_storage_dir,
6723 disk_index)),
6724 mode=disk["mode"])
6725 disks.append(disk_dev)
6726 else:
6727 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6728 return disks
6729
6730
6731 -def _GetInstanceInfoText(instance):
6732 """Compute that text that should be added to the disk's metadata.
6733
6734 """
6735 return "originstname+%s" % instance.name
6736
6737
6738 -def _CalcEta(time_taken, written, total_size):
6739 """Calculates the ETA based on size written and total size.
6740
6741 @param time_taken: The time taken so far
6742 @param written: amount written so far
6743 @param total_size: The total size of data to be written
6744 @return: The remaining time in seconds
6745
6746 """
6747 avg_time = time_taken / float(written)
6748 return (total_size - written) * avg_time
6749
6752 """Wipes instance disks.
6753
6754 @type lu: L{LogicalUnit}
6755 @param lu: the logical unit on whose behalf we execute
6756 @type instance: L{objects.Instance}
6757 @param instance: the instance whose disks we should create
6758 @return: the success of the wipe
6759
6760 """
6761 node = instance.primary_node
6762
6763 for device in instance.disks:
6764 lu.cfg.SetDiskID(device, node)
6765
6766 logging.info("Pause sync of instance %s disks", instance.name)
6767 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6768
6769 for idx, success in enumerate(result.payload):
6770 if not success:
6771 logging.warn("pause-sync of instance %s for disks %d failed",
6772 instance.name, idx)
6773
6774 try:
6775 for idx, device in enumerate(instance.disks):
6776
6777
6778 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6779 constants.MIN_WIPE_CHUNK_PERCENT)
6780
6781
6782 wipe_chunk_size = int(wipe_chunk_size)
6783
6784 lu.LogInfo("* Wiping disk %d", idx)
6785 logging.info("Wiping disk %d for instance %s, node %s using"
6786 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
6787
6788 offset = 0
6789 size = device.size
6790 last_output = 0
6791 start_time = time.time()
6792
6793 while offset < size:
6794 wipe_size = min(wipe_chunk_size, size - offset)
6795 logging.debug("Wiping disk %d, offset %s, chunk %s",
6796 idx, offset, wipe_size)
6797 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6798 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6799 (idx, offset, wipe_size))
6800 now = time.time()
6801 offset += wipe_size
6802 if now - last_output >= 60:
6803 eta = _CalcEta(now - start_time, offset, size)
6804 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6805 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6806 last_output = now
6807 finally:
6808 logging.info("Resume sync of instance %s disks", instance.name)
6809
6810 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6811
6812 for idx, success in enumerate(result.payload):
6813 if not success:
6814 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6815 " look at the status and troubleshoot the issue.", idx)
6816 logging.warn("resume-sync of instance %s for disks %d failed",
6817 instance.name, idx)
6818
6819
6820 -def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6821 """Create all disks for an instance.
6822
6823 This abstracts away some work from AddInstance.
6824
6825 @type lu: L{LogicalUnit}
6826 @param lu: the logical unit on whose behalf we execute
6827 @type instance: L{objects.Instance}
6828 @param instance: the instance whose disks we should create
6829 @type to_skip: list
6830 @param to_skip: list of indices to skip
6831 @type target_node: string
6832 @param target_node: if passed, overrides the target node for creation
6833 @rtype: boolean
6834 @return: the success of the creation
6835
6836 """
6837 info = _GetInstanceInfoText(instance)
6838 if target_node is None:
6839 pnode = instance.primary_node
6840 all_nodes = instance.all_nodes
6841 else:
6842 pnode = target_node
6843 all_nodes = [pnode]
6844
6845 if instance.disk_template == constants.DT_FILE:
6846 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6847 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6848
6849 result.Raise("Failed to create directory '%s' on"
6850 " node %s" % (file_storage_dir, pnode))
6851
6852
6853
6854 for idx, device in enumerate(instance.disks):
6855 if to_skip and idx in to_skip:
6856 continue
6857 logging.info("Creating volume %s for instance %s",
6858 device.iv_name, instance.name)
6859
6860 for node in all_nodes:
6861 f_create = node == pnode
6862 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6863
6866 """Remove all disks for an instance.
6867
6868 This abstracts away some work from `AddInstance()` and
6869 `RemoveInstance()`. Note that in case some of the devices couldn't
6870 be removed, the removal will continue with the other ones (compare
6871 with `_CreateDisks()`).
6872
6873 @type lu: L{LogicalUnit}
6874 @param lu: the logical unit on whose behalf we execute
6875 @type instance: L{objects.Instance}
6876 @param instance: the instance whose disks we should remove
6877 @type target_node: string
6878 @param target_node: used to override the node on which to remove the disks
6879 @rtype: boolean
6880 @return: the success of the removal
6881
6882 """
6883 logging.info("Removing block devices for instance %s", instance.name)
6884
6885 all_result = True
6886 for device in instance.disks:
6887 if target_node:
6888 edata = [(target_node, device)]
6889 else:
6890 edata = device.ComputeNodeTree(instance.primary_node)
6891 for node, disk in edata:
6892 lu.cfg.SetDiskID(disk, node)
6893 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6894 if msg:
6895 lu.LogWarning("Could not remove block device %s on node %s,"
6896 " continuing anyway: %s", device.iv_name, node, msg)
6897 all_result = False
6898
6899 if instance.disk_template == constants.DT_FILE:
6900 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6901 if target_node:
6902 tgt = target_node
6903 else:
6904 tgt = instance.primary_node
6905 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6906 if result.fail_msg:
6907 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6908 file_storage_dir, instance.primary_node, result.fail_msg)
6909 all_result = False
6910
6911 return all_result
6912
6915 """Compute disk size requirements in the volume group
6916
6917 """
6918 def _compute(disks, payload):
6919 """Universal algorithm
6920
6921 """
6922 vgs = {}
6923 for disk in disks:
6924 vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6925
6926 return vgs
6927
6928
6929 req_size_dict = {
6930 constants.DT_DISKLESS: {},
6931 constants.DT_PLAIN: _compute(disks, 0),
6932
6933 constants.DT_DRBD8: _compute(disks, 128),
6934 constants.DT_FILE: {},
6935 }
6936
6937 if disk_template not in req_size_dict:
6938 raise errors.ProgrammerError("Disk template '%s' size requirement"
6939 " is unknown" % disk_template)
6940
6941 return req_size_dict[disk_template]
6942
6945 """Compute disk size requirements in the volume group
6946
6947 """
6948
6949 req_size_dict = {
6950 constants.DT_DISKLESS: None,
6951 constants.DT_PLAIN: sum(d["size"] for d in disks),
6952
6953 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6954 constants.DT_FILE: None,
6955 }
6956
6957 if disk_template not in req_size_dict:
6958 raise errors.ProgrammerError("Disk template '%s' size requirement"
6959 " is unknown" % disk_template)
6960
6961 return req_size_dict[disk_template]
6962
6965 """Filters out non-vm_capable nodes from a list.
6966
6967 @type lu: L{LogicalUnit}
6968 @param lu: the logical unit for which we check
6969 @type nodenames: list
6970 @param nodenames: the list of nodes on which we should check
6971 @rtype: list
6972 @return: the list of vm-capable nodes
6973
6974 """
6975 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
6976 return [name for name in nodenames if name not in vm_nodes]
6977
6980 """Hypervisor parameter validation.
6981
6982 This function abstract the hypervisor parameter validation to be
6983 used in both instance create and instance modify.
6984
6985 @type lu: L{LogicalUnit}
6986 @param lu: the logical unit for which we check
6987 @type nodenames: list
6988 @param nodenames: the list of nodes on which we should check
6989 @type hvname: string
6990 @param hvname: the name of the hypervisor we should use
6991 @type hvparams: dict
6992 @param hvparams: the parameters which we need to check
6993 @raise errors.OpPrereqError: if the parameters are not valid
6994
6995 """
6996 nodenames = _FilterVmNodes(lu, nodenames)
6997 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6998 hvname,
6999 hvparams)
7000 for node in nodenames:
7001 info = hvinfo[node]
7002 if info.offline:
7003 continue
7004 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7005
7008 """OS parameters validation.
7009
7010 @type lu: L{LogicalUnit}
7011 @param lu: the logical unit for which we check
7012 @type required: boolean
7013 @param required: whether the validation should fail if the OS is not
7014 found
7015 @type nodenames: list
7016 @param nodenames: the list of nodes on which we should check
7017 @type osname: string
7018 @param osname: the name of the hypervisor we should use
7019 @type osparams: dict
7020 @param osparams: the parameters which we need to check
7021 @raise errors.OpPrereqError: if the parameters are not valid
7022
7023 """
7024 nodenames = _FilterVmNodes(lu, nodenames)
7025 result = lu.rpc.call_os_validate(required, nodenames, osname,
7026 [constants.OS_VALIDATE_PARAMETERS],
7027 osparams)
7028 for node, nres in result.items():
7029
7030
7031 nres.Raise("OS Parameters validation failed on node %s" % node)
7032 if not nres.payload:
7033 lu.LogInfo("OS %s not found on node %s, validation skipped",
7034 osname, node)
7035
7038 """Create an instance.
7039
7040 """
7041 HPATH = "instance-add"
7042 HTYPE = constants.HTYPE_INSTANCE
7043 REQ_BGL = False
7044
7046 """Check arguments.
7047
7048 """
7049
7050
7051 if self.op.no_install and self.op.start:
7052 self.LogInfo("No-installation mode selected, disabling startup")
7053 self.op.start = False
7054
7055 self.op.instance_name = \
7056 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7057
7058 if self.op.ip_check and not self.op.name_check:
7059
7060 raise errors.OpPrereqError("Cannot do ip check without a name check",
7061 errors.ECODE_INVAL)
7062
7063
7064 for nic in self.op.nics:
7065 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7066
7067
7068 has_adopt = has_no_adopt = False
7069 for disk in self.op.disks:
7070 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7071 if "adopt" in disk:
7072 has_adopt = True
7073 else:
7074 has_no_adopt = True
7075 if has_adopt and has_no_adopt:
7076 raise errors.OpPrereqError("Either all disks are adopted or none is",
7077 errors.ECODE_INVAL)
7078 if has_adopt:
7079 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7080 raise errors.OpPrereqError("Disk adoption is not supported for the"
7081 " '%s' disk template" %
7082 self.op.disk_template,
7083 errors.ECODE_INVAL)
7084 if self.op.iallocator is not None:
7085 raise errors.OpPrereqError("Disk adoption not allowed with an"
7086 " iallocator script", errors.ECODE_INVAL)
7087 if self.op.mode == constants.INSTANCE_IMPORT:
7088 raise errors.OpPrereqError("Disk adoption not allowed for"
7089 " instance import", errors.ECODE_INVAL)
7090
7091 self.adopt_disks = has_adopt
7092
7093
7094 if self.op.name_check:
7095 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7096 self.op.instance_name = self.hostname1.name
7097
7098 self.check_ip = self.hostname1.ip
7099 else:
7100 self.check_ip = None
7101
7102
7103 if (self.op.file_driver and
7104 not self.op.file_driver in constants.FILE_DRIVER):
7105 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7106 self.op.file_driver, errors.ECODE_INVAL)
7107
7108 if self.op.disk_template == constants.DT_FILE:
7109 opcodes.RequireFileStorage()
7110
7111
7112 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7113
7114 if self.op.pnode is not None:
7115 if self.op.disk_template in constants.DTS_NET_MIRROR:
7116 if self.op.snode is None:
7117 raise errors.OpPrereqError("The networked disk templates need"
7118 " a mirror node", errors.ECODE_INVAL)
7119 elif self.op.snode:
7120 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7121 " template")
7122 self.op.snode = None
7123
7124 self._cds = _GetClusterDomainSecret()
7125
7126 if self.op.mode == constants.INSTANCE_IMPORT:
7127
7128
7129
7130 self.op.force_variant = True
7131
7132 if self.op.no_install:
7133 self.LogInfo("No-installation mode has no effect during import")
7134
7135 elif self.op.mode == constants.INSTANCE_CREATE:
7136 if self.op.os_type is None:
7137 raise errors.OpPrereqError("No guest OS specified",
7138 errors.ECODE_INVAL)
7139 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7140 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7141 " installation" % self.op.os_type,
7142 errors.ECODE_STATE)
7143 if self.op.disk_template is None:
7144 raise errors.OpPrereqError("No disk template specified",
7145 errors.ECODE_INVAL)
7146
7147 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7148
7149 src_handshake = self.op.source_handshake
7150 if not src_handshake:
7151 raise errors.OpPrereqError("Missing source handshake",
7152 errors.ECODE_INVAL)
7153
7154 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7155 src_handshake)
7156 if errmsg:
7157 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7158 errors.ECODE_INVAL)
7159
7160
7161 self.source_x509_ca_pem = self.op.source_x509_ca
7162 if not self.source_x509_ca_pem:
7163 raise errors.OpPrereqError("Missing source X509 CA",
7164 errors.ECODE_INVAL)
7165
7166 try:
7167 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7168 self._cds)
7169 except OpenSSL.crypto.Error, err:
7170 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7171 (err, ), errors.ECODE_INVAL)
7172
7173 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7174 if errcode is not None:
7175 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7176 errors.ECODE_INVAL)
7177
7178 self.source_x509_ca = cert
7179
7180 src_instance_name = self.op.source_instance_name
7181 if not src_instance_name:
7182 raise errors.OpPrereqError("Missing source instance name",
7183 errors.ECODE_INVAL)
7184
7185 self.source_instance_name = \
7186 netutils.GetHostname(name=src_instance_name).name
7187
7188 else:
7189 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7190 self.op.mode, errors.ECODE_INVAL)
7191
7241
7243 """Run the allocator based on input opcode.
7244
7245 """
7246 nics = [n.ToDict() for n in self.nics]
7247 ial = IAllocator(self.cfg, self.rpc,
7248 mode=constants.IALLOCATOR_MODE_ALLOC,
7249 name=self.op.instance_name,
7250 disk_template=self.op.disk_template,
7251 tags=[],
7252 os=self.op.os_type,
7253 vcpus=self.be_full[constants.BE_VCPUS],
7254 mem_size=self.be_full[constants.BE_MEMORY],
7255 disks=self.disks,
7256 nics=nics,
7257 hypervisor=self.op.hypervisor,
7258 )
7259
7260 ial.Run(self.op.iallocator)
7261
7262 if not ial.success:
7263 raise errors.OpPrereqError("Can't compute nodes using"
7264 " iallocator '%s': %s" %
7265 (self.op.iallocator, ial.info),
7266 errors.ECODE_NORES)
7267 if len(ial.result) != ial.required_nodes:
7268 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7269 " of nodes (%s), required %s" %
7270 (self.op.iallocator, len(ial.result),
7271 ial.required_nodes), errors.ECODE_FAULT)
7272 self.op.pnode = ial.result[0]
7273 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7274 self.op.instance_name, self.op.iallocator,
7275 utils.CommaJoin(ial.result))
7276 if ial.required_nodes == 2:
7277 self.op.snode = ial.result[1]
7278
7280 """Build hooks env.
7281
7282 This runs on master, primary and secondary nodes of the instance.
7283
7284 """
7285 env = {
7286 "ADD_MODE": self.op.mode,
7287 }
7288 if self.op.mode == constants.INSTANCE_IMPORT:
7289 env["SRC_NODE"] = self.op.src_node
7290 env["SRC_PATH"] = self.op.src_path
7291 env["SRC_IMAGES"] = self.src_images
7292
7293 env.update(_BuildInstanceHookEnv(
7294 name=self.op.instance_name,
7295 primary_node=self.op.pnode,
7296 secondary_nodes=self.secondaries,
7297 status=self.op.start,
7298 os_type=self.op.os_type,
7299 memory=self.be_full[constants.BE_MEMORY],
7300 vcpus=self.be_full[constants.BE_VCPUS],
7301 nics=_NICListToTuple(self, self.nics),
7302 disk_template=self.op.disk_template,
7303 disks=[(d["size"], d["mode"]) for d in self.disks],
7304 bep=self.be_full,
7305 hvp=self.hv_full,
7306 hypervisor_name=self.op.hypervisor,
7307 ))
7308
7309 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7310 self.secondaries)
7311 return env, nl, nl
7312
7359
7361 """Use export parameters as defaults.
7362
7363 In case the opcode doesn't specify (as in override) some instance
7364 parameters, then try to use them from the export information, if
7365 that declares them.
7366
7367 """
7368 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7369
7370 if self.op.disk_template is None:
7371 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7372 self.op.disk_template = einfo.get(constants.INISECT_INS,
7373 "disk_template")
7374 else:
7375 raise errors.OpPrereqError("No disk template specified and the export"
7376 " is missing the disk_template information",
7377 errors.ECODE_INVAL)
7378
7379 if not self.op.disks:
7380 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7381 disks = []
7382
7383 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7384 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7385 disks.append({"size": disk_sz})
7386 self.op.disks = disks
7387 else:
7388 raise errors.OpPrereqError("No disk info specified and the export"
7389 " is missing the disk information",
7390 errors.ECODE_INVAL)
7391
7392 if (not self.op.nics and
7393 einfo.has_option(constants.INISECT_INS, "nic_count")):
7394 nics = []
7395 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7396 ndict = {}
7397 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7398 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7399 ndict[name] = v
7400 nics.append(ndict)
7401 self.op.nics = nics
7402
7403 if (self.op.hypervisor is None and
7404 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7405 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7406 if einfo.has_section(constants.INISECT_HYP):
7407
7408
7409 for name, value in einfo.items(constants.INISECT_HYP):
7410 if name not in self.op.hvparams:
7411 self.op.hvparams[name] = value
7412
7413 if einfo.has_section(constants.INISECT_BEP):
7414
7415 for name, value in einfo.items(constants.INISECT_BEP):
7416 if name not in self.op.beparams:
7417 self.op.beparams[name] = value
7418 else:
7419
7420 for name in constants.BES_PARAMETERS:
7421 if (name not in self.op.beparams and
7422 einfo.has_option(constants.INISECT_INS, name)):
7423 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7424
7425 if einfo.has_section(constants.INISECT_OSP):
7426
7427 for name, value in einfo.items(constants.INISECT_OSP):
7428 if name not in self.op.osparams:
7429 self.op.osparams[name] = value
7430
7432 """Revert the instance parameters to the default values.
7433
7434 """
7435
7436 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7437 for name in self.op.hvparams.keys():
7438 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7439 del self.op.hvparams[name]
7440
7441 be_defs = cluster.SimpleFillBE({})
7442 for name in self.op.beparams.keys():
7443 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7444 del self.op.beparams[name]
7445
7446 nic_defs = cluster.SimpleFillNIC({})
7447 for nic in self.op.nics:
7448 for name in constants.NICS_PARAMETERS:
7449 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7450 del nic[name]
7451
7452 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7453 for name in self.op.osparams.keys():
7454 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7455 del self.op.osparams[name]
7456
7458 """Calculate final instance file storage dir.
7459
7460 """
7461
7462 self.instance_file_storage_dir = None
7463 if self.op.disk_template == constants.DT_FILE:
7464
7465 joinargs = []
7466
7467 cfg_storagedir = self.cfg.GetFileStorageDir()
7468 if not cfg_storagedir:
7469 raise errors.OpPrereqError("Cluster file storage dir not defined")
7470 joinargs.append(cfg_storagedir)
7471
7472 if self.op.file_storage_dir is not None:
7473 joinargs.append(self.op.file_storage_dir)
7474
7475 joinargs.append(self.op.instance_name)
7476
7477
7478 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
7479
7481 """Check prerequisites.
7482
7483 """
7484 self._CalculateFileStorageDir()
7485
7486 if self.op.mode == constants.INSTANCE_IMPORT:
7487 export_info = self._ReadExportInfo()
7488 self._ReadExportParams(export_info)
7489
7490 if (not self.cfg.GetVGName() and
7491 self.op.disk_template not in constants.DTS_NOT_LVM):
7492 raise errors.OpPrereqError("Cluster does not support lvm-based"
7493 " instances", errors.ECODE_STATE)
7494
7495 if self.op.hypervisor is None:
7496 self.op.hypervisor = self.cfg.GetHypervisorType()
7497
7498 cluster = self.cfg.GetClusterInfo()
7499 enabled_hvs = cluster.enabled_hypervisors
7500 if self.op.hypervisor not in enabled_hvs:
7501 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7502 " cluster (%s)" % (self.op.hypervisor,
7503 ",".join(enabled_hvs)),
7504 errors.ECODE_STATE)
7505
7506
7507 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7508 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7509 self.op.hvparams)
7510 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7511 hv_type.CheckParameterSyntax(filled_hvp)
7512 self.hv_full = filled_hvp
7513
7514 _CheckGlobalHvParams(self.op.hvparams)
7515
7516
7517 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7518 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7519
7520
7521 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7522
7523
7524
7525 if self.op.identify_defaults:
7526 self._RevertToDefaults(cluster)
7527
7528
7529 self.nics = []
7530 for idx, nic in enumerate(self.op.nics):
7531 nic_mode_req = nic.get("mode", None)
7532 nic_mode = nic_mode_req
7533 if nic_mode is None:
7534 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7535
7536
7537 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7538 default_ip_mode = constants.VALUE_AUTO
7539 else:
7540 default_ip_mode = constants.VALUE_NONE
7541
7542
7543 ip = nic.get("ip", default_ip_mode)
7544 if ip is None or ip.lower() == constants.VALUE_NONE:
7545 nic_ip = None
7546 elif ip.lower() == constants.VALUE_AUTO:
7547 if not self.op.name_check:
7548 raise errors.OpPrereqError("IP address set to auto but name checks"
7549 " have been skipped",
7550 errors.ECODE_INVAL)
7551 nic_ip = self.hostname1.ip
7552 else:
7553 if not netutils.IPAddress.IsValid(ip):
7554 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7555 errors.ECODE_INVAL)
7556 nic_ip = ip
7557
7558
7559 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7560 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7561 errors.ECODE_INVAL)
7562
7563
7564 mac = nic.get("mac", constants.VALUE_AUTO)
7565 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7566 mac = utils.NormalizeAndValidateMac(mac)
7567
7568 try:
7569 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7570 except errors.ReservationError:
7571 raise errors.OpPrereqError("MAC address %s already in use"
7572 " in cluster" % mac,
7573 errors.ECODE_NOTUNIQUE)
7574
7575
7576 bridge = nic.get("bridge", None)
7577 link = nic.get("link", None)
7578 if bridge and link:
7579 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7580 " at the same time", errors.ECODE_INVAL)
7581 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7582 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7583 errors.ECODE_INVAL)
7584 elif bridge:
7585 link = bridge
7586
7587 nicparams = {}
7588 if nic_mode_req:
7589 nicparams[constants.NIC_MODE] = nic_mode_req
7590 if link:
7591 nicparams[constants.NIC_LINK] = link
7592
7593 check_params = cluster.SimpleFillNIC(nicparams)
7594 objects.NIC.CheckParameterSyntax(check_params)
7595 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7596
7597
7598 self.disks = []
7599 for disk in self.op.disks:
7600 mode = disk.get("mode", constants.DISK_RDWR)
7601 if mode not in constants.DISK_ACCESS_SET:
7602 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7603 mode, errors.ECODE_INVAL)
7604 size = disk.get("size", None)
7605 if size is None:
7606 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7607 try:
7608 size = int(size)
7609 except (TypeError, ValueError):
7610 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7611 errors.ECODE_INVAL)
7612 data_vg = disk.get("vg", self.cfg.GetVGName())
7613 meta_vg = disk.get("metavg", data_vg)
7614 new_disk = {"size": size, "mode": mode, "vg": data_vg, "metavg": meta_vg}
7615 if "adopt" in disk:
7616 new_disk["adopt"] = disk["adopt"]
7617 self.disks.append(new_disk)
7618
7619 if self.op.mode == constants.INSTANCE_IMPORT:
7620
7621
7622 instance_disks = len(self.disks)
7623 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7624 if instance_disks < export_disks:
7625 raise errors.OpPrereqError("Not enough disks to import."
7626 " (instance: %d, export: %d)" %
7627 (instance_disks, export_disks),
7628 errors.ECODE_INVAL)
7629
7630 disk_images = []
7631 for idx in range(export_disks):
7632 option = 'disk%d_dump' % idx
7633 if export_info.has_option(constants.INISECT_INS, option):
7634
7635 export_name = export_info.get(constants.INISECT_INS, option)
7636 image = utils.PathJoin(self.op.src_path, export_name)
7637 disk_images.append(image)
7638 else:
7639 disk_images.append(False)
7640
7641 self.src_images = disk_images
7642
7643 old_name = export_info.get(constants.INISECT_INS, 'name')
7644 try:
7645 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7646 except (TypeError, ValueError), err:
7647 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7648 " an integer: %s" % str(err),
7649 errors.ECODE_STATE)
7650 if self.op.instance_name == old_name:
7651 for idx, nic in enumerate(self.nics):
7652 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7653 nic_mac_ini = 'nic%d_mac' % idx
7654 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7655
7656
7657
7658
7659 if self.op.ip_check:
7660 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7661 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7662 (self.check_ip, self.op.instance_name),
7663 errors.ECODE_NOTUNIQUE)
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673 for nic in self.nics:
7674 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7675 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7676
7677
7678
7679 if self.op.iallocator is not None:
7680 self._RunAllocator()
7681
7682
7683
7684
7685 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7686 assert self.pnode is not None, \
7687 "Cannot retrieve locked node %s" % self.op.pnode
7688 if pnode.offline:
7689 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7690 pnode.name, errors.ECODE_STATE)
7691 if pnode.drained:
7692 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7693 pnode.name, errors.ECODE_STATE)
7694 if not pnode.vm_capable:
7695 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7696 " '%s'" % pnode.name, errors.ECODE_STATE)
7697
7698 self.secondaries = []
7699
7700
7701 if self.op.disk_template in constants.DTS_NET_MIRROR:
7702 if self.op.snode == pnode.name:
7703 raise errors.OpPrereqError("The secondary node cannot be the"
7704 " primary node.", errors.ECODE_INVAL)
7705 _CheckNodeOnline(self, self.op.snode)
7706 _CheckNodeNotDrained(self, self.op.snode)
7707 _CheckNodeVmCapable(self, self.op.snode)
7708 self.secondaries.append(self.op.snode)
7709
7710 nodenames = [pnode.name] + self.secondaries
7711
7712 if not self.adopt_disks:
7713
7714 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7715 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7716
7717 else:
7718 all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7719 if len(all_lvs) != len(self.disks):
7720 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7721 errors.ECODE_INVAL)
7722 for lv_name in all_lvs:
7723 try:
7724
7725
7726 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7727 except errors.ReservationError:
7728 raise errors.OpPrereqError("LV named %s used by another instance" %
7729 lv_name, errors.ECODE_NOTUNIQUE)
7730
7731 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7732 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7733
7734 node_lvs = self.rpc.call_lv_list([pnode.name],
7735 vg_names.payload.keys())[pnode.name]
7736 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7737 node_lvs = node_lvs.payload
7738
7739 delta = all_lvs.difference(node_lvs.keys())
7740 if delta:
7741 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7742 utils.CommaJoin(delta),
7743 errors.ECODE_INVAL)
7744 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7745 if online_lvs:
7746 raise errors.OpPrereqError("Online logical volumes found, cannot"
7747 " adopt: %s" % utils.CommaJoin(online_lvs),
7748 errors.ECODE_STATE)
7749
7750 for dsk in self.disks:
7751 dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7752
7753 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7754
7755 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7756
7757 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7758
7759 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7760
7761
7762 if self.op.start:
7763 _CheckNodeFreeMemory(self, self.pnode.name,
7764 "creating instance %s" % self.op.instance_name,
7765 self.be_full[constants.BE_MEMORY],
7766 self.op.hypervisor)
7767
7768 self.dry_run_result = list(nodenames)
7769
7770 - def Exec(self, feedback_fn):
7771 """Create and add the instance to the cluster.
7772
7773 """
7774 instance = self.op.instance_name
7775 pnode_name = self.pnode.name
7776
7777 ht_kind = self.op.hypervisor
7778 if ht_kind in constants.HTS_REQ_PORT:
7779 network_port = self.cfg.AllocatePort()
7780 else:
7781 network_port = None
7782
7783 disks = _GenerateDiskTemplate(self,
7784 self.op.disk_template,
7785 instance, pnode_name,
7786 self.secondaries,
7787 self.disks,
7788 self.instance_file_storage_dir,
7789 self.op.file_driver,
7790 0,
7791 feedback_fn)
7792
7793 iobj = objects.Instance(name=instance, os=self.op.os_type,
7794 primary_node=pnode_name,
7795 nics=self.nics, disks=disks,
7796 disk_template=self.op.disk_template,
7797 admin_up=False,
7798 network_port=network_port,
7799 beparams=self.op.beparams,
7800 hvparams=self.op.hvparams,
7801 hypervisor=self.op.hypervisor,
7802 osparams=self.op.osparams,
7803 )
7804
7805 if self.adopt_disks:
7806
7807
7808 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7809 rename_to = []
7810 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7811 rename_to.append(t_dsk.logical_id)
7812 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7813 self.cfg.SetDiskID(t_dsk, pnode_name)
7814 result = self.rpc.call_blockdev_rename(pnode_name,
7815 zip(tmp_disks, rename_to))
7816 result.Raise("Failed to rename adoped LVs")
7817 else:
7818 feedback_fn("* creating instance disks...")
7819 try:
7820 _CreateDisks(self, iobj)
7821 except errors.OpExecError:
7822 self.LogWarning("Device creation failed, reverting...")
7823 try:
7824 _RemoveDisks(self, iobj)
7825 finally:
7826 self.cfg.ReleaseDRBDMinors(instance)
7827 raise
7828
7829 feedback_fn("adding instance %s to cluster config" % instance)
7830
7831 self.cfg.AddInstance(iobj, self.proc.GetECId())
7832
7833
7834
7835 del self.remove_locks[locking.LEVEL_INSTANCE]
7836
7837 if self.op.mode == constants.INSTANCE_IMPORT:
7838 nodes_keep = [self.op.src_node]
7839 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7840 if node != self.op.src_node]
7841 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7842 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7843 else:
7844 self.context.glm.release(locking.LEVEL_NODE)
7845 del self.acquired_locks[locking.LEVEL_NODE]
7846
7847 disk_abort = False
7848 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
7849 feedback_fn("* wiping instance disks...")
7850 try:
7851 _WipeDisks(self, iobj)
7852 except errors.OpExecError, err:
7853 logging.exception("Wiping disks failed")
7854 self.LogWarning("Wiping instance disks failed (%s)", err)
7855 disk_abort = True
7856
7857 if disk_abort:
7858
7859 pass
7860 elif self.op.wait_for_sync:
7861 disk_abort = not _WaitForSync(self, iobj)
7862 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7863
7864 time.sleep(15)
7865 feedback_fn("* checking mirrors status")
7866 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7867 else:
7868 disk_abort = False
7869
7870 if disk_abort:
7871 _RemoveDisks(self, iobj)
7872 self.cfg.RemoveInstance(iobj.name)
7873
7874 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7875 raise errors.OpExecError("There are some degraded disks for"
7876 " this instance")
7877
7878 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7879 if self.op.mode == constants.INSTANCE_CREATE:
7880 if not self.op.no_install:
7881 feedback_fn("* running the instance OS create scripts...")
7882
7883 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7884 self.op.debug_level)
7885 result.Raise("Could not add os for instance %s"
7886 " on node %s" % (instance, pnode_name))
7887
7888 elif self.op.mode == constants.INSTANCE_IMPORT:
7889 feedback_fn("* running the instance OS import scripts...")
7890
7891 transfers = []
7892
7893 for idx, image in enumerate(self.src_images):
7894 if not image:
7895 continue
7896
7897
7898 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7899 constants.IEIO_FILE, (image, ),
7900 constants.IEIO_SCRIPT,
7901 (iobj.disks[idx], idx),
7902 None)
7903 transfers.append(dt)
7904
7905 import_result = \
7906 masterd.instance.TransferInstanceData(self, feedback_fn,
7907 self.op.src_node, pnode_name,
7908 self.pnode.secondary_ip,
7909 iobj, transfers)
7910 if not compat.all(import_result):
7911 self.LogWarning("Some disks for instance %s on node %s were not"
7912 " imported successfully" % (instance, pnode_name))
7913
7914 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7915 feedback_fn("* preparing remote import...")
7916
7917
7918
7919 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7920 self.op.source_shutdown_timeout)
7921 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7922
7923 assert iobj.primary_node == self.pnode.name
7924 disk_results = \
7925 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7926 self.source_x509_ca,
7927 self._cds, timeouts)
7928 if not compat.all(disk_results):
7929
7930
7931 self.LogWarning("Some disks for instance %s on node %s were not"
7932 " imported successfully" % (instance, pnode_name))
7933
7934
7935 assert iobj.name == instance
7936 feedback_fn("Running rename script for %s" % instance)
7937 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7938 self.source_instance_name,
7939 self.op.debug_level)
7940 if result.fail_msg:
7941 self.LogWarning("Failed to run rename script for %s on node"
7942 " %s: %s" % (instance, pnode_name, result.fail_msg))
7943
7944 else:
7945
7946 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7947 % self.op.mode)
7948
7949 if self.op.start:
7950 iobj.admin_up = True
7951 self.cfg.Update(iobj, feedback_fn)
7952 logging.info("Starting instance %s on node %s", instance, pnode_name)
7953 feedback_fn("* starting instance...")
7954 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7955 result.Raise("Could not start instance")
7956
7957 return list(iobj.all_nodes)
7958
7961 """Connect to an instance's console.
7962
7963 This is somewhat special in that it returns the command line that
7964 you need to run on the master node in order to connect to the
7965 console.
7966
7967 """
7968 REQ_BGL = False
7969
7972
7974 """Check prerequisites.
7975
7976 This checks that the instance is in the cluster.
7977
7978 """
7979 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7980 assert self.instance is not None, \
7981 "Cannot retrieve locked instance %s" % self.op.instance_name
7982 _CheckNodeOnline(self, self.instance.primary_node)
7983
7984 - def Exec(self, feedback_fn):
8006
8027
8030 """Replace the disks of an instance.
8031
8032 """
8033 HPATH = "mirrors-replace"
8034 HTYPE = constants.HTYPE_INSTANCE
8035 REQ_BGL = False
8036
8040
8067
8074
8076 """Build hooks env.
8077
8078 This runs on the master, the primary and all the secondaries.
8079
8080 """
8081 instance = self.replacer.instance
8082 env = {
8083 "MODE": self.op.mode,
8084 "NEW_SECONDARY": self.op.remote_node,
8085 "OLD_SECONDARY": instance.secondary_nodes[0],
8086 }
8087 env.update(_BuildInstanceHookEnvByObject(self, instance))
8088 nl = [
8089 self.cfg.GetMasterNode(),
8090 instance.primary_node,
8091 ]
8092 if self.op.remote_node is not None:
8093 nl.append(self.op.remote_node)
8094 return env, nl, nl
8095
8098 """Replaces disks for an instance.
8099
8100 Note: Locking is not within the scope of this class.
8101
8102 """
8103 - def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8104 disks, delay_iallocator, early_release):
8105 """Initializes this class.
8106
8107 """
8108 Tasklet.__init__(self, lu)
8109
8110
8111 self.instance_name = instance_name
8112 self.mode = mode
8113 self.iallocator_name = iallocator_name
8114 self.remote_node = remote_node
8115 self.disks = disks
8116 self.delay_iallocator = delay_iallocator
8117 self.early_release = early_release
8118
8119
8120 self.instance = None
8121 self.new_node = None
8122 self.target_node = None
8123 self.other_node = None
8124 self.remote_node_info = None
8125 self.node_secondary_ip = None
8126
8127 @staticmethod
8129 """Helper function for users of this class.
8130
8131 """
8132
8133 if mode == constants.REPLACE_DISK_CHG:
8134 if remote_node is None and iallocator is None:
8135 raise errors.OpPrereqError("When changing the secondary either an"
8136 " iallocator script must be used or the"
8137 " new node given", errors.ECODE_INVAL)
8138
8139 if remote_node is not None and iallocator is not None:
8140 raise errors.OpPrereqError("Give either the iallocator or the new"
8141 " secondary, not both", errors.ECODE_INVAL)
8142
8143 elif remote_node is not None or iallocator is not None:
8144
8145 raise errors.OpPrereqError("The iallocator and new node options can"
8146 " only be used when changing the"
8147 " secondary node", errors.ECODE_INVAL)
8148
8149 @staticmethod
8150 - def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8151 """Compute a new secondary node using an IAllocator.
8152
8153 """
8154 ial = IAllocator(lu.cfg, lu.rpc,
8155 mode=constants.IALLOCATOR_MODE_RELOC,
8156 name=instance_name,
8157 relocate_from=relocate_from)
8158
8159 ial.Run(iallocator_name)
8160
8161 if not ial.success:
8162 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8163 " %s" % (iallocator_name, ial.info),
8164 errors.ECODE_NORES)
8165
8166 if len(ial.result) != ial.required_nodes:
8167 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8168 " of nodes (%s), required %s" %
8169 (iallocator_name,
8170 len(ial.result), ial.required_nodes),
8171 errors.ECODE_FAULT)
8172
8173 remote_node_name = ial.result[0]
8174
8175 lu.LogInfo("Selected new secondary for instance '%s': %s",
8176 instance_name, remote_node_name)
8177
8178 return remote_node_name
8179
8183
8206
8208 """Check prerequisites, second part.
8209
8210 This function should always be part of CheckPrereq. It was separated and is
8211 now called from Exec because during node evacuation iallocator was only
8212 called with an unmodified cluster model, not taking planned changes into
8213 account.
8214
8215 """
8216 instance = self.instance
8217 secondary_node = instance.secondary_nodes[0]
8218
8219 if self.iallocator_name is None:
8220 remote_node = self.remote_node
8221 else:
8222 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8223 instance.name, instance.secondary_nodes)
8224
8225 if remote_node is not None:
8226 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8227 assert self.remote_node_info is not None, \
8228 "Cannot retrieve locked node %s" % remote_node
8229 else:
8230 self.remote_node_info = None
8231
8232 if remote_node == self.instance.primary_node:
8233 raise errors.OpPrereqError("The specified node is the primary node of"
8234 " the instance.", errors.ECODE_INVAL)
8235
8236 if remote_node == secondary_node:
8237 raise errors.OpPrereqError("The specified node is already the"
8238 " secondary node of the instance.",
8239 errors.ECODE_INVAL)
8240
8241 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8242 constants.REPLACE_DISK_CHG):
8243 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8244 errors.ECODE_INVAL)
8245
8246 if self.mode == constants.REPLACE_DISK_AUTO:
8247 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8248 faulty_secondary = self._FindFaultyDisks(secondary_node)
8249
8250 if faulty_primary and faulty_secondary:
8251 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8252 " one node and can not be repaired"
8253 " automatically" % self.instance_name,
8254 errors.ECODE_STATE)
8255
8256 if faulty_primary:
8257 self.disks = faulty_primary
8258 self.target_node = instance.primary_node
8259 self.other_node = secondary_node
8260 check_nodes = [self.target_node, self.other_node]
8261 elif faulty_secondary:
8262 self.disks = faulty_secondary
8263 self.target_node = secondary_node
8264 self.other_node = instance.primary_node
8265 check_nodes = [self.target_node, self.other_node]
8266 else:
8267 self.disks = []
8268 check_nodes = []
8269
8270 else:
8271
8272 if self.mode == constants.REPLACE_DISK_PRI:
8273 self.target_node = instance.primary_node
8274 self.other_node = secondary_node
8275 check_nodes = [self.target_node, self.other_node]
8276
8277 elif self.mode == constants.REPLACE_DISK_SEC:
8278 self.target_node = secondary_node
8279 self.other_node = instance.primary_node
8280 check_nodes = [self.target_node, self.other_node]
8281
8282 elif self.mode == constants.REPLACE_DISK_CHG:
8283 self.new_node = remote_node
8284 self.other_node = instance.primary_node
8285 self.target_node = secondary_node
8286 check_nodes = [self.new_node, self.other_node]
8287
8288 _CheckNodeNotDrained(self.lu, remote_node)
8289 _CheckNodeVmCapable(self.lu, remote_node)
8290
8291 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8292 assert old_node_info is not None
8293 if old_node_info.offline and not self.early_release:
8294
8295 self.early_release = True
8296 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8297 " early-release mode", secondary_node)
8298
8299 else:
8300 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8301 self.mode)
8302
8303
8304 if not self.disks:
8305 self.disks = range(len(self.instance.disks))
8306
8307 for node in check_nodes:
8308 _CheckNodeOnline(self.lu, node)
8309
8310 touched_nodes = frozenset([self.new_node, self.other_node,
8311 self.target_node])
8312
8313 if self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET:
8314
8315 for name in self.lu.acquired_locks[locking.LEVEL_NODE]:
8316 if name not in touched_nodes:
8317 self._ReleaseNodeLock(name)
8318
8319
8320 for disk_idx in self.disks:
8321 instance.FindDisk(disk_idx)
8322
8323
8324 self.node_secondary_ip = \
8325 dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
8326 for node_name in touched_nodes
8327 if node_name is not None)
8328
8329 - def Exec(self, feedback_fn):
8330 """Execute disk replacement.
8331
8332 This dispatches the disk replacement to the appropriate handler.
8333
8334 """
8335 if self.delay_iallocator:
8336 self._CheckPrereq2()
8337
8338 if (self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET and
8339 __debug__):
8340
8341 owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8342 assert set(owned_locks) == set(self.node_secondary_ip), \
8343 "Not owning the correct locks: %s" % (owned_locks, )
8344
8345 if not self.disks:
8346 feedback_fn("No disks need replacement")
8347 return
8348
8349 feedback_fn("Replacing disk(s) %s for %s" %
8350 (utils.CommaJoin(self.disks), self.instance.name))
8351
8352 activate_disks = (not self.instance.admin_up)
8353
8354
8355 if activate_disks:
8356 _StartInstanceDisks(self.lu, self.instance, True)
8357
8358 try:
8359
8360 if self.new_node is not None:
8361 fn = self._ExecDrbd8Secondary
8362 else:
8363 fn = self._ExecDrbd8DiskOnly
8364
8365 result = fn(feedback_fn)
8366 finally:
8367
8368
8369 if activate_disks:
8370 _SafeShutdownInstanceDisks(self.lu, self.instance)
8371
8372 if __debug__:
8373
8374 owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8375 assert ((self.early_release and not owned_locks) or
8376 (not self.early_release and
8377 set(owned_locks) == set(self.node_secondary_ip))), \
8378 ("Not owning the correct locks, early_release=%s, owned=%r" %
8379 (self.early_release, owned_locks))
8380
8381 return result
8382
8384 self.lu.LogInfo("Checking volume groups")
8385
8386 vgname = self.cfg.GetVGName()
8387
8388
8389 results = self.rpc.call_vg_list(nodes)
8390 if not results:
8391 raise errors.OpExecError("Can't list volume groups on the nodes")
8392
8393 for node in nodes:
8394 res = results[node]
8395 res.Raise("Error checking node %s" % node)
8396 if vgname not in res.payload:
8397 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8398 (vgname, node))
8399
8401
8402 for idx, dev in enumerate(self.instance.disks):
8403 if idx not in self.disks:
8404 continue
8405
8406 for node in nodes:
8407 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8408 self.cfg.SetDiskID(dev, node)
8409
8410 result = self.rpc.call_blockdev_find(node, dev)
8411
8412 msg = result.fail_msg
8413 if msg or not result.payload:
8414 if not msg:
8415 msg = "disk not found"
8416 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8417 (idx, node, msg))
8418
8420 for idx, dev in enumerate(self.instance.disks):
8421 if idx not in self.disks:
8422 continue
8423
8424 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8425 (idx, node_name))
8426
8427 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8428 ldisk=ldisk):
8429 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8430 " replace disks for instance %s" %
8431 (node_name, self.instance.name))
8432
8434 """Create new storage on the primary or secondary node.
8435
8436 This is only used for same-node replaces, not for changing the
8437 secondary node, hence we don't want to modify the existing disk.
8438
8439 """
8440 iv_names = {}
8441
8442 for idx, dev in enumerate(self.instance.disks):
8443 if idx not in self.disks:
8444 continue
8445
8446 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8447
8448 self.cfg.SetDiskID(dev, node_name)
8449
8450 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8451 names = _GenerateUniqueNames(self.lu, lv_names)
8452
8453 vg_data = dev.children[0].logical_id[0]
8454 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8455 logical_id=(vg_data, names[0]))
8456 vg_meta = dev.children[1].logical_id[0]
8457 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8458 logical_id=(vg_meta, names[1]))
8459
8460 new_lvs = [lv_data, lv_meta]
8461 old_lvs = [child.Copy() for child in dev.children]
8462 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8463
8464
8465 for new_lv in new_lvs:
8466 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8467 _GetInstanceInfoText(self.instance), False)
8468
8469 return iv_names
8470
8472 for name, (dev, _, _) in iv_names.iteritems():
8473 self.cfg.SetDiskID(dev, node_name)
8474
8475 result = self.rpc.call_blockdev_find(node_name, dev)
8476
8477 msg = result.fail_msg
8478 if msg or not result.payload:
8479 if not msg:
8480 msg = "disk not found"
8481 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8482 (name, msg))
8483
8484 if result.payload.is_degraded:
8485 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8486
8488 for name, (_, old_lvs, _) in iv_names.iteritems():
8489 self.lu.LogInfo("Remove logical volumes for %s" % name)
8490
8491 for lv in old_lvs:
8492 self.cfg.SetDiskID(lv, node_name)
8493
8494 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8495 if msg:
8496 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8497 hint="remove unused LVs manually")
8498
8502
8504 """Replace a disk on the primary or secondary for DRBD 8.
8505
8506 The algorithm for replace is quite complicated:
8507
8508 1. for each disk to be replaced:
8509
8510 1. create new LVs on the target node with unique names
8511 1. detach old LVs from the drbd device
8512 1. rename old LVs to name_replaced.<time_t>
8513 1. rename new LVs to old LVs
8514 1. attach the new LVs (with the old names now) to the drbd device
8515
8516 1. wait for sync across all devices
8517
8518 1. for each modified disk:
8519
8520 1. remove old LVs (which have the name name_replaces.<time_t>)
8521
8522 Failures are not very well handled.
8523
8524 """
8525 steps_total = 6
8526
8527
8528 self.lu.LogStep(1, steps_total, "Check device existence")
8529 self._CheckDisksExistence([self.other_node, self.target_node])
8530 self._CheckVolumeGroup([self.target_node, self.other_node])
8531
8532
8533 self.lu.LogStep(2, steps_total, "Check peer consistency")
8534 self._CheckDisksConsistency(self.other_node,
8535 self.other_node == self.instance.primary_node,
8536 False)
8537
8538
8539 self.lu.LogStep(3, steps_total, "Allocate new storage")
8540 iv_names = self._CreateNewStorage(self.target_node)
8541
8542
8543 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8544 for dev, old_lvs, new_lvs in iv_names.itervalues():
8545 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8546
8547 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8548 old_lvs)
8549 result.Raise("Can't detach drbd from local storage on node"
8550 " %s for device %s" % (self.target_node, dev.iv_name))
8551
8552
8553
8554
8555
8556
8557
8558
8559
8560
8561 temp_suffix = int(time.time())
8562 ren_fn = lambda d, suff: (d.physical_id[0],
8563 d.physical_id[1] + "_replaced-%s" % suff)
8564
8565
8566 rename_old_to_new = []
8567 for to_ren in old_lvs:
8568 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8569 if not result.fail_msg and result.payload:
8570
8571 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8572
8573 self.lu.LogInfo("Renaming the old LVs on the target node")
8574 result = self.rpc.call_blockdev_rename(self.target_node,
8575 rename_old_to_new)
8576 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8577
8578
8579 self.lu.LogInfo("Renaming the new LVs on the target node")
8580 rename_new_to_old = [(new, old.physical_id)
8581 for old, new in zip(old_lvs, new_lvs)]
8582 result = self.rpc.call_blockdev_rename(self.target_node,
8583 rename_new_to_old)
8584 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8585
8586
8587 for old, new in zip(old_lvs, new_lvs):
8588 new.logical_id = old.logical_id
8589 self.cfg.SetDiskID(new, self.target_node)
8590
8591
8592
8593
8594 for disk in old_lvs:
8595 disk.logical_id = ren_fn(disk, temp_suffix)
8596 self.cfg.SetDiskID(disk, self.target_node)
8597
8598
8599 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8600 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8601 new_lvs)
8602 msg = result.fail_msg
8603 if msg:
8604 for new_lv in new_lvs:
8605 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8606 new_lv).fail_msg
8607 if msg2:
8608 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8609 hint=("cleanup manually the unused logical"
8610 "volumes"))
8611 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8612
8613 cstep = 5
8614 if self.early_release:
8615 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8616 cstep += 1
8617 self._RemoveOldStorage(self.target_node, iv_names)
8618
8619
8620 self._ReleaseNodeLock([self.target_node, self.other_node])
8621
8622
8623
8624
8625 self.lu.LogStep(cstep, steps_total, "Sync devices")
8626 cstep += 1
8627 _WaitForSync(self.lu, self.instance)
8628
8629
8630 self._CheckDevices(self.instance.primary_node, iv_names)
8631
8632
8633 if not self.early_release:
8634 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8635 cstep += 1
8636 self._RemoveOldStorage(self.target_node, iv_names)
8637
8639 """Replace the secondary node for DRBD 8.
8640
8641 The algorithm for replace is quite complicated:
8642 - for all disks of the instance:
8643 - create new LVs on the new node with same names
8644 - shutdown the drbd device on the old secondary
8645 - disconnect the drbd network on the primary
8646 - create the drbd device on the new secondary
8647 - network attach the drbd on the primary, using an artifice:
8648 the drbd code for Attach() will connect to the network if it
8649 finds a device which is connected to the good local disks but
8650 not network enabled
8651 - wait for sync across all devices
8652 - remove all disks from the old secondary
8653
8654 Failures are not very well handled.
8655
8656 """
8657 steps_total = 6
8658
8659
8660 self.lu.LogStep(1, steps_total, "Check device existence")
8661 self._CheckDisksExistence([self.instance.primary_node])
8662 self._CheckVolumeGroup([self.instance.primary_node])
8663
8664
8665 self.lu.LogStep(2, steps_total, "Check peer consistency")
8666 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8667
8668
8669 self.lu.LogStep(3, steps_total, "Allocate new storage")
8670 for idx, dev in enumerate(self.instance.disks):
8671 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8672 (self.new_node, idx))
8673
8674 for new_lv in dev.children:
8675 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8676 _GetInstanceInfoText(self.instance), False)
8677
8678
8679
8680
8681 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8682 minors = self.cfg.AllocateDRBDMinor([self.new_node
8683 for dev in self.instance.disks],
8684 self.instance.name)
8685 logging.debug("Allocated minors %r", minors)
8686
8687 iv_names = {}
8688 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8689 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8690 (self.new_node, idx))
8691
8692
8693
8694
8695 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8696 if self.instance.primary_node == o_node1:
8697 p_minor = o_minor1
8698 else:
8699 assert self.instance.primary_node == o_node2, "Three-node instance?"
8700 p_minor = o_minor2
8701
8702 new_alone_id = (self.instance.primary_node, self.new_node, None,
8703 p_minor, new_minor, o_secret)
8704 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8705 p_minor, new_minor, o_secret)
8706
8707 iv_names[idx] = (dev, dev.children, new_net_id)
8708 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8709 new_net_id)
8710 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8711 logical_id=new_alone_id,
8712 children=dev.children,
8713 size=dev.size)
8714 try:
8715 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8716 _GetInstanceInfoText(self.instance), False)
8717 except errors.GenericError:
8718 self.cfg.ReleaseDRBDMinors(self.instance.name)
8719 raise
8720
8721
8722 for idx, dev in enumerate(self.instance.disks):
8723 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8724 self.cfg.SetDiskID(dev, self.target_node)
8725 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8726 if msg:
8727 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8728 "node: %s" % (idx, msg),
8729 hint=("Please cleanup this device manually as"
8730 " soon as possible"))
8731
8732 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8733 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8734 self.node_secondary_ip,
8735 self.instance.disks)\
8736 [self.instance.primary_node]
8737
8738 msg = result.fail_msg
8739 if msg:
8740
8741 self.cfg.ReleaseDRBDMinors(self.instance.name)
8742 raise errors.OpExecError("Can't detach the disks from the network on"
8743 " old node: %s" % (msg,))
8744
8745
8746
8747 self.lu.LogInfo("Updating instance configuration")
8748 for dev, _, new_logical_id in iv_names.itervalues():
8749 dev.logical_id = new_logical_id
8750 self.cfg.SetDiskID(dev, self.instance.primary_node)
8751
8752 self.cfg.Update(self.instance, feedback_fn)
8753
8754
8755 self.lu.LogInfo("Attaching primary drbds to new secondary"
8756 " (standalone => connected)")
8757 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8758 self.new_node],
8759 self.node_secondary_ip,
8760 self.instance.disks,
8761 self.instance.name,
8762 False)
8763 for to_node, to_result in result.items():
8764 msg = to_result.fail_msg
8765 if msg:
8766 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8767 to_node, msg,
8768 hint=("please do a gnt-instance info to see the"
8769 " status of disks"))
8770 cstep = 5
8771 if self.early_release:
8772 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8773 cstep += 1
8774 self._RemoveOldStorage(self.target_node, iv_names)
8775
8776
8777 self._ReleaseNodeLock([self.instance.primary_node,
8778 self.target_node,
8779 self.new_node])
8780
8781
8782
8783
8784 self.lu.LogStep(cstep, steps_total, "Sync devices")
8785 cstep += 1
8786 _WaitForSync(self.lu, self.instance)
8787
8788
8789 self._CheckDevices(self.instance.primary_node, iv_names)
8790
8791
8792 if not self.early_release:
8793 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8794 self._RemoveOldStorage(self.target_node, iv_names)
8795
8798 """Repairs the volume group on a node.
8799
8800 """
8801 REQ_BGL = False
8802
8813
8818
8832
8834 """Check prerequisites.
8835
8836 """
8837
8838 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8839 if not inst.admin_up:
8840 continue
8841 check_nodes = set(inst.all_nodes)
8842 check_nodes.discard(self.op.node_name)
8843 for inst_node_name in check_nodes:
8844 self._CheckFaultyDisks(inst, inst_node_name)
8845
8846 - def Exec(self, feedback_fn):
8847 feedback_fn("Repairing storage unit '%s' on %s ..." %
8848 (self.op.name, self.op.node_name))
8849
8850 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8851 result = self.rpc.call_storage_execute(self.op.node_name,
8852 self.op.storage_type, st_args,
8853 self.op.name,
8854 constants.SO_FIX_CONSISTENCY)
8855 result.Raise("Failed to repair storage unit '%s' on %s" %
8856 (self.op.name, self.op.node_name))
8857
8860 """Computes the node evacuation strategy.
8861
8862 """
8863 REQ_BGL = False
8864
8867
8876
8877 - def Exec(self, feedback_fn):
8878 instances = []
8879 for node in self.op.nodes:
8880 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8881 if not instances:
8882 return []
8883
8884 if self.op.remote_node is not None:
8885 result = []
8886 for i in instances:
8887 if i.primary_node == self.op.remote_node:
8888 raise errors.OpPrereqError("Node %s is the primary node of"
8889 " instance %s, cannot use it as"
8890 " secondary" %
8891 (self.op.remote_node, i.name),
8892 errors.ECODE_INVAL)
8893 result.append([i.name, self.op.remote_node])
8894 else:
8895 ial = IAllocator(self.cfg, self.rpc,
8896 mode=constants.IALLOCATOR_MODE_MEVAC,
8897 evac_nodes=self.op.nodes)
8898 ial.Run(self.op.iallocator, validate=True)
8899 if not ial.success:
8900 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8901 errors.ECODE_NORES)
8902 result = ial.result
8903 return result
8904
8907 """Grow a disk of an instance.
8908
8909 """
8910 HPATH = "disk-grow"
8911 HTYPE = constants.HTYPE_INSTANCE
8912 REQ_BGL = False
8913
8918
8922
8924 """Build hooks env.
8925
8926 This runs on the master, the primary and all the secondaries.
8927
8928 """
8929 env = {
8930 "DISK": self.op.disk,
8931 "AMOUNT": self.op.amount,
8932 }
8933 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8934 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8935 return env, nl, nl
8936
8963
8964 - def Exec(self, feedback_fn):
8965 """Execute disk grow.
8966
8967 """
8968 instance = self.instance
8969 disk = self.disk
8970
8971 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8972 if not disks_ok:
8973 raise errors.OpExecError("Cannot activate block device to grow")
8974
8975 for node in instance.all_nodes:
8976 self.cfg.SetDiskID(disk, node)
8977 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8978 result.Raise("Grow request failed to node %s" % node)
8979
8980
8981
8982
8983
8984
8985 time.sleep(5)
8986
8987 disk.RecordGrow(self.op.amount)
8988 self.cfg.Update(instance, feedback_fn)
8989 if self.op.wait_for_sync:
8990 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8991 if disk_abort:
8992 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8993 " status.\nPlease check the instance.")
8994 if not instance.admin_up:
8995 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8996 elif not instance.admin_up:
8997 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8998 " not supposed to be running because no wait for"
8999 " sync mode was requested.")
9000
9003 """Query runtime instance data.
9004
9005 """
9006 REQ_BGL = False
9007
9009 self.needed_locks = {}
9010
9011
9012 if not (self.op.static or self.op.use_locking):
9013 self.LogWarning("Non-static data requested, locks need to be acquired")
9014 self.op.use_locking = True
9015
9016 if self.op.instances or not self.op.use_locking:
9017
9018 self.wanted_names = _GetWantedInstances(self, self.op.instances)
9019 else:
9020
9021 self.wanted_names = None
9022
9023 if self.op.use_locking:
9024 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9025
9026 if self.wanted_names is None:
9027 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9028 else:
9029 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9030
9031 self.needed_locks[locking.LEVEL_NODE] = []
9032 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9033 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9034
9038
9040 """Check prerequisites.
9041
9042 This only checks the optional instance list against the existing names.
9043
9044 """
9045 if self.wanted_names is None:
9046 assert self.op.use_locking, "Locking was not used"
9047 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9048
9049 self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9050 for name in self.wanted_names]
9051
9053 """Returns the status of a block device
9054
9055 """
9056 if self.op.static or not node:
9057 return None
9058
9059 self.cfg.SetDiskID(dev, node)
9060
9061 result = self.rpc.call_blockdev_find(node, dev)
9062 if result.offline:
9063 return None
9064
9065 result.Raise("Can't compute disk status for %s" % instance_name)
9066
9067 status = result.payload
9068 if status is None:
9069 return None
9070
9071 return (status.dev_path, status.major, status.minor,
9072 status.sync_percent, status.estimated_time,
9073 status.is_degraded, status.ldisk_status)
9074
9076 """Compute block device status.
9077
9078 """
9079 if dev.dev_type in constants.LDS_DRBD:
9080
9081 if dev.logical_id[0] == instance.primary_node:
9082 snode = dev.logical_id[1]
9083 else:
9084 snode = dev.logical_id[0]
9085
9086 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9087 instance.name, dev)
9088 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9089
9090 if dev.children:
9091 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9092 for child in dev.children]
9093 else:
9094 dev_children = []
9095
9096 return {
9097 "iv_name": dev.iv_name,
9098 "dev_type": dev.dev_type,
9099 "logical_id": dev.logical_id,
9100 "physical_id": dev.physical_id,
9101 "pstatus": dev_pstatus,
9102 "sstatus": dev_sstatus,
9103 "children": dev_children,
9104 "mode": dev.mode,
9105 "size": dev.size,
9106 }
9107
9108 - def Exec(self, feedback_fn):
9109 """Gather and return data"""
9110 result = {}
9111
9112 cluster = self.cfg.GetClusterInfo()
9113
9114 for instance in self.wanted_instances:
9115 if not self.op.static:
9116 remote_info = self.rpc.call_instance_info(instance.primary_node,
9117 instance.name,
9118 instance.hypervisor)
9119 remote_info.Raise("Error checking node %s" % instance.primary_node)
9120 remote_info = remote_info.payload
9121 if remote_info and "state" in remote_info:
9122 remote_state = "up"
9123 else:
9124 remote_state = "down"
9125 else:
9126 remote_state = None
9127 if instance.admin_up:
9128 config_state = "up"
9129 else:
9130 config_state = "down"
9131
9132 disks = [self._ComputeDiskStatus(instance, None, device)
9133 for device in instance.disks]
9134
9135 result[instance.name] = {
9136 "name": instance.name,
9137 "config_state": config_state,
9138 "run_state": remote_state,
9139 "pnode": instance.primary_node,
9140 "snodes": instance.secondary_nodes,
9141 "os": instance.os,
9142
9143 "nics": _NICListToTuple(self, instance.nics),
9144 "disk_template": instance.disk_template,
9145 "disks": disks,
9146 "hypervisor": instance.hypervisor,
9147 "network_port": instance.network_port,
9148 "hv_instance": instance.hvparams,
9149 "hv_actual": cluster.FillHV(instance, skip_globals=True),
9150 "be_instance": instance.beparams,
9151 "be_actual": cluster.FillBE(instance),
9152 "os_instance": instance.osparams,
9153 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9154 "serial_no": instance.serial_no,
9155 "mtime": instance.mtime,
9156 "ctime": instance.ctime,
9157 "uuid": instance.uuid,
9158 }
9159
9160 return result
9161
9164 """Modifies an instances's parameters.
9165
9166 """
9167 HPATH = "instance-modify"
9168 HTYPE = constants.HTYPE_INSTANCE
9169 REQ_BGL = False
9170
9172 if not (self.op.nics or self.op.disks or self.op.disk_template or
9173 self.op.hvparams or self.op.beparams or self.op.os_name):
9174 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9175
9176 if self.op.hvparams:
9177 _CheckGlobalHvParams(self.op.hvparams)
9178
9179
9180 disk_addremove = 0
9181 for disk_op, disk_dict in self.op.disks:
9182 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9183 if disk_op == constants.DDM_REMOVE:
9184 disk_addremove += 1
9185 continue
9186 elif disk_op == constants.DDM_ADD:
9187 disk_addremove += 1
9188 else:
9189 if not isinstance(disk_op, int):
9190 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9191 if not isinstance(disk_dict, dict):
9192 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9193 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9194
9195 if disk_op == constants.DDM_ADD:
9196 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9197 if mode not in constants.DISK_ACCESS_SET:
9198 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9199 errors.ECODE_INVAL)
9200 size = disk_dict.get('size', None)
9201 if size is None:
9202 raise errors.OpPrereqError("Required disk parameter size missing",
9203 errors.ECODE_INVAL)
9204 try:
9205 size = int(size)
9206 except (TypeError, ValueError), err:
9207 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9208 str(err), errors.ECODE_INVAL)
9209 disk_dict['size'] = size
9210 else:
9211
9212 if 'size' in disk_dict:
9213 raise errors.OpPrereqError("Disk size change not possible, use"
9214 " grow-disk", errors.ECODE_INVAL)
9215
9216 if disk_addremove > 1:
9217 raise errors.OpPrereqError("Only one disk add or remove operation"
9218 " supported at a time", errors.ECODE_INVAL)
9219
9220 if self.op.disks and self.op.disk_template is not None:
9221 raise errors.OpPrereqError("Disk template conversion and other disk"
9222 " changes not supported at the same time",
9223 errors.ECODE_INVAL)
9224
9225 if (self.op.disk_template and
9226 self.op.disk_template in constants.DTS_NET_MIRROR and
9227 self.op.remote_node is None):
9228 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9229 " one requires specifying a secondary node",
9230 errors.ECODE_INVAL)
9231
9232
9233 nic_addremove = 0
9234 for nic_op, nic_dict in self.op.nics:
9235 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9236 if nic_op == constants.DDM_REMOVE:
9237 nic_addremove += 1
9238 continue
9239 elif nic_op == constants.DDM_ADD:
9240 nic_addremove += 1
9241 else:
9242 if not isinstance(nic_op, int):
9243 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9244 if not isinstance(nic_dict, dict):
9245 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9246 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9247
9248
9249 nic_ip = nic_dict.get('ip', None)
9250 if nic_ip is not None:
9251 if nic_ip.lower() == constants.VALUE_NONE:
9252 nic_dict['ip'] = None
9253 else:
9254 if not netutils.IPAddress.IsValid(nic_ip):
9255 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9256 errors.ECODE_INVAL)
9257
9258 nic_bridge = nic_dict.get('bridge', None)
9259 nic_link = nic_dict.get('link', None)
9260 if nic_bridge and nic_link:
9261 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9262 " at the same time", errors.ECODE_INVAL)
9263 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9264 nic_dict['bridge'] = None
9265 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9266 nic_dict['link'] = None
9267
9268 if nic_op == constants.DDM_ADD:
9269 nic_mac = nic_dict.get('mac', None)
9270 if nic_mac is None:
9271 nic_dict['mac'] = constants.VALUE_AUTO
9272
9273 if 'mac' in nic_dict:
9274 nic_mac = nic_dict['mac']
9275 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9276 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9277
9278 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9279 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9280 " modifying an existing nic",
9281 errors.ECODE_INVAL)
9282
9283 if nic_addremove > 1:
9284 raise errors.OpPrereqError("Only one NIC add or remove operation"
9285 " supported at a time", errors.ECODE_INVAL)
9286
9291
9298
9300 """Build hooks env.
9301
9302 This runs on the master, primary and secondaries.
9303
9304 """
9305 args = dict()
9306 if constants.BE_MEMORY in self.be_new:
9307 args['memory'] = self.be_new[constants.BE_MEMORY]
9308 if constants.BE_VCPUS in self.be_new:
9309 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9310
9311
9312 if self.op.nics:
9313 args['nics'] = []
9314 nic_override = dict(self.op.nics)
9315 for idx, nic in enumerate(self.instance.nics):
9316 if idx in nic_override:
9317 this_nic_override = nic_override[idx]
9318 else:
9319 this_nic_override = {}
9320 if 'ip' in this_nic_override:
9321 ip = this_nic_override['ip']
9322 else:
9323 ip = nic.ip
9324 if 'mac' in this_nic_override:
9325 mac = this_nic_override['mac']
9326 else:
9327 mac = nic.mac
9328 if idx in self.nic_pnew:
9329 nicparams = self.nic_pnew[idx]
9330 else:
9331 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9332 mode = nicparams[constants.NIC_MODE]
9333 link = nicparams[constants.NIC_LINK]
9334 args['nics'].append((ip, mac, mode, link))
9335 if constants.DDM_ADD in nic_override:
9336 ip = nic_override[constants.DDM_ADD].get('ip', None)
9337 mac = nic_override[constants.DDM_ADD]['mac']
9338 nicparams = self.nic_pnew[constants.DDM_ADD]
9339 mode = nicparams[constants.NIC_MODE]
9340 link = nicparams[constants.NIC_LINK]
9341 args['nics'].append((ip, mac, mode, link))
9342 elif constants.DDM_REMOVE in nic_override:
9343 del args['nics'][-1]
9344
9345 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9346 if self.op.disk_template:
9347 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9348 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9349 return env, nl, nl
9350
9352 """Check prerequisites.
9353
9354 This only checks the instance list against the existing names.
9355
9356 """
9357
9358
9359 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9360 cluster = self.cluster = self.cfg.GetClusterInfo()
9361 assert self.instance is not None, \
9362 "Cannot retrieve locked instance %s" % self.op.instance_name
9363 pnode = instance.primary_node
9364 nodelist = list(instance.all_nodes)
9365
9366
9367 if self.op.os_name and not self.op.force:
9368 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9369 self.op.force_variant)
9370 instance_os = self.op.os_name
9371 else:
9372 instance_os = instance.os
9373
9374 if self.op.disk_template:
9375 if instance.disk_template == self.op.disk_template:
9376 raise errors.OpPrereqError("Instance already has disk template %s" %
9377 instance.disk_template, errors.ECODE_INVAL)
9378
9379 if (instance.disk_template,
9380 self.op.disk_template) not in self._DISK_CONVERSIONS:
9381 raise errors.OpPrereqError("Unsupported disk template conversion from"
9382 " %s to %s" % (instance.disk_template,
9383 self.op.disk_template),
9384 errors.ECODE_INVAL)
9385 _CheckInstanceDown(self, instance, "cannot change disk template")
9386 if self.op.disk_template in constants.DTS_NET_MIRROR:
9387 if self.op.remote_node == pnode:
9388 raise errors.OpPrereqError("Given new secondary node %s is the same"
9389 " as the primary node of the instance" %
9390 self.op.remote_node, errors.ECODE_STATE)
9391 _CheckNodeOnline(self, self.op.remote_node)
9392 _CheckNodeNotDrained(self, self.op.remote_node)
9393
9394 assert instance.disk_template == constants.DT_PLAIN
9395 disks = [{"size": d.size, "vg": d.logical_id[0]}
9396 for d in instance.disks]
9397 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9398 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9399
9400
9401 if self.op.hvparams:
9402 hv_type = instance.hypervisor
9403 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9404 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9405 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9406
9407
9408 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9409 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9410 self.hv_new = hv_new
9411 self.hv_inst = i_hvdict
9412 else:
9413 self.hv_new = self.hv_inst = {}
9414
9415
9416 if self.op.beparams:
9417 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9418 use_none=True)
9419 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9420 be_new = cluster.SimpleFillBE(i_bedict)
9421 self.be_new = be_new
9422 self.be_inst = i_bedict
9423 else:
9424 self.be_new = self.be_inst = {}
9425 be_old = cluster.FillBE(instance)
9426
9427
9428 if self.op.osparams:
9429 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9430 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9431 self.os_inst = i_osdict
9432 else:
9433 self.os_inst = {}
9434
9435 self.warn = []
9436
9437 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
9438 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
9439 mem_check_list = [pnode]
9440 if be_new[constants.BE_AUTO_BALANCE]:
9441
9442 mem_check_list.extend(instance.secondary_nodes)
9443 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9444 instance.hypervisor)
9445 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9446 instance.hypervisor)
9447 pninfo = nodeinfo[pnode]
9448 msg = pninfo.fail_msg
9449 if msg:
9450
9451 self.warn.append("Can't get info from primary node %s: %s" %
9452 (pnode, msg))
9453 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9454 self.warn.append("Node data from primary node %s doesn't contain"
9455 " free memory information" % pnode)
9456 elif instance_info.fail_msg:
9457 self.warn.append("Can't get instance runtime information: %s" %
9458 instance_info.fail_msg)
9459 else:
9460 if instance_info.payload:
9461 current_mem = int(instance_info.payload['memory'])
9462 else:
9463
9464
9465
9466 current_mem = 0
9467 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9468 pninfo.payload['memory_free'])
9469 if miss_mem > 0:
9470 raise errors.OpPrereqError("This change will prevent the instance"
9471 " from starting, due to %d MB of memory"
9472 " missing on its primary node" % miss_mem,
9473 errors.ECODE_NORES)
9474
9475 if be_new[constants.BE_AUTO_BALANCE]:
9476 for node, nres in nodeinfo.items():
9477 if node not in instance.secondary_nodes:
9478 continue
9479 nres.Raise("Can't get info from secondary node %s" % node,
9480 prereq=True, ecode=errors.ECODE_STATE)
9481 if not isinstance(nres.payload.get('memory_free', None), int):
9482 raise errors.OpPrereqError("Secondary node %s didn't return free"
9483 " memory information" % node,
9484 errors.ECODE_STATE)
9485 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9486 raise errors.OpPrereqError("This change will prevent the instance"
9487 " from failover to its secondary node"
9488 " %s, due to not enough memory" % node,
9489 errors.ECODE_STATE)
9490
9491
9492 self.nic_pnew = {}
9493 self.nic_pinst = {}
9494 for nic_op, nic_dict in self.op.nics:
9495 if nic_op == constants.DDM_REMOVE:
9496 if not instance.nics:
9497 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9498 errors.ECODE_INVAL)
9499 continue
9500 if nic_op != constants.DDM_ADD:
9501
9502 if not instance.nics:
9503 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9504 " no NICs" % nic_op,
9505 errors.ECODE_INVAL)
9506 if nic_op < 0 or nic_op >= len(instance.nics):
9507 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9508 " are 0 to %d" %
9509 (nic_op, len(instance.nics) - 1),
9510 errors.ECODE_INVAL)
9511 old_nic_params = instance.nics[nic_op].nicparams
9512 old_nic_ip = instance.nics[nic_op].ip
9513 else:
9514 old_nic_params = {}
9515 old_nic_ip = None
9516
9517 update_params_dict = dict([(key, nic_dict[key])
9518 for key in constants.NICS_PARAMETERS
9519 if key in nic_dict])
9520
9521 if 'bridge' in nic_dict:
9522 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9523
9524 new_nic_params = _GetUpdatedParams(old_nic_params,
9525 update_params_dict)
9526 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9527 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9528 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9529 self.nic_pinst[nic_op] = new_nic_params
9530 self.nic_pnew[nic_op] = new_filled_nic_params
9531 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9532
9533 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9534 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9535 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9536 if msg:
9537 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9538 if self.op.force:
9539 self.warn.append(msg)
9540 else:
9541 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9542 if new_nic_mode == constants.NIC_MODE_ROUTED:
9543 if 'ip' in nic_dict:
9544 nic_ip = nic_dict['ip']
9545 else:
9546 nic_ip = old_nic_ip
9547 if nic_ip is None:
9548 raise errors.OpPrereqError('Cannot set the nic ip to None'
9549 ' on a routed nic', errors.ECODE_INVAL)
9550 if 'mac' in nic_dict:
9551 nic_mac = nic_dict['mac']
9552 if nic_mac is None:
9553 raise errors.OpPrereqError('Cannot set the nic mac to None',
9554 errors.ECODE_INVAL)
9555 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9556
9557 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9558 else:
9559
9560 try:
9561 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9562 except errors.ReservationError:
9563 raise errors.OpPrereqError("MAC address %s already in use"
9564 " in cluster" % nic_mac,
9565 errors.ECODE_NOTUNIQUE)
9566
9567
9568 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9569 raise errors.OpPrereqError("Disk operations not supported for"
9570 " diskless instances",
9571 errors.ECODE_INVAL)
9572 for disk_op, _ in self.op.disks:
9573 if disk_op == constants.DDM_REMOVE:
9574 if len(instance.disks) == 1:
9575 raise errors.OpPrereqError("Cannot remove the last disk of"
9576 " an instance", errors.ECODE_INVAL)
9577 _CheckInstanceDown(self, instance, "cannot remove disks")
9578
9579 if (disk_op == constants.DDM_ADD and
9580 len(instance.disks) >= constants.MAX_DISKS):
9581 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9582 " add more" % constants.MAX_DISKS,
9583 errors.ECODE_STATE)
9584 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9585
9586 if disk_op < 0 or disk_op >= len(instance.disks):
9587 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9588 " are 0 to %d" %
9589 (disk_op, len(instance.disks)),
9590 errors.ECODE_INVAL)
9591
9592 return
9593
9595 """Converts an instance from plain to drbd.
9596
9597 """
9598 feedback_fn("Converting template to drbd")
9599 instance = self.instance
9600 pnode = instance.primary_node
9601 snode = self.op.remote_node
9602
9603
9604 disk_info = [{"size": d.size, "mode": d.mode,
9605 "vg": d.logical_id[0]} for d in instance.disks]
9606 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9607 instance.name, pnode, [snode],
9608 disk_info, None, None, 0, feedback_fn)
9609 info = _GetInstanceInfoText(instance)
9610 feedback_fn("Creating aditional volumes...")
9611
9612 for disk in new_disks:
9613
9614 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9615 info, True)
9616 for child in disk.children:
9617 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9618
9619
9620 feedback_fn("Renaming original volumes...")
9621 rename_list = [(o, n.children[0].logical_id)
9622 for (o, n) in zip(instance.disks, new_disks)]
9623 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9624 result.Raise("Failed to rename original LVs")
9625
9626 feedback_fn("Initializing DRBD devices...")
9627
9628 for disk in new_disks:
9629 for node in [pnode, snode]:
9630 f_create = node == pnode
9631 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9632
9633
9634 instance.disk_template = constants.DT_DRBD8
9635 instance.disks = new_disks
9636 self.cfg.Update(instance, feedback_fn)
9637
9638
9639 disk_abort = not _WaitForSync(self, instance,
9640 oneshot=not self.op.wait_for_sync)
9641 if disk_abort:
9642 raise errors.OpExecError("There are some degraded disks for"
9643 " this instance, please cleanup manually")
9644
9646 """Converts an instance from drbd to plain.
9647
9648 """
9649 instance = self.instance
9650 assert len(instance.secondary_nodes) == 1
9651 pnode = instance.primary_node
9652 snode = instance.secondary_nodes[0]
9653 feedback_fn("Converting template to plain")
9654
9655 old_disks = instance.disks
9656 new_disks = [d.children[0] for d in old_disks]
9657
9658
9659 for parent, child in zip(old_disks, new_disks):
9660 child.size = parent.size
9661 child.mode = parent.mode
9662
9663
9664 instance.disks = new_disks
9665 instance.disk_template = constants.DT_PLAIN
9666 self.cfg.Update(instance, feedback_fn)
9667
9668 feedback_fn("Removing volumes on the secondary node...")
9669 for disk in old_disks:
9670 self.cfg.SetDiskID(disk, snode)
9671 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9672 if msg:
9673 self.LogWarning("Could not remove block device %s on node %s,"
9674 " continuing anyway: %s", disk.iv_name, snode, msg)
9675
9676 feedback_fn("Removing unneeded volumes on the primary node...")
9677 for idx, disk in enumerate(old_disks):
9678 meta = disk.children[1]
9679 self.cfg.SetDiskID(meta, pnode)
9680 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9681 if msg:
9682 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9683 " continuing anyway: %s", idx, pnode, msg)
9684
9685 - def Exec(self, feedback_fn):
9686 """Modifies an instance.
9687
9688 All parameters take effect only at the next restart of the instance.
9689
9690 """
9691
9692
9693 for warn in self.warn:
9694 feedback_fn("WARNING: %s" % warn)
9695
9696 result = []
9697 instance = self.instance
9698
9699 for disk_op, disk_dict in self.op.disks:
9700 if disk_op == constants.DDM_REMOVE:
9701
9702 device = instance.disks.pop()
9703 device_idx = len(instance.disks)
9704 for node, disk in device.ComputeNodeTree(instance.primary_node):
9705 self.cfg.SetDiskID(disk, node)
9706 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9707 if msg:
9708 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9709 " continuing anyway", device_idx, node, msg)
9710 result.append(("disk/%d" % device_idx, "remove"))
9711 elif disk_op == constants.DDM_ADD:
9712
9713 if instance.disk_template == constants.DT_FILE:
9714 file_driver, file_path = instance.disks[0].logical_id
9715 file_path = os.path.dirname(file_path)
9716 else:
9717 file_driver = file_path = None
9718 disk_idx_base = len(instance.disks)
9719 new_disk = _GenerateDiskTemplate(self,
9720 instance.disk_template,
9721 instance.name, instance.primary_node,
9722 instance.secondary_nodes,
9723 [disk_dict],
9724 file_path,
9725 file_driver,
9726 disk_idx_base, feedback_fn)[0]
9727 instance.disks.append(new_disk)
9728 info = _GetInstanceInfoText(instance)
9729
9730 logging.info("Creating volume %s for instance %s",
9731 new_disk.iv_name, instance.name)
9732
9733
9734 for node in instance.all_nodes:
9735 f_create = node == instance.primary_node
9736 try:
9737 _CreateBlockDev(self, node, instance, new_disk,
9738 f_create, info, f_create)
9739 except errors.OpExecError, err:
9740 self.LogWarning("Failed to create volume %s (%s) on"
9741 " node %s: %s",
9742 new_disk.iv_name, new_disk, node, err)
9743 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9744 (new_disk.size, new_disk.mode)))
9745 else:
9746
9747 instance.disks[disk_op].mode = disk_dict['mode']
9748 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9749
9750 if self.op.disk_template:
9751 r_shut = _ShutdownInstanceDisks(self, instance)
9752 if not r_shut:
9753 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9754 " proceed with disk template conversion")
9755 mode = (instance.disk_template, self.op.disk_template)
9756 try:
9757 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9758 except:
9759 self.cfg.ReleaseDRBDMinors(instance.name)
9760 raise
9761 result.append(("disk_template", self.op.disk_template))
9762
9763
9764 for nic_op, nic_dict in self.op.nics:
9765 if nic_op == constants.DDM_REMOVE:
9766
9767 del instance.nics[-1]
9768 result.append(("nic.%d" % len(instance.nics), "remove"))
9769 elif nic_op == constants.DDM_ADD:
9770
9771 mac = nic_dict['mac']
9772 ip = nic_dict.get('ip', None)
9773 nicparams = self.nic_pinst[constants.DDM_ADD]
9774 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9775 instance.nics.append(new_nic)
9776 result.append(("nic.%d" % (len(instance.nics) - 1),
9777 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9778 (new_nic.mac, new_nic.ip,
9779 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9780 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9781 )))
9782 else:
9783 for key in 'mac', 'ip':
9784 if key in nic_dict:
9785 setattr(instance.nics[nic_op], key, nic_dict[key])
9786 if nic_op in self.nic_pinst:
9787 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9788 for key, val in nic_dict.iteritems():
9789 result.append(("nic.%s/%d" % (key, nic_op), val))
9790
9791
9792 if self.op.hvparams:
9793 instance.hvparams = self.hv_inst
9794 for key, val in self.op.hvparams.iteritems():
9795 result.append(("hv/%s" % key, val))
9796
9797
9798 if self.op.beparams:
9799 instance.beparams = self.be_inst
9800 for key, val in self.op.beparams.iteritems():
9801 result.append(("be/%s" % key, val))
9802
9803
9804 if self.op.os_name:
9805 instance.os = self.op.os_name
9806
9807
9808 if self.op.osparams:
9809 instance.osparams = self.os_inst
9810 for key, val in self.op.osparams.iteritems():
9811 result.append(("os/%s" % key, val))
9812
9813 self.cfg.Update(instance, feedback_fn)
9814
9815 return result
9816
9817 _DISK_CONVERSIONS = {
9818 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9819 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9820 }
9821
9824 """Query the exports list
9825
9826 """
9827 REQ_BGL = False
9828
9837
9838 - def Exec(self, feedback_fn):
9839 """Compute the list of all the exported system images.
9840
9841 @rtype: dict
9842 @return: a dictionary with the structure node->(export-list)
9843 where export-list is a list of the instances exported on
9844 that node.
9845
9846 """
9847 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9848 rpcresult = self.rpc.call_export_list(self.nodes)
9849 result = {}
9850 for node in rpcresult:
9851 if rpcresult[node].fail_msg:
9852 result[node] = False
9853 else:
9854 result[node] = rpcresult[node].payload
9855
9856 return result
9857
9860 """Prepares an instance for an export and returns useful information.
9861
9862 """
9863 REQ_BGL = False
9864
9867
9880
9881 - def Exec(self, feedback_fn):
9882 """Prepares an instance for an export.
9883
9884 """
9885 instance = self.instance
9886
9887 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9888 salt = utils.GenerateSecret(8)
9889
9890 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9891 result = self.rpc.call_x509_cert_create(instance.primary_node,
9892 constants.RIE_CERT_VALIDITY)
9893 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9894
9895 (name, cert_pem) = result.payload
9896
9897 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9898 cert_pem)
9899
9900 return {
9901 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9902 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9903 salt),
9904 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9905 }
9906
9907 return None
9908
9911 """Export an instance to an image in the cluster.
9912
9913 """
9914 HPATH = "instance-export"
9915 HTYPE = constants.HTYPE_INSTANCE
9916 REQ_BGL = False
9917
9933
9948
9950 """Last minute lock declaration."""
9951
9952
9954 """Build hooks env.
9955
9956 This will run on the master, primary node and target node.
9957
9958 """
9959 env = {
9960 "EXPORT_MODE": self.op.mode,
9961 "EXPORT_NODE": self.op.target_node,
9962 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9963 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9964
9965 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9966 }
9967
9968 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9969
9970 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9971
9972 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9973 nl.append(self.op.target_node)
9974
9975 return env, nl, nl
9976
9978 """Check prerequisites.
9979
9980 This checks that the instance and node names are valid.
9981
9982 """
9983 instance_name = self.op.instance_name
9984
9985 self.instance = self.cfg.GetInstanceInfo(instance_name)
9986 assert self.instance is not None, \
9987 "Cannot retrieve locked instance %s" % self.op.instance_name
9988 _CheckNodeOnline(self, self.instance.primary_node)
9989
9990 if (self.op.remove_instance and self.instance.admin_up and
9991 not self.op.shutdown):
9992 raise errors.OpPrereqError("Can not remove instance without shutting it"
9993 " down before")
9994
9995 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9996 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9997 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9998 assert self.dst_node is not None
9999
10000 _CheckNodeOnline(self, self.dst_node.name)
10001 _CheckNodeNotDrained(self, self.dst_node.name)
10002
10003 self._cds = None
10004 self.dest_disk_info = None
10005 self.dest_x509_ca = None
10006
10007 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10008 self.dst_node = None
10009
10010 if len(self.op.target_node) != len(self.instance.disks):
10011 raise errors.OpPrereqError(("Received destination information for %s"
10012 " disks, but instance %s has %s disks") %
10013 (len(self.op.target_node), instance_name,
10014 len(self.instance.disks)),
10015 errors.ECODE_INVAL)
10016
10017 cds = _GetClusterDomainSecret()
10018
10019
10020 try:
10021 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10022 except (TypeError, ValueError), err:
10023 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10024
10025 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10026 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10027 errors.ECODE_INVAL)
10028
10029
10030 try:
10031 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10032 except OpenSSL.crypto.Error, err:
10033 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10034 (err, ), errors.ECODE_INVAL)
10035
10036 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10037 if errcode is not None:
10038 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10039 (msg, ), errors.ECODE_INVAL)
10040
10041 self.dest_x509_ca = cert
10042
10043
10044 disk_info = []
10045 for idx, disk_data in enumerate(self.op.target_node):
10046 try:
10047 (host, port, magic) = \
10048 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10049 except errors.GenericError, err:
10050 raise errors.OpPrereqError("Target info for disk %s: %s" %
10051 (idx, err), errors.ECODE_INVAL)
10052
10053 disk_info.append((host, port, magic))
10054
10055 assert len(disk_info) == len(self.op.target_node)
10056 self.dest_disk_info = disk_info
10057
10058 else:
10059 raise errors.ProgrammerError("Unhandled export mode %r" %
10060 self.op.mode)
10061
10062
10063
10064 for disk in self.instance.disks:
10065 if disk.dev_type == constants.LD_FILE:
10066 raise errors.OpPrereqError("Export not supported for instances with"
10067 " file-based disks", errors.ECODE_INVAL)
10068
10070 """Removes exports of current instance from all other nodes.
10071
10072 If an instance in a cluster with nodes A..D was exported to node C, its
10073 exports will be removed from the nodes A, B and D.
10074
10075 """
10076 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10077
10078 nodelist = self.cfg.GetNodeList()
10079 nodelist.remove(self.dst_node.name)
10080
10081
10082
10083
10084 iname = self.instance.name
10085 if nodelist:
10086 feedback_fn("Removing old exports for instance %s" % iname)
10087 exportlist = self.rpc.call_export_list(nodelist)
10088 for node in exportlist:
10089 if exportlist[node].fail_msg:
10090 continue
10091 if iname in exportlist[node].payload:
10092 msg = self.rpc.call_export_remove(node, iname).fail_msg
10093 if msg:
10094 self.LogWarning("Could not remove older export for instance %s"
10095 " on node %s: %s", iname, node, msg)
10096
10097 - def Exec(self, feedback_fn):
10098 """Export an instance to an image in the cluster.
10099
10100 """
10101 assert self.op.mode in constants.EXPORT_MODES
10102
10103 instance = self.instance
10104 src_node = instance.primary_node
10105
10106 if self.op.shutdown:
10107
10108 feedback_fn("Shutting down instance %s" % instance.name)
10109 result = self.rpc.call_instance_shutdown(src_node, instance,
10110 self.op.shutdown_timeout)
10111
10112 result.Raise("Could not shutdown instance %s on"
10113 " node %s" % (instance.name, src_node))
10114
10115
10116
10117 for disk in instance.disks:
10118 self.cfg.SetDiskID(disk, src_node)
10119
10120 activate_disks = (not instance.admin_up)
10121
10122 if activate_disks:
10123
10124 feedback_fn("Activating disks for %s" % instance.name)
10125 _StartInstanceDisks(self, instance, None)
10126
10127 try:
10128 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10129 instance)
10130
10131 helper.CreateSnapshots()
10132 try:
10133 if (self.op.shutdown and instance.admin_up and
10134 not self.op.remove_instance):
10135 assert not activate_disks
10136 feedback_fn("Starting instance %s" % instance.name)
10137 result = self.rpc.call_instance_start(src_node, instance, None, None)
10138 msg = result.fail_msg
10139 if msg:
10140 feedback_fn("Failed to start instance: %s" % msg)
10141 _ShutdownInstanceDisks(self, instance)
10142 raise errors.OpExecError("Could not start instance: %s" % msg)
10143
10144 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10145 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10146 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10147 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10148 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10149
10150 (key_name, _, _) = self.x509_key_name
10151
10152 dest_ca_pem = \
10153 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10154 self.dest_x509_ca)
10155
10156 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10157 key_name, dest_ca_pem,
10158 timeouts)
10159 finally:
10160 helper.Cleanup()
10161
10162
10163 assert len(dresults) == len(instance.disks)
10164 assert compat.all(isinstance(i, bool) for i in dresults), \
10165 "Not all results are boolean: %r" % dresults
10166
10167 finally:
10168 if activate_disks:
10169 feedback_fn("Deactivating disks for %s" % instance.name)
10170 _ShutdownInstanceDisks(self, instance)
10171
10172 if not (compat.all(dresults) and fin_resu):
10173 failures = []
10174 if not fin_resu:
10175 failures.append("export finalization")
10176 if not compat.all(dresults):
10177 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10178 if not dsk)
10179 failures.append("disk export: disk(s) %s" % fdsk)
10180
10181 raise errors.OpExecError("Export failed, errors in %s" %
10182 utils.CommaJoin(failures))
10183
10184
10185
10186
10187 if self.op.remove_instance:
10188 feedback_fn("Removing instance %s" % instance.name)
10189 _RemoveInstance(self, feedback_fn, instance,
10190 self.op.ignore_remove_failures)
10191
10192 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10193 self._CleanupExports(feedback_fn)
10194
10195 return fin_resu, dresults
10196
10199 """Remove exports related to the named instance.
10200
10201 """
10202 REQ_BGL = False
10203
10210
10211 - def Exec(self, feedback_fn):
10212 """Remove any export.
10213
10214 """
10215 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10216
10217
10218 fqdn_warn = False
10219 if not instance_name:
10220 fqdn_warn = True
10221 instance_name = self.op.instance_name
10222
10223 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10224 exportlist = self.rpc.call_export_list(locked_nodes)
10225 found = False
10226 for node in exportlist:
10227 msg = exportlist[node].fail_msg
10228 if msg:
10229 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10230 continue
10231 if instance_name in exportlist[node].payload:
10232 found = True
10233 result = self.rpc.call_export_remove(node, instance_name)
10234 msg = result.fail_msg
10235 if msg:
10236 logging.error("Could not remove export for instance %s"
10237 " on node %s: %s", instance_name, node, msg)
10238
10239 if fqdn_warn and not found:
10240 feedback_fn("Export not found. If trying to remove an export belonging"
10241 " to a deleted instance please use its Fully Qualified"
10242 " Domain Name.")
10243
10246 """Logical unit for creating node groups.
10247
10248 """
10249 HPATH = "group-add"
10250 HTYPE = constants.HTYPE_GROUP
10251 REQ_BGL = False
10252
10260
10280
10282 """Build hooks env.
10283
10284 """
10285 env = {
10286 "GROUP_NAME": self.op.group_name,
10287 }
10288 mn = self.cfg.GetMasterNode()
10289 return env, [mn], [mn]
10290
10291 - def Exec(self, feedback_fn):
10292 """Add the node group to the cluster.
10293
10294 """
10295 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10296 uuid=self.group_uuid,
10297 alloc_policy=self.op.alloc_policy,
10298 ndparams=self.op.ndparams)
10299
10300 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10301 del self.remove_locks[locking.LEVEL_NODEGROUP]
10302
10305 """Logical unit for assigning nodes to groups.
10306
10307 """
10308 REQ_BGL = False
10309
10322
10332
10334 """Check prerequisites.
10335
10336 """
10337 assert self.needed_locks[locking.LEVEL_NODEGROUP]
10338 assert (frozenset(self.acquired_locks[locking.LEVEL_NODE]) ==
10339 frozenset(self.op.nodes))
10340
10341 expected_locks = (set([self.group_uuid]) |
10342 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
10343 actual_locks = self.acquired_locks[locking.LEVEL_NODEGROUP]
10344 if actual_locks != expected_locks:
10345 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
10346 " current groups are '%s', used to be '%s'" %
10347 (utils.CommaJoin(expected_locks),
10348 utils.CommaJoin(actual_locks)))
10349
10350 self.node_data = self.cfg.GetAllNodesInfo()
10351 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10352 instance_data = self.cfg.GetAllInstancesInfo()
10353
10354 if self.group is None:
10355 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10356 (self.op.group_name, self.group_uuid))
10357
10358 (new_splits, previous_splits) = \
10359 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10360 for node in self.op.nodes],
10361 self.node_data, instance_data)
10362
10363 if new_splits:
10364 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10365
10366 if not self.op.force:
10367 raise errors.OpExecError("The following instances get split by this"
10368 " change and --force was not given: %s" %
10369 fmt_new_splits)
10370 else:
10371 self.LogWarning("This operation will split the following instances: %s",
10372 fmt_new_splits)
10373
10374 if previous_splits:
10375 self.LogWarning("In addition, these already-split instances continue"
10376 " to be split across groups: %s",
10377 utils.CommaJoin(utils.NiceSort(previous_splits)))
10378
10379 - def Exec(self, feedback_fn):
10380 """Assign nodes to a new group.
10381
10382 """
10383 for node in self.op.nodes:
10384 self.node_data[node].group = self.group_uuid
10385
10386
10387
10388
10389 self.cfg.Update(self.group, feedback_fn)
10390
10391 @staticmethod
10393 """Check for split instances after a node assignment.
10394
10395 This method considers a series of node assignments as an atomic operation,
10396 and returns information about split instances after applying the set of
10397 changes.
10398
10399 In particular, it returns information about newly split instances, and
10400 instances that were already split, and remain so after the change.
10401
10402 Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10403 considered.
10404
10405 @type changes: list of (node_name, new_group_uuid) pairs.
10406 @param changes: list of node assignments to consider.
10407 @param node_data: a dict with data for all nodes
10408 @param instance_data: a dict with all instances to consider
10409 @rtype: a two-tuple
10410 @return: a list of instances that were previously okay and result split as a
10411 consequence of this change, and a list of instances that were previously
10412 split and this change does not fix.
10413
10414 """
10415 changed_nodes = dict((node, group) for node, group in changes
10416 if node_data[node].group != group)
10417
10418 all_split_instances = set()
10419 previously_split_instances = set()
10420
10421 def InstanceNodes(instance):
10422 return [instance.primary_node] + list(instance.secondary_nodes)
10423
10424 for inst in instance_data.values():
10425 if inst.disk_template not in constants.DTS_NET_MIRROR:
10426 continue
10427
10428 instance_nodes = InstanceNodes(inst)
10429
10430 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10431 previously_split_instances.add(inst.name)
10432
10433 if len(set(changed_nodes.get(node, node_data[node].group)
10434 for node in instance_nodes)) > 1:
10435 all_split_instances.add(inst.name)
10436
10437 return (list(all_split_instances - previously_split_instances),
10438 list(previously_split_instances & all_split_instances))
10439
10442
10443 FIELDS = query.GROUP_FIELDS
10444
10446 lu.needed_locks = {}
10447
10448 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10449 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10450
10451 if not self.names:
10452 self.wanted = [name_to_uuid[name]
10453 for name in utils.NiceSort(name_to_uuid.keys())]
10454 else:
10455
10456 missing = []
10457 self.wanted = []
10458 all_uuid = frozenset(self._all_groups.keys())
10459
10460 for name in self.names:
10461 if name in all_uuid:
10462 self.wanted.append(name)
10463 elif name in name_to_uuid:
10464 self.wanted.append(name_to_uuid[name])
10465 else:
10466 missing.append(name)
10467
10468 if missing:
10469 raise errors.OpPrereqError("Some groups do not exist: %s" %
10470 utils.CommaJoin(missing),
10471 errors.ECODE_NOENT)
10472
10475
10477 """Computes the list of node groups and their attributes.
10478
10479 """
10480 do_nodes = query.GQ_NODE in self.requested_data
10481 do_instances = query.GQ_INST in self.requested_data
10482
10483 group_to_nodes = None
10484 group_to_instances = None
10485
10486
10487
10488
10489
10490
10491 if do_nodes or do_instances:
10492 all_nodes = lu.cfg.GetAllNodesInfo()
10493 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10494 node_to_group = {}
10495
10496 for node in all_nodes.values():
10497 if node.group in group_to_nodes:
10498 group_to_nodes[node.group].append(node.name)
10499 node_to_group[node.name] = node.group
10500
10501 if do_instances:
10502 all_instances = lu.cfg.GetAllInstancesInfo()
10503 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10504
10505 for instance in all_instances.values():
10506 node = instance.primary_node
10507 if node in node_to_group:
10508 group_to_instances[node_to_group[node]].append(instance.name)
10509
10510 if not do_nodes:
10511
10512 group_to_nodes = None
10513
10514 return query.GroupQueryData([self._all_groups[uuid]
10515 for uuid in self.wanted],
10516 group_to_nodes, group_to_instances)
10517
10520 """Logical unit for querying node groups.
10521
10522 """
10523 REQ_BGL = False
10524
10526 self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10527
10530
10531 - def Exec(self, feedback_fn):
10533
10536 """Modifies the parameters of a node group.
10537
10538 """
10539 HPATH = "group-modify"
10540 HTYPE = constants.HTYPE_GROUP
10541 REQ_BGL = False
10542
10544 all_changes = [
10545 self.op.ndparams,
10546 self.op.alloc_policy,
10547 ]
10548
10549 if all_changes.count(None) == len(all_changes):
10550 raise errors.OpPrereqError("Please pass at least one modification",
10551 errors.ECODE_INVAL)
10552
10560
10562 """Check prerequisites.
10563
10564 """
10565 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10566
10567 if self.group is None:
10568 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10569 (self.op.group_name, self.group_uuid))
10570
10571 if self.op.ndparams:
10572 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10573 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10574 self.new_ndparams = new_ndparams
10575
10577 """Build hooks env.
10578
10579 """
10580 env = {
10581 "GROUP_NAME": self.op.group_name,
10582 "NEW_ALLOC_POLICY": self.op.alloc_policy,
10583 }
10584 mn = self.cfg.GetMasterNode()
10585 return env, [mn], [mn]
10586
10587 - def Exec(self, feedback_fn):
10588 """Modifies the node group.
10589
10590 """
10591 result = []
10592
10593 if self.op.ndparams:
10594 self.group.ndparams = self.new_ndparams
10595 result.append(("ndparams", str(self.group.ndparams)))
10596
10597 if self.op.alloc_policy:
10598 self.group.alloc_policy = self.op.alloc_policy
10599
10600 self.cfg.Update(self.group, feedback_fn)
10601 return result
10602
10606 HPATH = "group-remove"
10607 HTYPE = constants.HTYPE_GROUP
10608 REQ_BGL = False
10609
10616
10618 """Check prerequisites.
10619
10620 This checks that the given group name exists as a node group, that is
10621 empty (i.e., contains no nodes), and that is not the last group of the
10622 cluster.
10623
10624 """
10625
10626 group_nodes = [node.name
10627 for node in self.cfg.GetAllNodesInfo().values()
10628 if node.group == self.group_uuid]
10629
10630 if group_nodes:
10631 raise errors.OpPrereqError("Group '%s' not empty, has the following"
10632 " nodes: %s" %
10633 (self.op.group_name,
10634 utils.CommaJoin(utils.NiceSort(group_nodes))),
10635 errors.ECODE_STATE)
10636
10637
10638 if len(self.cfg.GetNodeGroupList()) == 1:
10639 raise errors.OpPrereqError("Group '%s' is the only group,"
10640 " cannot be removed" %
10641 self.op.group_name,
10642 errors.ECODE_STATE)
10643
10645 """Build hooks env.
10646
10647 """
10648 env = {
10649 "GROUP_NAME": self.op.group_name,
10650 }
10651 mn = self.cfg.GetMasterNode()
10652 return env, [mn], [mn]
10653
10654 - def Exec(self, feedback_fn):
10665
10668 HPATH = "group-rename"
10669 HTYPE = constants.HTYPE_GROUP
10670 REQ_BGL = False
10671
10679
10681 """Check prerequisites.
10682
10683 This checks that the given old_name exists as a node group, and that
10684 new_name doesn't.
10685
10686 """
10687 try:
10688 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10689 except errors.OpPrereqError:
10690 pass
10691 else:
10692 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10693 " node group (UUID: %s)" %
10694 (self.op.new_name, new_name_uuid),
10695 errors.ECODE_EXISTS)
10696
10698 """Build hooks env.
10699
10700 """
10701 env = {
10702 "OLD_NAME": self.op.old_name,
10703 "NEW_NAME": self.op.new_name,
10704 }
10705
10706 mn = self.cfg.GetMasterNode()
10707 all_nodes = self.cfg.GetAllNodesInfo()
10708 run_nodes = [mn]
10709 all_nodes.pop(mn, None)
10710
10711 for node in all_nodes.values():
10712 if node.group == self.group_uuid:
10713 run_nodes.append(node.name)
10714
10715 return env, run_nodes, run_nodes
10716
10717 - def Exec(self, feedback_fn):
10718 """Rename the node group.
10719
10720 """
10721 group = self.cfg.GetNodeGroup(self.group_uuid)
10722
10723 if group is None:
10724 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10725 (self.op.old_name, self.group_uuid))
10726
10727 group.name = self.op.new_name
10728 self.cfg.Update(group, feedback_fn)
10729
10730 return self.op.new_name
10731
10734 """Generic tags LU.
10735
10736 This is an abstract class which is the parent of all the other tags LUs.
10737
10738 """
10739
10748
10749
10750
10751
10765
10784
10823
10851
10885
10888 """Sleep for a specified amount of time.
10889
10890 This LU sleeps on the master and/or nodes for a specified amount of
10891 time.
10892
10893 """
10894 REQ_BGL = False
10895
10897 """Expand names and set required locks.
10898
10899 This expands the node list, if any.
10900
10901 """
10902 self.needed_locks = {}
10903 if self.op.on_nodes:
10904
10905
10906
10907 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10908 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10909
10911 """Do the actual sleep.
10912
10913 """
10914 if self.op.on_master:
10915 if not utils.TestDelay(self.op.duration):
10916 raise errors.OpExecError("Error during master delay test")
10917 if self.op.on_nodes:
10918 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10919 for node, node_result in result.items():
10920 node_result.Raise("Failure during rpc call to node %s" % node)
10921
10922 - def Exec(self, feedback_fn):
10923 """Execute the test delay opcode, with the wanted repetitions.
10924
10925 """
10926 if self.op.repeat == 0:
10927 self._TestDelay()
10928 else:
10929 top_value = self.op.repeat - 1
10930 for i in range(self.op.repeat):
10931 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10932 self._TestDelay()
10933
10936 """Utility LU to test some aspects of the job queue.
10937
10938 """
10939 REQ_BGL = False
10940
10941
10942
10943 _CLIENT_CONNECT_TIMEOUT = 20.0
10944 _CLIENT_CONFIRM_TIMEOUT = 60.0
10945
10946 @classmethod
10948 """Opens a Unix socket and waits for another program to connect.
10949
10950 @type cb: callable
10951 @param cb: Callback to send socket name to client
10952 @type errcls: class
10953 @param errcls: Exception class to use for errors
10954
10955 """
10956
10957
10958
10959 tmpdir = tempfile.mkdtemp()
10960 try:
10961 tmpsock = utils.PathJoin(tmpdir, "sock")
10962
10963 logging.debug("Creating temporary socket at %s", tmpsock)
10964 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10965 try:
10966 sock.bind(tmpsock)
10967 sock.listen(1)
10968
10969
10970 cb(tmpsock)
10971
10972
10973 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10974 try:
10975 (conn, _) = sock.accept()
10976 except socket.error, err:
10977 raise errcls("Client didn't connect in time (%s)" % err)
10978 finally:
10979 sock.close()
10980 finally:
10981
10982 shutil.rmtree(tmpdir)
10983
10984
10985 try:
10986 try:
10987
10988
10989 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10990 conn.recv(1)
10991 except socket.error, err:
10992 raise errcls("Client failed to confirm notification (%s)" % err)
10993 finally:
10994 conn.close()
10995
10997 """Sends a notification to the client.
10998
10999 @type test: string
11000 @param test: Test name
11001 @param arg: Test argument (depends on test)
11002 @type sockname: string
11003 @param sockname: Socket path
11004
11005 """
11006 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11007
11008 - def _Notify(self, prereq, test, arg):
11009 """Notifies the client of a test.
11010
11011 @type prereq: bool
11012 @param prereq: Whether this is a prereq-phase test
11013 @type test: string
11014 @param test: Test name
11015 @param arg: Test argument (depends on test)
11016
11017 """
11018 if prereq:
11019 errcls = errors.OpPrereqError
11020 else:
11021 errcls = errors.OpExecError
11022
11023 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11024 test, arg),
11025 errcls)
11026
11028 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11029 self.expandnames_calls = 0
11030
11047
11048 - def Exec(self, feedback_fn):
11069
11072 """IAllocator framework.
11073
11074 An IAllocator instance has three sets of attributes:
11075 - cfg that is needed to query the cluster
11076 - input data (all members of the _KEYS class attribute are required)
11077 - four buffer attributes (in|out_data|text), that represent the
11078 input (to the external script) in text and data structure format,
11079 and the output from it, again in two formats
11080 - the result variables from the script (success, info, nodes) for
11081 easy usage
11082
11083 """
11084
11085
11086 _ALLO_KEYS = [
11087 "name", "mem_size", "disks", "disk_template",
11088 "os", "tags", "nics", "vcpus", "hypervisor",
11089 ]
11090 _RELO_KEYS = [
11091 "name", "relocate_from",
11092 ]
11093 _EVAC_KEYS = [
11094 "evac_nodes",
11095 ]
11096
11097 - def __init__(self, cfg, rpc, mode, **kwargs):
11098 self.cfg = cfg
11099 self.rpc = rpc
11100
11101 self.in_text = self.out_text = self.in_data = self.out_data = None
11102
11103 self.mode = mode
11104 self.mem_size = self.disks = self.disk_template = None
11105 self.os = self.tags = self.nics = self.vcpus = None
11106 self.hypervisor = None
11107 self.relocate_from = None
11108 self.name = None
11109 self.evac_nodes = None
11110
11111 self.required_nodes = None
11112
11113 self.success = self.info = self.result = None
11114 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11115 keyset = self._ALLO_KEYS
11116 fn = self._AddNewInstance
11117 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11118 keyset = self._RELO_KEYS
11119 fn = self._AddRelocateInstance
11120 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11121 keyset = self._EVAC_KEYS
11122 fn = self._AddEvacuateNodes
11123 else:
11124 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11125 " IAllocator" % self.mode)
11126 for key in kwargs:
11127 if key not in keyset:
11128 raise errors.ProgrammerError("Invalid input parameter '%s' to"
11129 " IAllocator" % key)
11130 setattr(self, key, kwargs[key])
11131
11132 for key in keyset:
11133 if key not in kwargs:
11134 raise errors.ProgrammerError("Missing input parameter '%s' to"
11135 " IAllocator" % key)
11136 self._BuildInputData(fn)
11137
11139 """Compute the generic allocator input data.
11140
11141 This is the data that is independent of the actual operation.
11142
11143 """
11144 cfg = self.cfg
11145 cluster_info = cfg.GetClusterInfo()
11146
11147 data = {
11148 "version": constants.IALLOCATOR_VERSION,
11149 "cluster_name": cfg.GetClusterName(),
11150 "cluster_tags": list(cluster_info.GetTags()),
11151 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11152
11153 }
11154 ninfo = cfg.GetAllNodesInfo()
11155 iinfo = cfg.GetAllInstancesInfo().values()
11156 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11157
11158
11159 node_list = [n.name for n in ninfo.values() if n.vm_capable]
11160
11161 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11162 hypervisor_name = self.hypervisor
11163 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11164 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11165 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11166 hypervisor_name = cluster_info.enabled_hypervisors[0]
11167
11168 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11169 hypervisor_name)
11170 node_iinfo = \
11171 self.rpc.call_all_instances_info(node_list,
11172 cluster_info.enabled_hypervisors)
11173
11174 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11175
11176 config_ndata = self._ComputeBasicNodeData(ninfo)
11177 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11178 i_list, config_ndata)
11179 assert len(data["nodes"]) == len(ninfo), \
11180 "Incomplete node data computed"
11181
11182 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11183
11184 self.in_data = data
11185
11186 @staticmethod
11188 """Compute node groups data.
11189
11190 """
11191 ng = {}
11192 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11193 ng[guuid] = {
11194 "name": gdata.name,
11195 "alloc_policy": gdata.alloc_policy,
11196 }
11197 return ng
11198
11199 @staticmethod
11201 """Compute global node data.
11202
11203 @rtype: dict
11204 @returns: a dict of name: (node dict, node config)
11205
11206 """
11207 node_results = {}
11208 for ninfo in node_cfg.values():
11209
11210 pnr = {
11211 "tags": list(ninfo.GetTags()),
11212 "primary_ip": ninfo.primary_ip,
11213 "secondary_ip": ninfo.secondary_ip,
11214 "offline": ninfo.offline,
11215 "drained": ninfo.drained,
11216 "master_candidate": ninfo.master_candidate,
11217 "group": ninfo.group,
11218 "master_capable": ninfo.master_capable,
11219 "vm_capable": ninfo.vm_capable,
11220 }
11221
11222 node_results[ninfo.name] = pnr
11223
11224 return node_results
11225
11226 @staticmethod
11229 """Compute global node data.
11230
11231 @param node_results: the basic node structures as filled from the config
11232
11233 """
11234
11235 node_results = dict(node_results)
11236 for nname, nresult in node_data.items():
11237 assert nname in node_results, "Missing basic data for node %s" % nname
11238 ninfo = node_cfg[nname]
11239
11240 if not (ninfo.offline or ninfo.drained):
11241 nresult.Raise("Can't get data for node %s" % nname)
11242 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11243 nname)
11244 remote_info = nresult.payload
11245
11246 for attr in ['memory_total', 'memory_free', 'memory_dom0',
11247 'vg_size', 'vg_free', 'cpu_total']:
11248 if attr not in remote_info:
11249 raise errors.OpExecError("Node '%s' didn't return attribute"
11250 " '%s'" % (nname, attr))
11251 if not isinstance(remote_info[attr], int):
11252 raise errors.OpExecError("Node '%s' returned invalid value"
11253 " for '%s': %s" %
11254 (nname, attr, remote_info[attr]))
11255
11256 i_p_mem = i_p_up_mem = 0
11257 for iinfo, beinfo in i_list:
11258 if iinfo.primary_node == nname:
11259 i_p_mem += beinfo[constants.BE_MEMORY]
11260 if iinfo.name not in node_iinfo[nname].payload:
11261 i_used_mem = 0
11262 else:
11263 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11264 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11265 remote_info['memory_free'] -= max(0, i_mem_diff)
11266
11267 if iinfo.admin_up:
11268 i_p_up_mem += beinfo[constants.BE_MEMORY]
11269
11270
11271 pnr_dyn = {
11272 "total_memory": remote_info['memory_total'],
11273 "reserved_memory": remote_info['memory_dom0'],
11274 "free_memory": remote_info['memory_free'],
11275 "total_disk": remote_info['vg_size'],
11276 "free_disk": remote_info['vg_free'],
11277 "total_cpus": remote_info['cpu_total'],
11278 "i_pri_memory": i_p_mem,
11279 "i_pri_up_memory": i_p_up_mem,
11280 }
11281 pnr_dyn.update(node_results[nname])
11282 node_results[nname] = pnr_dyn
11283
11284 return node_results
11285
11286 @staticmethod
11288 """Compute global instance data.
11289
11290 """
11291 instance_data = {}
11292 for iinfo, beinfo in i_list:
11293 nic_data = []
11294 for nic in iinfo.nics:
11295 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11296 nic_dict = {"mac": nic.mac,
11297 "ip": nic.ip,
11298 "mode": filled_params[constants.NIC_MODE],
11299 "link": filled_params[constants.NIC_LINK],
11300 }
11301 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11302 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11303 nic_data.append(nic_dict)
11304 pir = {
11305 "tags": list(iinfo.GetTags()),
11306 "admin_up": iinfo.admin_up,
11307 "vcpus": beinfo[constants.BE_VCPUS],
11308 "memory": beinfo[constants.BE_MEMORY],
11309 "os": iinfo.os,
11310 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11311 "nics": nic_data,
11312 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11313 "disk_template": iinfo.disk_template,
11314 "hypervisor": iinfo.hypervisor,
11315 }
11316 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11317 pir["disks"])
11318 instance_data[iinfo.name] = pir
11319
11320 return instance_data
11321
11323 """Add new instance data to allocator structure.
11324
11325 This in combination with _AllocatorGetClusterData will create the
11326 correct structure needed as input for the allocator.
11327
11328 The checks for the completeness of the opcode must have already been
11329 done.
11330
11331 """
11332 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11333
11334 if self.disk_template in constants.DTS_NET_MIRROR:
11335 self.required_nodes = 2
11336 else:
11337 self.required_nodes = 1
11338 request = {
11339 "name": self.name,
11340 "disk_template": self.disk_template,
11341 "tags": self.tags,
11342 "os": self.os,
11343 "vcpus": self.vcpus,
11344 "memory": self.mem_size,
11345 "disks": self.disks,
11346 "disk_space_total": disk_space,
11347 "nics": self.nics,
11348 "required_nodes": self.required_nodes,
11349 }
11350 return request
11351
11353 """Add relocate instance data to allocator structure.
11354
11355 This in combination with _IAllocatorGetClusterData will create the
11356 correct structure needed as input for the allocator.
11357
11358 The checks for the completeness of the opcode must have already been
11359 done.
11360
11361 """
11362 instance = self.cfg.GetInstanceInfo(self.name)
11363 if instance is None:
11364 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11365 " IAllocator" % self.name)
11366
11367 if instance.disk_template not in constants.DTS_NET_MIRROR:
11368 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11369 errors.ECODE_INVAL)
11370
11371 if len(instance.secondary_nodes) != 1:
11372 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11373 errors.ECODE_STATE)
11374
11375 self.required_nodes = 1
11376 disk_sizes = [{'size': disk.size} for disk in instance.disks]
11377 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11378
11379 request = {
11380 "name": self.name,
11381 "disk_space_total": disk_space,
11382 "required_nodes": self.required_nodes,
11383 "relocate_from": self.relocate_from,
11384 }
11385 return request
11386
11388 """Add evacuate nodes data to allocator structure.
11389
11390 """
11391 request = {
11392 "evac_nodes": self.evac_nodes
11393 }
11394 return request
11395
11407
11408 - def Run(self, name, validate=True, call_fn=None):
11409 """Run an instance allocator and return the results.
11410
11411 """
11412 if call_fn is None:
11413 call_fn = self.rpc.call_iallocator_runner
11414
11415 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11416 result.Raise("Failure while running the iallocator script")
11417
11418 self.out_text = result.payload
11419 if validate:
11420 self._ValidateResult()
11421
11423 """Process the allocator results.
11424
11425 This will process and if successful save the result in
11426 self.out_data and the other parameters.
11427
11428 """
11429 try:
11430 rdict = serializer.Load(self.out_text)
11431 except Exception, err:
11432 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11433
11434 if not isinstance(rdict, dict):
11435 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11436
11437
11438 if "nodes" in rdict and "result" not in rdict:
11439 rdict["result"] = rdict["nodes"]
11440 del rdict["nodes"]
11441
11442 for key in "success", "info", "result":
11443 if key not in rdict:
11444 raise errors.OpExecError("Can't parse iallocator results:"
11445 " missing key '%s'" % key)
11446 setattr(self, key, rdict[key])
11447
11448 if not isinstance(rdict["result"], list):
11449 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11450 " is not a list")
11451 self.out_data = rdict
11452
11455 """Run allocator tests.
11456
11457 This LU runs the allocator tests
11458
11459 """
11461 """Check prerequisites.
11462
11463 This checks the opcode parameters depending on the director and mode test.
11464
11465 """
11466 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11467 for attr in ["mem_size", "disks", "disk_template",
11468 "os", "tags", "nics", "vcpus"]:
11469 if not hasattr(self.op, attr):
11470 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11471 attr, errors.ECODE_INVAL)
11472 iname = self.cfg.ExpandInstanceName(self.op.name)
11473 if iname is not None:
11474 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11475 iname, errors.ECODE_EXISTS)
11476 if not isinstance(self.op.nics, list):
11477 raise errors.OpPrereqError("Invalid parameter 'nics'",
11478 errors.ECODE_INVAL)
11479 if not isinstance(self.op.disks, list):
11480 raise errors.OpPrereqError("Invalid parameter 'disks'",
11481 errors.ECODE_INVAL)
11482 for row in self.op.disks:
11483 if (not isinstance(row, dict) or
11484 "size" not in row or
11485 not isinstance(row["size"], int) or
11486 "mode" not in row or
11487 row["mode"] not in ['r', 'w']):
11488 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11489 " parameter", errors.ECODE_INVAL)
11490 if self.op.hypervisor is None:
11491 self.op.hypervisor = self.cfg.GetHypervisorType()
11492 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11493 fname = _ExpandInstanceName(self.cfg, self.op.name)
11494 self.op.name = fname
11495 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11496 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11497 if not hasattr(self.op, "evac_nodes"):
11498 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11499 " opcode input", errors.ECODE_INVAL)
11500 else:
11501 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11502 self.op.mode, errors.ECODE_INVAL)
11503
11504 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11505 if self.op.allocator is None:
11506 raise errors.OpPrereqError("Missing allocator name",
11507 errors.ECODE_INVAL)
11508 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11509 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11510 self.op.direction, errors.ECODE_INVAL)
11511
11512 - def Exec(self, feedback_fn):
11513 """Run the allocator test.
11514
11515 """
11516 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11517 ial = IAllocator(self.cfg, self.rpc,
11518 mode=self.op.mode,
11519 name=self.op.name,
11520 mem_size=self.op.mem_size,
11521 disks=self.op.disks,
11522 disk_template=self.op.disk_template,
11523 os=self.op.os,
11524 tags=self.op.tags,
11525 nics=self.op.nics,
11526 vcpus=self.op.vcpus,
11527 hypervisor=self.op.hypervisor,
11528 )
11529 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11530 ial = IAllocator(self.cfg, self.rpc,
11531 mode=self.op.mode,
11532 name=self.op.name,
11533 relocate_from=list(self.relocate_from),
11534 )
11535 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11536 ial = IAllocator(self.cfg, self.rpc,
11537 mode=self.op.mode,
11538 evac_nodes=self.op.evac_nodes)
11539 else:
11540 raise errors.ProgrammerError("Uncatched mode %s in"
11541 " LUTestAllocator.Exec", self.op.mode)
11542
11543 if self.op.direction == constants.IALLOCATOR_DIR_IN:
11544 result = ial.in_text
11545 else:
11546 ial.Run(self.op.allocator, validate=False)
11547 result = ial.out_text
11548 return result
11549
11550
11551
11552 _QUERY_IMPL = {
11553 constants.QR_INSTANCE: _InstanceQuery,
11554 constants.QR_NODE: _NodeQuery,
11555 constants.QR_GROUP: _GroupQuery,
11556 }
11560 """Returns the implemtnation for a query type.
11561
11562 @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11563
11564 """
11565 try:
11566 return _QUERY_IMPL[name]
11567 except KeyError:
11568 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11569 errors.ECODE_INVAL)
11570