Package ganeti :: Module cmdlib
[hide private]
[frames] | no frames]

Source Code for Module ganeti.cmdlib

    1  # 
    2  # 
    3   
    4  # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc. 
    5  # 
    6  # This program is free software; you can redistribute it and/or modify 
    7  # it under the terms of the GNU General Public License as published by 
    8  # the Free Software Foundation; either version 2 of the License, or 
    9  # (at your option) any later version. 
   10  # 
   11  # This program is distributed in the hope that it will be useful, but 
   12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
   13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
   14  # General Public License for more details. 
   15  # 
   16  # You should have received a copy of the GNU General Public License 
   17  # along with this program; if not, write to the Free Software 
   18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
   19  # 02110-1301, USA. 
   20   
   21   
   22  """Module implementing the master-side code.""" 
   23   
   24  # pylint: disable-msg=W0201,C0302 
   25   
   26  # W0201 since most LU attributes are defined in CheckPrereq or similar 
   27  # functions 
   28   
   29  # C0302: since we have waaaay to many lines in this module 
   30   
   31  import os 
   32  import os.path 
   33  import time 
   34  import re 
   35  import platform 
   36  import logging 
   37  import copy 
   38  import OpenSSL 
   39  import socket 
   40  import tempfile 
   41  import shutil 
   42   
   43  from ganeti import ssh 
   44  from ganeti import utils 
   45  from ganeti import errors 
   46  from ganeti import hypervisor 
   47  from ganeti import locking 
   48  from ganeti import constants 
   49  from ganeti import objects 
   50  from ganeti import serializer 
   51  from ganeti import ssconf 
   52  from ganeti import uidpool 
   53  from ganeti import compat 
   54  from ganeti import masterd 
   55  from ganeti import netutils 
   56   
   57  import ganeti.masterd.instance # pylint: disable-msg=W0611 
58 59 60 # Modifiable default values; need to define these here before the 61 # actual LUs 62 63 -def _EmptyList():
64 """Returns an empty list. 65 66 """ 67 return []
68
69 70 -def _EmptyDict():
71 """Returns an empty dict. 72 73 """ 74 return {}
75 76 77 #: The without-default default value 78 _NoDefault = object() 79 80 81 #: The no-type (value to complex to check it in the type system) 82 _NoType = object()
83 84 85 # Some basic types 86 -def _TNotNone(val):
87 """Checks if the given value is not None. 88 89 """ 90 return val is not None
91
92 93 -def _TNone(val):
94 """Checks if the given value is None. 95 96 """ 97 return val is None
98
99 100 -def _TBool(val):
101 """Checks if the given value is a boolean. 102 103 """ 104 return isinstance(val, bool)
105
106 107 -def _TInt(val):
108 """Checks if the given value is an integer. 109 110 """ 111 return isinstance(val, int)
112
113 114 -def _TFloat(val):
115 """Checks if the given value is a float. 116 117 """ 118 return isinstance(val, float)
119
120 121 -def _TString(val):
122 """Checks if the given value is a string. 123 124 """ 125 return isinstance(val, basestring)
126
127 128 -def _TTrue(val):
129 """Checks if a given value evaluates to a boolean True value. 130 131 """ 132 return bool(val)
133
134 135 -def _TElemOf(target_list):
136 """Builds a function that checks if a given value is a member of a list. 137 138 """ 139 return lambda val: val in target_list
140
141 142 # Container types 143 -def _TList(val):
144 """Checks if the given value is a list. 145 146 """ 147 return isinstance(val, list)
148
149 150 -def _TDict(val):
151 """Checks if the given value is a dictionary. 152 153 """ 154 return isinstance(val, dict)
155
156 157 -def _TIsLength(size):
158 """Check is the given container is of the given size. 159 160 """ 161 return lambda container: len(container) == size
162
163 164 # Combinator types 165 -def _TAnd(*args):
166 """Combine multiple functions using an AND operation. 167 168 """ 169 def fn(val): 170 return compat.all(t(val) for t in args)
171 return fn 172
173 174 -def _TOr(*args):
175 """Combine multiple functions using an AND operation. 176 177 """ 178 def fn(val): 179 return compat.any(t(val) for t in args)
180 return fn 181
182 183 -def _TMap(fn, test):
184 """Checks that a modified version of the argument passes the given test. 185 186 """ 187 return lambda val: test(fn(val))
188 189 190 # Type aliases 191 192 #: a non-empty string 193 _TNonEmptyString = _TAnd(_TString, _TTrue) 194 195 196 #: a maybe non-empty string 197 _TMaybeString = _TOr(_TNonEmptyString, _TNone) 198 199 200 #: a maybe boolean (bool or none) 201 _TMaybeBool = _TOr(_TBool, _TNone) 202 203 204 #: a positive integer 205 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0) 206 207 #: a strictly positive integer 208 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
209 210 211 -def _TListOf(my_type):
212 """Checks if a given value is a list with all elements of the same type. 213 214 """ 215 return _TAnd(_TList, 216 lambda lst: compat.all(my_type(v) for v in lst))
217
218 219 -def _TDictOf(key_type, val_type):
220 """Checks a dict type for the type of its key/values. 221 222 """ 223 return _TAnd(_TDict, 224 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys()) 225 and compat.all(val_type(v) 226 for v in my_dict.values())))
227 228 229 # Common opcode attributes 230 231 #: output fields for a query operation 232 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)) 233 234 235 #: the shutdown timeout 236 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, 237 _TPositiveInt) 238 239 #: the force parameter 240 _PForce = ("force", False, _TBool) 241 242 #: a required instance name (for single-instance LUs) 243 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString) 244 245 246 #: a required node name (for single-node LUs) 247 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString) 248 249 #: the migration type (live/non-live) 250 _PMigrationMode = ("mode", None, _TOr(_TNone, 251 _TElemOf(constants.HT_MIGRATION_MODES))) 252 253 #: the obsolete 'live' mode (boolean) 254 _PMigrationLive = ("live", None, _TMaybeBool)
255 256 257 # End types 258 -class LogicalUnit(object):
259 """Logical Unit base class. 260 261 Subclasses must follow these rules: 262 - implement ExpandNames 263 - implement CheckPrereq (except when tasklets are used) 264 - implement Exec (except when tasklets are used) 265 - implement BuildHooksEnv 266 - redefine HPATH and HTYPE 267 - optionally redefine their run requirements: 268 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively 269 270 Note that all commands require root permissions. 271 272 @ivar dry_run_result: the value (if any) that will be returned to the caller 273 in dry-run mode (signalled by opcode dry_run parameter) 274 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values 275 they should get if not already defined, and types they must match 276 277 """ 278 HPATH = None 279 HTYPE = None 280 _OP_PARAMS = [] 281 REQ_BGL = True 282
283 - def __init__(self, processor, op, context, rpc):
284 """Constructor for LogicalUnit. 285 286 This needs to be overridden in derived classes in order to check op 287 validity. 288 289 """ 290 self.proc = processor 291 self.op = op 292 self.cfg = context.cfg 293 self.context = context 294 self.rpc = rpc 295 # Dicts used to declare locking needs to mcpu 296 self.needed_locks = None 297 self.acquired_locks = {} 298 self.share_locks = dict.fromkeys(locking.LEVELS, 0) 299 self.add_locks = {} 300 self.remove_locks = {} 301 # Used to force good behavior when calling helper functions 302 self.recalculate_locks = {} 303 self.__ssh = None 304 # logging 305 self.Log = processor.Log # pylint: disable-msg=C0103 306 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103 307 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103 308 self.LogStep = processor.LogStep # pylint: disable-msg=C0103 309 # support for dry-run 310 self.dry_run_result = None 311 # support for generic debug attribute 312 if (not hasattr(self.op, "debug_level") or 313 not isinstance(self.op.debug_level, int)): 314 self.op.debug_level = 0 315 316 # Tasklets 317 self.tasklets = None 318 319 # The new kind-of-type-system 320 op_id = self.op.OP_ID 321 for attr_name, aval, test in self._OP_PARAMS: 322 if not hasattr(op, attr_name): 323 if aval == _NoDefault: 324 raise errors.OpPrereqError("Required parameter '%s.%s' missing" % 325 (op_id, attr_name), errors.ECODE_INVAL) 326 else: 327 if callable(aval): 328 dval = aval() 329 else: 330 dval = aval 331 setattr(self.op, attr_name, dval) 332 attr_val = getattr(op, attr_name) 333 if test == _NoType: 334 # no tests here 335 continue 336 if not callable(test): 337 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed," 338 " given type is not a proper type (%s)" % 339 (op_id, attr_name, test)) 340 if not test(attr_val): 341 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s", 342 self.op.OP_ID, attr_name, type(attr_val), attr_val) 343 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" % 344 (op_id, attr_name), errors.ECODE_INVAL) 345 346 self.CheckArguments()
347
348 - def __GetSSH(self):
349 """Returns the SshRunner object 350 351 """ 352 if not self.__ssh: 353 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName()) 354 return self.__ssh
355 356 ssh = property(fget=__GetSSH) 357
358 - def CheckArguments(self):
359 """Check syntactic validity for the opcode arguments. 360 361 This method is for doing a simple syntactic check and ensure 362 validity of opcode parameters, without any cluster-related 363 checks. While the same can be accomplished in ExpandNames and/or 364 CheckPrereq, doing these separate is better because: 365 366 - ExpandNames is left as as purely a lock-related function 367 - CheckPrereq is run after we have acquired locks (and possible 368 waited for them) 369 370 The function is allowed to change the self.op attribute so that 371 later methods can no longer worry about missing parameters. 372 373 """ 374 pass
375
376 - def ExpandNames(self):
377 """Expand names for this LU. 378 379 This method is called before starting to execute the opcode, and it should 380 update all the parameters of the opcode to their canonical form (e.g. a 381 short node name must be fully expanded after this method has successfully 382 completed). This way locking, hooks, logging, ecc. can work correctly. 383 384 LUs which implement this method must also populate the self.needed_locks 385 member, as a dict with lock levels as keys, and a list of needed lock names 386 as values. Rules: 387 388 - use an empty dict if you don't need any lock 389 - if you don't need any lock at a particular level omit that level 390 - don't put anything for the BGL level 391 - if you want all locks at a level use locking.ALL_SET as a value 392 393 If you need to share locks (rather than acquire them exclusively) at one 394 level you can modify self.share_locks, setting a true value (usually 1) for 395 that level. By default locks are not shared. 396 397 This function can also define a list of tasklets, which then will be 398 executed in order instead of the usual LU-level CheckPrereq and Exec 399 functions, if those are not defined by the LU. 400 401 Examples:: 402 403 # Acquire all nodes and one instance 404 self.needed_locks = { 405 locking.LEVEL_NODE: locking.ALL_SET, 406 locking.LEVEL_INSTANCE: ['instance1.example.com'], 407 } 408 # Acquire just two nodes 409 self.needed_locks = { 410 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'], 411 } 412 # Acquire no locks 413 self.needed_locks = {} # No, you can't leave it to the default value None 414 415 """ 416 # The implementation of this method is mandatory only if the new LU is 417 # concurrent, so that old LUs don't need to be changed all at the same 418 # time. 419 if self.REQ_BGL: 420 self.needed_locks = {} # Exclusive LUs don't need locks. 421 else: 422 raise NotImplementedError
423
424 - def DeclareLocks(self, level):
425 """Declare LU locking needs for a level 426 427 While most LUs can just declare their locking needs at ExpandNames time, 428 sometimes there's the need to calculate some locks after having acquired 429 the ones before. This function is called just before acquiring locks at a 430 particular level, but after acquiring the ones at lower levels, and permits 431 such calculations. It can be used to modify self.needed_locks, and by 432 default it does nothing. 433 434 This function is only called if you have something already set in 435 self.needed_locks for the level. 436 437 @param level: Locking level which is going to be locked 438 @type level: member of ganeti.locking.LEVELS 439 440 """
441
442 - def CheckPrereq(self):
443 """Check prerequisites for this LU. 444 445 This method should check that the prerequisites for the execution 446 of this LU are fulfilled. It can do internode communication, but 447 it should be idempotent - no cluster or system changes are 448 allowed. 449 450 The method should raise errors.OpPrereqError in case something is 451 not fulfilled. Its return value is ignored. 452 453 This method should also update all the parameters of the opcode to 454 their canonical form if it hasn't been done by ExpandNames before. 455 456 """ 457 if self.tasklets is not None: 458 for (idx, tl) in enumerate(self.tasklets): 459 logging.debug("Checking prerequisites for tasklet %s/%s", 460 idx + 1, len(self.tasklets)) 461 tl.CheckPrereq() 462 else: 463 pass
464
465 - def Exec(self, feedback_fn):
466 """Execute the LU. 467 468 This method should implement the actual work. It should raise 469 errors.OpExecError for failures that are somewhat dealt with in 470 code, or expected. 471 472 """ 473 if self.tasklets is not None: 474 for (idx, tl) in enumerate(self.tasklets): 475 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets)) 476 tl.Exec(feedback_fn) 477 else: 478 raise NotImplementedError
479
480 - def BuildHooksEnv(self):
481 """Build hooks environment for this LU. 482 483 This method should return a three-node tuple consisting of: a dict 484 containing the environment that will be used for running the 485 specific hook for this LU, a list of node names on which the hook 486 should run before the execution, and a list of node names on which 487 the hook should run after the execution. 488 489 The keys of the dict must not have 'GANETI_' prefixed as this will 490 be handled in the hooks runner. Also note additional keys will be 491 added by the hooks runner. If the LU doesn't define any 492 environment, an empty dict (and not None) should be returned. 493 494 No nodes should be returned as an empty list (and not None). 495 496 Note that if the HPATH for a LU class is None, this function will 497 not be called. 498 499 """ 500 raise NotImplementedError
501
502 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
503 """Notify the LU about the results of its hooks. 504 505 This method is called every time a hooks phase is executed, and notifies 506 the Logical Unit about the hooks' result. The LU can then use it to alter 507 its result based on the hooks. By default the method does nothing and the 508 previous result is passed back unchanged but any LU can define it if it 509 wants to use the local cluster hook-scripts somehow. 510 511 @param phase: one of L{constants.HOOKS_PHASE_POST} or 512 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 513 @param hook_results: the results of the multi-node hooks rpc call 514 @param feedback_fn: function used send feedback back to the caller 515 @param lu_result: the previous Exec result this LU had, or None 516 in the PRE phase 517 @return: the new Exec result, based on the previous result 518 and hook results 519 520 """ 521 # API must be kept, thus we ignore the unused argument and could 522 # be a function warnings 523 # pylint: disable-msg=W0613,R0201 524 return lu_result
525
526 - def _ExpandAndLockInstance(self):
527 """Helper function to expand and lock an instance. 528 529 Many LUs that work on an instance take its name in self.op.instance_name 530 and need to expand it and then declare the expanded name for locking. This 531 function does it, and then updates self.op.instance_name to the expanded 532 name. It also initializes needed_locks as a dict, if this hasn't been done 533 before. 534 535 """ 536 if self.needed_locks is None: 537 self.needed_locks = {} 538 else: 539 assert locking.LEVEL_INSTANCE not in self.needed_locks, \ 540 "_ExpandAndLockInstance called with instance-level locks set" 541 self.op.instance_name = _ExpandInstanceName(self.cfg, 542 self.op.instance_name) 543 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
544
545 - def _LockInstancesNodes(self, primary_only=False):
546 """Helper function to declare instances' nodes for locking. 547 548 This function should be called after locking one or more instances to lock 549 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE] 550 with all primary or secondary nodes for instances already locked and 551 present in self.needed_locks[locking.LEVEL_INSTANCE]. 552 553 It should be called from DeclareLocks, and for safety only works if 554 self.recalculate_locks[locking.LEVEL_NODE] is set. 555 556 In the future it may grow parameters to just lock some instance's nodes, or 557 to just lock primaries or secondary nodes, if needed. 558 559 If should be called in DeclareLocks in a way similar to:: 560 561 if level == locking.LEVEL_NODE: 562 self._LockInstancesNodes() 563 564 @type primary_only: boolean 565 @param primary_only: only lock primary nodes of locked instances 566 567 """ 568 assert locking.LEVEL_NODE in self.recalculate_locks, \ 569 "_LockInstancesNodes helper function called with no nodes to recalculate" 570 571 # TODO: check if we're really been called with the instance locks held 572 573 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the 574 # future we might want to have different behaviors depending on the value 575 # of self.recalculate_locks[locking.LEVEL_NODE] 576 wanted_nodes = [] 577 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]: 578 instance = self.context.cfg.GetInstanceInfo(instance_name) 579 wanted_nodes.append(instance.primary_node) 580 if not primary_only: 581 wanted_nodes.extend(instance.secondary_nodes) 582 583 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE: 584 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes 585 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND: 586 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes) 587 588 del self.recalculate_locks[locking.LEVEL_NODE]
589
590 591 -class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
592 """Simple LU which runs no hooks. 593 594 This LU is intended as a parent for other LogicalUnits which will 595 run no hooks, in order to reduce duplicate code. 596 597 """ 598 HPATH = None 599 HTYPE = None 600
601 - def BuildHooksEnv(self):
602 """Empty BuildHooksEnv for NoHooksLu. 603 604 This just raises an error. 605 606 """ 607 assert False, "BuildHooksEnv called for NoHooksLUs"
608
609 610 -class Tasklet:
611 """Tasklet base class. 612 613 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or 614 they can mix legacy code with tasklets. Locking needs to be done in the LU, 615 tasklets know nothing about locks. 616 617 Subclasses must follow these rules: 618 - Implement CheckPrereq 619 - Implement Exec 620 621 """
622 - def __init__(self, lu):
623 self.lu = lu 624 625 # Shortcuts 626 self.cfg = lu.cfg 627 self.rpc = lu.rpc
628
629 - def CheckPrereq(self):
630 """Check prerequisites for this tasklets. 631 632 This method should check whether the prerequisites for the execution of 633 this tasklet are fulfilled. It can do internode communication, but it 634 should be idempotent - no cluster or system changes are allowed. 635 636 The method should raise errors.OpPrereqError in case something is not 637 fulfilled. Its return value is ignored. 638 639 This method should also update all parameters to their canonical form if it 640 hasn't been done before. 641 642 """ 643 pass
644
645 - def Exec(self, feedback_fn):
646 """Execute the tasklet. 647 648 This method should implement the actual work. It should raise 649 errors.OpExecError for failures that are somewhat dealt with in code, or 650 expected. 651 652 """ 653 raise NotImplementedError
654
655 656 -def _GetWantedNodes(lu, nodes):
657 """Returns list of checked and expanded node names. 658 659 @type lu: L{LogicalUnit} 660 @param lu: the logical unit on whose behalf we execute 661 @type nodes: list 662 @param nodes: list of node names or None for all nodes 663 @rtype: list 664 @return: the list of nodes, sorted 665 @raise errors.ProgrammerError: if the nodes parameter is wrong type 666 667 """ 668 if not nodes: 669 raise errors.ProgrammerError("_GetWantedNodes should only be called with a" 670 " non-empty list of nodes whose name is to be expanded.") 671 672 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes] 673 return utils.NiceSort(wanted)
674
675 676 -def _GetWantedInstances(lu, instances):
677 """Returns list of checked and expanded instance names. 678 679 @type lu: L{LogicalUnit} 680 @param lu: the logical unit on whose behalf we execute 681 @type instances: list 682 @param instances: list of instance names or None for all instances 683 @rtype: list 684 @return: the list of instances, sorted 685 @raise errors.OpPrereqError: if the instances parameter is wrong type 686 @raise errors.OpPrereqError: if any of the passed instances is not found 687 688 """ 689 if instances: 690 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances] 691 else: 692 wanted = utils.NiceSort(lu.cfg.GetInstanceList()) 693 return wanted
694
695 696 -def _GetUpdatedParams(old_params, update_dict, 697 use_default=True, use_none=False):
698 """Return the new version of a parameter dictionary. 699 700 @type old_params: dict 701 @param old_params: old parameters 702 @type update_dict: dict 703 @param update_dict: dict containing new parameter values, or 704 constants.VALUE_DEFAULT to reset the parameter to its default 705 value 706 @param use_default: boolean 707 @type use_default: whether to recognise L{constants.VALUE_DEFAULT} 708 values as 'to be deleted' values 709 @param use_none: boolean 710 @type use_none: whether to recognise C{None} values as 'to be 711 deleted' values 712 @rtype: dict 713 @return: the new parameter dictionary 714 715 """ 716 params_copy = copy.deepcopy(old_params) 717 for key, val in update_dict.iteritems(): 718 if ((use_default and val == constants.VALUE_DEFAULT) or 719 (use_none and val is None)): 720 try: 721 del params_copy[key] 722 except KeyError: 723 pass 724 else: 725 params_copy[key] = val 726 return params_copy
727
728 729 -def _CheckOutputFields(static, dynamic, selected):
730 """Checks whether all selected fields are valid. 731 732 @type static: L{utils.FieldSet} 733 @param static: static fields set 734 @type dynamic: L{utils.FieldSet} 735 @param dynamic: dynamic fields set 736 737 """ 738 f = utils.FieldSet() 739 f.Extend(static) 740 f.Extend(dynamic) 741 742 delta = f.NonMatching(selected) 743 if delta: 744 raise errors.OpPrereqError("Unknown output fields selected: %s" 745 % ",".join(delta), errors.ECODE_INVAL)
746
747 748 -def _CheckGlobalHvParams(params):
749 """Validates that given hypervisor params are not global ones. 750 751 This will ensure that instances don't get customised versions of 752 global params. 753 754 """ 755 used_globals = constants.HVC_GLOBALS.intersection(params) 756 if used_globals: 757 msg = ("The following hypervisor parameters are global and cannot" 758 " be customized at instance level, please modify them at" 759 " cluster level: %s" % utils.CommaJoin(used_globals)) 760 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
761
762 763 -def _CheckNodeOnline(lu, node):
764 """Ensure that a given node is online. 765 766 @param lu: the LU on behalf of which we make the check 767 @param node: the node to check 768 @raise errors.OpPrereqError: if the node is offline 769 770 """ 771 if lu.cfg.GetNodeInfo(node).offline: 772 raise errors.OpPrereqError("Can't use offline node %s" % node, 773 errors.ECODE_INVAL)
774
775 776 -def _CheckNodeNotDrained(lu, node):
777 """Ensure that a given node is not drained. 778 779 @param lu: the LU on behalf of which we make the check 780 @param node: the node to check 781 @raise errors.OpPrereqError: if the node is drained 782 783 """ 784 if lu.cfg.GetNodeInfo(node).drained: 785 raise errors.OpPrereqError("Can't use drained node %s" % node, 786 errors.ECODE_INVAL)
787
788 789 -def _CheckNodeHasOS(lu, node, os_name, force_variant):
790 """Ensure that a node supports a given OS. 791 792 @param lu: the LU on behalf of which we make the check 793 @param node: the node to check 794 @param os_name: the OS to query about 795 @param force_variant: whether to ignore variant errors 796 @raise errors.OpPrereqError: if the node is not supporting the OS 797 798 """ 799 result = lu.rpc.call_os_get(node, os_name) 800 result.Raise("OS '%s' not in supported OS list for node %s" % 801 (os_name, node), 802 prereq=True, ecode=errors.ECODE_INVAL) 803 if not force_variant: 804 _CheckOSVariant(result.payload, os_name)
805
806 807 -def _RequireFileStorage():
808 """Checks that file storage is enabled. 809 810 @raise errors.OpPrereqError: when file storage is disabled 811 812 """ 813 if not constants.ENABLE_FILE_STORAGE: 814 raise errors.OpPrereqError("File storage disabled at configure time", 815 errors.ECODE_INVAL)
816
817 818 -def _CheckDiskTemplate(template):
819 """Ensure a given disk template is valid. 820 821 """ 822 if template not in constants.DISK_TEMPLATES: 823 msg = ("Invalid disk template name '%s', valid templates are: %s" % 824 (template, utils.CommaJoin(constants.DISK_TEMPLATES))) 825 raise errors.OpPrereqError(msg, errors.ECODE_INVAL) 826 if template == constants.DT_FILE: 827 _RequireFileStorage() 828 return True
829
830 831 -def _CheckStorageType(storage_type):
832 """Ensure a given storage type is valid. 833 834 """ 835 if storage_type not in constants.VALID_STORAGE_TYPES: 836 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type, 837 errors.ECODE_INVAL) 838 if storage_type == constants.ST_FILE: 839 _RequireFileStorage() 840 return True
841
842 843 -def _GetClusterDomainSecret():
844 """Reads the cluster domain secret. 845 846 """ 847 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE, 848 strict=True)
849
850 851 -def _CheckInstanceDown(lu, instance, reason):
852 """Ensure that an instance is not running.""" 853 if instance.admin_up: 854 raise errors.OpPrereqError("Instance %s is marked to be up, %s" % 855 (instance.name, reason), errors.ECODE_STATE) 856 857 pnode = instance.primary_node 858 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode] 859 ins_l.Raise("Can't contact node %s for instance information" % pnode, 860 prereq=True, ecode=errors.ECODE_ENVIRON) 861 862 if instance.name in ins_l.payload: 863 raise errors.OpPrereqError("Instance %s is running, %s" % 864 (instance.name, reason), errors.ECODE_STATE)
865
866 867 -def _ExpandItemName(fn, name, kind):
868 """Expand an item name. 869 870 @param fn: the function to use for expansion 871 @param name: requested item name 872 @param kind: text description ('Node' or 'Instance') 873 @return: the resolved (full) name 874 @raise errors.OpPrereqError: if the item is not found 875 876 """ 877 full_name = fn(name) 878 if full_name is None: 879 raise errors.OpPrereqError("%s '%s' not known" % (kind, name), 880 errors.ECODE_NOENT) 881 return full_name
882
883 884 -def _ExpandNodeName(cfg, name):
885 """Wrapper over L{_ExpandItemName} for nodes.""" 886 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
887
888 889 -def _ExpandInstanceName(cfg, name):
890 """Wrapper over L{_ExpandItemName} for instance.""" 891 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
892
893 894 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, 895 memory, vcpus, nics, disk_template, disks, 896 bep, hvp, hypervisor_name):
897 """Builds instance related env variables for hooks 898 899 This builds the hook environment from individual variables. 900 901 @type name: string 902 @param name: the name of the instance 903 @type primary_node: string 904 @param primary_node: the name of the instance's primary node 905 @type secondary_nodes: list 906 @param secondary_nodes: list of secondary nodes as strings 907 @type os_type: string 908 @param os_type: the name of the instance's OS 909 @type status: boolean 910 @param status: the should_run status of the instance 911 @type memory: string 912 @param memory: the memory size of the instance 913 @type vcpus: string 914 @param vcpus: the count of VCPUs the instance has 915 @type nics: list 916 @param nics: list of tuples (ip, mac, mode, link) representing 917 the NICs the instance has 918 @type disk_template: string 919 @param disk_template: the disk template of the instance 920 @type disks: list 921 @param disks: the list of (size, mode) pairs 922 @type bep: dict 923 @param bep: the backend parameters for the instance 924 @type hvp: dict 925 @param hvp: the hypervisor parameters for the instance 926 @type hypervisor_name: string 927 @param hypervisor_name: the hypervisor for the instance 928 @rtype: dict 929 @return: the hook environment for this instance 930 931 """ 932 if status: 933 str_status = "up" 934 else: 935 str_status = "down" 936 env = { 937 "OP_TARGET": name, 938 "INSTANCE_NAME": name, 939 "INSTANCE_PRIMARY": primary_node, 940 "INSTANCE_SECONDARIES": " ".join(secondary_nodes), 941 "INSTANCE_OS_TYPE": os_type, 942 "INSTANCE_STATUS": str_status, 943 "INSTANCE_MEMORY": memory, 944 "INSTANCE_VCPUS": vcpus, 945 "INSTANCE_DISK_TEMPLATE": disk_template, 946 "INSTANCE_HYPERVISOR": hypervisor_name, 947 } 948 949 if nics: 950 nic_count = len(nics) 951 for idx, (ip, mac, mode, link) in enumerate(nics): 952 if ip is None: 953 ip = "" 954 env["INSTANCE_NIC%d_IP" % idx] = ip 955 env["INSTANCE_NIC%d_MAC" % idx] = mac 956 env["INSTANCE_NIC%d_MODE" % idx] = mode 957 env["INSTANCE_NIC%d_LINK" % idx] = link 958 if mode == constants.NIC_MODE_BRIDGED: 959 env["INSTANCE_NIC%d_BRIDGE" % idx] = link 960 else: 961 nic_count = 0 962 963 env["INSTANCE_NIC_COUNT"] = nic_count 964 965 if disks: 966 disk_count = len(disks) 967 for idx, (size, mode) in enumerate(disks): 968 env["INSTANCE_DISK%d_SIZE" % idx] = size 969 env["INSTANCE_DISK%d_MODE" % idx] = mode 970 else: 971 disk_count = 0 972 973 env["INSTANCE_DISK_COUNT"] = disk_count 974 975 for source, kind in [(bep, "BE"), (hvp, "HV")]: 976 for key, value in source.items(): 977 env["INSTANCE_%s_%s" % (kind, key)] = value 978 979 return env
980
981 982 -def _NICListToTuple(lu, nics):
983 """Build a list of nic information tuples. 984 985 This list is suitable to be passed to _BuildInstanceHookEnv or as a return 986 value in LUQueryInstanceData. 987 988 @type lu: L{LogicalUnit} 989 @param lu: the logical unit on whose behalf we execute 990 @type nics: list of L{objects.NIC} 991 @param nics: list of nics to convert to hooks tuples 992 993 """ 994 hooks_nics = [] 995 cluster = lu.cfg.GetClusterInfo() 996 for nic in nics: 997 ip = nic.ip 998 mac = nic.mac 999 filled_params = cluster.SimpleFillNIC(nic.nicparams) 1000 mode = filled_params[constants.NIC_MODE] 1001 link = filled_params[constants.NIC_LINK] 1002 hooks_nics.append((ip, mac, mode, link)) 1003 return hooks_nics
1004
1005 1006 -def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007 """Builds instance related env variables for hooks from an object. 1008 1009 @type lu: L{LogicalUnit} 1010 @param lu: the logical unit on whose behalf we execute 1011 @type instance: L{objects.Instance} 1012 @param instance: the instance for which we should build the 1013 environment 1014 @type override: dict 1015 @param override: dictionary with key/values that will override 1016 our values 1017 @rtype: dict 1018 @return: the hook environment dictionary 1019 1020 """ 1021 cluster = lu.cfg.GetClusterInfo() 1022 bep = cluster.FillBE(instance) 1023 hvp = cluster.FillHV(instance) 1024 args = { 1025 'name': instance.name, 1026 'primary_node': instance.primary_node, 1027 'secondary_nodes': instance.secondary_nodes, 1028 'os_type': instance.os, 1029 'status': instance.admin_up, 1030 'memory': bep[constants.BE_MEMORY], 1031 'vcpus': bep[constants.BE_VCPUS], 1032 'nics': _NICListToTuple(lu, instance.nics), 1033 'disk_template': instance.disk_template, 1034 'disks': [(disk.size, disk.mode) for disk in instance.disks], 1035 'bep': bep, 1036 'hvp': hvp, 1037 'hypervisor_name': instance.hypervisor, 1038 } 1039 if override: 1040 args.update(override) 1041 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1042
1043 1044 -def _AdjustCandidatePool(lu, exceptions):
1045 """Adjust the candidate pool after node operations. 1046 1047 """ 1048 mod_list = lu.cfg.MaintainCandidatePool(exceptions) 1049 if mod_list: 1050 lu.LogInfo("Promoted nodes to master candidate role: %s", 1051 utils.CommaJoin(node.name for node in mod_list)) 1052 for name in mod_list: 1053 lu.context.ReaddNode(name) 1054 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions) 1055 if mc_now > mc_max: 1056 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % 1057 (mc_now, mc_max))
1058
1059 1060 -def _DecideSelfPromotion(lu, exceptions=None):
1061 """Decide whether I should promote myself as a master candidate. 1062 1063 """ 1064 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size 1065 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions) 1066 # the new node will increase mc_max with one, so: 1067 mc_should = min(mc_should + 1, cp_size) 1068 return mc_now < mc_should
1069
1070 1071 -def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072 """Check that the brigdes needed by a list of nics exist. 1073 1074 """ 1075 cluster = lu.cfg.GetClusterInfo() 1076 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics] 1077 brlist = [params[constants.NIC_LINK] for params in paramslist 1078 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED] 1079 if brlist: 1080 result = lu.rpc.call_bridges_exist(target_node, brlist) 1081 result.Raise("Error checking bridges on destination node '%s'" % 1082 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1083
1084 1085 -def _CheckInstanceBridgesExist(lu, instance, node=None):
1086 """Check that the brigdes needed by an instance exist. 1087 1088 """ 1089 if node is None: 1090 node = instance.primary_node 1091 _CheckNicsBridgesExist(lu, instance.nics, node)
1092
1093 1094 -def _CheckOSVariant(os_obj, name):
1095 """Check whether an OS name conforms to the os variants specification. 1096 1097 @type os_obj: L{objects.OS} 1098 @param os_obj: OS object to check 1099 @type name: string 1100 @param name: OS name passed by the user, to check for validity 1101 1102 """ 1103 if not os_obj.supported_variants: 1104 return 1105 variant = objects.OS.GetVariant(name) 1106 if not variant: 1107 raise errors.OpPrereqError("OS name must include a variant", 1108 errors.ECODE_INVAL) 1109 1110 if variant not in os_obj.supported_variants: 1111 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1112
1113 1114 -def _GetNodeInstancesInner(cfg, fn):
1115 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1116
1117 1118 -def _GetNodeInstances(cfg, node_name):
1119 """Returns a list of all primary and secondary instances on a node. 1120 1121 """ 1122 1123 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1124
1125 1126 -def _GetNodePrimaryInstances(cfg, node_name):
1127 """Returns primary instances on a node. 1128 1129 """ 1130 return _GetNodeInstancesInner(cfg, 1131 lambda inst: node_name == inst.primary_node)
1132
1133 1134 -def _GetNodeSecondaryInstances(cfg, node_name):
1135 """Returns secondary instances on a node. 1136 1137 """ 1138 return _GetNodeInstancesInner(cfg, 1139 lambda inst: node_name in inst.secondary_nodes)
1140
1141 1142 -def _GetStorageTypeArgs(cfg, storage_type):
1143 """Returns the arguments for a storage type. 1144 1145 """ 1146 # Special case for file storage 1147 if storage_type == constants.ST_FILE: 1148 # storage.FileStorage wants a list of storage directories 1149 return [[cfg.GetFileStorageDir()]] 1150 1151 return []
1152
1153 1154 -def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1155 faulty = [] 1156 1157 for dev in instance.disks: 1158 cfg.SetDiskID(dev, node_name) 1159 1160 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks) 1161 result.Raise("Failed to get disk status from node %s" % node_name, 1162 prereq=prereq, ecode=errors.ECODE_ENVIRON) 1163 1164 for idx, bdev_status in enumerate(result.payload): 1165 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY: 1166 faulty.append(idx) 1167 1168 return faulty
1169
1170 1171 -def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172 """Check the sanity of iallocator and node arguments and use the 1173 cluster-wide iallocator if appropriate. 1174 1175 Check that at most one of (iallocator, node) is specified. If none is 1176 specified, then the LU's opcode's iallocator slot is filled with the 1177 cluster-wide default iallocator. 1178 1179 @type iallocator_slot: string 1180 @param iallocator_slot: the name of the opcode iallocator slot 1181 @type node_slot: string 1182 @param node_slot: the name of the opcode target node slot 1183 1184 """ 1185 node = getattr(lu.op, node_slot, None) 1186 iallocator = getattr(lu.op, iallocator_slot, None) 1187 1188 if node is not None and iallocator is not None: 1189 raise errors.OpPrereqError("Do not specify both, iallocator and node.", 1190 errors.ECODE_INVAL) 1191 elif node is None and iallocator is None: 1192 default_iallocator = lu.cfg.GetDefaultIAllocator() 1193 if default_iallocator: 1194 setattr(lu.op, iallocator_slot, default_iallocator) 1195 else: 1196 raise errors.OpPrereqError("No iallocator or node given and no" 1197 " cluster-wide default iallocator found." 1198 " Please specify either an iallocator or a" 1199 " node, or set a cluster-wide default" 1200 " iallocator.")
1201
1202 1203 -class LUPostInitCluster(LogicalUnit):
1204 """Logical unit for running hooks after cluster initialization. 1205 1206 """ 1207 HPATH = "cluster-init" 1208 HTYPE = constants.HTYPE_CLUSTER 1209
1210 - def BuildHooksEnv(self):
1211 """Build hooks env. 1212 1213 """ 1214 env = {"OP_TARGET": self.cfg.GetClusterName()} 1215 mn = self.cfg.GetMasterNode() 1216 return env, [], [mn]
1217
1218 - def Exec(self, feedback_fn):
1219 """Nothing to do. 1220 1221 """ 1222 return True
1223
1224 1225 -class LUDestroyCluster(LogicalUnit):
1226 """Logical unit for destroying the cluster. 1227 1228 """ 1229 HPATH = "cluster-destroy" 1230 HTYPE = constants.HTYPE_CLUSTER 1231
1232 - def BuildHooksEnv(self):
1233 """Build hooks env. 1234 1235 """ 1236 env = {"OP_TARGET": self.cfg.GetClusterName()} 1237 return env, [], []
1238
1239 - def CheckPrereq(self):
1240 """Check prerequisites. 1241 1242 This checks whether the cluster is empty. 1243 1244 Any errors are signaled by raising errors.OpPrereqError. 1245 1246 """ 1247 master = self.cfg.GetMasterNode() 1248 1249 nodelist = self.cfg.GetNodeList() 1250 if len(nodelist) != 1 or nodelist[0] != master: 1251 raise errors.OpPrereqError("There are still %d node(s) in" 1252 " this cluster." % (len(nodelist) - 1), 1253 errors.ECODE_INVAL) 1254 instancelist = self.cfg.GetInstanceList() 1255 if instancelist: 1256 raise errors.OpPrereqError("There are still %d instance(s) in" 1257 " this cluster." % len(instancelist), 1258 errors.ECODE_INVAL)
1259
1260 - def Exec(self, feedback_fn):
1261 """Destroys the cluster. 1262 1263 """ 1264 master = self.cfg.GetMasterNode() 1265 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup 1266 1267 # Run post hooks on master node before it's removed 1268 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self) 1269 try: 1270 hm.RunPhase(constants.HOOKS_PHASE_POST, [master]) 1271 except: 1272 # pylint: disable-msg=W0702 1273 self.LogWarning("Errors occurred running hooks on %s" % master) 1274 1275 result = self.rpc.call_node_stop_master(master, False) 1276 result.Raise("Could not disable the master role") 1277 1278 if modify_ssh_setup: 1279 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS) 1280 utils.CreateBackup(priv_key) 1281 utils.CreateBackup(pub_key) 1282 1283 return master
1284
1285 1286 -def _VerifyCertificate(filename):
1287 """Verifies a certificate for LUVerifyCluster. 1288 1289 @type filename: string 1290 @param filename: Path to PEM file 1291 1292 """ 1293 try: 1294 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 1295 utils.ReadFile(filename)) 1296 except Exception, err: # pylint: disable-msg=W0703 1297 return (LUVerifyCluster.ETYPE_ERROR, 1298 "Failed to load X509 certificate %s: %s" % (filename, err)) 1299 1300 (errcode, msg) = \ 1301 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN, 1302 constants.SSL_CERT_EXPIRATION_ERROR) 1303 1304 if msg: 1305 fnamemsg = "While verifying %s: %s" % (filename, msg) 1306 else: 1307 fnamemsg = None 1308 1309 if errcode is None: 1310 return (None, fnamemsg) 1311 elif errcode == utils.CERT_WARNING: 1312 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg) 1313 elif errcode == utils.CERT_ERROR: 1314 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg) 1315 1316 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1317
1318 1319 -class LUVerifyCluster(LogicalUnit):
1320 """Verifies the cluster status. 1321 1322 """ 1323 HPATH = "cluster-verify" 1324 HTYPE = constants.HTYPE_CLUSTER 1325 _OP_PARAMS = [ 1326 ("skip_checks", _EmptyList, 1327 _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))), 1328 ("verbose", False, _TBool), 1329 ("error_codes", False, _TBool), 1330 ("debug_simulate_errors", False, _TBool), 1331 ] 1332 REQ_BGL = False 1333 1334 TCLUSTER = "cluster" 1335 TNODE = "node" 1336 TINSTANCE = "instance" 1337 1338 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG") 1339 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT") 1340 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE") 1341 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN") 1342 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT") 1343 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK") 1344 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK") 1345 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE") 1346 ENODEDRBD = (TNODE, "ENODEDRBD") 1347 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER") 1348 ENODEFILECHECK = (TNODE, "ENODEFILECHECK") 1349 ENODEHOOKS = (TNODE, "ENODEHOOKS") 1350 ENODEHV = (TNODE, "ENODEHV") 1351 ENODELVM = (TNODE, "ENODELVM") 1352 ENODEN1 = (TNODE, "ENODEN1") 1353 ENODENET = (TNODE, "ENODENET") 1354 ENODEOS = (TNODE, "ENODEOS") 1355 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE") 1356 ENODEORPHANLV = (TNODE, "ENODEORPHANLV") 1357 ENODERPC = (TNODE, "ENODERPC") 1358 ENODESSH = (TNODE, "ENODESSH") 1359 ENODEVERSION = (TNODE, "ENODEVERSION") 1360 ENODESETUP = (TNODE, "ENODESETUP") 1361 ENODETIME = (TNODE, "ENODETIME") 1362 1363 ETYPE_FIELD = "code" 1364 ETYPE_ERROR = "ERROR" 1365 ETYPE_WARNING = "WARNING" 1366
1367 - class NodeImage(object):
1368 """A class representing the logical and physical status of a node. 1369 1370 @type name: string 1371 @ivar name: the node name to which this object refers 1372 @ivar volumes: a structure as returned from 1373 L{ganeti.backend.GetVolumeList} (runtime) 1374 @ivar instances: a list of running instances (runtime) 1375 @ivar pinst: list of configured primary instances (config) 1376 @ivar sinst: list of configured secondary instances (config) 1377 @ivar sbp: diction of {secondary-node: list of instances} of all peers 1378 of this node (config) 1379 @ivar mfree: free memory, as reported by hypervisor (runtime) 1380 @ivar dfree: free disk, as reported by the node (runtime) 1381 @ivar offline: the offline status (config) 1382 @type rpc_fail: boolean 1383 @ivar rpc_fail: whether the RPC verify call was successfull (overall, 1384 not whether the individual keys were correct) (runtime) 1385 @type lvm_fail: boolean 1386 @ivar lvm_fail: whether the RPC call didn't return valid LVM data 1387 @type hyp_fail: boolean 1388 @ivar hyp_fail: whether the RPC call didn't return the instance list 1389 @type ghost: boolean 1390 @ivar ghost: whether this is a known node or not (config) 1391 @type os_fail: boolean 1392 @ivar os_fail: whether the RPC call didn't return valid OS data 1393 @type oslist: list 1394 @ivar oslist: list of OSes as diagnosed by DiagnoseOS 1395 1396 """
1397 - def __init__(self, offline=False, name=None):
1398 self.name = name 1399 self.volumes = {} 1400 self.instances = [] 1401 self.pinst = [] 1402 self.sinst = [] 1403 self.sbp = {} 1404 self.mfree = 0 1405 self.dfree = 0 1406 self.offline = offline 1407 self.rpc_fail = False 1408 self.lvm_fail = False 1409 self.hyp_fail = False 1410 self.ghost = False 1411 self.os_fail = False 1412 self.oslist = {}
1413
1414 - def ExpandNames(self):
1415 self.needed_locks = { 1416 locking.LEVEL_NODE: locking.ALL_SET, 1417 locking.LEVEL_INSTANCE: locking.ALL_SET, 1418 } 1419 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1420
1421 - def _Error(self, ecode, item, msg, *args, **kwargs):
1422 """Format an error message. 1423 1424 Based on the opcode's error_codes parameter, either format a 1425 parseable error code, or a simpler error string. 1426 1427 This must be called only from Exec and functions called from Exec. 1428 1429 """ 1430 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) 1431 itype, etxt = ecode 1432 # first complete the msg 1433 if args: 1434 msg = msg % args 1435 # then format the whole message 1436 if self.op.error_codes: 1437 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) 1438 else: 1439 if item: 1440 item = " " + item 1441 else: 1442 item = "" 1443 msg = "%s: %s%s: %s" % (ltype, itype, item, msg) 1444 # and finally report it via the feedback_fn 1445 self._feedback_fn(" - %s" % msg)
1446
1447 - def _ErrorIf(self, cond, *args, **kwargs):
1448 """Log an error message if the passed condition is True. 1449 1450 """ 1451 cond = bool(cond) or self.op.debug_simulate_errors 1452 if cond: 1453 self._Error(*args, **kwargs) 1454 # do not mark the operation as failed for WARN cases only 1455 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR: 1456 self.bad = self.bad or cond
1457
1458 - def _VerifyNode(self, ninfo, nresult):
1459 """Perform some basic validation on data returned from a node. 1460 1461 - check the result data structure is well formed and has all the 1462 mandatory fields 1463 - check ganeti version 1464 1465 @type ninfo: L{objects.Node} 1466 @param ninfo: the node to check 1467 @param nresult: the results from the node 1468 @rtype: boolean 1469 @return: whether overall this call was successful (and we can expect 1470 reasonable values in the respose) 1471 1472 """ 1473 node = ninfo.name 1474 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1475 1476 # main result, nresult should be a non-empty dict 1477 test = not nresult or not isinstance(nresult, dict) 1478 _ErrorIf(test, self.ENODERPC, node, 1479 "unable to verify node: no data returned") 1480 if test: 1481 return False 1482 1483 # compares ganeti version 1484 local_version = constants.PROTOCOL_VERSION 1485 remote_version = nresult.get("version", None) 1486 test = not (remote_version and 1487 isinstance(remote_version, (list, tuple)) and 1488 len(remote_version) == 2) 1489 _ErrorIf(test, self.ENODERPC, node, 1490 "connection to node returned invalid data") 1491 if test: 1492 return False 1493 1494 test = local_version != remote_version[0] 1495 _ErrorIf(test, self.ENODEVERSION, node, 1496 "incompatible protocol versions: master %s," 1497 " node %s", local_version, remote_version[0]) 1498 if test: 1499 return False 1500 1501 # node seems compatible, we can actually try to look into its results 1502 1503 # full package version 1504 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], 1505 self.ENODEVERSION, node, 1506 "software version mismatch: master %s, node %s", 1507 constants.RELEASE_VERSION, remote_version[1], 1508 code=self.ETYPE_WARNING) 1509 1510 hyp_result = nresult.get(constants.NV_HYPERVISOR, None) 1511 if isinstance(hyp_result, dict): 1512 for hv_name, hv_result in hyp_result.iteritems(): 1513 test = hv_result is not None 1514 _ErrorIf(test, self.ENODEHV, node, 1515 "hypervisor %s verify failure: '%s'", hv_name, hv_result) 1516 1517 1518 test = nresult.get(constants.NV_NODESETUP, 1519 ["Missing NODESETUP results"]) 1520 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s", 1521 "; ".join(test)) 1522 1523 return True
1524
1525 - def _VerifyNodeTime(self, ninfo, nresult, 1526 nvinfo_starttime, nvinfo_endtime):
1527 """Check the node time. 1528 1529 @type ninfo: L{objects.Node} 1530 @param ninfo: the node to check 1531 @param nresult: the remote results for the node 1532 @param nvinfo_starttime: the start time of the RPC call 1533 @param nvinfo_endtime: the end time of the RPC call 1534 1535 """ 1536 node = ninfo.name 1537 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1538 1539 ntime = nresult.get(constants.NV_TIME, None) 1540 try: 1541 ntime_merged = utils.MergeTime(ntime) 1542 except (ValueError, TypeError): 1543 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time") 1544 return 1545 1546 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): 1547 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) 1548 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): 1549 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) 1550 else: 1551 ntime_diff = None 1552 1553 _ErrorIf(ntime_diff is not None, self.ENODETIME, node, 1554 "Node time diverges by at least %s from master node time", 1555 ntime_diff)
1556
1557 - def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1558 """Check the node time. 1559 1560 @type ninfo: L{objects.Node} 1561 @param ninfo: the node to check 1562 @param nresult: the remote results for the node 1563 @param vg_name: the configured VG name 1564 1565 """ 1566 if vg_name is None: 1567 return 1568 1569 node = ninfo.name 1570 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1571 1572 # checks vg existence and size > 20G 1573 vglist = nresult.get(constants.NV_VGLIST, None) 1574 test = not vglist 1575 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups") 1576 if not test: 1577 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, 1578 constants.MIN_VG_SIZE) 1579 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus) 1580 1581 # check pv names 1582 pvlist = nresult.get(constants.NV_PVLIST, None) 1583 test = pvlist is None 1584 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node") 1585 if not test: 1586 # check that ':' is not present in PV names, since it's a 1587 # special character for lvcreate (denotes the range of PEs to 1588 # use on the PV) 1589 for _, pvname, owner_vg in pvlist: 1590 test = ":" in pvname 1591 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV" 1592 " '%s' of VG '%s'", pvname, owner_vg)
1593
1594 - def _VerifyNodeNetwork(self, ninfo, nresult):
1595 """Check the node time. 1596 1597 @type ninfo: L{objects.Node} 1598 @param ninfo: the node to check 1599 @param nresult: the remote results for the node 1600 1601 """ 1602 node = ninfo.name 1603 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1604 1605 test = constants.NV_NODELIST not in nresult 1606 _ErrorIf(test, self.ENODESSH, node, 1607 "node hasn't returned node ssh connectivity data") 1608 if not test: 1609 if nresult[constants.NV_NODELIST]: 1610 for a_node, a_msg in nresult[constants.NV_NODELIST].items(): 1611 _ErrorIf(True, self.ENODESSH, node, 1612 "ssh communication with node '%s': %s", a_node, a_msg) 1613 1614 test = constants.NV_NODENETTEST not in nresult 1615 _ErrorIf(test, self.ENODENET, node, 1616 "node hasn't returned node tcp connectivity data") 1617 if not test: 1618 if nresult[constants.NV_NODENETTEST]: 1619 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) 1620 for anode in nlist: 1621 _ErrorIf(True, self.ENODENET, node, 1622 "tcp communication with node '%s': %s", 1623 anode, nresult[constants.NV_NODENETTEST][anode]) 1624 1625 test = constants.NV_MASTERIP not in nresult 1626 _ErrorIf(test, self.ENODENET, node, 1627 "node hasn't returned node master IP reachability data") 1628 if not test: 1629 if not nresult[constants.NV_MASTERIP]: 1630 if node == self.master_node: 1631 msg = "the master node cannot reach the master IP (not configured?)" 1632 else: 1633 msg = "cannot reach the master IP" 1634 _ErrorIf(True, self.ENODENET, node, msg)
1635 1636
1637 - def _VerifyInstance(self, instance, instanceconfig, node_image):
1638 """Verify an instance. 1639 1640 This function checks to see if the required block devices are 1641 available on the instance's node. 1642 1643 """ 1644 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1645 node_current = instanceconfig.primary_node 1646 1647 node_vol_should = {} 1648 instanceconfig.MapLVsByNode(node_vol_should) 1649 1650 for node in node_vol_should: 1651 n_img = node_image[node] 1652 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 1653 # ignore missing volumes on offline or broken nodes 1654 continue 1655 for volume in node_vol_should[node]: 1656 test = volume not in n_img.volumes 1657 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance, 1658 "volume %s missing on node %s", volume, node) 1659 1660 if instanceconfig.admin_up: 1661 pri_img = node_image[node_current] 1662 test = instance not in pri_img.instances and not pri_img.offline 1663 _ErrorIf(test, self.EINSTANCEDOWN, instance, 1664 "instance not running on its primary node %s", 1665 node_current) 1666 1667 for node, n_img in node_image.items(): 1668 if (not node == node_current): 1669 test = instance in n_img.instances 1670 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance, 1671 "instance should not run on node %s", node)
1672
1673 - def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1674 """Verify if there are any unknown volumes in the cluster. 1675 1676 The .os, .swap and backup volumes are ignored. All other volumes are 1677 reported as unknown. 1678 1679 @type reserved: L{ganeti.utils.FieldSet} 1680 @param reserved: a FieldSet of reserved volume names 1681 1682 """ 1683 for node, n_img in node_image.items(): 1684 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 1685 # skip non-healthy nodes 1686 continue 1687 for volume in n_img.volumes: 1688 test = ((node not in node_vol_should or 1689 volume not in node_vol_should[node]) and 1690 not reserved.Matches(volume)) 1691 self._ErrorIf(test, self.ENODEORPHANLV, node, 1692 "volume %s is unknown", volume)
1693
1694 - def _VerifyOrphanInstances(self, instancelist, node_image):
1695 """Verify the list of running instances. 1696 1697 This checks what instances are running but unknown to the cluster. 1698 1699 """ 1700 for node, n_img in node_image.items(): 1701 for o_inst in n_img.instances: 1702 test = o_inst not in instancelist 1703 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node, 1704 "instance %s on node %s should not exist", o_inst, node)
1705
1706 - def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1707 """Verify N+1 Memory Resilience. 1708 1709 Check that if one single node dies we can still start all the 1710 instances it was primary for. 1711 1712 """ 1713 for node, n_img in node_image.items(): 1714 # This code checks that every node which is now listed as 1715 # secondary has enough memory to host all instances it is 1716 # supposed to should a single other node in the cluster fail. 1717 # FIXME: not ready for failover to an arbitrary node 1718 # FIXME: does not support file-backed instances 1719 # WARNING: we currently take into account down instances as well 1720 # as up ones, considering that even if they're down someone 1721 # might want to start them even in the event of a node failure. 1722 for prinode, instances in n_img.sbp.items(): 1723 needed_mem = 0 1724 for instance in instances: 1725 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance]) 1726 if bep[constants.BE_AUTO_BALANCE]: 1727 needed_mem += bep[constants.BE_MEMORY] 1728 test = n_img.mfree < needed_mem 1729 self._ErrorIf(test, self.ENODEN1, node, 1730 "not enough memory on to accommodate" 1731 " failovers should peer node %s fail", prinode)
1732
1733 - def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum, 1734 master_files):
1735 """Verifies and computes the node required file checksums. 1736 1737 @type ninfo: L{objects.Node} 1738 @param ninfo: the node to check 1739 @param nresult: the remote results for the node 1740 @param file_list: required list of files 1741 @param local_cksum: dictionary of local files and their checksums 1742 @param master_files: list of files that only masters should have 1743 1744 """ 1745 node = ninfo.name 1746 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1747 1748 remote_cksum = nresult.get(constants.NV_FILELIST, None) 1749 test = not isinstance(remote_cksum, dict) 1750 _ErrorIf(test, self.ENODEFILECHECK, node, 1751 "node hasn't returned file checksum data") 1752 if test: 1753 return 1754 1755 for file_name in file_list: 1756 node_is_mc = ninfo.master_candidate 1757 must_have = (file_name not in master_files) or node_is_mc 1758 # missing 1759 test1 = file_name not in remote_cksum 1760 # invalid checksum 1761 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name] 1762 # existing and good 1763 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name] 1764 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node, 1765 "file '%s' missing", file_name) 1766 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node, 1767 "file '%s' has wrong checksum", file_name) 1768 # not candidate and this is not a must-have file 1769 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node, 1770 "file '%s' should not exist on non master" 1771 " candidates (and the file is outdated)", file_name) 1772 # all good, except non-master/non-must have combination 1773 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node, 1774 "file '%s' should not exist" 1775 " on non master candidates", file_name)
1776
1777 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper, 1778 drbd_map):
1779 """Verifies and the node DRBD status. 1780 1781 @type ninfo: L{objects.Node} 1782 @param ninfo: the node to check 1783 @param nresult: the remote results for the node 1784 @param instanceinfo: the dict of instances 1785 @param drbd_helper: the configured DRBD usermode helper 1786 @param drbd_map: the DRBD map as returned by 1787 L{ganeti.config.ConfigWriter.ComputeDRBDMap} 1788 1789 """ 1790 node = ninfo.name 1791 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1792 1793 if drbd_helper: 1794 helper_result = nresult.get(constants.NV_DRBDHELPER, None) 1795 test = (helper_result == None) 1796 _ErrorIf(test, self.ENODEDRBDHELPER, node, 1797 "no drbd usermode helper returned") 1798 if helper_result: 1799 status, payload = helper_result 1800 test = not status 1801 _ErrorIf(test, self.ENODEDRBDHELPER, node, 1802 "drbd usermode helper check unsuccessful: %s", payload) 1803 test = status and (payload != drbd_helper) 1804 _ErrorIf(test, self.ENODEDRBDHELPER, node, 1805 "wrong drbd usermode helper: %s", payload) 1806 1807 # compute the DRBD minors 1808 node_drbd = {} 1809 for minor, instance in drbd_map[node].items(): 1810 test = instance not in instanceinfo 1811 _ErrorIf(test, self.ECLUSTERCFG, None, 1812 "ghost instance '%s' in temporary DRBD map", instance) 1813 # ghost instance should not be running, but otherwise we 1814 # don't give double warnings (both ghost instance and 1815 # unallocated minor in use) 1816 if test: 1817 node_drbd[minor] = (instance, False) 1818 else: 1819 instance = instanceinfo[instance] 1820 node_drbd[minor] = (instance.name, instance.admin_up) 1821 1822 # and now check them 1823 used_minors = nresult.get(constants.NV_DRBDLIST, []) 1824 test = not isinstance(used_minors, (tuple, list)) 1825 _ErrorIf(test, self.ENODEDRBD, node, 1826 "cannot parse drbd status file: %s", str(used_minors)) 1827 if test: 1828 # we cannot check drbd status 1829 return 1830 1831 for minor, (iname, must_exist) in node_drbd.items(): 1832 test = minor not in used_minors and must_exist 1833 _ErrorIf(test, self.ENODEDRBD, node, 1834 "drbd minor %d of instance %s is not active", minor, iname) 1835 for minor in used_minors: 1836 test = minor not in node_drbd 1837 _ErrorIf(test, self.ENODEDRBD, node, 1838 "unallocated drbd minor %d is in use", minor)
1839
1840 - def _UpdateNodeOS(self, ninfo, nresult, nimg):
1841 """Builds the node OS structures. 1842 1843 @type ninfo: L{objects.Node} 1844 @param ninfo: the node to check 1845 @param nresult: the remote results for the node 1846 @param nimg: the node image object 1847 1848 """ 1849 node = ninfo.name 1850 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1851 1852 remote_os = nresult.get(constants.NV_OSLIST, None) 1853 test = (not isinstance(remote_os, list) or 1854 not compat.all(isinstance(v, list) and len(v) == 7 1855 for v in remote_os)) 1856 1857 _ErrorIf(test, self.ENODEOS, node, 1858 "node hasn't returned valid OS data") 1859 1860 nimg.os_fail = test 1861 1862 if test: 1863 return 1864 1865 os_dict = {} 1866 1867 for (name, os_path, status, diagnose, 1868 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]: 1869 1870 if name not in os_dict: 1871 os_dict[name] = [] 1872 1873 # parameters is a list of lists instead of list of tuples due to 1874 # JSON lacking a real tuple type, fix it: 1875 parameters = [tuple(v) for v in parameters] 1876 os_dict[name].append((os_path, status, diagnose, 1877 set(variants), set(parameters), set(api_ver))) 1878 1879 nimg.oslist = os_dict
1880
1881 - def _VerifyNodeOS(self, ninfo, nimg, base):
1882 """Verifies the node OS list. 1883 1884 @type ninfo: L{objects.Node} 1885 @param ninfo: the node to check 1886 @param nimg: the node image object 1887 @param base: the 'template' node we match against (e.g. from the master) 1888 1889 """ 1890 node = ninfo.name 1891 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1892 1893 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" 1894 1895 for os_name, os_data in nimg.oslist.items(): 1896 assert os_data, "Empty OS status for OS %s?!" % os_name 1897 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] 1898 _ErrorIf(not f_status, self.ENODEOS, node, 1899 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag) 1900 _ErrorIf(len(os_data) > 1, self.ENODEOS, node, 1901 "OS '%s' has multiple entries (first one shadows the rest): %s", 1902 os_name, utils.CommaJoin([v[0] for v in os_data])) 1903 # this will catched in backend too 1904 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api) 1905 and not f_var, self.ENODEOS, node, 1906 "OS %s with API at least %d does not declare any variant", 1907 os_name, constants.OS_API_V15) 1908 # comparisons with the 'base' image 1909 test = os_name not in base.oslist 1910 _ErrorIf(test, self.ENODEOS, node, 1911 "Extra OS %s not present on reference node (%s)", 1912 os_name, base.name) 1913 if test: 1914 continue 1915 assert base.oslist[os_name], "Base node has empty OS status?" 1916 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0] 1917 if not b_status: 1918 # base OS is invalid, skipping 1919 continue 1920 for kind, a, b in [("API version", f_api, b_api), 1921 ("variants list", f_var, b_var), 1922 ("parameters", f_param, b_param)]: 1923 _ErrorIf(a != b, self.ENODEOS, node, 1924 "OS %s %s differs from reference node %s: %s vs. %s", 1925 kind, os_name, base.name, 1926 utils.CommaJoin(a), utils.CommaJoin(b)) 1927 1928 # check any missing OSes 1929 missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) 1930 _ErrorIf(missing, self.ENODEOS, node, 1931 "OSes present on reference node %s but missing on this node: %s", 1932 base.name, utils.CommaJoin(missing))
1933
1934 - def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1935 """Verifies and updates the node volume data. 1936 1937 This function will update a L{NodeImage}'s internal structures 1938 with data from the remote call. 1939 1940 @type ninfo: L{objects.Node} 1941 @param ninfo: the node to check 1942 @param nresult: the remote results for the node 1943 @param nimg: the node image object 1944 @param vg_name: the configured VG name 1945 1946 """ 1947 node = ninfo.name 1948 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1949 1950 nimg.lvm_fail = True 1951 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") 1952 if vg_name is None: 1953 pass 1954 elif isinstance(lvdata, basestring): 1955 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s", 1956 utils.SafeEncode(lvdata)) 1957 elif not isinstance(lvdata, dict): 1958 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)") 1959 else: 1960 nimg.volumes = lvdata 1961 nimg.lvm_fail = False
1962
1963 - def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1964 """Verifies and updates the node instance list. 1965 1966 If the listing was successful, then updates this node's instance 1967 list. Otherwise, it marks the RPC call as failed for the instance 1968 list key. 1969 1970 @type ninfo: L{objects.Node} 1971 @param ninfo: the node to check 1972 @param nresult: the remote results for the node 1973 @param nimg: the node image object 1974 1975 """ 1976 idata = nresult.get(constants.NV_INSTANCELIST, None) 1977 test = not isinstance(idata, list) 1978 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed" 1979 " (instancelist): %s", utils.SafeEncode(str(idata))) 1980 if test: 1981 nimg.hyp_fail = True 1982 else: 1983 nimg.instances = idata
1984
1985 - def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1986 """Verifies and computes a node information map 1987 1988 @type ninfo: L{objects.Node} 1989 @param ninfo: the node to check 1990 @param nresult: the remote results for the node 1991 @param nimg: the node image object 1992 @param vg_name: the configured VG name 1993 1994 """ 1995 node = ninfo.name 1996 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1997 1998 # try to read free memory (from the hypervisor) 1999 hv_info = nresult.get(constants.NV_HVINFO, None) 2000 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info 2001 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)") 2002 if not test: 2003 try: 2004 nimg.mfree = int(hv_info["memory_free"]) 2005 except (ValueError, TypeError): 2006 _ErrorIf(True, self.ENODERPC, node, 2007 "node returned invalid nodeinfo, check hypervisor") 2008 2009 # FIXME: devise a free space model for file based instances as well 2010 if vg_name is not None: 2011 test = (constants.NV_VGLIST not in nresult or 2012 vg_name not in nresult[constants.NV_VGLIST]) 2013 _ErrorIf(test, self.ENODELVM, node, 2014 "node didn't return data for the volume group '%s'" 2015 " - it is either missing or broken", vg_name) 2016 if not test: 2017 try: 2018 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) 2019 except (ValueError, TypeError): 2020 _ErrorIf(True, self.ENODERPC, node, 2021 "node returned invalid LVM info, check LVM status")
2022
2023 - def BuildHooksEnv(self):
2024 """Build hooks env. 2025 2026 Cluster-Verify hooks just ran in the post phase and their failure makes 2027 the output be logged in the verify output and the verification to fail. 2028 2029 """ 2030 all_nodes = self.cfg.GetNodeList() 2031 env = { 2032 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()) 2033 } 2034 for node in self.cfg.GetAllNodesInfo().values(): 2035 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags()) 2036 2037 return env, [], all_nodes
2038
2039 - def Exec(self, feedback_fn):
2040 """Verify integrity of cluster, performing various test on nodes. 2041 2042 """ 2043 self.bad = False 2044 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 2045 verbose = self.op.verbose 2046 self._feedback_fn = feedback_fn 2047 feedback_fn("* Verifying global settings") 2048 for msg in self.cfg.VerifyConfig(): 2049 _ErrorIf(True, self.ECLUSTERCFG, None, msg) 2050 2051 # Check the cluster certificates 2052 for cert_filename in constants.ALL_CERT_FILES: 2053 (errcode, msg) = _VerifyCertificate(cert_filename) 2054 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode) 2055 2056 vg_name = self.cfg.GetVGName() 2057 drbd_helper = self.cfg.GetDRBDHelper() 2058 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors 2059 cluster = self.cfg.GetClusterInfo() 2060 nodelist = utils.NiceSort(self.cfg.GetNodeList()) 2061 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist] 2062 instancelist = utils.NiceSort(self.cfg.GetInstanceList()) 2063 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname)) 2064 for iname in instancelist) 2065 i_non_redundant = [] # Non redundant instances 2066 i_non_a_balanced = [] # Non auto-balanced instances 2067 n_offline = 0 # Count of offline nodes 2068 n_drained = 0 # Count of nodes being drained 2069 node_vol_should = {} 2070 2071 # FIXME: verify OS list 2072 # do local checksums 2073 master_files = [constants.CLUSTER_CONF_FILE] 2074 master_node = self.master_node = self.cfg.GetMasterNode() 2075 master_ip = self.cfg.GetMasterIP() 2076 2077 file_names = ssconf.SimpleStore().GetFileList() 2078 file_names.extend(constants.ALL_CERT_FILES) 2079 file_names.extend(master_files) 2080 if cluster.modify_etc_hosts: 2081 file_names.append(constants.ETC_HOSTS) 2082 2083 local_checksums = utils.FingerprintFiles(file_names) 2084 2085 feedback_fn("* Gathering data (%d nodes)" % len(nodelist)) 2086 node_verify_param = { 2087 constants.NV_FILELIST: file_names, 2088 constants.NV_NODELIST: [node.name for node in nodeinfo 2089 if not node.offline], 2090 constants.NV_HYPERVISOR: hypervisors, 2091 constants.NV_NODENETTEST: [(node.name, node.primary_ip, 2092 node.secondary_ip) for node in nodeinfo 2093 if not node.offline], 2094 constants.NV_INSTANCELIST: hypervisors, 2095 constants.NV_VERSION: None, 2096 constants.NV_HVINFO: self.cfg.GetHypervisorType(), 2097 constants.NV_NODESETUP: None, 2098 constants.NV_TIME: None, 2099 constants.NV_MASTERIP: (master_node, master_ip), 2100 constants.NV_OSLIST: None, 2101 } 2102 2103 if vg_name is not None: 2104 node_verify_param[constants.NV_VGLIST] = None 2105 node_verify_param[constants.NV_LVLIST] = vg_name 2106 node_verify_param[constants.NV_PVLIST] = [vg_name] 2107 node_verify_param[constants.NV_DRBDLIST] = None 2108 2109 if drbd_helper: 2110 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper 2111 2112 # Build our expected cluster state 2113 node_image = dict((node.name, self.NodeImage(offline=node.offline, 2114 name=node.name)) 2115 for node in nodeinfo) 2116 2117 for instance in instancelist: 2118 inst_config = instanceinfo[instance] 2119 2120 for nname in inst_config.all_nodes: 2121 if nname not in node_image: 2122 # ghost node 2123 gnode = self.NodeImage(name=nname) 2124 gnode.ghost = True 2125 node_image[nname] = gnode 2126 2127 inst_config.MapLVsByNode(node_vol_should) 2128 2129 pnode = inst_config.primary_node 2130 node_image[pnode].pinst.append(instance) 2131 2132 for snode in inst_config.secondary_nodes: 2133 nimg = node_image[snode] 2134 nimg.sinst.append(instance) 2135 if pnode not in nimg.sbp: 2136 nimg.sbp[pnode] = [] 2137 nimg.sbp[pnode].append(instance) 2138 2139 # At this point, we have the in-memory data structures complete, 2140 # except for the runtime information, which we'll gather next 2141 2142 # Due to the way our RPC system works, exact response times cannot be 2143 # guaranteed (e.g. a broken node could run into a timeout). By keeping the 2144 # time before and after executing the request, we can at least have a time 2145 # window. 2146 nvinfo_starttime = time.time() 2147 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param, 2148 self.cfg.GetClusterName()) 2149 nvinfo_endtime = time.time() 2150 2151 all_drbd_map = self.cfg.ComputeDRBDMap() 2152 2153 feedback_fn("* Verifying node status") 2154 2155 refos_img = None 2156 2157 for node_i in nodeinfo: 2158 node = node_i.name 2159 nimg = node_image[node] 2160 2161 if node_i.offline: 2162 if verbose: 2163 feedback_fn("* Skipping offline node %s" % (node,)) 2164 n_offline += 1 2165 continue 2166 2167 if node == master_node: 2168 ntype = "master" 2169 elif node_i.master_candidate: 2170 ntype = "master candidate" 2171 elif node_i.drained: 2172 ntype = "drained" 2173 n_drained += 1 2174 else: 2175 ntype = "regular" 2176 if verbose: 2177 feedback_fn("* Verifying node %s (%s)" % (node, ntype)) 2178 2179 msg = all_nvinfo[node].fail_msg 2180 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg) 2181 if msg: 2182 nimg.rpc_fail = True 2183 continue 2184 2185 nresult = all_nvinfo[node].payload 2186 2187 nimg.call_ok = self._VerifyNode(node_i, nresult) 2188 self._VerifyNodeNetwork(node_i, nresult) 2189 self._VerifyNodeLVM(node_i, nresult, vg_name) 2190 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums, 2191 master_files) 2192 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper, 2193 all_drbd_map) 2194 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime) 2195 2196 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name) 2197 self._UpdateNodeInstances(node_i, nresult, nimg) 2198 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name) 2199 self._UpdateNodeOS(node_i, nresult, nimg) 2200 if not nimg.os_fail: 2201 if refos_img is None: 2202 refos_img = nimg 2203 self._VerifyNodeOS(node_i, nimg, refos_img) 2204 2205 feedback_fn("* Verifying instance status") 2206 for instance in instancelist: 2207 if verbose: 2208 feedback_fn("* Verifying instance %s" % instance) 2209 inst_config = instanceinfo[instance] 2210 self._VerifyInstance(instance, inst_config, node_image) 2211 inst_nodes_offline = [] 2212 2213 pnode = inst_config.primary_node 2214 pnode_img = node_image[pnode] 2215 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, 2216 self.ENODERPC, pnode, "instance %s, connection to" 2217 " primary node failed", instance) 2218 2219 if pnode_img.offline: 2220 inst_nodes_offline.append(pnode) 2221 2222 # If the instance is non-redundant we cannot survive losing its primary 2223 # node, so we are not N+1 compliant. On the other hand we have no disk 2224 # templates with more than one secondary so that situation is not well 2225 # supported either. 2226 # FIXME: does not support file-backed instances 2227 if not inst_config.secondary_nodes: 2228 i_non_redundant.append(instance) 2229 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT, 2230 instance, "instance has multiple secondary nodes: %s", 2231 utils.CommaJoin(inst_config.secondary_nodes), 2232 code=self.ETYPE_WARNING) 2233 2234 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]: 2235 i_non_a_balanced.append(instance) 2236 2237 for snode in inst_config.secondary_nodes: 2238 s_img = node_image[snode] 2239 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode, 2240 "instance %s, connection to secondary node failed", instance) 2241 2242 if s_img.offline: 2243 inst_nodes_offline.append(snode) 2244 2245 # warn that the instance lives on offline nodes 2246 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance, 2247 "instance lives on offline node(s) %s", 2248 utils.CommaJoin(inst_nodes_offline)) 2249 # ... or ghost nodes 2250 for node in inst_config.all_nodes: 2251 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance, 2252 "instance lives on ghost node %s", node) 2253 2254 feedback_fn("* Verifying orphan volumes") 2255 reserved = utils.FieldSet(*cluster.reserved_lvs) 2256 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved) 2257 2258 feedback_fn("* Verifying orphan instances") 2259 self._VerifyOrphanInstances(instancelist, node_image) 2260 2261 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks: 2262 feedback_fn("* Verifying N+1 Memory redundancy") 2263 self._VerifyNPlusOneMemory(node_image, instanceinfo) 2264 2265 feedback_fn("* Other Notes") 2266 if i_non_redundant: 2267 feedback_fn(" - NOTICE: %d non-redundant instance(s) found." 2268 % len(i_non_redundant)) 2269 2270 if i_non_a_balanced: 2271 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found." 2272 % len(i_non_a_balanced)) 2273 2274 if n_offline: 2275 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline) 2276 2277 if n_drained: 2278 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained) 2279 2280 return not self.bad
2281
2282 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2283 """Analyze the post-hooks' result 2284 2285 This method analyses the hook result, handles it, and sends some 2286 nicely-formatted feedback back to the user. 2287 2288 @param phase: one of L{constants.HOOKS_PHASE_POST} or 2289 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 2290 @param hooks_results: the results of the multi-node hooks rpc call 2291 @param feedback_fn: function used send feedback back to the caller 2292 @param lu_result: previous Exec result 2293 @return: the new Exec result, based on the previous result 2294 and hook results 2295 2296 """ 2297 # We only really run POST phase hooks, and are only interested in 2298 # their results 2299 if phase == constants.HOOKS_PHASE_POST: 2300 # Used to change hooks' output to proper indentation 2301 indent_re = re.compile('^', re.M) 2302 feedback_fn("* Hooks Results") 2303 assert hooks_results, "invalid result from hooks" 2304 2305 for node_name in hooks_results: 2306 res = hooks_results[node_name] 2307 msg = res.fail_msg 2308 test = msg and not res.offline 2309 self._ErrorIf(test, self.ENODEHOOKS, node_name, 2310 "Communication failure in hooks execution: %s", msg) 2311 if res.offline or msg: 2312 # No need to investigate payload if node is offline or gave an error. 2313 # override manually lu_result here as _ErrorIf only 2314 # overrides self.bad 2315 lu_result = 1 2316 continue 2317 for script, hkr, output in res.payload: 2318 test = hkr == constants.HKR_FAIL 2319 self._ErrorIf(test, self.ENODEHOOKS, node_name, 2320 "Script %s failed, output:", script) 2321 if test: 2322 output = indent_re.sub(' ', output) 2323 feedback_fn("%s" % output) 2324 lu_result = 0 2325 2326 return lu_result
2327
2328 2329 -class LUVerifyDisks(NoHooksLU):
2330 """Verifies the cluster disks status. 2331 2332 """ 2333 REQ_BGL = False 2334
2335 - def ExpandNames(self):
2336 self.needed_locks = { 2337 locking.LEVEL_NODE: locking.ALL_SET, 2338 locking.LEVEL_INSTANCE: locking.ALL_SET, 2339 } 2340 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2341
2342 - def Exec(self, feedback_fn):
2343 """Verify integrity of cluster disks. 2344 2345 @rtype: tuple of three items 2346 @return: a tuple of (dict of node-to-node_error, list of instances 2347 which need activate-disks, dict of instance: (node, volume) for 2348 missing volumes 2349 2350 """ 2351 result = res_nodes, res_instances, res_missing = {}, [], {} 2352 2353 vg_name = self.cfg.GetVGName() 2354 nodes = utils.NiceSort(self.cfg.GetNodeList()) 2355 instances = [self.cfg.GetInstanceInfo(name) 2356 for name in self.cfg.GetInstanceList()] 2357 2358 nv_dict = {} 2359 for inst in instances: 2360 inst_lvs = {} 2361 if (not inst.admin_up or 2362 inst.disk_template not in constants.DTS_NET_MIRROR): 2363 continue 2364 inst.MapLVsByNode(inst_lvs) 2365 # transform { iname: {node: [vol,],},} to {(node, vol): iname} 2366 for node, vol_list in inst_lvs.iteritems(): 2367 for vol in vol_list: 2368 nv_dict[(node, vol)] = inst 2369 2370 if not nv_dict: 2371 return result 2372 2373 node_lvs = self.rpc.call_lv_list(nodes, vg_name) 2374 2375 for node in nodes: 2376 # node_volume 2377 node_res = node_lvs[node] 2378 if node_res.offline: 2379 continue 2380 msg = node_res.fail_msg 2381 if msg: 2382 logging.warning("Error enumerating LVs on node %s: %s", node, msg) 2383 res_nodes[node] = msg 2384 continue 2385 2386 lvs = node_res.payload 2387 for lv_name, (_, _, lv_online) in lvs.items(): 2388 inst = nv_dict.pop((node, lv_name), None) 2389 if (not lv_online and inst is not None 2390 and inst.name not in res_instances): 2391 res_instances.append(inst.name) 2392 2393 # any leftover items in nv_dict are missing LVs, let's arrange the 2394 # data better 2395 for key, inst in nv_dict.iteritems(): 2396 if inst.name not in res_missing: 2397 res_missing[inst.name] = [] 2398 res_missing[inst.name].append(key) 2399 2400 return result
2401
2402 2403 -class LURepairDiskSizes(NoHooksLU):
2404 """Verifies the cluster disks sizes. 2405 2406 """ 2407 _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))] 2408 REQ_BGL = False 2409
2410 - def ExpandNames(self):
2411 if self.op.instances: 2412 self.wanted_names = [] 2413 for name in self.op.instances: 2414 full_name = _ExpandInstanceName(self.cfg, name) 2415 self.wanted_names.append(full_name) 2416 self.needed_locks = { 2417 locking.LEVEL_NODE: [], 2418 locking.LEVEL_INSTANCE: self.wanted_names, 2419 } 2420 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 2421 else: 2422 self.wanted_names = None 2423 self.needed_locks = { 2424 locking.LEVEL_NODE: locking.ALL_SET, 2425 locking.LEVEL_INSTANCE: locking.ALL_SET, 2426 } 2427 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2428
2429 - def DeclareLocks(self, level):
2430 if level == locking.LEVEL_NODE and self.wanted_names is not None: 2431 self._LockInstancesNodes(primary_only=True)
2432
2433 - def CheckPrereq(self):
2434 """Check prerequisites. 2435 2436 This only checks the optional instance list against the existing names. 2437 2438 """ 2439 if self.wanted_names is None: 2440 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE] 2441 2442 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name 2443 in self.wanted_names]
2444
2445 - def _EnsureChildSizes(self, disk):
2446 """Ensure children of the disk have the needed disk size. 2447 2448 This is valid mainly for DRBD8 and fixes an issue where the 2449 children have smaller disk size. 2450 2451 @param disk: an L{ganeti.objects.Disk} object 2452 2453 """ 2454 if disk.dev_type == constants.LD_DRBD8: 2455 assert disk.children, "Empty children for DRBD8?" 2456 fchild = disk.children[0] 2457 mismatch = fchild.size < disk.size 2458 if mismatch: 2459 self.LogInfo("Child disk has size %d, parent %d, fixing", 2460 fchild.size, disk.size) 2461 fchild.size = disk.size 2462 2463 # and we recurse on this child only, not on the metadev 2464 return self._EnsureChildSizes(fchild) or mismatch 2465 else: 2466 return False
2467
2468 - def Exec(self, feedback_fn):
2469 """Verify the size of cluster disks. 2470 2471 """ 2472 # TODO: check child disks too 2473 # TODO: check differences in size between primary/secondary nodes 2474 per_node_disks = {} 2475 for instance in self.wanted_instances: 2476 pnode = instance.primary_node 2477 if pnode not in per_node_disks: 2478 per_node_disks[pnode] = [] 2479 for idx, disk in enumerate(instance.disks): 2480 per_node_disks[pnode].append((instance, idx, disk)) 2481 2482 changed = [] 2483 for node, dskl in per_node_disks.items(): 2484 newl = [v[2].Copy() for v in dskl] 2485 for dsk in newl: 2486 self.cfg.SetDiskID(dsk, node) 2487 result = self.rpc.call_blockdev_getsizes(node, newl) 2488 if result.fail_msg: 2489 self.LogWarning("Failure in blockdev_getsizes call to node" 2490 " %s, ignoring", node) 2491 continue 2492 if len(result.data) != len(dskl): 2493 self.LogWarning("Invalid result from node %s, ignoring node results", 2494 node) 2495 continue 2496 for ((instance, idx, disk), size) in zip(dskl, result.data): 2497 if size is None: 2498 self.LogWarning("Disk %d of instance %s did not return size" 2499 " information, ignoring", idx, instance.name) 2500 continue 2501 if not isinstance(size, (int, long)): 2502 self.LogWarning("Disk %d of instance %s did not return valid" 2503 " size information, ignoring", idx, instance.name) 2504 continue 2505 size = size >> 20 2506 if size != disk.size: 2507 self.LogInfo("Disk %d of instance %s has mismatched size," 2508 " correcting: recorded %d, actual %d", idx, 2509 instance.name, disk.size, size) 2510 disk.size = size 2511 self.cfg.Update(instance, feedback_fn) 2512 changed.append((instance.name, idx, size)) 2513 if self._EnsureChildSizes(disk): 2514 self.cfg.Update(instance, feedback_fn) 2515 changed.append((instance.name, idx, disk.size)) 2516 return changed
2517
2518 2519 -class LURenameCluster(LogicalUnit):
2520 """Rename the cluster. 2521 2522 """ 2523 HPATH = "cluster-rename" 2524 HTYPE = constants.HTYPE_CLUSTER 2525 _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)] 2526
2527 - def BuildHooksEnv(self):
2528 """Build hooks env. 2529 2530 """ 2531 env = { 2532 "OP_TARGET": self.cfg.GetClusterName(), 2533 "NEW_NAME": self.op.name, 2534 } 2535 mn = self.cfg.GetMasterNode() 2536 all_nodes = self.cfg.GetNodeList() 2537 return env, [mn], all_nodes
2538
2539 - def CheckPrereq(self):
2540 """Verify that the passed name is a valid one. 2541 2542 """ 2543 hostname = netutils.GetHostInfo(self.op.name) 2544 2545 new_name = hostname.name 2546 self.ip = new_ip = hostname.ip 2547 old_name = self.cfg.GetClusterName() 2548 old_ip = self.cfg.GetMasterIP() 2549 if new_name == old_name and new_ip == old_ip: 2550 raise errors.OpPrereqError("Neither the name nor the IP address of the" 2551 " cluster has changed", 2552 errors.ECODE_INVAL) 2553 if new_ip != old_ip: 2554 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT): 2555 raise errors.OpPrereqError("The given cluster IP address (%s) is" 2556 " reachable on the network. Aborting." % 2557 new_ip, errors.ECODE_NOTUNIQUE) 2558 2559 self.op.name = new_name
2560
2561 - def Exec(self, feedback_fn):
2562 """Rename the cluster. 2563 2564 """ 2565 clustername = self.op.name 2566 ip = self.ip 2567 2568 # shutdown the master IP 2569 master = self.cfg.GetMasterNode() 2570 result = self.rpc.call_node_stop_master(master, False) 2571 result.Raise("Could not disable the master role") 2572 2573 try: 2574 cluster = self.cfg.GetClusterInfo() 2575 cluster.cluster_name = clustername 2576 cluster.master_ip = ip 2577 self.cfg.Update(cluster, feedback_fn) 2578 2579 # update the known hosts file 2580 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE) 2581 node_list = self.cfg.GetNodeList() 2582 try: 2583 node_list.remove(master) 2584 except ValueError: 2585 pass 2586 result = self.rpc.call_upload_file(node_list, 2587 constants.SSH_KNOWN_HOSTS_FILE) 2588 for to_node, to_result in result.iteritems(): 2589 msg = to_result.fail_msg 2590 if msg: 2591 msg = ("Copy of file %s to node %s failed: %s" % 2592 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg)) 2593 self.proc.LogWarning(msg) 2594 2595 finally: 2596 result = self.rpc.call_node_start_master(master, False, False) 2597 msg = result.fail_msg 2598 if msg: 2599 self.LogWarning("Could not re-enable the master role on" 2600 " the master, please restart manually: %s", msg) 2601 2602 return clustername
2603
2604 2605 -class LUSetClusterParams(LogicalUnit):
2606 """Change the parameters of the cluster. 2607 2608 """ 2609 HPATH = "cluster-modify" 2610 HTYPE = constants.HTYPE_CLUSTER 2611 _OP_PARAMS = [ 2612 ("vg_name", None, _TMaybeString), 2613 ("enabled_hypervisors", None, 2614 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)), 2615 ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)), 2616 ("beparams", None, _TOr(_TDict, _TNone)), 2617 ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)), 2618 ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)), 2619 ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)), 2620 ("uid_pool", None, _NoType), 2621 ("add_uids", None, _NoType), 2622 ("remove_uids", None, _NoType), 2623 ("maintain_node_health", None, _TMaybeBool), 2624 ("nicparams", None, _TOr(_TDict, _TNone)), 2625 ("drbd_helper", None, _TOr(_TString, _TNone)), 2626 ("default_iallocator", None, _TMaybeString), 2627 ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)), 2628 ("hidden_os", None, _TOr(_TListOf(\ 2629 _TAnd(_TList, 2630 _TIsLength(2), 2631 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))), 2632 _TNone)), 2633 ("blacklisted_os", None, _TOr(_TListOf(\ 2634 _TAnd(_TList, 2635 _TIsLength(2), 2636 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))), 2637 _TNone)), 2638 ] 2639 REQ_BGL = False 2640
2641 - def CheckArguments(self):
2642 """Check parameters 2643 2644 """ 2645 if self.op.uid_pool: 2646 uidpool.CheckUidPool(self.op.uid_pool) 2647 2648 if self.op.add_uids: 2649 uidpool.CheckUidPool(self.op.add_uids) 2650 2651 if self.op.remove_uids: 2652 uidpool.CheckUidPool(self.op.remove_uids)
2653
2654 - def ExpandNames(self):
2655 # FIXME: in the future maybe other cluster params won't require checking on 2656 # all nodes to be modified. 2657 self.needed_locks = { 2658 locking.LEVEL_NODE: locking.ALL_SET, 2659 } 2660 self.share_locks[locking.LEVEL_NODE] = 1
2661
2662 - def BuildHooksEnv(self):
2663 """Build hooks env. 2664 2665 """ 2666 env = { 2667 "OP_TARGET": self.cfg.GetClusterName(), 2668 "NEW_VG_NAME": self.op.vg_name, 2669 } 2670 mn = self.cfg.GetMasterNode() 2671 return env, [mn], [mn]
2672
2673 - def CheckPrereq(self):
2674 """Check prerequisites. 2675 2676 This checks whether the given params don't conflict and 2677 if the given volume group is valid. 2678 2679 """ 2680 if self.op.vg_name is not None and not self.op.vg_name: 2681 if self.cfg.HasAnyDiskOfType(constants.LD_LV): 2682 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based" 2683 " instances exist", errors.ECODE_INVAL) 2684 2685 if self.op.drbd_helper is not None and not self.op.drbd_helper: 2686 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8): 2687 raise errors.OpPrereqError("Cannot disable drbd helper while" 2688 " drbd-based instances exist", 2689 errors.ECODE_INVAL) 2690 2691 node_list = self.acquired_locks[locking.LEVEL_NODE] 2692 2693 # if vg_name not None, checks given volume group on all nodes 2694 if self.op.vg_name: 2695 vglist = self.rpc.call_vg_list(node_list) 2696 for node in node_list: 2697 msg = vglist[node].fail_msg 2698 if msg: 2699 # ignoring down node 2700 self.LogWarning("Error while gathering data on node %s" 2701 " (ignoring node): %s", node, msg) 2702 continue 2703 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload, 2704 self.op.vg_name, 2705 constants.MIN_VG_SIZE) 2706 if vgstatus: 2707 raise errors.OpPrereqError("Error on node '%s': %s" % 2708 (node, vgstatus), errors.ECODE_ENVIRON) 2709 2710 if self.op.drbd_helper: 2711 # checks given drbd helper on all nodes 2712 helpers = self.rpc.call_drbd_helper(node_list) 2713 for node in node_list: 2714 ninfo = self.cfg.GetNodeInfo(node) 2715 if ninfo.offline: 2716 self.LogInfo("Not checking drbd helper on offline node %s", node) 2717 continue 2718 msg = helpers[node].fail_msg 2719 if msg: 2720 raise errors.OpPrereqError("Error checking drbd helper on node" 2721 " '%s': %s" % (node, msg), 2722 errors.ECODE_ENVIRON) 2723 node_helper = helpers[node].payload 2724 if node_helper != self.op.drbd_helper: 2725 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" % 2726 (node, node_helper), errors.ECODE_ENVIRON) 2727 2728 self.cluster = cluster = self.cfg.GetClusterInfo() 2729 # validate params changes 2730 if self.op.beparams: 2731 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 2732 self.new_beparams = cluster.SimpleFillBE(self.op.beparams) 2733 2734 if self.op.nicparams: 2735 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES) 2736 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams) 2737 objects.NIC.CheckParameterSyntax(self.new_nicparams) 2738 nic_errors = [] 2739 2740 # check all instances for consistency 2741 for instance in self.cfg.GetAllInstancesInfo().values(): 2742 for nic_idx, nic in enumerate(instance.nics): 2743 params_copy = copy.deepcopy(nic.nicparams) 2744 params_filled = objects.FillDict(self.new_nicparams, params_copy) 2745 2746 # check parameter syntax 2747 try: 2748 objects.NIC.CheckParameterSyntax(params_filled) 2749 except errors.ConfigurationError, err: 2750 nic_errors.append("Instance %s, nic/%d: %s" % 2751 (instance.name, nic_idx, err)) 2752 2753 # if we're moving instances to routed, check that they have an ip 2754 target_mode = params_filled[constants.NIC_MODE] 2755 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip: 2756 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" % 2757 (instance.name, nic_idx)) 2758 if nic_errors: 2759 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" % 2760 "\n".join(nic_errors)) 2761 2762 # hypervisor list/parameters 2763 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {}) 2764 if self.op.hvparams: 2765 for hv_name, hv_dict in self.op.hvparams.items(): 2766 if hv_name not in self.new_hvparams: 2767 self.new_hvparams[hv_name] = hv_dict 2768 else: 2769 self.new_hvparams[hv_name].update(hv_dict) 2770 2771 # os hypervisor parameters 2772 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {}) 2773 if self.op.os_hvp: 2774 for os_name, hvs in self.op.os_hvp.items(): 2775 if os_name not in self.new_os_hvp: 2776 self.new_os_hvp[os_name] = hvs 2777 else: 2778 for hv_name, hv_dict in hvs.items(): 2779 if hv_name not in self.new_os_hvp[os_name]: 2780 self.new_os_hvp[os_name][hv_name] = hv_dict 2781 else: 2782 self.new_os_hvp[os_name][hv_name].update(hv_dict) 2783 2784 # os parameters 2785 self.new_osp = objects.FillDict(cluster.osparams, {}) 2786 if self.op.osparams: 2787 for os_name, osp in self.op.osparams.items(): 2788 if os_name not in self.new_osp: 2789 self.new_osp[os_name] = {} 2790 2791 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp, 2792 use_none=True) 2793 2794 if not self.new_osp[os_name]: 2795 # we removed all parameters 2796 del self.new_osp[os_name] 2797 else: 2798 # check the parameter validity (remote check) 2799 _CheckOSParams(self, False, [self.cfg.GetMasterNode()], 2800 os_name, self.new_osp[os_name]) 2801 2802 # changes to the hypervisor list 2803 if self.op.enabled_hypervisors is not None: 2804 self.hv_list = self.op.enabled_hypervisors 2805 for hv in self.hv_list: 2806 # if the hypervisor doesn't already exist in the cluster 2807 # hvparams, we initialize it to empty, and then (in both 2808 # cases) we make sure to fill the defaults, as we might not 2809 # have a complete defaults list if the hypervisor wasn't 2810 # enabled before 2811 if hv not in new_hvp: 2812 new_hvp[hv] = {} 2813 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv]) 2814 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES) 2815 else: 2816 self.hv_list = cluster.enabled_hypervisors 2817 2818 if self.op.hvparams or self.op.enabled_hypervisors is not None: 2819 # either the enabled list has changed, or the parameters have, validate 2820 for hv_name, hv_params in self.new_hvparams.items(): 2821 if ((self.op.hvparams and hv_name in self.op.hvparams) or 2822 (self.op.enabled_hypervisors and 2823 hv_name in self.op.enabled_hypervisors)): 2824 # either this is a new hypervisor, or its parameters have changed 2825 hv_class = hypervisor.GetHypervisor(hv_name) 2826 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 2827 hv_class.CheckParameterSyntax(hv_params) 2828 _CheckHVParams(self, node_list, hv_name, hv_params) 2829 2830 if self.op.os_hvp: 2831 # no need to check any newly-enabled hypervisors, since the 2832 # defaults have already been checked in the above code-block 2833 for os_name, os_hvp in self.new_os_hvp.items(): 2834 for hv_name, hv_params in os_hvp.items(): 2835 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 2836 # we need to fill in the new os_hvp on top of the actual hv_p 2837 cluster_defaults = self.new_hvparams.get(hv_name, {}) 2838 new_osp = objects.FillDict(cluster_defaults, hv_params) 2839 hv_class = hypervisor.GetHypervisor(hv_name) 2840 hv_class.CheckParameterSyntax(new_osp) 2841 _CheckHVParams(self, node_list, hv_name, new_osp) 2842 2843 if self.op.default_iallocator: 2844 alloc_script = utils.FindFile(self.op.default_iallocator, 2845 constants.IALLOCATOR_SEARCH_PATH, 2846 os.path.isfile) 2847 if alloc_script is None: 2848 raise errors.OpPrereqError("Invalid default iallocator script '%s'" 2849 " specified" % self.op.default_iallocator, 2850 errors.ECODE_INVAL)
2851
2852 - def Exec(self, feedback_fn):
2853 """Change the parameters of the cluster. 2854 2855 """ 2856 if self.op.vg_name is not None: 2857 new_volume = self.op.vg_name 2858 if not new_volume: 2859 new_volume = None 2860 if new_volume != self.cfg.GetVGName(): 2861 self.cfg.SetVGName(new_volume) 2862 else: 2863 feedback_fn("Cluster LVM configuration already in desired" 2864 " state, not changing") 2865 if self.op.drbd_helper is not None: 2866 new_helper = self.op.drbd_helper 2867 if not new_helper: 2868 new_helper = None 2869 if new_helper != self.cfg.GetDRBDHelper(): 2870 self.cfg.SetDRBDHelper(new_helper) 2871 else: 2872 feedback_fn("Cluster DRBD helper already in desired state," 2873 " not changing") 2874 if self.op.hvparams: 2875 self.cluster.hvparams = self.new_hvparams 2876 if self.op.os_hvp: 2877 self.cluster.os_hvp = self.new_os_hvp 2878 if self.op.enabled_hypervisors is not None: 2879 self.cluster.hvparams = self.new_hvparams 2880 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors 2881 if self.op.beparams: 2882 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams 2883 if self.op.nicparams: 2884 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams 2885 if self.op.osparams: 2886 self.cluster.osparams = self.new_osp 2887 2888 if self.op.candidate_pool_size is not None: 2889 self.cluster.candidate_pool_size = self.op.candidate_pool_size 2890 # we need to update the pool size here, otherwise the save will fail 2891 _AdjustCandidatePool(self, []) 2892 2893 if self.op.maintain_node_health is not None: 2894 self.cluster.maintain_node_health = self.op.maintain_node_health 2895 2896 if self.op.add_uids is not None: 2897 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids) 2898 2899 if self.op.remove_uids is not None: 2900 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids) 2901 2902 if self.op.uid_pool is not None: 2903 self.cluster.uid_pool = self.op.uid_pool 2904 2905 if self.op.default_iallocator is not None: 2906 self.cluster.default_iallocator = self.op.default_iallocator 2907 2908 if self.op.reserved_lvs is not None: 2909 self.cluster.reserved_lvs = self.op.reserved_lvs 2910 2911 def helper_os(aname, mods, desc): 2912 desc += " OS list" 2913 lst = getattr(self.cluster, aname) 2914 for key, val in mods: 2915 if key == constants.DDM_ADD: 2916 if val in lst: 2917 feedback_fn("OS %s already in %s, ignoring", val, desc) 2918 else: 2919 lst.append(val) 2920 elif key == constants.DDM_REMOVE: 2921 if val in lst: 2922 lst.remove(val) 2923 else: 2924 feedback_fn("OS %s not found in %s, ignoring", val, desc) 2925 else: 2926 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2927 2928 if self.op.hidden_os: 2929 helper_os("hidden_os", self.op.hidden_os, "hidden") 2930 2931 if self.op.blacklisted_os: 2932 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted") 2933 2934 self.cfg.Update(self.cluster, feedback_fn)
2935
2936 2937 -def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2938 """Distribute additional files which are part of the cluster configuration. 2939 2940 ConfigWriter takes care of distributing the config and ssconf files, but 2941 there are more files which should be distributed to all nodes. This function 2942 makes sure those are copied. 2943 2944 @param lu: calling logical unit 2945 @param additional_nodes: list of nodes not in the config to distribute to 2946 2947 """ 2948 # 1. Gather target nodes 2949 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode()) 2950 dist_nodes = lu.cfg.GetOnlineNodeList() 2951 if additional_nodes is not None: 2952 dist_nodes.extend(additional_nodes) 2953 if myself.name in dist_nodes: 2954 dist_nodes.remove(myself.name) 2955 2956 # 2. Gather files to distribute 2957 dist_files = set([constants.ETC_HOSTS, 2958 constants.SSH_KNOWN_HOSTS_FILE, 2959 constants.RAPI_CERT_FILE, 2960 constants.RAPI_USERS_FILE, 2961 constants.CONFD_HMAC_KEY, 2962 constants.CLUSTER_DOMAIN_SECRET_FILE, 2963 ]) 2964 2965 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors 2966 for hv_name in enabled_hypervisors: 2967 hv_class = hypervisor.GetHypervisor(hv_name) 2968 dist_files.update(hv_class.GetAncillaryFiles()) 2969 2970 # 3. Perform the files upload 2971 for fname in dist_files: 2972 if os.path.exists(fname): 2973 result = lu.rpc.call_upload_file(dist_nodes, fname) 2974 for to_node, to_result in result.items(): 2975 msg = to_result.fail_msg 2976 if msg: 2977 msg = ("Copy of file %s to node %s failed: %s" % 2978 (fname, to_node, msg)) 2979 lu.proc.LogWarning(msg)
2980
2981 2982 -class LURedistributeConfig(NoHooksLU):
2983 """Force the redistribution of cluster configuration. 2984 2985 This is a very simple LU. 2986 2987 """ 2988 REQ_BGL = False 2989
2990 - def ExpandNames(self):
2991 self.needed_locks = { 2992 locking.LEVEL_NODE: locking.ALL_SET, 2993 } 2994 self.share_locks[locking.LEVEL_NODE] = 1
2995
2996 - def Exec(self, feedback_fn):
2997 """Redistribute the configuration. 2998 2999 """ 3000 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn) 3001 _RedistributeAncillaryFiles(self)
3002
3003 3004 -def _WaitForSync(lu, instance, disks=None, oneshot=False):
3005 """Sleep and poll for an instance's disk to sync. 3006 3007 """ 3008 if not instance.disks or disks is not None and not disks: 3009 return True 3010 3011 disks = _ExpandCheckDisks(instance, disks) 3012 3013 if not oneshot: 3014 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name) 3015 3016 node = instance.primary_node 3017 3018 for dev in disks: 3019 lu.cfg.SetDiskID(dev, node) 3020 3021 # TODO: Convert to utils.Retry 3022 3023 retries = 0 3024 degr_retries = 10 # in seconds, as we sleep 1 second each time 3025 while True: 3026 max_time = 0 3027 done = True 3028 cumul_degraded = False 3029 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks) 3030 msg = rstats.fail_msg 3031 if msg: 3032 lu.LogWarning("Can't get any data from node %s: %s", node, msg) 3033 retries += 1 3034 if retries >= 10: 3035 raise errors.RemoteError("Can't contact node %s for mirror data," 3036 " aborting." % node) 3037 time.sleep(6) 3038 continue 3039 rstats = rstats.payload 3040 retries = 0 3041 for i, mstat in enumerate(rstats): 3042 if mstat is None: 3043 lu.LogWarning("Can't compute data for node %s/%s", 3044 node, disks[i].iv_name) 3045 continue 3046 3047 cumul_degraded = (cumul_degraded or 3048 (mstat.is_degraded and mstat.sync_percent is None)) 3049 if mstat.sync_percent is not None: 3050 done = False 3051 if mstat.estimated_time is not None: 3052 rem_time = ("%s remaining (estimated)" % 3053 utils.FormatSeconds(mstat.estimated_time)) 3054 max_time = mstat.estimated_time 3055 else: 3056 rem_time = "no time estimate" 3057 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" % 3058 (disks[i].iv_name, mstat.sync_percent, rem_time)) 3059 3060 # if we're done but degraded, let's do a few small retries, to 3061 # make sure we see a stable and not transient situation; therefore 3062 # we force restart of the loop 3063 if (done or oneshot) and cumul_degraded and degr_retries > 0: 3064 logging.info("Degraded disks found, %d retries left", degr_retries) 3065 degr_retries -= 1 3066 time.sleep(1) 3067 continue 3068 3069 if done or oneshot: 3070 break 3071 3072 time.sleep(min(60, max_time)) 3073 3074 if done: 3075 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name) 3076 return not cumul_degraded
3077
3078 3079 -def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3080 """Check that mirrors are not degraded. 3081 3082 The ldisk parameter, if True, will change the test from the 3083 is_degraded attribute (which represents overall non-ok status for 3084 the device(s)) to the ldisk (representing the local storage status). 3085 3086 """ 3087 lu.cfg.SetDiskID(dev, node) 3088 3089 result = True 3090 3091 if on_primary or dev.AssembleOnSecondary(): 3092 rstats = lu.rpc.call_blockdev_find(node, dev) 3093 msg = rstats.fail_msg 3094 if msg: 3095 lu.LogWarning("Can't find disk on node %s: %s", node, msg) 3096 result = False 3097 elif not rstats.payload: 3098 lu.LogWarning("Can't find disk on node %s", node) 3099 result = False 3100 else: 3101 if ldisk: 3102 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY 3103 else: 3104 result = result and not rstats.payload.is_degraded 3105 3106 if dev.children: 3107 for child in dev.children: 3108 result = result and _CheckDiskConsistency(lu, child, node, on_primary) 3109 3110 return result
3111
3112 3113 -class LUDiagnoseOS(NoHooksLU):
3114 """Logical unit for OS diagnose/query. 3115 3116 """ 3117 _OP_PARAMS = [ 3118 _POutputFields, 3119 ("names", _EmptyList, _TListOf(_TNonEmptyString)), 3120 ] 3121 REQ_BGL = False 3122 _HID = "hidden" 3123 _BLK = "blacklisted" 3124 _VLD = "valid" 3125 _FIELDS_STATIC = utils.FieldSet() 3126 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants", 3127 "parameters", "api_versions", _HID, _BLK) 3128
3129 - def CheckArguments(self):
3130 if self.op.names: 3131 raise errors.OpPrereqError("Selective OS query not supported", 3132 errors.ECODE_INVAL) 3133 3134 _CheckOutputFields(static=self._FIELDS_STATIC, 3135 dynamic=self._FIELDS_DYNAMIC, 3136 selected=self.op.output_fields)
3137
3138 - def ExpandNames(self):
3139 # Lock all nodes, in shared mode 3140 # Temporary removal of locks, should be reverted later 3141 # TODO: reintroduce locks when they are lighter-weight 3142 self.needed_locks = {}
3143 #self.share_locks[locking.LEVEL_NODE] = 1 3144 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 3145 3146 @staticmethod
3147 - def _DiagnoseByOS(rlist):
3148 """Remaps a per-node return list into an a per-os per-node dictionary 3149 3150 @param rlist: a map with node names as keys and OS objects as values 3151 3152 @rtype: dict 3153 @return: a dictionary with osnames as keys and as value another 3154 map, with nodes as keys and tuples of (path, status, diagnose, 3155 variants, parameters, api_versions) as values, eg:: 3156 3157 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []), 3158 (/srv/..., False, "invalid api")], 3159 "node2": [(/srv/..., True, "", [], [])]} 3160 } 3161 3162 """ 3163 all_os = {} 3164 # we build here the list of nodes that didn't fail the RPC (at RPC 3165 # level), so that nodes with a non-responding node daemon don't 3166 # make all OSes invalid 3167 good_nodes = [node_name for node_name in rlist 3168 if not rlist[node_name].fail_msg] 3169 for node_name, nr in rlist.items(): 3170 if nr.fail_msg or not nr.payload: 3171 continue 3172 for (name, path, status, diagnose, variants, 3173 params, api_versions) in nr.payload: 3174 if name not in all_os: 3175 # build a list of nodes for this os containing empty lists 3176 # for each node in node_list 3177 all_os[name] = {} 3178 for nname in good_nodes: 3179 all_os[name][nname] = [] 3180 # convert params from [name, help] to (name, help) 3181 params = [tuple(v) for v in params] 3182 all_os[name][node_name].append((path, status, diagnose, 3183 variants, params, api_versions)) 3184 return all_os
3185
3186 - def Exec(self, feedback_fn):
3187 """Compute the list of OSes. 3188 3189 """ 3190 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()] 3191 node_data = self.rpc.call_os_diagnose(valid_nodes) 3192 pol = self._DiagnoseByOS(node_data) 3193 output = [] 3194 cluster = self.cfg.GetClusterInfo() 3195 3196 for os_name in utils.NiceSort(pol.keys()): 3197 os_data = pol[os_name] 3198 row = [] 3199 valid = True 3200 (variants, params, api_versions) = null_state = (set(), set(), set()) 3201 for idx, osl in enumerate(os_data.values()): 3202 valid = bool(valid and osl and osl[0][1]) 3203 if not valid: 3204 (variants, params, api_versions) = null_state 3205 break 3206 node_variants, node_params, node_api = osl[0][3:6] 3207 if idx == 0: # first entry 3208 variants = set(node_variants) 3209 params = set(node_params) 3210 api_versions = set(node_api) 3211 else: # keep consistency 3212 variants.intersection_update(node_variants) 3213 params.intersection_update(node_params) 3214 api_versions.intersection_update(node_api) 3215 3216 is_hid = os_name in cluster.hidden_os 3217 is_blk = os_name in cluster.blacklisted_os 3218 if ((self._HID not in self.op.output_fields and is_hid) or 3219 (self._BLK not in self.op.output_fields and is_blk) or 3220 (self._VLD not in self.op.output_fields and not valid)): 3221 continue 3222 3223 for field in self.op.output_fields: 3224 if field == "name": 3225 val = os_name 3226 elif field == self._VLD: 3227 val = valid 3228 elif field == "node_status": 3229 # this is just a copy of the dict 3230 val = {} 3231 for node_name, nos_list in os_data.items(): 3232 val[node_name] = nos_list 3233 elif field == "variants": 3234 val = utils.NiceSort(list(variants)) 3235 elif field == "parameters": 3236 val = list(params) 3237 elif field == "api_versions": 3238 val = list(api_versions) 3239 elif field == self._HID: 3240 val = is_hid 3241 elif field == self._BLK: 3242 val = is_blk 3243 else: 3244 raise errors.ParameterError(field) 3245 row.append(val) 3246 output.append(row) 3247 3248 return output
3249
3250 3251 -class LURemoveNode(LogicalUnit):
3252 """Logical unit for removing a node. 3253 3254 """ 3255 HPATH = "node-remove" 3256 HTYPE = constants.HTYPE_NODE 3257 _OP_PARAMS = [ 3258 _PNodeName, 3259 ] 3260
3261 - def BuildHooksEnv(self):
3262 """Build hooks env. 3263 3264 This doesn't run on the target node in the pre phase as a failed 3265 node would then be impossible to remove. 3266 3267 """ 3268 env = { 3269 "OP_TARGET": self.op.node_name, 3270 "NODE_NAME": self.op.node_name, 3271 } 3272 all_nodes = self.cfg.GetNodeList() 3273 try: 3274 all_nodes.remove(self.op.node_name) 3275 except ValueError: 3276 logging.warning("Node %s which is about to be removed not found" 3277 " in the all nodes list", self.op.node_name) 3278 return env, all_nodes, all_nodes
3279
3280 - def CheckPrereq(self):
3281 """Check prerequisites. 3282 3283 This checks: 3284 - the node exists in the configuration 3285 - it does not have primary or secondary instances 3286 - it's not the master 3287 3288 Any errors are signaled by raising errors.OpPrereqError. 3289 3290 """ 3291 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 3292 node = self.cfg.GetNodeInfo(self.op.node_name) 3293 assert node is not None 3294 3295 instance_list = self.cfg.GetInstanceList() 3296 3297 masternode = self.cfg.GetMasterNode() 3298 if node.name == masternode: 3299 raise errors.OpPrereqError("Node is the master node," 3300 " you need to failover first.", 3301 errors.ECODE_INVAL) 3302 3303 for instance_name in instance_list: 3304 instance = self.cfg.GetInstanceInfo(instance_name) 3305 if node.name in instance.all_nodes: 3306 raise errors.OpPrereqError("Instance %s is still running on the node," 3307 " please remove first." % instance_name, 3308 errors.ECODE_INVAL) 3309 self.op.node_name = node.name 3310 self.node = node
3311
3312 - def Exec(self, feedback_fn):
3313 """Removes the node from the cluster. 3314 3315 """ 3316 node = self.node 3317 logging.info("Stopping the node daemon and removing configs from node %s", 3318 node.name) 3319 3320 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup 3321 3322 # Promote nodes to master candidate as needed 3323 _AdjustCandidatePool(self, exceptions=[node.name]) 3324 self.context.RemoveNode(node.name) 3325 3326 # Run post hooks on the node before it's removed 3327 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self) 3328 try: 3329 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name]) 3330 except: 3331 # pylint: disable-msg=W0702 3332 self.LogWarning("Errors occurred running hooks on %s" % node.name) 3333 3334 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup) 3335 msg = result.fail_msg 3336 if msg: 3337 self.LogWarning("Errors encountered on the remote node while leaving" 3338 " the cluster: %s", msg) 3339 3340 # Remove node from our /etc/hosts 3341 if self.cfg.GetClusterInfo().modify_etc_hosts: 3342 # FIXME: this should be done via an rpc call to node daemon 3343 utils.RemoveHostFromEtcHosts(node.name) 3344 _RedistributeAncillaryFiles(self)
3345
3346 3347 -class LUQueryNodes(NoHooksLU):
3348 """Logical unit for querying nodes. 3349 3350 """ 3351 # pylint: disable-msg=W0142 3352 _OP_PARAMS = [ 3353 _POutputFields, 3354 ("names", _EmptyList, _TListOf(_TNonEmptyString)), 3355 ("use_locking", False, _TBool), 3356 ] 3357 REQ_BGL = False 3358 3359 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid", 3360 "master_candidate", "offline", "drained"] 3361 3362 _FIELDS_DYNAMIC = utils.FieldSet( 3363 "dtotal", "dfree", 3364 "mtotal", "mnode", "mfree", 3365 "bootid", 3366 "ctotal", "cnodes", "csockets", 3367 ) 3368 3369 _FIELDS_STATIC = utils.FieldSet(*[ 3370 "pinst_cnt", "sinst_cnt", 3371 "pinst_list", "sinst_list", 3372 "pip", "sip", "tags", 3373 "master", 3374 "role"] + _SIMPLE_FIELDS 3375 ) 3376
3377 - def CheckArguments(self):
3378 _CheckOutputFields(static=self._FIELDS_STATIC, 3379 dynamic=self._FIELDS_DYNAMIC, 3380 selected=self.op.output_fields)
3381
3382 - def ExpandNames(self):
3383 self.needed_locks = {} 3384 self.share_locks[locking.LEVEL_NODE] = 1 3385 3386 if self.op.names: 3387 self.wanted = _GetWantedNodes(self, self.op.names) 3388 else: 3389 self.wanted = locking.ALL_SET 3390 3391 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields) 3392 self.do_locking = self.do_node_query and self.op.use_locking 3393 if self.do_locking: 3394 # if we don't request only static fields, we need to lock the nodes 3395 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3396
3397 - def Exec(self, feedback_fn):
3398 """Computes the list of nodes and their attributes. 3399 3400 """ 3401 all_info = self.cfg.GetAllNodesInfo() 3402 if self.do_locking: 3403 nodenames = self.acquired_locks[locking.LEVEL_NODE] 3404 elif self.wanted != locking.ALL_SET: 3405 nodenames = self.wanted 3406 missing = set(nodenames).difference(all_info.keys()) 3407 if missing: 3408 raise errors.OpExecError( 3409 "Some nodes were removed before retrieving their data: %s" % missing) 3410 else: 3411 nodenames = all_info.keys() 3412 3413 nodenames = utils.NiceSort(nodenames) 3414 nodelist = [all_info[name] for name in nodenames] 3415 3416 # begin data gathering 3417 3418 if self.do_node_query: 3419 live_data = {} 3420 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(), 3421 self.cfg.GetHypervisorType()) 3422 for name in nodenames: 3423 nodeinfo = node_data[name] 3424 if not nodeinfo.fail_msg and nodeinfo.payload: 3425 nodeinfo = nodeinfo.payload 3426 fn = utils.TryConvert 3427 live_data[name] = { 3428 "mtotal": fn(int, nodeinfo.get('memory_total', None)), 3429 "mnode": fn(int, nodeinfo.get('memory_dom0', None)), 3430 "mfree": fn(int, nodeinfo.get('memory_free', None)), 3431 "dtotal": fn(int, nodeinfo.get('vg_size', None)), 3432 "dfree": fn(int, nodeinfo.get('vg_free', None)), 3433 "ctotal": fn(int, nodeinfo.get('cpu_total', None)), 3434 "bootid": nodeinfo.get('bootid', None), 3435 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)), 3436 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)), 3437 } 3438 else: 3439 live_data[name] = {} 3440 else: 3441 live_data = dict.fromkeys(nodenames, {}) 3442 3443 node_to_primary = dict([(name, set()) for name in nodenames]) 3444 node_to_secondary = dict([(name, set()) for name in nodenames]) 3445 3446 inst_fields = frozenset(("pinst_cnt", "pinst_list", 3447 "sinst_cnt", "sinst_list")) 3448 if inst_fields & frozenset(self.op.output_fields): 3449 inst_data = self.cfg.GetAllInstancesInfo() 3450 3451 for inst in inst_data.values(): 3452 if inst.primary_node in node_to_primary: 3453 node_to_primary[inst.primary_node].add(inst.name) 3454 for secnode in inst.secondary_nodes: 3455 if secnode in node_to_secondary: 3456 node_to_secondary[secnode].add(inst.name) 3457 3458 master_node = self.cfg.GetMasterNode() 3459 3460 # end data gathering 3461 3462 output = [] 3463 for node in nodelist: 3464 node_output = [] 3465 for field in self.op.output_fields: 3466 if field in self._SIMPLE_FIELDS: 3467 val = getattr(node, field) 3468 elif field == "pinst_list": 3469 val = list(node_to_primary[node.name]) 3470 elif field == "sinst_list": 3471 val = list(node_to_secondary[node.name]) 3472 elif field == "pinst_cnt": 3473 val = len(node_to_primary[node.name]) 3474 elif field == "sinst_cnt": 3475 val = len(node_to_secondary[node.name]) 3476 elif field == "pip": 3477 val = node.primary_ip 3478 elif field == "sip": 3479 val = node.secondary_ip 3480 elif field == "tags": 3481 val = list(node.GetTags()) 3482 elif field == "master": 3483 val = node.name == master_node 3484 elif self._FIELDS_DYNAMIC.Matches(field): 3485 val = live_data[node.name].get(field, None) 3486 elif field == "role": 3487 if node.name == master_node: 3488 val = "M" 3489 elif node.master_candidate: 3490 val = "C" 3491 elif node.drained: 3492 val = "D" 3493 elif node.offline: 3494 val = "O" 3495 else: 3496 val = "R" 3497 else: 3498 raise errors.ParameterError(field) 3499 node_output.append(val) 3500 output.append(node_output) 3501 3502 return output
3503
3504 3505 -class LUQueryNodeVolumes(NoHooksLU):
3506 """Logical unit for getting volumes on node(s). 3507 3508 """ 3509 _OP_PARAMS = [ 3510 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)), 3511 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)), 3512 ] 3513 REQ_BGL = False 3514 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance") 3515 _FIELDS_STATIC = utils.FieldSet("node") 3516
3517 - def CheckArguments(self):
3518 _CheckOutputFields(static=self._FIELDS_STATIC, 3519 dynamic=self._FIELDS_DYNAMIC, 3520 selected=self.op.output_fields)
3521
3522 - def ExpandNames(self):
3523 self.needed_locks = {} 3524 self.share_locks[locking.LEVEL_NODE] = 1 3525 if not self.op.nodes: 3526 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 3527 else: 3528 self.needed_locks[locking.LEVEL_NODE] = \ 3529 _GetWantedNodes(self, self.op.nodes)
3530
3531 - def Exec(self, feedback_fn):
3532 """Computes the list of nodes and their attributes. 3533 3534 """ 3535 nodenames = self.acquired_locks[locking.LEVEL_NODE] 3536 volumes = self.rpc.call_node_volumes(nodenames) 3537 3538 ilist = [self.cfg.GetInstanceInfo(iname) for iname 3539 in self.cfg.GetInstanceList()] 3540 3541 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist]) 3542 3543 output = [] 3544 for node in nodenames: 3545 nresult = volumes[node] 3546 if nresult.offline: 3547 continue 3548 msg = nresult.fail_msg 3549 if msg: 3550 self.LogWarning("Can't compute volume data on node %s: %s", node, msg) 3551 continue 3552 3553 node_vols = nresult.payload[:] 3554 node_vols.sort(key=lambda vol: vol['dev']) 3555 3556 for vol in node_vols: 3557 node_output = [] 3558 for field in self.op.output_fields: 3559 if field == "node": 3560 val = node 3561 elif field == "phys": 3562 val = vol['dev'] 3563 elif field == "vg": 3564 val = vol['vg'] 3565 elif field == "name": 3566 val = vol['name'] 3567 elif field == "size": 3568 val = int(float(vol['size'])) 3569 elif field == "instance": 3570 for inst in ilist: 3571 if node not in lv_by_node[inst]: 3572 continue 3573 if vol['name'] in lv_by_node[inst][node]: 3574 val = inst.name 3575 break 3576 else: 3577 val = '-' 3578 else: 3579 raise errors.ParameterError(field) 3580 node_output.append(str(val)) 3581 3582 output.append(node_output) 3583 3584 return output
3585
3586 3587 -class LUQueryNodeStorage(NoHooksLU):
3588 """Logical unit for getting information on storage units on node(s). 3589 3590 """ 3591 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE) 3592 _OP_PARAMS = [ 3593 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)), 3594 ("storage_type", _NoDefault, _CheckStorageType), 3595 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)), 3596 ("name", None, _TMaybeString), 3597 ] 3598 REQ_BGL = False 3599
3600 - def CheckArguments(self):
3601 _CheckOutputFields(static=self._FIELDS_STATIC, 3602 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS), 3603 selected=self.op.output_fields)
3604
3605 - def ExpandNames(self):
3606 self.needed_locks = {} 3607 self.share_locks[locking.LEVEL_NODE] = 1 3608 3609 if self.op.nodes: 3610 self.needed_locks[locking.LEVEL_NODE] = \ 3611 _GetWantedNodes(self, self.op.nodes) 3612 else: 3613 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3614
3615 - def Exec(self, feedback_fn):
3616 """Computes the list of nodes and their attributes. 3617 3618 """ 3619 self.nodes = self.acquired_locks[locking.LEVEL_NODE] 3620 3621 # Always get name to sort by 3622 if constants.SF_NAME in self.op.output_fields: 3623 fields = self.op.output_fields[:] 3624 else: 3625 fields = [constants.SF_NAME] + self.op.output_fields 3626 3627 # Never ask for node or type as it's only known to the LU 3628 for extra in [constants.SF_NODE, constants.SF_TYPE]: 3629 while extra in fields: 3630 fields.remove(extra) 3631 3632 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)]) 3633 name_idx = field_idx[constants.SF_NAME] 3634 3635 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 3636 data = self.rpc.call_storage_list(self.nodes, 3637 self.op.storage_type, st_args, 3638 self.op.name, fields) 3639 3640 result = [] 3641 3642 for node in utils.NiceSort(self.nodes): 3643 nresult = data[node] 3644 if nresult.offline: 3645 continue 3646 3647 msg = nresult.fail_msg 3648 if msg: 3649 self.LogWarning("Can't get storage data from node %s: %s", node, msg) 3650 continue 3651 3652 rows = dict([(row[name_idx], row) for row in nresult.payload]) 3653 3654 for name in utils.NiceSort(rows.keys()): 3655 row = rows[name] 3656 3657 out = [] 3658 3659 for field in self.op.output_fields: 3660 if field == constants.SF_NODE: 3661 val = node 3662 elif field == constants.SF_TYPE: 3663 val = self.op.storage_type 3664 elif field in field_idx: 3665 val = row[field_idx[field]] 3666 else: 3667 raise errors.ParameterError(field) 3668 3669 out.append(val) 3670 3671 result.append(out) 3672 3673 return result
3674
3675 3676 -class LUModifyNodeStorage(NoHooksLU):
3677 """Logical unit for modifying a storage volume on a node. 3678 3679 """ 3680 _OP_PARAMS = [ 3681 _PNodeName, 3682 ("storage_type", _NoDefault, _CheckStorageType), 3683 ("name", _NoDefault, _TNonEmptyString), 3684 ("changes", _NoDefault, _TDict), 3685 ] 3686 REQ_BGL = False 3687
3688 - def CheckArguments(self):
3689 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 3690 3691 storage_type = self.op.storage_type 3692 3693 try: 3694 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type] 3695 except KeyError: 3696 raise errors.OpPrereqError("Storage units of type '%s' can not be" 3697 " modified" % storage_type, 3698 errors.ECODE_INVAL) 3699 3700 diff = set(self.op.changes.keys()) - modifiable 3701 if diff: 3702 raise errors.OpPrereqError("The following fields can not be modified for" 3703 " storage units of type '%s': %r" % 3704 (storage_type, list(diff)), 3705 errors.ECODE_INVAL)
3706
3707 - def ExpandNames(self):
3708 self.needed_locks = { 3709 locking.LEVEL_NODE: self.op.node_name, 3710 }
3711
3712 - def Exec(self, feedback_fn):
3713 """Computes the list of nodes and their attributes. 3714 3715 """ 3716 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 3717 result = self.rpc.call_storage_modify(self.op.node_name, 3718 self.op.storage_type, st_args, 3719 self.op.name, self.op.changes) 3720 result.Raise("Failed to modify storage unit '%s' on %s" % 3721 (self.op.name, self.op.node_name))
3722
3723 3724 -class LUAddNode(LogicalUnit):
3725 """Logical unit for adding node to the cluster. 3726 3727 """ 3728 HPATH = "node-add" 3729 HTYPE = constants.HTYPE_NODE 3730 _OP_PARAMS = [ 3731 _PNodeName, 3732 ("primary_ip", None, _NoType), 3733 ("secondary_ip", None, _TMaybeString), 3734 ("readd", False, _TBool), 3735 ] 3736
3737 - def CheckArguments(self):
3738 # validate/normalize the node name 3739 self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3740
3741 - def BuildHooksEnv(self):
3742 """Build hooks env. 3743 3744 This will run on all nodes before, and on all nodes + the new node after. 3745 3746 """ 3747 env = { 3748 "OP_TARGET": self.op.node_name, 3749 "NODE_NAME": self.op.node_name, 3750 "NODE_PIP": self.op.primary_ip, 3751 "NODE_SIP": self.op.secondary_ip, 3752 } 3753 nodes_0 = self.cfg.GetNodeList() 3754 nodes_1 = nodes_0 + [self.op.node_name, ] 3755 return env, nodes_0, nodes_1
3756
3757 - def CheckPrereq(self):
3758 """Check prerequisites. 3759 3760 This checks: 3761 - the new node is not already in the config 3762 - it is resolvable 3763 - its parameters (single/dual homed) matches the cluster 3764 3765 Any errors are signaled by raising errors.OpPrereqError. 3766 3767 """ 3768 node_name = self.op.node_name 3769 cfg = self.cfg 3770 3771 dns_data = netutils.GetHostInfo(node_name) 3772 3773 node = dns_data.name 3774 primary_ip = self.op.primary_ip = dns_data.ip 3775 if self.op.secondary_ip is None: 3776 self.op.secondary_ip = primary_ip 3777 if not netutils.IsValidIP4(self.op.secondary_ip): 3778 raise errors.OpPrereqError("Invalid secondary IP given", 3779 errors.ECODE_INVAL) 3780 secondary_ip = self.op.secondary_ip 3781 3782 node_list = cfg.GetNodeList() 3783 if not self.op.readd and node in node_list: 3784 raise errors.OpPrereqError("Node %s is already in the configuration" % 3785 node, errors.ECODE_EXISTS) 3786 elif self.op.readd and node not in node_list: 3787 raise errors.OpPrereqError("Node %s is not in the configuration" % node, 3788 errors.ECODE_NOENT) 3789 3790 self.changed_primary_ip = False 3791 3792 for existing_node_name in node_list: 3793 existing_node = cfg.GetNodeInfo(existing_node_name) 3794 3795 if self.op.readd and node == existing_node_name: 3796 if existing_node.secondary_ip != secondary_ip: 3797 raise errors.OpPrereqError("Readded node doesn't have the same IP" 3798 " address configuration as before", 3799 errors.ECODE_INVAL) 3800 if existing_node.primary_ip != primary_ip: 3801 self.changed_primary_ip = True 3802 3803 continue 3804 3805 if (existing_node.primary_ip == primary_ip or 3806 existing_node.secondary_ip == primary_ip or 3807 existing_node.primary_ip == secondary_ip or 3808 existing_node.secondary_ip == secondary_ip): 3809 raise errors.OpPrereqError("New node ip address(es) conflict with" 3810 " existing node %s" % existing_node.name, 3811 errors.ECODE_NOTUNIQUE) 3812 3813 # check that the type of the node (single versus dual homed) is the 3814 # same as for the master 3815 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode()) 3816 master_singlehomed = myself.secondary_ip == myself.primary_ip 3817 newbie_singlehomed = secondary_ip == primary_ip 3818 if master_singlehomed != newbie_singlehomed: 3819 if master_singlehomed: 3820 raise errors.OpPrereqError("The master has no private ip but the" 3821 " new node has one", 3822 errors.ECODE_INVAL) 3823 else: 3824 raise errors.OpPrereqError("The master has a private ip but the" 3825 " new node doesn't have one", 3826 errors.ECODE_INVAL) 3827 3828 # checks reachability 3829 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT): 3830 raise errors.OpPrereqError("Node not reachable by ping", 3831 errors.ECODE_ENVIRON) 3832 3833 if not newbie_singlehomed: 3834 # check reachability from my secondary ip to newbie's secondary ip 3835 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT, 3836 source=myself.secondary_ip): 3837 raise errors.OpPrereqError("Node secondary ip not reachable by TCP" 3838 " based ping to noded port", 3839 errors.ECODE_ENVIRON) 3840 3841 if self.op.readd: 3842 exceptions = [node] 3843 else: 3844 exceptions = [] 3845 3846 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions) 3847 3848 if self.op.readd: 3849 self.new_node = self.cfg.GetNodeInfo(node) 3850 assert self.new_node is not None, "Can't retrieve locked node %s" % node 3851 else: 3852 self.new_node = objects.Node(name=node, 3853 primary_ip=primary_ip, 3854 secondary_ip=secondary_ip, 3855 master_candidate=self.master_candidate, 3856 offline=False, drained=False)
3857
3858 - def Exec(self, feedback_fn):
3859 """Adds the new node to the cluster. 3860 3861 """ 3862 new_node = self.new_node 3863 node = new_node.name 3864 3865 # for re-adds, reset the offline/drained/master-candidate flags; 3866 # we need to reset here, otherwise offline would prevent RPC calls 3867 # later in the procedure; this also means that if the re-add 3868 # fails, we are left with a non-offlined, broken node 3869 if self.op.readd: 3870 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201 3871 self.LogInfo("Readding a node, the offline/drained flags were reset") 3872 # if we demote the node, we do cleanup later in the procedure 3873 new_node.master_candidate = self.master_candidate 3874 if self.changed_primary_ip: 3875 new_node.primary_ip = self.op.primary_ip 3876 3877 # notify the user about any possible mc promotion 3878 if new_node.master_candidate: 3879 self.LogInfo("Node will be a master candidate") 3880 3881 # check connectivity 3882 result = self.rpc.call_version([node])[node] 3883 result.Raise("Can't get version information from node %s" % node) 3884 if constants.PROTOCOL_VERSION == result.payload: 3885 logging.info("Communication to node %s fine, sw version %s match", 3886 node, result.payload) 3887 else: 3888 raise errors.OpExecError("Version mismatch master version %s," 3889 " node version %s" % 3890 (constants.PROTOCOL_VERSION, result.payload)) 3891 3892 # setup ssh on node 3893 if self.cfg.GetClusterInfo().modify_ssh_setup: 3894 logging.info("Copy ssh key to node %s", node) 3895 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS) 3896 keyarray = [] 3897 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB, 3898 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB, 3899 priv_key, pub_key] 3900 3901 for i in keyfiles: 3902 keyarray.append(utils.ReadFile(i)) 3903 3904 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1], 3905 keyarray[2], keyarray[3], keyarray[4], 3906 keyarray[5]) 3907 result.Raise("Cannot transfer ssh keys to the new node") 3908 3909 # Add node to our /etc/hosts, and add key to known_hosts 3910 if self.cfg.GetClusterInfo().modify_etc_hosts: 3911 # FIXME: this should be done via an rpc call to node daemon 3912 utils.AddHostToEtcHosts(new_node.name) 3913 3914 if new_node.secondary_ip != new_node.primary_ip: 3915 result = self.rpc.call_node_has_ip_address(new_node.name, 3916 new_node.secondary_ip) 3917 result.Raise("Failure checking secondary ip on node %s" % new_node.name, 3918 prereq=True, ecode=errors.ECODE_ENVIRON) 3919 if not result.payload: 3920 raise errors.OpExecError("Node claims it doesn't have the secondary ip" 3921 " you gave (%s). Please fix and re-run this" 3922 " command." % new_node.secondary_ip) 3923 3924 node_verify_list = [self.cfg.GetMasterNode()] 3925 node_verify_param = { 3926 constants.NV_NODELIST: [node], 3927 # TODO: do a node-net-test as well? 3928 } 3929 3930 result = self.rpc.call_node_verify(node_verify_list, node_verify_param, 3931 self.cfg.GetClusterName()) 3932 for verifier in node_verify_list: 3933 result[verifier].Raise("Cannot communicate with node %s" % verifier) 3934 nl_payload = result[verifier].payload[constants.NV_NODELIST] 3935 if nl_payload: 3936 for failed in nl_payload: 3937 feedback_fn("ssh/hostname verification failed" 3938 " (checking from %s): %s" % 3939 (verifier, nl_payload[failed])) 3940 raise errors.OpExecError("ssh/hostname verification failed.") 3941 3942 if self.op.readd: 3943 _RedistributeAncillaryFiles(self) 3944 self.context.ReaddNode(new_node) 3945 # make sure we redistribute the config 3946 self.cfg.Update(new_node, feedback_fn) 3947 # and make sure the new node will not have old files around 3948 if not new_node.master_candidate: 3949 result = self.rpc.call_node_demote_from_mc(new_node.name) 3950 msg = result.fail_msg 3951 if msg: 3952 self.LogWarning("Node failed to demote itself from master" 3953 " candidate status: %s" % msg) 3954 else: 3955 _RedistributeAncillaryFiles(self, additional_nodes=[node]) 3956 self.context.AddNode(new_node, self.proc.GetECId())
3957
3958 3959 -class LUSetNodeParams(LogicalUnit):
3960 """Modifies the parameters of a node. 3961 3962 """ 3963 HPATH = "node-modify" 3964 HTYPE = constants.HTYPE_NODE 3965 _OP_PARAMS = [ 3966 _PNodeName, 3967 ("master_candidate", None, _TMaybeBool), 3968 ("offline", None, _TMaybeBool), 3969 ("drained", None, _TMaybeBool), 3970 ("auto_promote", False, _TBool), 3971 _PForce, 3972 ] 3973 REQ_BGL = False 3974
3975 - def CheckArguments(self):
3976 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 3977 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained] 3978 if all_mods.count(None) == 3: 3979 raise errors.OpPrereqError("Please pass at least one modification", 3980 errors.ECODE_INVAL) 3981 if all_mods.count(True) > 1: 3982 raise errors.OpPrereqError("Can't set the node into more than one" 3983 " state at the same time", 3984 errors.ECODE_INVAL) 3985 3986 # Boolean value that tells us whether we're offlining or draining the node 3987 self.offline_or_drain = (self.op.offline == True or 3988 self.op.drained == True) 3989 self.deoffline_or_drain = (self.op.offline == False or 3990 self.op.drained == False) 3991 self.might_demote = (self.op.master_candidate == False or 3992 self.offline_or_drain) 3993 3994 self.lock_all = self.op.auto_promote and self.might_demote
3995 3996
3997 - def ExpandNames(self):
3998 if self.lock_all: 3999 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET} 4000 else: 4001 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4002
4003 - def BuildHooksEnv(self):
4004 """Build hooks env. 4005 4006 This runs on the master node. 4007 4008 """ 4009 env = { 4010 "OP_TARGET": self.op.node_name, 4011 "MASTER_CANDIDATE": str(self.op.master_candidate), 4012 "OFFLINE": str(self.op.offline), 4013 "DRAINED": str(self.op.drained), 4014 } 4015 nl = [self.cfg.GetMasterNode(), 4016 self.op.node_name] 4017 return env, nl, nl
4018
4019 - def CheckPrereq(self):
4020 """Check prerequisites. 4021 4022 This only checks the instance list against the existing names. 4023 4024 """ 4025 node = self.node = self.cfg.GetNodeInfo(self.op.node_name) 4026 4027 if (self.op.master_candidate is not None or 4028 self.op.drained is not None or 4029 self.op.offline is not None): 4030 # we can't change the master's node flags 4031 if self.op.node_name == self.cfg.GetMasterNode(): 4032 raise errors.OpPrereqError("The master role can be changed" 4033 " only via master-failover", 4034 errors.ECODE_INVAL) 4035 4036 4037 if node.master_candidate and self.might_demote and not self.lock_all: 4038 assert not self.op.auto_promote, "auto-promote set but lock_all not" 4039 # check if after removing the current node, we're missing master 4040 # candidates 4041 (mc_remaining, mc_should, _) = \ 4042 self.cfg.GetMasterCandidateStats(exceptions=[node.name]) 4043 if mc_remaining < mc_should: 4044 raise errors.OpPrereqError("Not enough master candidates, please" 4045 " pass auto_promote to allow promotion", 4046 errors.ECODE_INVAL) 4047 4048 if (self.op.master_candidate == True and 4049 ((node.offline and not self.op.offline == False) or 4050 (node.drained and not self.op.drained == False))): 4051 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set" 4052 " to master_candidate" % node.name, 4053 errors.ECODE_INVAL) 4054 4055 # If we're being deofflined/drained, we'll MC ourself if needed 4056 if (self.deoffline_or_drain and not self.offline_or_drain and not 4057 self.op.master_candidate == True and not node.master_candidate): 4058 self.op.master_candidate = _DecideSelfPromotion(self) 4059 if self.op.master_candidate: 4060 self.LogInfo("Autopromoting node to master candidate") 4061 4062 return
4063
4064 - def Exec(self, feedback_fn):
4065 """Modifies a node. 4066 4067 """ 4068 node = self.node 4069 4070 result = [] 4071 changed_mc = False 4072 4073 if self.op.offline is not None: 4074 node.offline = self.op.offline 4075 result.append(("offline", str(self.op.offline))) 4076 if self.op.offline == True: 4077 if node.master_candidate: 4078 node.master_candidate = False 4079 changed_mc = True 4080 result.append(("master_candidate", "auto-demotion due to offline")) 4081 if node.drained: 4082 node.drained = False 4083 result.append(("drained", "clear drained status due to offline")) 4084 4085 if self.op.master_candidate is not None: 4086 node.master_candidate = self.op.master_candidate 4087 changed_mc = True 4088 result.append(("master_candidate", str(self.op.master_candidate))) 4089 if self.op.master_candidate == False: 4090 rrc = self.rpc.call_node_demote_from_mc(node.name) 4091 msg = rrc.fail_msg 4092 if msg: 4093 self.LogWarning("Node failed to demote itself: %s" % msg) 4094 4095 if self.op.drained is not None: 4096 node.drained = self.op.drained 4097 result.append(("drained", str(self.op.drained))) 4098 if self.op.drained == True: 4099 if node.master_candidate: 4100 node.master_candidate = False 4101 changed_mc = True 4102 result.append(("master_candidate", "auto-demotion due to drain")) 4103 rrc = self.rpc.call_node_demote_from_mc(node.name) 4104 msg = rrc.fail_msg 4105 if msg: 4106 self.LogWarning("Node failed to demote itself: %s" % msg) 4107 if node.offline: 4108 node.offline = False 4109 result.append(("offline", "clear offline status due to drain")) 4110 4111 # we locked all nodes, we adjust the CP before updating this node 4112 if self.lock_all: 4113 _AdjustCandidatePool(self, [node.name]) 4114 4115 # this will trigger configuration file update, if needed 4116 self.cfg.Update(node, feedback_fn) 4117 4118 # this will trigger job queue propagation or cleanup 4119 if changed_mc: 4120 self.context.ReaddNode(node) 4121 4122 return result
4123
4124 4125 -class LUPowercycleNode(NoHooksLU):
4126 """Powercycles a node. 4127 4128 """ 4129 _OP_PARAMS = [ 4130 _PNodeName, 4131 _PForce, 4132 ] 4133 REQ_BGL = False 4134
4135 - def CheckArguments(self):
4136 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 4137 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force: 4138 raise errors.OpPrereqError("The node is the master and the force" 4139 " parameter was not set", 4140 errors.ECODE_INVAL)
4141
4142 - def ExpandNames(self):
4143 """Locking for PowercycleNode. 4144 4145 This is a last-resort option and shouldn't block on other 4146 jobs. Therefore, we grab no locks. 4147 4148 """ 4149 self.needed_locks = {}
4150
4151 - def Exec(self, feedback_fn):
4152 """Reboots a node. 4153 4154 """ 4155 result = self.rpc.call_node_powercycle(self.op.node_name, 4156 self.cfg.GetHypervisorType()) 4157 result.Raise("Failed to schedule the reboot") 4158 return result.payload
4159
4160 4161 -class LUQueryClusterInfo(NoHooksLU):
4162 """Query cluster configuration. 4163 4164 """ 4165 REQ_BGL = False 4166
4167 - def ExpandNames(self):
4168 self.needed_locks = {}
4169
4170 - def Exec(self, feedback_fn):
4171 """Return cluster config. 4172 4173 """ 4174 cluster = self.cfg.GetClusterInfo() 4175 os_hvp = {} 4176 4177 # Filter just for enabled hypervisors 4178 for os_name, hv_dict in cluster.os_hvp.items(): 4179 os_hvp[os_name] = {} 4180 for hv_name, hv_params in hv_dict.items(): 4181 if hv_name in cluster.enabled_hypervisors: 4182 os_hvp[os_name][hv_name] = hv_params 4183 4184 result = { 4185 "software_version": constants.RELEASE_VERSION, 4186 "protocol_version": constants.PROTOCOL_VERSION, 4187 "config_version": constants.CONFIG_VERSION, 4188 "os_api_version": max(constants.OS_API_VERSIONS), 4189 "export_version": constants.EXPORT_VERSION, 4190 "architecture": (platform.architecture()[0], platform.machine()), 4191 "name": cluster.cluster_name, 4192 "master": cluster.master_node, 4193 "default_hypervisor": cluster.enabled_hypervisors[0], 4194 "enabled_hypervisors": cluster.enabled_hypervisors, 4195 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name]) 4196 for hypervisor_name in cluster.enabled_hypervisors]), 4197 "os_hvp": os_hvp, 4198 "beparams": cluster.beparams, 4199 "osparams": cluster.osparams, 4200 "nicparams": cluster.nicparams, 4201 "candidate_pool_size": cluster.candidate_pool_size, 4202 "master_netdev": cluster.master_netdev, 4203 "volume_group_name": cluster.volume_group_name, 4204 "drbd_usermode_helper": cluster.drbd_usermode_helper, 4205 "file_storage_dir": cluster.file_storage_dir, 4206 "maintain_node_health": cluster.maintain_node_health, 4207 "ctime": cluster.ctime, 4208 "mtime": cluster.mtime, 4209 "uuid": cluster.uuid, 4210 "tags": list(cluster.GetTags()), 4211 "uid_pool": cluster.uid_pool, 4212 "default_iallocator": cluster.default_iallocator, 4213 "reserved_lvs": cluster.reserved_lvs, 4214 } 4215 4216 return result
4217
4218 4219 -class LUQueryConfigValues(NoHooksLU):
4220 """Return configuration values. 4221 4222 """ 4223 _OP_PARAMS = [_POutputFields] 4224 REQ_BGL = False 4225 _FIELDS_DYNAMIC = utils.FieldSet() 4226 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag", 4227 "watcher_pause", "volume_group_name") 4228
4229 - def CheckArguments(self):
4230 _CheckOutputFields(static=self._FIELDS_STATIC, 4231 dynamic=self._FIELDS_DYNAMIC, 4232 selected=self.op.output_fields)
4233
4234 - def ExpandNames(self):
4235 self.needed_locks = {}
4236
4237 - def Exec(self, feedback_fn):
4238 """Dump a representation of the cluster config to the standard output. 4239 4240 """ 4241 values = [] 4242 for field in self.op.output_fields: 4243 if field == "cluster_name": 4244 entry = self.cfg.GetClusterName() 4245 elif field == "master_node": 4246 entry = self.cfg.GetMasterNode() 4247 elif field == "drain_flag": 4248 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE) 4249 elif field == "watcher_pause": 4250 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE) 4251 elif field == "volume_group_name": 4252 entry = self.cfg.GetVGName() 4253 else: 4254 raise errors.ParameterError(field) 4255 values.append(entry) 4256 return values
4257
4258 4259 -class LUActivateInstanceDisks(NoHooksLU):
4260 """Bring up an instance's disks. 4261 4262 """ 4263 _OP_PARAMS = [ 4264 _PInstanceName, 4265 ("ignore_size", False, _TBool), 4266 ] 4267 REQ_BGL = False 4268
4269 - def ExpandNames(self):
4270 self._ExpandAndLockInstance() 4271 self.needed_locks[locking.LEVEL_NODE] = [] 4272 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4273
4274 - def DeclareLocks(self, level):
4275 if level == locking.LEVEL_NODE: 4276 self._LockInstancesNodes()
4277
4278 - def CheckPrereq(self):
4279 """Check prerequisites. 4280 4281 This checks that the instance is in the cluster. 4282 4283 """ 4284 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4285 assert self.instance is not None, \ 4286 "Cannot retrieve locked instance %s" % self.op.instance_name 4287 _CheckNodeOnline(self, self.instance.primary_node)
4288
4289 - def Exec(self, feedback_fn):
4290 """Activate the disks. 4291 4292 """ 4293 disks_ok, disks_info = \ 4294 _AssembleInstanceDisks(self, self.instance, 4295 ignore_size=self.op.ignore_size) 4296 if not disks_ok: 4297 raise errors.OpExecError("Cannot activate block devices") 4298 4299 return disks_info
4300
4301 4302 -def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False, 4303 ignore_size=False):
4304 """Prepare the block devices for an instance. 4305 4306 This sets up the block devices on all nodes. 4307 4308 @type lu: L{LogicalUnit} 4309 @param lu: the logical unit on whose behalf we execute 4310 @type instance: L{objects.Instance} 4311 @param instance: the instance for whose disks we assemble 4312 @type disks: list of L{objects.Disk} or None 4313 @param disks: which disks to assemble (or all, if None) 4314 @type ignore_secondaries: boolean 4315 @param ignore_secondaries: if true, errors on secondary nodes 4316 won't result in an error return from the function 4317 @type ignore_size: boolean 4318 @param ignore_size: if true, the current known size of the disk 4319 will not be used during the disk activation, useful for cases 4320 when the size is wrong 4321 @return: False if the operation failed, otherwise a list of 4322 (host, instance_visible_name, node_visible_name) 4323 with the mapping from node devices to instance devices 4324 4325 """ 4326 device_info = [] 4327 disks_ok = True 4328 iname = instance.name 4329 disks = _ExpandCheckDisks(instance, disks) 4330 4331 # With the two passes mechanism we try to reduce the window of 4332 # opportunity for the race condition of switching DRBD to primary 4333 # before handshaking occured, but we do not eliminate it 4334 4335 # The proper fix would be to wait (with some limits) until the 4336 # connection has been made and drbd transitions from WFConnection 4337 # into any other network-connected state (Connected, SyncTarget, 4338 # SyncSource, etc.) 4339 4340 # 1st pass, assemble on all nodes in secondary mode 4341 for inst_disk in disks: 4342 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): 4343 if ignore_size: 4344 node_disk = node_disk.Copy() 4345 node_disk.UnsetSize() 4346 lu.cfg.SetDiskID(node_disk, node) 4347 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False) 4348 msg = result.fail_msg 4349 if msg: 4350 lu.proc.LogWarning("Could not prepare block device %s on node %s" 4351 " (is_primary=False, pass=1): %s", 4352 inst_disk.iv_name, node, msg) 4353 if not ignore_secondaries: 4354 disks_ok = False 4355 4356 # FIXME: race condition on drbd migration to primary 4357 4358 # 2nd pass, do only the primary node 4359 for inst_disk in disks: 4360 dev_path = None 4361 4362 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): 4363 if node != instance.primary_node: 4364 continue 4365 if ignore_size: 4366 node_disk = node_disk.Copy() 4367 node_disk.UnsetSize() 4368 lu.cfg.SetDiskID(node_disk, node) 4369 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True) 4370 msg = result.fail_msg 4371 if msg: 4372 lu.proc.LogWarning("Could not prepare block device %s on node %s" 4373 " (is_primary=True, pass=2): %s", 4374 inst_disk.iv_name, node, msg) 4375 disks_ok = False 4376 else: 4377 dev_path = result.payload 4378 4379 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path)) 4380 4381 # leave the disks configured for the primary node 4382 # this is a workaround that would be fixed better by 4383 # improving the logical/physical id handling 4384 for disk in disks: 4385 lu.cfg.SetDiskID(disk, instance.primary_node) 4386 4387 return disks_ok, device_info
4388
4389 4390 -def _StartInstanceDisks(lu, instance, force):
4391 """Start the disks of an instance. 4392 4393 """ 4394 disks_ok, _ = _AssembleInstanceDisks(lu, instance, 4395 ignore_secondaries=force) 4396 if not disks_ok: 4397 _ShutdownInstanceDisks(lu, instance) 4398 if force is not None and not force: 4399 lu.proc.LogWarning("", hint="If the message above refers to a" 4400 " secondary node," 4401 " you can retry the operation using '--force'.") 4402 raise errors.OpExecError("Disk consistency error")
4403
4404 4405 -class LUDeactivateInstanceDisks(NoHooksLU):
4406 """Shutdown an instance's disks. 4407 4408 """ 4409 _OP_PARAMS = [ 4410 _PInstanceName, 4411 ] 4412 REQ_BGL = False 4413
4414 - def ExpandNames(self):
4415 self._ExpandAndLockInstance() 4416 self.needed_locks[locking.LEVEL_NODE] = [] 4417 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4418
4419 - def DeclareLocks(self, level):
4420 if level == locking.LEVEL_NODE: 4421 self._LockInstancesNodes()
4422
4423 - def CheckPrereq(self):
4424 """Check prerequisites. 4425 4426 This checks that the instance is in the cluster. 4427 4428 """ 4429 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4430 assert self.instance is not None, \ 4431 "Cannot retrieve locked instance %s" % self.op.instance_name
4432
4433 - def Exec(self, feedback_fn):
4434 """Deactivate the disks 4435 4436 """ 4437 instance = self.instance 4438 _SafeShutdownInstanceDisks(self, instance)
4439
4440 4441 -def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4442 """Shutdown block devices of an instance. 4443 4444 This function checks if an instance is running, before calling 4445 _ShutdownInstanceDisks. 4446 4447 """ 4448 _CheckInstanceDown(lu, instance, "cannot shutdown disks") 4449 _ShutdownInstanceDisks(lu, instance, disks=disks)
4450
4451 4452 -def _ExpandCheckDisks(instance, disks):
4453 """Return the instance disks selected by the disks list 4454 4455 @type disks: list of L{objects.Disk} or None 4456 @param disks: selected disks 4457 @rtype: list of L{objects.Disk} 4458 @return: selected instance disks to act on 4459 4460 """ 4461 if disks is None: 4462 return instance.disks 4463 else: 4464 if not set(disks).issubset(instance.disks): 4465 raise errors.ProgrammerError("Can only act on disks belonging to the" 4466 " target instance") 4467 return disks
4468
4469 4470 -def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4471 """Shutdown block devices of an instance. 4472 4473 This does the shutdown on all nodes of the instance. 4474 4475 If the ignore_primary is false, errors on the primary node are 4476 ignored. 4477 4478 """ 4479 all_result = True 4480 disks = _ExpandCheckDisks(instance, disks) 4481 4482 for disk in disks: 4483 for node, top_disk in disk.ComputeNodeTree(instance.primary_node): 4484 lu.cfg.SetDiskID(top_disk, node) 4485 result = lu.rpc.call_blockdev_shutdown(node, top_disk) 4486 msg = result.fail_msg 4487 if msg: 4488 lu.LogWarning("Could not shutdown block device %s on node %s: %s", 4489 disk.iv_name, node, msg) 4490 if not ignore_primary or node != instance.primary_node: 4491 all_result = False 4492 return all_result
4493
4494 4495 -def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4496 """Checks if a node has enough free memory. 4497 4498 This function check if a given node has the needed amount of free 4499 memory. In case the node has less memory or we cannot get the 4500 information from the node, this function raise an OpPrereqError 4501 exception. 4502 4503 @type lu: C{LogicalUnit} 4504 @param lu: a logical unit from which we get configuration data 4505 @type node: C{str} 4506 @param node: the node to check 4507 @type reason: C{str} 4508 @param reason: string to use in the error message 4509 @type requested: C{int} 4510 @param requested: the amount of memory in MiB to check for 4511 @type hypervisor_name: C{str} 4512 @param hypervisor_name: the hypervisor to ask for memory stats 4513 @raise errors.OpPrereqError: if the node doesn't have enough memory, or 4514 we cannot check the node 4515 4516 """ 4517 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name) 4518 nodeinfo[node].Raise("Can't get data from node %s" % node, 4519 prereq=True, ecode=errors.ECODE_ENVIRON) 4520 free_mem = nodeinfo[node].payload.get('memory_free', None) 4521 if not isinstance(free_mem, int): 4522 raise errors.OpPrereqError("Can't compute free memory on node %s, result" 4523 " was '%s'" % (node, free_mem), 4524 errors.ECODE_ENVIRON) 4525 if requested > free_mem: 4526 raise errors.OpPrereqError("Not enough memory on node %s for %s:" 4527 " needed %s MiB, available %s MiB" % 4528 (node, reason, requested, free_mem), 4529 errors.ECODE_NORES)
4530
4531 4532 -def _CheckNodesFreeDisk(lu, nodenames, requested):
4533 """Checks if nodes have enough free disk space in the default VG. 4534 4535 This function check if all given nodes have the needed amount of 4536 free disk. In case any node has less disk or we cannot get the 4537 information from the node, this function raise an OpPrereqError 4538 exception. 4539 4540 @type lu: C{LogicalUnit} 4541 @param lu: a logical unit from which we get configuration data 4542 @type nodenames: C{list} 4543 @param nodenames: the list of node names to check 4544 @type requested: C{int} 4545 @param requested: the amount of disk in MiB to check for 4546 @raise errors.OpPrereqError: if the node doesn't have enough disk, or 4547 we cannot check the node 4548 4549 """ 4550 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(), 4551 lu.cfg.GetHypervisorType()) 4552 for node in nodenames: 4553 info = nodeinfo[node] 4554 info.Raise("Cannot get current information from node %s" % node, 4555 prereq=True, ecode=errors.ECODE_ENVIRON) 4556 vg_free = info.payload.get("vg_free", None) 4557 if not isinstance(vg_free, int): 4558 raise errors.OpPrereqError("Can't compute free disk space on node %s," 4559 " result was '%s'" % (node, vg_free), 4560 errors.ECODE_ENVIRON) 4561 if requested > vg_free: 4562 raise errors.OpPrereqError("Not enough disk space on target node %s:" 4563 " required %d MiB, available %d MiB" % 4564 (node, requested, vg_free), 4565 errors.ECODE_NORES)
4566
4567 4568 -class LUStartupInstance(LogicalUnit):
4569 """Starts an instance. 4570 4571 """ 4572 HPATH = "instance-start" 4573 HTYPE = constants.HTYPE_INSTANCE 4574 _OP_PARAMS = [ 4575 _PInstanceName, 4576 _PForce, 4577 ("hvparams", _EmptyDict, _TDict), 4578 ("beparams", _EmptyDict, _TDict), 4579 ] 4580 REQ_BGL = False 4581
4582 - def CheckArguments(self):
4583 # extra beparams 4584 if self.op.beparams: 4585 # fill the beparams dict 4586 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4587
4588 - def ExpandNames(self):
4590
4591 - def BuildHooksEnv(self):
4592 """Build hooks env. 4593 4594 This runs on master, primary and secondary nodes of the instance. 4595 4596 """ 4597 env = { 4598 "FORCE": self.op.force, 4599 } 4600 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 4601 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 4602 return env, nl, nl
4603
4604 - def CheckPrereq(self):
4605 """Check prerequisites. 4606 4607 This checks that the instance is in the cluster. 4608 4609 """ 4610 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4611 assert self.instance is not None, \ 4612 "Cannot retrieve locked instance %s" % self.op.instance_name 4613 4614 # extra hvparams 4615 if self.op.hvparams: 4616 # check hypervisor parameter syntax (locally) 4617 cluster = self.cfg.GetClusterInfo() 4618 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) 4619 filled_hvp = cluster.FillHV(instance) 4620 filled_hvp.update(self.op.hvparams) 4621 hv_type = hypervisor.GetHypervisor(instance.hypervisor) 4622 hv_type.CheckParameterSyntax(filled_hvp) 4623 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp) 4624 4625 _CheckNodeOnline(self, instance.primary_node) 4626 4627 bep = self.cfg.GetClusterInfo().FillBE(instance) 4628 # check bridges existence 4629 _CheckInstanceBridgesExist(self, instance) 4630 4631 remote_info = self.rpc.call_instance_info(instance.primary_node, 4632 instance.name, 4633 instance.hypervisor) 4634 remote_info.Raise("Error checking node %s" % instance.primary_node, 4635 prereq=True, ecode=errors.ECODE_ENVIRON) 4636 if not remote_info.payload: # not running already 4637 _CheckNodeFreeMemory(self, instance.primary_node, 4638 "starting instance %s" % instance.name, 4639 bep[constants.BE_MEMORY], instance.hypervisor)
4640
4641 - def Exec(self, feedback_fn):
4642 """Start the instance. 4643 4644 """ 4645 instance = self.instance 4646 force = self.op.force 4647 4648 self.cfg.MarkInstanceUp(instance.name) 4649 4650 node_current = instance.primary_node 4651 4652 _StartInstanceDisks(self, instance, force) 4653 4654 result = self.rpc.call_instance_start(node_current, instance, 4655 self.op.hvparams, self.op.beparams) 4656 msg = result.fail_msg 4657 if msg: 4658 _ShutdownInstanceDisks(self, instance) 4659 raise errors.OpExecError("Could not start instance: %s" % msg)
4660
4661 4662 -class LURebootInstance(LogicalUnit):
4663 """Reboot an instance. 4664 4665 """ 4666 HPATH = "instance-reboot" 4667 HTYPE = constants.HTYPE_INSTANCE 4668 _OP_PARAMS = [ 4669 _PInstanceName, 4670 ("ignore_secondaries", False, _TBool), 4671 ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)), 4672 _PShutdownTimeout, 4673 ] 4674 REQ_BGL = False 4675
4676 - def ExpandNames(self):
4678
4679 - def BuildHooksEnv(self):
4680 """Build hooks env. 4681 4682 This runs on master, primary and secondary nodes of the instance. 4683 4684 """ 4685 env = { 4686 "IGNORE_SECONDARIES": self.op.ignore_secondaries, 4687 "REBOOT_TYPE": self.op.reboot_type, 4688 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 4689 } 4690 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 4691 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 4692 return env, nl, nl
4693
4694 - def CheckPrereq(self):
4695 """Check prerequisites. 4696 4697 This checks that the instance is in the cluster. 4698 4699 """ 4700 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4701 assert self.instance is not None, \ 4702 "Cannot retrieve locked instance %s" % self.op.instance_name 4703 4704 _CheckNodeOnline(self, instance.primary_node) 4705 4706 # check bridges existence 4707 _CheckInstanceBridgesExist(self, instance)
4708
4709 - def Exec(self, feedback_fn):
4710 """Reboot the instance. 4711 4712 """ 4713 instance = self.instance 4714 ignore_secondaries = self.op.ignore_secondaries 4715 reboot_type = self.op.reboot_type 4716 4717 node_current = instance.primary_node 4718 4719 if reboot_type in [constants.INSTANCE_REBOOT_SOFT, 4720 constants.INSTANCE_REBOOT_HARD]: 4721 for disk in instance.disks: 4722 self.cfg.SetDiskID(disk, node_current) 4723 result = self.rpc.call_instance_reboot(node_current, instance, 4724 reboot_type, 4725 self.op.shutdown_timeout) 4726 result.Raise("Could not reboot instance") 4727 else: 4728 result = self.rpc.call_instance_shutdown(node_current, instance, 4729 self.op.shutdown_timeout) 4730 result.Raise("Could not shutdown instance for full reboot") 4731 _ShutdownInstanceDisks(self, instance) 4732 _StartInstanceDisks(self, instance, ignore_secondaries) 4733 result = self.rpc.call_instance_start(node_current, instance, None, None) 4734 msg = result.fail_msg 4735 if msg: 4736 _ShutdownInstanceDisks(self, instance) 4737 raise errors.OpExecError("Could not start instance for" 4738 " full reboot: %s" % msg) 4739 4740 self.cfg.MarkInstanceUp(instance.name)
4741
4742 4743 -class LUShutdownInstance(LogicalUnit):
4744 """Shutdown an instance. 4745 4746 """ 4747 HPATH = "instance-stop" 4748 HTYPE = constants.HTYPE_INSTANCE 4749 _OP_PARAMS = [ 4750 _PInstanceName, 4751 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt), 4752 ] 4753 REQ_BGL = False 4754
4755 - def ExpandNames(self):
4757
4758 - def BuildHooksEnv(self):
4759 """Build hooks env. 4760 4761 This runs on master, primary and secondary nodes of the instance. 4762 4763 """ 4764 env = _BuildInstanceHookEnvByObject(self, self.instance) 4765 env["TIMEOUT"] = self.op.timeout 4766 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 4767 return env, nl, nl
4768
4769 - def CheckPrereq(self):
4770 """Check prerequisites. 4771 4772 This checks that the instance is in the cluster. 4773 4774 """ 4775 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4776 assert self.instance is not None, \ 4777 "Cannot retrieve locked instance %s" % self.op.instance_name 4778 _CheckNodeOnline(self, self.instance.primary_node)
4779
4780 - def Exec(self, feedback_fn):
4781 """Shutdown the instance. 4782 4783 """ 4784 instance = self.instance 4785 node_current = instance.primary_node 4786 timeout = self.op.timeout 4787 self.cfg.MarkInstanceDown(instance.name) 4788 result = self.rpc.call_instance_shutdown(node_current, instance, timeout) 4789 msg = result.fail_msg 4790 if msg: 4791 self.proc.LogWarning("Could not shutdown instance: %s" % msg) 4792 4793 _ShutdownInstanceDisks(self, instance)
4794
4795 4796 -class LUReinstallInstance(LogicalUnit):
4797 """Reinstall an instance. 4798 4799 """ 4800 HPATH = "instance-reinstall" 4801 HTYPE = constants.HTYPE_INSTANCE 4802 _OP_PARAMS = [ 4803 _PInstanceName, 4804 ("os_type", None, _TMaybeString), 4805 ("force_variant", False, _TBool), 4806 ] 4807 REQ_BGL = False 4808
4809 - def ExpandNames(self):
4811
4812 - def BuildHooksEnv(self):
4813 """Build hooks env. 4814 4815 This runs on master, primary and secondary nodes of the instance. 4816 4817 """ 4818 env = _BuildInstanceHookEnvByObject(self, self.instance) 4819 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 4820 return env, nl, nl
4821
4822 - def CheckPrereq(self):
4823 """Check prerequisites. 4824 4825 This checks that the instance is in the cluster and is not running. 4826 4827 """ 4828 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4829 assert instance is not None, \ 4830 "Cannot retrieve locked instance %s" % self.op.instance_name 4831 _CheckNodeOnline(self, instance.primary_node) 4832 4833 if instance.disk_template == constants.DT_DISKLESS: 4834 raise errors.OpPrereqError("Instance '%s' has no disks" % 4835 self.op.instance_name, 4836 errors.ECODE_INVAL) 4837 _CheckInstanceDown(self, instance, "cannot reinstall") 4838 4839 if self.op.os_type is not None: 4840 # OS verification 4841 pnode = _ExpandNodeName(self.cfg, instance.primary_node) 4842 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant) 4843 4844 self.instance = instance
4845
4846 - def Exec(self, feedback_fn):
4847 """Reinstall the instance. 4848 4849 """ 4850 inst = self.instance 4851 4852 if self.op.os_type is not None: 4853 feedback_fn("Changing OS to '%s'..." % self.op.os_type) 4854 inst.os = self.op.os_type 4855 self.cfg.Update(inst, feedback_fn) 4856 4857 _StartInstanceDisks(self, inst, None) 4858 try: 4859 feedback_fn("Running the instance OS create scripts...") 4860 # FIXME: pass debug option from opcode to backend 4861 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True, 4862 self.op.debug_level) 4863 result.Raise("Could not install OS for instance %s on node %s" % 4864 (inst.name, inst.primary_node)) 4865 finally: 4866 _ShutdownInstanceDisks(self, inst)
4867
4868 4869 -class LURecreateInstanceDisks(LogicalUnit):
4870 """Recreate an instance's missing disks. 4871 4872 """ 4873 HPATH = "instance-recreate-disks" 4874 HTYPE = constants.HTYPE_INSTANCE 4875 _OP_PARAMS = [ 4876 _PInstanceName, 4877 ("disks", _EmptyList, _TListOf(_TPositiveInt)), 4878 ] 4879 REQ_BGL = False 4880
4881 - def ExpandNames(self):
4883
4884 - def BuildHooksEnv(self):
4885 """Build hooks env. 4886 4887 This runs on master, primary and secondary nodes of the instance. 4888 4889 """ 4890 env = _BuildInstanceHookEnvByObject(self, self.instance) 4891 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 4892 return env, nl, nl
4893
4894 - def CheckPrereq(self):
4895 """Check prerequisites. 4896 4897 This checks that the instance is in the cluster and is not running. 4898 4899 """ 4900 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4901 assert instance is not None, \ 4902 "Cannot retrieve locked instance %s" % self.op.instance_name 4903 _CheckNodeOnline(self, instance.primary_node) 4904 4905 if instance.disk_template == constants.DT_DISKLESS: 4906 raise errors.OpPrereqError("Instance '%s' has no disks" % 4907 self.op.instance_name, errors.ECODE_INVAL) 4908 _CheckInstanceDown(self, instance, "cannot recreate disks") 4909 4910 if not self.op.disks: 4911 self.op.disks = range(len(instance.disks)) 4912 else: 4913 for idx in self.op.disks: 4914 if idx >= len(instance.disks): 4915 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx, 4916 errors.ECODE_INVAL) 4917 4918 self.instance = instance
4919
4920 - def Exec(self, feedback_fn):
4921 """Recreate the disks. 4922 4923 """ 4924 to_skip = [] 4925 for idx, _ in enumerate(self.instance.disks): 4926 if idx not in self.op.disks: # disk idx has not been passed in 4927 to_skip.append(idx) 4928 continue 4929 4930 _CreateDisks(self, self.instance, to_skip=to_skip)
4931
4932 4933 -class LURenameInstance(LogicalUnit):
4934 """Rename an instance. 4935 4936 """ 4937 HPATH = "instance-rename" 4938 HTYPE = constants.HTYPE_INSTANCE 4939 _OP_PARAMS = [ 4940 _PInstanceName, 4941 ("new_name", _NoDefault, _TNonEmptyString), 4942 ("ip_check", False, _TBool), 4943 ("name_check", True, _TBool), 4944 ] 4945
4946 - def CheckArguments(self):
4947 """Check arguments. 4948 4949 """ 4950 if self.op.ip_check and not self.op.name_check: 4951 # TODO: make the ip check more flexible and not depend on the name check 4952 raise errors.OpPrereqError("Cannot do ip check without a name check", 4953 errors.ECODE_INVAL)
4954
4955 - def BuildHooksEnv(self):
4956 """Build hooks env. 4957 4958 This runs on master, primary and secondary nodes of the instance. 4959 4960 """ 4961 env = _BuildInstanceHookEnvByObject(self, self.instance) 4962 env["INSTANCE_NEW_NAME"] = self.op.new_name 4963 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 4964 return env, nl, nl
4965
4966 - def CheckPrereq(self):
4967 """Check prerequisites. 4968 4969 This checks that the instance is in the cluster and is not running. 4970 4971 """ 4972 self.op.instance_name = _ExpandInstanceName(self.cfg, 4973 self.op.instance_name) 4974 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4975 assert instance is not None 4976 _CheckNodeOnline(self, instance.primary_node) 4977 _CheckInstanceDown(self, instance, "cannot rename") 4978 self.instance = instance 4979 4980 new_name = self.op.new_name 4981 if self.op.name_check: 4982 hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name)) 4983 new_name = self.op.new_name = hostinfo.name 4984 if (self.op.ip_check and 4985 netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)): 4986 raise errors.OpPrereqError("IP %s of instance %s already in use" % 4987 (hostinfo.ip, new_name), 4988 errors.ECODE_NOTUNIQUE) 4989 4990 instance_list = self.cfg.GetInstanceList() 4991 if new_name in instance_list: 4992 raise errors.OpPrereqError("Instance '%s' is already in the cluster" % 4993 new_name, errors.ECODE_EXISTS)
4994 4995
4996 - def Exec(self, feedback_fn):
4997 """Reinstall the instance. 4998 4999 """ 5000 inst = self.instance 5001 old_name = inst.name 5002 5003 if inst.disk_template == constants.DT_FILE: 5004 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1]) 5005 5006 self.cfg.RenameInstance(inst.name, self.op.new_name) 5007 # Change the instance lock. This is definitely safe while we hold the BGL 5008 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name) 5009 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name) 5010 5011 # re-read the instance from the configuration after rename 5012 inst = self.cfg.GetInstanceInfo(self.op.new_name) 5013 5014 if inst.disk_template == constants.DT_FILE: 5015 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1]) 5016 result = self.rpc.call_file_storage_dir_rename(inst.primary_node, 5017 old_file_storage_dir, 5018 new_file_storage_dir) 5019 result.Raise("Could not rename on node %s directory '%s' to '%s'" 5020 " (but the instance has been renamed in Ganeti)" % 5021 (inst.primary_node, old_file_storage_dir, 5022 new_file_storage_dir)) 5023 5024 _StartInstanceDisks(self, inst, None) 5025 try: 5026 result = self.rpc.call_instance_run_rename(inst.primary_node, inst, 5027 old_name, self.op.debug_level) 5028 msg = result.fail_msg 5029 if msg: 5030 msg = ("Could not run OS rename script for instance %s on node %s" 5031 " (but the instance has been renamed in Ganeti): %s" % 5032 (inst.name, inst.primary_node, msg)) 5033 self.proc.LogWarning(msg) 5034 finally: 5035 _ShutdownInstanceDisks(self, inst) 5036 5037 return inst.name
5038
5039 5040 -class LURemoveInstance(LogicalUnit):
5041 """Remove an instance. 5042 5043 """ 5044 HPATH = "instance-remove" 5045 HTYPE = constants.HTYPE_INSTANCE 5046 _OP_PARAMS = [ 5047 _PInstanceName, 5048 ("ignore_failures", False, _TBool), 5049 _PShutdownTimeout, 5050 ] 5051 REQ_BGL = False 5052
5053 - def ExpandNames(self):
5054 self._ExpandAndLockInstance() 5055 self.needed_locks[locking.LEVEL_NODE] = [] 5056 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5057
5058 - def DeclareLocks(self, level):
5059 if level == locking.LEVEL_NODE: 5060 self._LockInstancesNodes()
5061
5062 - def BuildHooksEnv(self):
5063 """Build hooks env. 5064 5065 This runs on master, primary and secondary nodes of the instance. 5066 5067 """ 5068 env = _BuildInstanceHookEnvByObject(self, self.instance) 5069 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout 5070 nl = [self.cfg.GetMasterNode()] 5071 nl_post = list(self.instance.all_nodes) + nl 5072 return env, nl, nl_post
5073
5074 - def CheckPrereq(self):
5075 """Check prerequisites. 5076 5077 This checks that the instance is in the cluster. 5078 5079 """ 5080 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5081 assert self.instance is not None, \ 5082 "Cannot retrieve locked instance %s" % self.op.instance_name
5083
5084 - def Exec(self, feedback_fn):
5085 """Remove the instance. 5086 5087 """ 5088 instance = self.instance 5089 logging.info("Shutting down instance %s on node %s", 5090 instance.name, instance.primary_node) 5091 5092 result = self.rpc.call_instance_shutdown(instance.primary_node, instance, 5093 self.op.shutdown_timeout) 5094 msg = result.fail_msg 5095 if msg: 5096 if self.op.ignore_failures: 5097 feedback_fn("Warning: can't shutdown instance: %s" % msg) 5098 else: 5099 raise errors.OpExecError("Could not shutdown instance %s on" 5100 " node %s: %s" % 5101 (instance.name, instance.primary_node, msg)) 5102 5103 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5104
5105 5106 -def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5107 """Utility function to remove an instance. 5108 5109 """ 5110 logging.info("Removing block devices for instance %s", instance.name) 5111 5112 if not _RemoveDisks(lu, instance): 5113 if not ignore_failures: 5114 raise errors.OpExecError("Can't remove instance's disks") 5115 feedback_fn("Warning: can't remove instance's disks") 5116 5117 logging.info("Removing instance %s out of cluster config", instance.name) 5118 5119 lu.cfg.RemoveInstance(instance.name) 5120 5121 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \ 5122 "Instance lock removal conflict" 5123 5124 # Remove lock for the instance 5125 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5126
5127 5128 -class LUQueryInstances(NoHooksLU):
5129 """Logical unit for querying instances. 5130 5131 """ 5132 # pylint: disable-msg=W0142 5133 _OP_PARAMS = [ 5134 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)), 5135 ("names", _EmptyList, _TListOf(_TNonEmptyString)), 5136 ("use_locking", False, _TBool), 5137 ] 5138 REQ_BGL = False 5139 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor", 5140 "serial_no", "ctime", "mtime", "uuid"] 5141 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes", 5142 "admin_state", 5143 "disk_template", "ip", "mac", "bridge", 5144 "nic_mode", "nic_link", 5145 "sda_size", "sdb_size", "vcpus", "tags", 5146 "network_port", "beparams", 5147 r"(disk)\.(size)/([0-9]+)", 5148 r"(disk)\.(sizes)", "disk_usage", 5149 r"(nic)\.(mac|ip|mode|link)/([0-9]+)", 5150 r"(nic)\.(bridge)/([0-9]+)", 5151 r"(nic)\.(macs|ips|modes|links|bridges)", 5152 r"(disk|nic)\.(count)", 5153 "hvparams", 5154 ] + _SIMPLE_FIELDS + 5155 ["hv/%s" % name 5156 for name in constants.HVS_PARAMETERS 5157 if name not in constants.HVC_GLOBALS] + 5158 ["be/%s" % name 5159 for name in constants.BES_PARAMETERS]) 5160 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", 5161 "oper_ram", 5162 "oper_vcpus", 5163 "status") 5164 5165
5166 - def CheckArguments(self):
5167 _CheckOutputFields(static=self._FIELDS_STATIC, 5168 dynamic=self._FIELDS_DYNAMIC, 5169 selected=self.op.output_fields)
5170
5171 - def ExpandNames(self):
5172 self.needed_locks = {} 5173 self.share_locks[locking.LEVEL_INSTANCE] = 1 5174 self.share_locks[locking.LEVEL_NODE] = 1 5175 5176 if self.op.names: 5177 self.wanted = _GetWantedInstances(self, self.op.names) 5178 else: 5179 self.wanted = locking.ALL_SET 5180 5181 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields) 5182 self.do_locking = self.do_node_query and self.op.use_locking 5183 if self.do_locking: 5184 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted 5185 self.needed_locks[locking.LEVEL_NODE] = [] 5186 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5187
5188 - def DeclareLocks(self, level):
5189 if level == locking.LEVEL_NODE and self.do_locking: 5190 self._LockInstancesNodes()
5191
5192 - def Exec(self, feedback_fn):
5193 """Computes the list of nodes and their attributes. 5194 5195 """ 5196 # pylint: disable-msg=R0912 5197 # way too many branches here 5198 all_info = self.cfg.GetAllInstancesInfo() 5199 if self.wanted == locking.ALL_SET: 5200 # caller didn't specify instance names, so ordering is not important 5201 if self.do_locking: 5202 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE] 5203 else: 5204 instance_names = all_info.keys() 5205 instance_names = utils.NiceSort(instance_names) 5206 else: 5207 # caller did specify names, so we must keep the ordering 5208 if self.do_locking: 5209 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE] 5210 else: 5211 tgt_set = all_info.keys() 5212 missing = set(self.wanted).difference(tgt_set) 5213 if missing: 5214 raise errors.OpExecError("Some instances were removed before" 5215 " retrieving their data: %s" % missing) 5216 instance_names = self.wanted 5217 5218 instance_list = [all_info[iname] for iname in instance_names] 5219 5220 # begin data gathering 5221 5222 nodes = frozenset([inst.primary_node for inst in instance_list]) 5223 hv_list = list(set([inst.hypervisor for inst in instance_list])) 5224 5225 bad_nodes = [] 5226 off_nodes = [] 5227 if self.do_node_query: 5228 live_data = {} 5229 node_data = self.rpc.call_all_instances_info(nodes, hv_list) 5230 for name in nodes: 5231 result = node_data[name] 5232 if result.offline: 5233 # offline nodes will be in both lists 5234 off_nodes.append(name) 5235 if result.fail_msg: 5236 bad_nodes.append(name) 5237 else: 5238 if result.payload: 5239 live_data.update(result.payload) 5240 # else no instance is alive 5241 else: 5242 live_data = dict([(name, {}) for name in instance_names]) 5243 5244 # end data gathering 5245 5246 HVPREFIX = "hv/" 5247 BEPREFIX = "be/" 5248 output = [] 5249 cluster = self.cfg.GetClusterInfo() 5250 for instance in instance_list: 5251 iout = [] 5252 i_hv = cluster.FillHV(instance, skip_globals=True) 5253 i_be = cluster.FillBE(instance) 5254 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics] 5255 for field in self.op.output_fields: 5256 st_match = self._FIELDS_STATIC.Matches(field) 5257 if field in self._SIMPLE_FIELDS: 5258 val = getattr(instance, field) 5259 elif field == "pnode": 5260 val = instance.primary_node 5261 elif field == "snodes": 5262 val = list(instance.secondary_nodes) 5263 elif field == "admin_state": 5264 val = instance.admin_up 5265 elif field == "oper_state": 5266 if instance.primary_node in bad_nodes: 5267 val = None 5268 else: 5269 val = bool(live_data.get(instance.name)) 5270 elif field == "status": 5271 if instance.primary_node in off_nodes: 5272 val = "ERROR_nodeoffline" 5273 elif instance.primary_node in bad_nodes: 5274 val = "ERROR_nodedown" 5275 else: 5276 running = bool(live_data.get(instance.name)) 5277 if running: 5278 if instance.admin_up: 5279 val = "running" 5280 else: 5281 val = "ERROR_up" 5282 else: 5283 if instance.admin_up: 5284 val = "ERROR_down" 5285 else: 5286 val = "ADMIN_down" 5287 elif field == "oper_ram": 5288 if instance.primary_node in bad_nodes: 5289 val = None 5290 elif instance.name in live_data: 5291 val = live_data[instance.name].get("memory", "?") 5292 else: 5293 val = "-" 5294 elif field == "oper_vcpus": 5295 if instance.primary_node in bad_nodes: 5296 val = None 5297 elif instance.name in live_data: 5298 val = live_data[instance.name].get("vcpus", "?") 5299 else: 5300 val = "-" 5301 elif field == "vcpus": 5302 val = i_be[constants.BE_VCPUS] 5303 elif field == "disk_template": 5304 val = instance.disk_template 5305 elif field == "ip": 5306 if instance.nics: 5307 val = instance.nics[0].ip 5308 else: 5309 val = None 5310 elif field == "nic_mode": 5311 if instance.nics: 5312 val = i_nicp[0][constants.NIC_MODE] 5313 else: 5314 val = None 5315 elif field == "nic_link": 5316 if instance.nics: 5317 val = i_nicp[0][constants.NIC_LINK] 5318 else: 5319 val = None 5320 elif field == "bridge": 5321 if (instance.nics and 5322 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED): 5323 val = i_nicp[0][constants.NIC_LINK] 5324 else: 5325 val = None 5326 elif field == "mac": 5327 if instance.nics: 5328 val = instance.nics[0].mac 5329 else: 5330 val = None 5331 elif field == "sda_size" or field == "sdb_size": 5332 idx = ord(field[2]) - ord('a') 5333 try: 5334 val = instance.FindDisk(idx).size 5335 except errors.OpPrereqError: 5336 val = None 5337 elif field == "disk_usage": # total disk usage per node 5338 disk_sizes = [{'size': disk.size} for disk in instance.disks] 5339 val = _ComputeDiskSize(instance.disk_template, disk_sizes) 5340 elif field == "tags": 5341 val = list(instance.GetTags()) 5342 elif field == "hvparams": 5343 val = i_hv 5344 elif (field.startswith(HVPREFIX) and 5345 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and 5346 field[len(HVPREFIX):] not in constants.HVC_GLOBALS): 5347 val = i_hv.get(field[len(HVPREFIX):], None) 5348 elif field == "beparams": 5349 val = i_be 5350 elif (field.startswith(BEPREFIX) and 5351 field[len(BEPREFIX):] in constants.BES_PARAMETERS): 5352 val = i_be.get(field[len(BEPREFIX):], None) 5353 elif st_match and st_match.groups(): 5354 # matches a variable list 5355 st_groups = st_match.groups() 5356 if st_groups and st_groups[0] == "disk": 5357 if st_groups[1] == "count": 5358 val = len(instance.disks) 5359 elif st_groups[1] == "sizes": 5360 val = [disk.size for disk in instance.disks] 5361 elif st_groups[1] == "size": 5362 try: 5363 val = instance.FindDisk(st_groups[2]).size 5364 except errors.OpPrereqError: 5365 val = None 5366 else: 5367 assert False, "Unhandled disk parameter" 5368 elif st_groups[0] == "nic": 5369 if st_groups[1] == "count": 5370 val = len(instance.nics) 5371 elif st_groups[1] == "macs": 5372 val = [nic.mac for nic in instance.nics] 5373 elif st_groups[1] == "ips": 5374 val = [nic.ip for nic in instance.nics] 5375 elif st_groups[1] == "modes": 5376 val = [nicp[constants.NIC_MODE] for nicp in i_nicp] 5377 elif st_groups[1] == "links": 5378 val = [nicp[constants.NIC_LINK] for nicp in i_nicp] 5379 elif st_groups[1] == "bridges": 5380 val = [] 5381 for nicp in i_nicp: 5382 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 5383 val.append(nicp[constants.NIC_LINK]) 5384 else: 5385 val.append(None) 5386 else: 5387 # index-based item 5388 nic_idx = int(st_groups[2]) 5389 if nic_idx >= len(instance.nics): 5390 val = None 5391 else: 5392 if st_groups[1] == "mac": 5393 val = instance.nics[nic_idx].mac 5394 elif st_groups[1] == "ip": 5395 val = instance.nics[nic_idx].ip 5396 elif st_groups[1] == "mode": 5397 val = i_nicp[nic_idx][constants.NIC_MODE] 5398 elif st_groups[1] == "link": 5399 val = i_nicp[nic_idx][constants.NIC_LINK] 5400 elif st_groups[1] == "bridge": 5401 nic_mode = i_nicp[nic_idx][constants.NIC_MODE] 5402 if nic_mode == constants.NIC_MODE_BRIDGED: 5403 val = i_nicp[nic_idx][constants.NIC_LINK] 5404 else: 5405 val = None 5406 else: 5407 assert False, "Unhandled NIC parameter" 5408 else: 5409 assert False, ("Declared but unhandled variable parameter '%s'" % 5410 field) 5411 else: 5412 assert False, "Declared but unhandled parameter '%s'" % field 5413 iout.append(val) 5414 output.append(iout) 5415 5416 return output
5417
5418 5419 -class LUFailoverInstance(LogicalUnit):
5420 """Failover an instance. 5421 5422 """ 5423 HPATH = "instance-failover" 5424 HTYPE = constants.HTYPE_INSTANCE 5425 _OP_PARAMS = [ 5426 _PInstanceName, 5427 ("ignore_consistency", False, _TBool), 5428 _PShutdownTimeout, 5429 ] 5430 REQ_BGL = False 5431
5432 - def ExpandNames(self):
5433 self._ExpandAndLockInstance() 5434 self.needed_locks[locking.LEVEL_NODE] = [] 5435 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5436
5437 - def DeclareLocks(self, level):
5438 if level == locking.LEVEL_NODE: 5439 self._LockInstancesNodes()
5440
5441 - def BuildHooksEnv(self):
5442 """Build hooks env. 5443 5444 This runs on master, primary and secondary nodes of the instance. 5445 5446 """ 5447 instance = self.instance 5448 source_node = instance.primary_node 5449 target_node = instance.secondary_nodes[0] 5450 env = { 5451 "IGNORE_CONSISTENCY": self.op.ignore_consistency, 5452 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 5453 "OLD_PRIMARY": source_node, 5454 "OLD_SECONDARY": target_node, 5455 "NEW_PRIMARY": target_node, 5456 "NEW_SECONDARY": source_node, 5457 } 5458 env.update(_BuildInstanceHookEnvByObject(self, instance)) 5459 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes) 5460 nl_post = list(nl) 5461 nl_post.append(source_node) 5462 return env, nl, nl_post
5463
5464 - def CheckPrereq(self):
5465 """Check prerequisites. 5466 5467 This checks that the instance is in the cluster. 5468 5469 """ 5470 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5471 assert self.instance is not None, \ 5472 "Cannot retrieve locked instance %s" % self.op.instance_name 5473 5474 bep = self.cfg.GetClusterInfo().FillBE(instance) 5475 if instance.disk_template not in constants.DTS_NET_MIRROR: 5476 raise errors.OpPrereqError("Instance's disk layout is not" 5477 " network mirrored, cannot failover.", 5478 errors.ECODE_STATE) 5479 5480 secondary_nodes = instance.secondary_nodes 5481 if not secondary_nodes: 5482 raise errors.ProgrammerError("no secondary node but using " 5483 "a mirrored disk template") 5484 5485 target_node = secondary_nodes[0] 5486 _CheckNodeOnline(self, target_node) 5487 _CheckNodeNotDrained(self, target_node) 5488 if instance.admin_up: 5489 # check memory requirements on the secondary node 5490 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" % 5491 instance.name, bep[constants.BE_MEMORY], 5492 instance.hypervisor) 5493 else: 5494 self.LogInfo("Not checking memory on the secondary node as" 5495 " instance will not be started") 5496 5497 # check bridge existance 5498 _CheckInstanceBridgesExist(self, instance, node=target_node)
5499
5500 - def Exec(self, feedback_fn):
5501 """Failover an instance. 5502 5503 The failover is done by shutting it down on its present node and 5504 starting it on the secondary. 5505 5506 """ 5507 instance = self.instance 5508 5509 source_node = instance.primary_node 5510 target_node = instance.secondary_nodes[0] 5511 5512 if instance.admin_up: 5513 feedback_fn("* checking disk consistency between source and target") 5514 for dev in instance.disks: 5515 # for drbd, these are drbd over lvm 5516 if not _CheckDiskConsistency(self, dev, target_node, False): 5517 if not self.op.ignore_consistency: 5518 raise errors.OpExecError("Disk %s is degraded on target node," 5519 " aborting failover." % dev.iv_name) 5520 else: 5521 feedback_fn("* not checking disk consistency as instance is not running") 5522 5523 feedback_fn("* shutting down instance on source node") 5524 logging.info("Shutting down instance %s on node %s", 5525 instance.name, source_node) 5526 5527 result = self.rpc.call_instance_shutdown(source_node, instance, 5528 self.op.shutdown_timeout) 5529 msg = result.fail_msg 5530 if msg: 5531 if self.op.ignore_consistency: 5532 self.proc.LogWarning("Could not shutdown instance %s on node %s." 5533 " Proceeding anyway. Please make sure node" 5534 " %s is down. Error details: %s", 5535 instance.name, source_node, source_node, msg) 5536 else: 5537 raise errors.OpExecError("Could not shutdown instance %s on" 5538 " node %s: %s" % 5539 (instance.name, source_node, msg)) 5540 5541 feedback_fn("* deactivating the instance's disks on source node") 5542 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True): 5543 raise errors.OpExecError("Can't shut down the instance's disks.") 5544 5545 instance.primary_node = target_node 5546 # distribute new instance config to the other nodes 5547 self.cfg.Update(instance, feedback_fn) 5548 5549 # Only start the instance if it's marked as up 5550 if instance.admin_up: 5551 feedback_fn("* activating the instance's disks on target node") 5552 logging.info("Starting instance %s on node %s", 5553 instance.name, target_node) 5554 5555 disks_ok, _ = _AssembleInstanceDisks(self, instance, 5556 ignore_secondaries=True) 5557 if not disks_ok: 5558 _ShutdownInstanceDisks(self, instance) 5559 raise errors.OpExecError("Can't activate the instance's disks") 5560 5561 feedback_fn("* starting the instance on the target node") 5562 result = self.rpc.call_instance_start(target_node, instance, None, None) 5563 msg = result.fail_msg 5564 if msg: 5565 _ShutdownInstanceDisks(self, instance) 5566 raise errors.OpExecError("Could not start instance %s on node %s: %s" % 5567 (instance.name, target_node, msg))
5568
5569 5570 -class LUMigrateInstance(LogicalUnit):
5571 """Migrate an instance. 5572 5573 This is migration without shutting down, compared to the failover, 5574 which is done with shutdown. 5575 5576 """ 5577 HPATH = "instance-migrate" 5578 HTYPE = constants.HTYPE_INSTANCE 5579 _OP_PARAMS = [ 5580 _PInstanceName, 5581 _PMigrationMode, 5582 _PMigrationLive, 5583 ("cleanup", False, _TBool), 5584 ] 5585 5586 REQ_BGL = False 5587
5588 - def ExpandNames(self):
5589 self._ExpandAndLockInstance() 5590 5591 self.needed_locks[locking.LEVEL_NODE] = [] 5592 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 5593 5594 self._migrater = TLMigrateInstance(self, self.op.instance_name, 5595 self.op.cleanup) 5596 self.tasklets = [self._migrater]
5597
5598 - def DeclareLocks(self, level):
5599 if level == locking.LEVEL_NODE: 5600 self._LockInstancesNodes()
5601
5602 - def BuildHooksEnv(self):
5603 """Build hooks env. 5604 5605 This runs on master, primary and secondary nodes of the instance. 5606 5607 """ 5608 instance = self._migrater.instance 5609 source_node = instance.primary_node 5610 target_node = instance.secondary_nodes[0] 5611 env = _BuildInstanceHookEnvByObject(self, instance) 5612 env["MIGRATE_LIVE"] = self._migrater.live 5613 env["MIGRATE_CLEANUP"] = self.op.cleanup 5614 env.update({ 5615 "OLD_PRIMARY": source_node, 5616 "OLD_SECONDARY": target_node, 5617 "NEW_PRIMARY": target_node, 5618 "NEW_SECONDARY": source_node, 5619 }) 5620 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes) 5621 nl_post = list(nl) 5622 nl_post.append(source_node) 5623 return env, nl, nl_post
5624
5625 5626 -class LUMoveInstance(LogicalUnit):
5627 """Move an instance by data-copying. 5628 5629 """ 5630 HPATH = "instance-move" 5631 HTYPE = constants.HTYPE_INSTANCE 5632 _OP_PARAMS = [ 5633 _PInstanceName, 5634 ("target_node", _NoDefault, _TNonEmptyString), 5635 _PShutdownTimeout, 5636 ] 5637 REQ_BGL = False 5638
5639 - def ExpandNames(self):
5640 self._ExpandAndLockInstance() 5641 target_node = _ExpandNodeName(self.cfg, self.op.target_node) 5642 self.op.target_node = target_node 5643 self.needed_locks[locking.LEVEL_NODE] = [target_node] 5644 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5645
5646 - def DeclareLocks(self, level):
5647 if level == locking.LEVEL_NODE: 5648 self._LockInstancesNodes(primary_only=True)
5649
5650 - def BuildHooksEnv(self):
5651 """Build hooks env. 5652 5653 This runs on master, primary and secondary nodes of the instance. 5654 5655 """ 5656 env = { 5657 "TARGET_NODE": self.op.target_node, 5658 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 5659 } 5660 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 5661 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node, 5662 self.op.target_node] 5663 return env, nl, nl
5664
5665 - def CheckPrereq(self):
5666 """Check prerequisites. 5667 5668 This checks that the instance is in the cluster. 5669 5670 """ 5671 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5672 assert self.instance is not None, \ 5673 "Cannot retrieve locked instance %s" % self.op.instance_name 5674 5675 node = self.cfg.GetNodeInfo(self.op.target_node) 5676 assert node is not None, \ 5677 "Cannot retrieve locked node %s" % self.op.target_node 5678 5679 self.target_node = target_node = node.name 5680 5681 if target_node == instance.primary_node: 5682 raise errors.OpPrereqError("Instance %s is already on the node %s" % 5683 (instance.name, target_node), 5684 errors.ECODE_STATE) 5685 5686 bep = self.cfg.GetClusterInfo().FillBE(instance) 5687 5688 for idx, dsk in enumerate(instance.disks): 5689 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE): 5690 raise errors.OpPrereqError("Instance disk %d has a complex layout," 5691 " cannot copy" % idx, errors.ECODE_STATE) 5692 5693 _CheckNodeOnline(self, target_node) 5694 _CheckNodeNotDrained(self, target_node) 5695 5696 if instance.admin_up: 5697 # check memory requirements on the secondary node 5698 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" % 5699 instance.name, bep[constants.BE_MEMORY], 5700 instance.hypervisor) 5701 else: 5702 self.LogInfo("Not checking memory on the secondary node as" 5703 " instance will not be started") 5704 5705 # check bridge existance 5706 _CheckInstanceBridgesExist(self, instance, node=target_node)
5707
5708 - def Exec(self, feedback_fn):
5709 """Move an instance. 5710 5711 The move is done by shutting it down on its present node, copying 5712 the data over (slow) and starting it on the new node. 5713 5714 """ 5715 instance = self.instance 5716 5717 source_node = instance.primary_node 5718 target_node = self.target_node 5719 5720 self.LogInfo("Shutting down instance %s on source node %s", 5721 instance.name, source_node) 5722 5723 result = self.rpc.call_instance_shutdown(source_node, instance, 5724 self.op.shutdown_timeout) 5725 msg = result.fail_msg 5726 if msg: 5727 if self.op.ignore_consistency: 5728 self.proc.LogWarning("Could not shutdown instance %s on node %s." 5729 " Proceeding anyway. Please make sure node" 5730 " %s is down. Error details: %s", 5731 instance.name, source_node, source_node, msg) 5732 else: 5733 raise errors.OpExecError("Could not shutdown instance %s on" 5734 " node %s: %s" % 5735 (instance.name, source_node, msg)) 5736 5737 # create the target disks 5738 try: 5739 _CreateDisks(self, instance, target_node=target_node) 5740 except errors.OpExecError: 5741 self.LogWarning("Device creation failed, reverting...") 5742 try: 5743 _RemoveDisks(self, instance, target_node=target_node) 5744 finally: 5745 self.cfg.ReleaseDRBDMinors(instance.name) 5746 raise 5747 5748 cluster_name = self.cfg.GetClusterInfo().cluster_name 5749 5750 errs = [] 5751 # activate, get path, copy the data over 5752 for idx, disk in enumerate(instance.disks): 5753 self.LogInfo("Copying data for disk %d", idx) 5754 result = self.rpc.call_blockdev_assemble(target_node, disk, 5755 instance.name, True) 5756 if result.fail_msg: 5757 self.LogWarning("Can't assemble newly created disk %d: %s", 5758 idx, result.fail_msg) 5759 errs.append(result.fail_msg) 5760 break 5761 dev_path = result.payload 5762 result = self.rpc.call_blockdev_export(source_node, disk, 5763 target_node, dev_path, 5764 cluster_name) 5765 if result.fail_msg: 5766 self.LogWarning("Can't copy data over for disk %d: %s", 5767 idx, result.fail_msg) 5768 errs.append(result.fail_msg) 5769 break 5770 5771 if errs: 5772 self.LogWarning("Some disks failed to copy, aborting") 5773 try: 5774 _RemoveDisks(self, instance, target_node=target_node) 5775 finally: 5776 self.cfg.ReleaseDRBDMinors(instance.name) 5777 raise errors.OpExecError("Errors during disk copy: %s" % 5778 (",".join(errs),)) 5779 5780 instance.primary_node = target_node 5781 self.cfg.Update(instance, feedback_fn) 5782 5783 self.LogInfo("Removing the disks on the original node") 5784 _RemoveDisks(self, instance, target_node=source_node) 5785 5786 # Only start the instance if it's marked as up 5787 if instance.admin_up: 5788 self.LogInfo("Starting instance %s on node %s", 5789 instance.name, target_node) 5790 5791 disks_ok, _ = _AssembleInstanceDisks(self, instance, 5792 ignore_secondaries=True) 5793 if not disks_ok: 5794 _ShutdownInstanceDisks(self, instance) 5795 raise errors.OpExecError("Can't activate the instance's disks") 5796 5797 result = self.rpc.call_instance_start(target_node, instance, None, None) 5798 msg = result.fail_msg 5799 if msg: 5800 _ShutdownInstanceDisks(self, instance) 5801 raise errors.OpExecError("Could not start instance %s on node %s: %s" % 5802 (instance.name, target_node, msg))
5803
5804 5805 -class LUMigrateNode(LogicalUnit):
5806 """Migrate all instances from a node. 5807 5808 """ 5809 HPATH = "node-migrate" 5810 HTYPE = constants.HTYPE_NODE 5811 _OP_PARAMS = [ 5812 _PNodeName, 5813 _PMigrationMode, 5814 _PMigrationLive, 5815 ] 5816 REQ_BGL = False 5817
5818 - def ExpandNames(self):
5819 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 5820 5821 self.needed_locks = { 5822 locking.LEVEL_NODE: [self.op.node_name], 5823 } 5824 5825 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 5826 5827 # Create tasklets for migrating instances for all instances on this node 5828 names = [] 5829 tasklets = [] 5830 5831 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name): 5832 logging.debug("Migrating instance %s", inst.name) 5833 names.append(inst.name) 5834 5835 tasklets.append(TLMigrateInstance(self, inst.name, False)) 5836 5837 self.tasklets = tasklets 5838 5839 # Declare instance locks 5840 self.needed_locks[locking.LEVEL_INSTANCE] = names
5841
5842 - def DeclareLocks(self, level):
5843 if level == locking.LEVEL_NODE: 5844 self._LockInstancesNodes()
5845
5846 - def BuildHooksEnv(self):
5847 """Build hooks env. 5848 5849 This runs on the master, the primary and all the secondaries. 5850 5851 """ 5852 env = { 5853 "NODE_NAME": self.op.node_name, 5854 } 5855 5856 nl = [self.cfg.GetMasterNode()] 5857 5858 return (env, nl, nl)
5859
5860 5861 -class TLMigrateInstance(Tasklet):
5862 """Tasklet class for instance migration. 5863 5864 @type live: boolean 5865 @ivar live: whether the migration will be done live or non-live; 5866 this variable is initalized only after CheckPrereq has run 5867 5868 """
5869 - def __init__(self, lu, instance_name, cleanup):
5870 """Initializes this class. 5871 5872 """ 5873 Tasklet.__init__(self, lu) 5874 5875 # Parameters 5876 self.instance_name = instance_name 5877 self.cleanup = cleanup 5878 self.live = False # will be overridden later
5879
5880 - def CheckPrereq(self):
5881 """Check prerequisites. 5882 5883 This checks that the instance is in the cluster. 5884 5885 """ 5886 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name) 5887 instance = self.cfg.GetInstanceInfo(instance_name) 5888 assert instance is not None 5889 5890 if instance.disk_template != constants.DT_DRBD8: 5891 raise errors.OpPrereqError("Instance's disk layout is not" 5892 " drbd8, cannot migrate.", errors.ECODE_STATE) 5893 5894 secondary_nodes = instance.secondary_nodes 5895 if not secondary_nodes: 5896 raise errors.ConfigurationError("No secondary node but using" 5897 " drbd8 disk template") 5898 5899 i_be = self.cfg.GetClusterInfo().FillBE(instance) 5900 5901 target_node = secondary_nodes[0] 5902 # check memory requirements on the secondary node 5903 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" % 5904 instance.name, i_be[constants.BE_MEMORY], 5905 instance.hypervisor) 5906 5907 # check bridge existance 5908 _CheckInstanceBridgesExist(self.lu, instance, node=target_node) 5909 5910 if not self.cleanup: 5911 _CheckNodeNotDrained(self.lu, target_node) 5912 result = self.rpc.call_instance_migratable(instance.primary_node, 5913 instance) 5914 result.Raise("Can't migrate, please use failover", 5915 prereq=True, ecode=errors.ECODE_STATE) 5916 5917 self.instance = instance 5918 5919 if self.lu.op.live is not None and self.lu.op.mode is not None: 5920 raise errors.OpPrereqError("Only one of the 'live' and 'mode'" 5921 " parameters are accepted", 5922 errors.ECODE_INVAL) 5923 if self.lu.op.live is not None: 5924 if self.lu.op.live: 5925 self.lu.op.mode = constants.HT_MIGRATION_LIVE 5926 else: 5927 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE 5928 # reset the 'live' parameter to None so that repeated 5929 # invocations of CheckPrereq do not raise an exception 5930 self.lu.op.live = None 5931 elif self.lu.op.mode is None: 5932 # read the default value from the hypervisor 5933 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False) 5934 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE] 5935 5936 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5937
5938 - def _WaitUntilSync(self):
5939 """Poll with custom rpc for disk sync. 5940 5941 This uses our own step-based rpc call. 5942 5943 """ 5944 self.feedback_fn("* wait until resync is done") 5945 all_done = False 5946 while not all_done: 5947 all_done = True 5948 result = self.rpc.call_drbd_wait_sync(self.all_nodes, 5949 self.nodes_ip, 5950 self.instance.disks) 5951 min_percent = 100 5952 for node, nres in result.items(): 5953 nres.Raise("Cannot resync disks on node %s" % node) 5954 node_done, node_percent = nres.payload 5955 all_done = all_done and node_done 5956 if node_percent is not None: 5957 min_percent = min(min_percent, node_percent) 5958 if not all_done: 5959 if min_percent < 100: 5960 self.feedback_fn(" - progress: %.1f%%" % min_percent) 5961 time.sleep(2)
5962
5963 - def _EnsureSecondary(self, node):
5964 """Demote a node to secondary. 5965 5966 """ 5967 self.feedback_fn("* switching node %s to secondary mode" % node) 5968 5969 for dev in self.instance.disks: 5970 self.cfg.SetDiskID(dev, node) 5971 5972 result = self.rpc.call_blockdev_close(node, self.instance.name, 5973 self.instance.disks) 5974 result.Raise("Cannot change disk to secondary on node %s" % node)
5975
5976 - def _GoStandalone(self):
5977 """Disconnect from the network. 5978 5979 """ 5980 self.feedback_fn("* changing into standalone mode") 5981 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip, 5982 self.instance.disks) 5983 for node, nres in result.items(): 5984 nres.Raise("Cannot disconnect disks node %s" % node)
5985
5986 - def _GoReconnect(self, multimaster):
5987 """Reconnect to the network. 5988 5989 """ 5990 if multimaster: 5991 msg = "dual-master" 5992 else: 5993 msg = "single-master" 5994 self.feedback_fn("* changing disks into %s mode" % msg) 5995 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip, 5996 self.instance.disks, 5997 self.instance.name, multimaster) 5998 for node, nres in result.items(): 5999 nres.Raise("Cannot change disks config on node %s" % node)
6000
6001 - def _ExecCleanup(self):
6002 """Try to cleanup after a failed migration. 6003 6004 The cleanup is done by: 6005 - check that the instance is running only on one node 6006 (and update the config if needed) 6007 - change disks on its secondary node to secondary 6008 - wait until disks are fully synchronized 6009 - disconnect from the network 6010 - change disks into single-master mode 6011 - wait again until disks are fully synchronized 6012 6013 """ 6014 instance = self.instance 6015 target_node = self.target_node 6016 source_node = self.source_node 6017 6018 # check running on only one node 6019 self.feedback_fn("* checking where the instance actually runs" 6020 " (if this hangs, the hypervisor might be in" 6021 " a bad state)") 6022 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor]) 6023 for node, result in ins_l.items(): 6024 result.Raise("Can't contact node %s" % node) 6025 6026 runningon_source = instance.name in ins_l[source_node].payload 6027 runningon_target = instance.name in ins_l[target_node].payload 6028 6029 if runningon_source and runningon_target: 6030 raise errors.OpExecError("Instance seems to be running on two nodes," 6031 " or the hypervisor is confused. You will have" 6032 " to ensure manually that it runs only on one" 6033 " and restart this operation.") 6034 6035 if not (runningon_source or runningon_target): 6036 raise errors.OpExecError("Instance does not seem to be running at all." 6037 " In this case, it's safer to repair by" 6038 " running 'gnt-instance stop' to ensure disk" 6039 " shutdown, and then restarting it.") 6040 6041 if runningon_target: 6042 # the migration has actually succeeded, we need to update the config 6043 self.feedback_fn("* instance running on secondary node (%s)," 6044 " updating config" % target_node) 6045 instance.primary_node = target_node 6046 self.cfg.Update(instance, self.feedback_fn) 6047 demoted_node = source_node 6048 else: 6049 self.feedback_fn("* instance confirmed to be running on its" 6050 " primary node (%s)" % source_node) 6051 demoted_node = target_node 6052 6053 self._EnsureSecondary(demoted_node) 6054 try: 6055 self._WaitUntilSync() 6056 except errors.OpExecError: 6057 # we ignore here errors, since if the device is standalone, it 6058 # won't be able to sync 6059 pass 6060 self._GoStandalone() 6061 self._GoReconnect(False) 6062 self._WaitUntilSync() 6063 6064 self.feedback_fn("* done")
6065
6066 - def _RevertDiskStatus(self):
6067 """Try to revert the disk status after a failed migration. 6068 6069 """ 6070 target_node = self.target_node 6071 try: 6072 self._EnsureSecondary(target_node) 6073 self._GoStandalone() 6074 self._GoReconnect(False) 6075 self._WaitUntilSync() 6076 except errors.OpExecError, err: 6077 self.lu.LogWarning("Migration failed and I can't reconnect the" 6078 " drives: error '%s'\n" 6079 "Please look and recover the instance status" % 6080 str(err))
6081
6082 - def _AbortMigration(self):
6083 """Call the hypervisor code to abort a started migration. 6084 6085 """ 6086 instance = self.instance 6087 target_node = self.target_node 6088 migration_info = self.migration_info 6089 6090 abort_result = self.rpc.call_finalize_migration(target_node, 6091 instance, 6092 migration_info, 6093 False) 6094 abort_msg = abort_result.fail_msg 6095 if abort_msg: 6096 logging.error("Aborting migration failed on target node %s: %s", 6097 target_node, abort_msg)
6098 # Don't raise an exception here, as we stil have to try to revert the 6099 # disk status, even if this step failed. 6100
6101 - def _ExecMigration(self):
6102 """Migrate an instance. 6103 6104 The migrate is done by: 6105 - change the disks into dual-master mode 6106 - wait until disks are fully synchronized again 6107 - migrate the instance 6108 - change disks on the new secondary node (the old primary) to secondary 6109 - wait until disks are fully synchronized 6110 - change disks into single-master mode 6111 6112 """ 6113 instance = self.instance 6114 target_node = self.target_node 6115 source_node = self.source_node 6116 6117 self.feedback_fn("* checking disk consistency between source and target") 6118 for dev in instance.disks: 6119 if not _CheckDiskConsistency(self.lu, dev, target_node, False): 6120 raise errors.OpExecError("Disk %s is degraded or not fully" 6121 " synchronized on target node," 6122 " aborting migrate." % dev.iv_name) 6123 6124 # First get the migration information from the remote node 6125 result = self.rpc.call_migration_info(source_node, instance) 6126 msg = result.fail_msg 6127 if msg: 6128 log_err = ("Failed fetching source migration information from %s: %s" % 6129 (source_node, msg)) 6130 logging.error(log_err) 6131 raise errors.OpExecError(log_err) 6132 6133 self.migration_info = migration_info = result.payload 6134 6135 # Then switch the disks to master/master mode 6136 self._EnsureSecondary(target_node) 6137 self._GoStandalone() 6138 self._GoReconnect(True) 6139 self._WaitUntilSync() 6140 6141 self.feedback_fn("* preparing %s to accept the instance" % target_node) 6142 result = self.rpc.call_accept_instance(target_node, 6143 instance, 6144 migration_info, 6145 self.nodes_ip[target_node]) 6146 6147 msg = result.fail_msg 6148 if msg: 6149 logging.error("Instance pre-migration failed, trying to revert" 6150 " disk status: %s", msg) 6151 self.feedback_fn("Pre-migration failed, aborting") 6152 self._AbortMigration() 6153 self._RevertDiskStatus() 6154 raise errors.OpExecError("Could not pre-migrate instance %s: %s" % 6155 (instance.name, msg)) 6156 6157 self.feedback_fn("* migrating instance to %s" % target_node) 6158 time.sleep(10) 6159 result = self.rpc.call_instance_migrate(source_node, instance, 6160 self.nodes_ip[target_node], 6161 self.live) 6162 msg = result.fail_msg 6163 if msg: 6164 logging.error("Instance migration failed, trying to revert" 6165 " disk status: %s", msg) 6166 self.feedback_fn("Migration failed, aborting") 6167 self._AbortMigration() 6168 self._RevertDiskStatus() 6169 raise errors.OpExecError("Could not migrate instance %s: %s" % 6170 (instance.name, msg)) 6171 time.sleep(10) 6172 6173 instance.primary_node = target_node 6174 # distribute new instance config to the other nodes 6175 self.cfg.Update(instance, self.feedback_fn) 6176 6177 result = self.rpc.call_finalize_migration(target_node, 6178 instance, 6179 migration_info, 6180 True) 6181 msg = result.fail_msg 6182 if msg: 6183 logging.error("Instance migration succeeded, but finalization failed:" 6184 " %s", msg) 6185 raise errors.OpExecError("Could not finalize instance migration: %s" % 6186 msg) 6187 6188 self._EnsureSecondary(source_node) 6189 self._WaitUntilSync() 6190 self._GoStandalone() 6191 self._GoReconnect(False) 6192 self._WaitUntilSync() 6193 6194 self.feedback_fn("* done")
6195
6196 - def Exec(self, feedback_fn):
6197 """Perform the migration. 6198 6199 """ 6200 feedback_fn("Migrating instance %s" % self.instance.name) 6201 6202 self.feedback_fn = feedback_fn 6203 6204 self.source_node = self.instance.primary_node 6205 self.target_node = self.instance.secondary_nodes[0] 6206 self.all_nodes = [self.source_node, self.target_node] 6207 self.nodes_ip = { 6208 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip, 6209 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip, 6210 } 6211 6212 if self.cleanup: 6213 return self._ExecCleanup() 6214 else: 6215 return self._ExecMigration()
6216
6217 6218 -def _CreateBlockDev(lu, node, instance, device, force_create, 6219 info, force_open):
6220 """Create a tree of block devices on a given node. 6221 6222 If this device type has to be created on secondaries, create it and 6223 all its children. 6224 6225 If not, just recurse to children keeping the same 'force' value. 6226 6227 @param lu: the lu on whose behalf we execute 6228 @param node: the node on which to create the device 6229 @type instance: L{objects.Instance} 6230 @param instance: the instance which owns the device 6231 @type device: L{objects.Disk} 6232 @param device: the device to create 6233 @type force_create: boolean 6234 @param force_create: whether to force creation of this device; this 6235 will be change to True whenever we find a device which has 6236 CreateOnSecondary() attribute 6237 @param info: the extra 'metadata' we should attach to the device 6238 (this will be represented as a LVM tag) 6239 @type force_open: boolean 6240 @param force_open: this parameter will be passes to the 6241 L{backend.BlockdevCreate} function where it specifies 6242 whether we run on primary or not, and it affects both 6243 the child assembly and the device own Open() execution 6244 6245 """ 6246 if device.CreateOnSecondary(): 6247 force_create = True 6248 6249 if device.children: 6250 for child in device.children: 6251 _CreateBlockDev(lu, node, instance, child, force_create, 6252 info, force_open) 6253 6254 if not force_create: 6255 return 6256 6257 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6258
6259 6260 -def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6261 """Create a single block device on a given node. 6262 6263 This will not recurse over children of the device, so they must be 6264 created in advance. 6265 6266 @param lu: the lu on whose behalf we execute 6267 @param node: the node on which to create the device 6268 @type instance: L{objects.Instance} 6269 @param instance: the instance which owns the device 6270 @type device: L{objects.Disk} 6271 @param device: the device to create 6272 @param info: the extra 'metadata' we should attach to the device 6273 (this will be represented as a LVM tag) 6274 @type force_open: boolean 6275 @param force_open: this parameter will be passes to the 6276 L{backend.BlockdevCreate} function where it specifies 6277 whether we run on primary or not, and it affects both 6278 the child assembly and the device own Open() execution 6279 6280 """ 6281 lu.cfg.SetDiskID(device, node) 6282 result = lu.rpc.call_blockdev_create(node, device, device.size, 6283 instance.name, force_open, info) 6284 result.Raise("Can't create block device %s on" 6285 " node %s for instance %s" % (device, node, instance.name)) 6286 if device.physical_id is None: 6287 device.physical_id = result.payload
6288
6289 6290 -def _GenerateUniqueNames(lu, exts):
6291 """Generate a suitable LV name. 6292 6293 This will generate a logical volume name for the given instance. 6294 6295 """ 6296 results = [] 6297 for val in exts: 6298 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId()) 6299 results.append("%s%s" % (new_id, val)) 6300 return results
6301
6302 6303 -def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name, 6304 p_minor, s_minor):
6305 """Generate a drbd8 device complete with its children. 6306 6307 """ 6308 port = lu.cfg.AllocatePort() 6309 vgname = lu.cfg.GetVGName() 6310 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId()) 6311 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size, 6312 logical_id=(vgname, names[0])) 6313 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128, 6314 logical_id=(vgname, names[1])) 6315 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size, 6316 logical_id=(primary, secondary, port, 6317 p_minor, s_minor, 6318 shared_secret), 6319 children=[dev_data, dev_meta], 6320 iv_name=iv_name) 6321 return drbd_dev
6322
6323 6324 -def _GenerateDiskTemplate(lu, template_name, 6325 instance_name, primary_node, 6326 secondary_nodes, disk_info, 6327 file_storage_dir, file_driver, 6328 base_index):
6329 """Generate the entire disk layout for a given template type. 6330 6331 """ 6332 #TODO: compute space requirements 6333 6334 vgname = lu.cfg.GetVGName() 6335 disk_count = len(disk_info) 6336 disks = [] 6337 if template_name == constants.DT_DISKLESS: 6338 pass 6339 elif template_name == constants.DT_PLAIN: 6340 if len(secondary_nodes) != 0: 6341 raise errors.ProgrammerError("Wrong template configuration") 6342 6343 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i) 6344 for i in range(disk_count)]) 6345 for idx, disk in enumerate(disk_info): 6346 disk_index = idx + base_index 6347 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"], 6348 logical_id=(vgname, names[idx]), 6349 iv_name="disk/%d" % disk_index, 6350 mode=disk["mode"]) 6351 disks.append(disk_dev) 6352 elif template_name == constants.DT_DRBD8: 6353 if len(secondary_nodes) != 1: 6354 raise errors.ProgrammerError("Wrong template configuration") 6355 remote_node = secondary_nodes[0] 6356 minors = lu.cfg.AllocateDRBDMinor( 6357 [primary_node, remote_node] * len(disk_info), instance_name) 6358 6359 names = [] 6360 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i) 6361 for i in range(disk_count)]): 6362 names.append(lv_prefix + "_data") 6363 names.append(lv_prefix + "_meta") 6364 for idx, disk in enumerate(disk_info): 6365 disk_index = idx + base_index 6366 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node, 6367 disk["size"], names[idx*2:idx*2+2], 6368 "disk/%d" % disk_index, 6369 minors[idx*2], minors[idx*2+1]) 6370 disk_dev.mode = disk["mode"] 6371 disks.append(disk_dev) 6372 elif template_name == constants.DT_FILE: 6373 if len(secondary_nodes) != 0: 6374 raise errors.ProgrammerError("Wrong template configuration") 6375 6376 _RequireFileStorage() 6377 6378 for idx, disk in enumerate(disk_info): 6379 disk_index = idx + base_index 6380 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"], 6381 iv_name="disk/%d" % disk_index, 6382 logical_id=(file_driver, 6383 "%s/disk%d" % (file_storage_dir, 6384 disk_index)), 6385 mode=disk["mode"]) 6386 disks.append(disk_dev) 6387 else: 6388 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name) 6389 return disks
6390
6391 6392 -def _GetInstanceInfoText(instance):
6393 """Compute that text that should be added to the disk's metadata. 6394 6395 """ 6396 return "originstname+%s" % instance.name
6397
6398 6399 -def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6400 """Create all disks for an instance. 6401 6402 This abstracts away some work from AddInstance. 6403 6404 @type lu: L{LogicalUnit} 6405 @param lu: the logical unit on whose behalf we execute 6406 @type instance: L{objects.Instance} 6407 @param instance: the instance whose disks we should create 6408 @type to_skip: list 6409 @param to_skip: list of indices to skip 6410 @type target_node: string 6411 @param target_node: if passed, overrides the target node for creation 6412 @rtype: boolean 6413 @return: the success of the creation 6414 6415 """ 6416 info = _GetInstanceInfoText(instance) 6417 if target_node is None: 6418 pnode = instance.primary_node 6419 all_nodes = instance.all_nodes 6420 else: 6421 pnode = target_node 6422 all_nodes = [pnode] 6423 6424 if instance.disk_template == constants.DT_FILE: 6425 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) 6426 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir) 6427 6428 result.Raise("Failed to create directory '%s' on" 6429 " node %s" % (file_storage_dir, pnode)) 6430 6431 # Note: this needs to be kept in sync with adding of disks in 6432 # LUSetInstanceParams 6433 for idx, device in enumerate(instance.disks): 6434 if to_skip and idx in to_skip: 6435 continue 6436 logging.info("Creating volume %s for instance %s", 6437 device.iv_name, instance.name) 6438 #HARDCODE 6439 for node in all_nodes: 6440 f_create = node == pnode 6441 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6442
6443 6444 -def _RemoveDisks(lu, instance, target_node=None):
6445 """Remove all disks for an instance. 6446 6447 This abstracts away some work from `AddInstance()` and 6448 `RemoveInstance()`. Note that in case some of the devices couldn't 6449 be removed, the removal will continue with the other ones (compare 6450 with `_CreateDisks()`). 6451 6452 @type lu: L{LogicalUnit} 6453 @param lu: the logical unit on whose behalf we execute 6454 @type instance: L{objects.Instance} 6455 @param instance: the instance whose disks we should remove 6456 @type target_node: string 6457 @param target_node: used to override the node on which to remove the disks 6458 @rtype: boolean 6459 @return: the success of the removal 6460 6461 """ 6462 logging.info("Removing block devices for instance %s", instance.name) 6463 6464 all_result = True 6465 for device in instance.disks: 6466 if target_node: 6467 edata = [(target_node, device)] 6468 else: 6469 edata = device.ComputeNodeTree(instance.primary_node) 6470 for node, disk in edata: 6471 lu.cfg.SetDiskID(disk, node) 6472 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg 6473 if msg: 6474 lu.LogWarning("Could not remove block device %s on node %s," 6475 " continuing anyway: %s", device.iv_name, node, msg) 6476 all_result = False 6477 6478 if instance.disk_template == constants.DT_FILE: 6479 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) 6480 if target_node: 6481 tgt = target_node 6482 else: 6483 tgt = instance.primary_node 6484 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir) 6485 if result.fail_msg: 6486 lu.LogWarning("Could not remove directory '%s' on node %s: %s", 6487 file_storage_dir, instance.primary_node, result.fail_msg) 6488 all_result = False 6489 6490 return all_result
6491
6492 6493 -def _ComputeDiskSize(disk_template, disks):
6494 """Compute disk size requirements in the volume group 6495 6496 """ 6497 # Required free disk space as a function of disk and swap space 6498 req_size_dict = { 6499 constants.DT_DISKLESS: None, 6500 constants.DT_PLAIN: sum(d["size"] for d in disks), 6501 # 128 MB are added for drbd metadata for each disk 6502 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks), 6503 constants.DT_FILE: None, 6504 } 6505 6506 if disk_template not in req_size_dict: 6507 raise errors.ProgrammerError("Disk template '%s' size requirement" 6508 " is unknown" % disk_template) 6509 6510 return req_size_dict[disk_template]
6511
6512 6513 -def _CheckHVParams(lu, nodenames, hvname, hvparams):
6514 """Hypervisor parameter validation. 6515 6516 This function abstract the hypervisor parameter validation to be 6517 used in both instance create and instance modify. 6518 6519 @type lu: L{LogicalUnit} 6520 @param lu: the logical unit for which we check 6521 @type nodenames: list 6522 @param nodenames: the list of nodes on which we should check 6523 @type hvname: string 6524 @param hvname: the name of the hypervisor we should use 6525 @type hvparams: dict 6526 @param hvparams: the parameters which we need to check 6527 @raise errors.OpPrereqError: if the parameters are not valid 6528 6529 """ 6530 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, 6531 hvname, 6532 hvparams) 6533 for node in nodenames: 6534 info = hvinfo[node] 6535 if info.offline: 6536 continue 6537 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6538
6539 6540 -def _CheckOSParams(lu, required, nodenames, osname, osparams):
6541 """OS parameters validation. 6542 6543 @type lu: L{LogicalUnit} 6544 @param lu: the logical unit for which we check 6545 @type required: boolean 6546 @param required: whether the validation should fail if the OS is not 6547 found 6548 @type nodenames: list 6549 @param nodenames: the list of nodes on which we should check 6550 @type osname: string 6551 @param osname: the name of the hypervisor we should use 6552 @type osparams: dict 6553 @param osparams: the parameters which we need to check 6554 @raise errors.OpPrereqError: if the parameters are not valid 6555 6556 """ 6557 result = lu.rpc.call_os_validate(required, nodenames, osname, 6558 [constants.OS_VALIDATE_PARAMETERS], 6559 osparams) 6560 for node, nres in result.items(): 6561 # we don't check for offline cases since this should be run only 6562 # against the master node and/or an instance's nodes 6563 nres.Raise("OS Parameters validation failed on node %s" % node) 6564 if not nres.payload: 6565 lu.LogInfo("OS %s not found on node %s, validation skipped", 6566 osname, node)
6567
6568 6569 -class LUCreateInstance(LogicalUnit):
6570 """Create an instance. 6571 6572 """ 6573 HPATH = "instance-add" 6574 HTYPE = constants.HTYPE_INSTANCE 6575 _OP_PARAMS = [ 6576 _PInstanceName, 6577 ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)), 6578 ("start", True, _TBool), 6579 ("wait_for_sync", True, _TBool), 6580 ("ip_check", True, _TBool), 6581 ("name_check", True, _TBool), 6582 ("disks", _NoDefault, _TListOf(_TDict)), 6583 ("nics", _NoDefault, _TListOf(_TDict)), 6584 ("hvparams", _EmptyDict, _TDict), 6585 ("beparams", _EmptyDict, _TDict), 6586 ("osparams", _EmptyDict, _TDict), 6587 ("no_install", None, _TMaybeBool), 6588 ("os_type", None, _TMaybeString), 6589 ("force_variant", False, _TBool), 6590 ("source_handshake", None, _TOr(_TList, _TNone)), 6591 ("source_x509_ca", None, _TMaybeString), 6592 ("source_instance_name", None, _TMaybeString), 6593 ("src_node", None, _TMaybeString), 6594 ("src_path", None, _TMaybeString), 6595 ("pnode", None, _TMaybeString), 6596 ("snode", None, _TMaybeString), 6597 ("iallocator", None, _TMaybeString), 6598 ("hypervisor", None, _TMaybeString), 6599 ("disk_template", _NoDefault, _CheckDiskTemplate), 6600 ("identify_defaults", False, _TBool), 6601 ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))), 6602 ("file_storage_dir", None, _TMaybeString), 6603 ] 6604 REQ_BGL = False 6605
6606 - def CheckArguments(self):
6607 """Check arguments. 6608 6609 """ 6610 # do not require name_check to ease forward/backward compatibility 6611 # for tools 6612 if self.op.no_install and self.op.start: 6613 self.LogInfo("No-installation mode selected, disabling startup") 6614 self.op.start = False 6615 # validate/normalize the instance name 6616 self.op.instance_name = \ 6617 netutils.HostInfo.NormalizeName(self.op.instance_name) 6618 6619 if self.op.ip_check and not self.op.name_check: 6620 # TODO: make the ip check more flexible and not depend on the name check 6621 raise errors.OpPrereqError("Cannot do ip check without a name check", 6622 errors.ECODE_INVAL) 6623 6624 # check nics' parameter names 6625 for nic in self.op.nics: 6626 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES) 6627 6628 # check disks. parameter names and consistent adopt/no-adopt strategy 6629 has_adopt = has_no_adopt = False 6630 for disk in self.op.disks: 6631 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES) 6632 if "adopt" in disk: 6633 has_adopt = True 6634 else: 6635 has_no_adopt = True 6636 if has_adopt and has_no_adopt: 6637 raise errors.OpPrereqError("Either all disks are adopted or none is", 6638 errors.ECODE_INVAL) 6639 if has_adopt: 6640 if self.op.disk_template not in constants.DTS_MAY_ADOPT: 6641 raise errors.OpPrereqError("Disk adoption is not supported for the" 6642 " '%s' disk template" % 6643 self.op.disk_template, 6644 errors.ECODE_INVAL) 6645 if self.op.iallocator is not None: 6646 raise errors.OpPrereqError("Disk adoption not allowed with an" 6647 " iallocator script", errors.ECODE_INVAL) 6648 if self.op.mode == constants.INSTANCE_IMPORT: 6649 raise errors.OpPrereqError("Disk adoption not allowed for" 6650 " instance import", errors.ECODE_INVAL) 6651 6652 self.adopt_disks = has_adopt 6653 6654 # instance name verification 6655 if self.op.name_check: 6656 self.hostname1 = netutils.GetHostInfo(self.op.instance_name) 6657 self.op.instance_name = self.hostname1.name 6658 # used in CheckPrereq for ip ping check 6659 self.check_ip = self.hostname1.ip 6660 else: 6661 self.check_ip = None 6662 6663 # file storage checks 6664 if (self.op.file_driver and 6665 not self.op.file_driver in constants.FILE_DRIVER): 6666 raise errors.OpPrereqError("Invalid file driver name '%s'" % 6667 self.op.file_driver, errors.ECODE_INVAL) 6668 6669 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir): 6670 raise errors.OpPrereqError("File storage directory path not absolute", 6671 errors.ECODE_INVAL) 6672 6673 ### Node/iallocator related checks 6674 _CheckIAllocatorOrNode(self, "iallocator", "pnode") 6675 6676 if self.op.pnode is not None: 6677 if self.op.disk_template in constants.DTS_NET_MIRROR: 6678 if self.op.snode is None: 6679 raise errors.OpPrereqError("The networked disk templates need" 6680 " a mirror node", errors.ECODE_INVAL) 6681 elif self.op.snode: 6682 self.LogWarning("Secondary node will be ignored on non-mirrored disk" 6683 " template") 6684 self.op.snode = None 6685 6686 self._cds = _GetClusterDomainSecret() 6687 6688 if self.op.mode == constants.INSTANCE_IMPORT: 6689 # On import force_variant must be True, because if we forced it at 6690 # initial install, our only chance when importing it back is that it 6691 # works again! 6692 self.op.force_variant = True 6693 6694 if self.op.no_install: 6695 self.LogInfo("No-installation mode has no effect during import") 6696 6697 elif self.op.mode == constants.INSTANCE_CREATE: 6698 if self.op.os_type is None: 6699 raise errors.OpPrereqError("No guest OS specified", 6700 errors.ECODE_INVAL) 6701 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os: 6702 raise errors.OpPrereqError("Guest OS '%s' is not allowed for" 6703 " installation" % self.op.os_type, 6704 errors.ECODE_STATE) 6705 if self.op.disk_template is None: 6706 raise errors.OpPrereqError("No disk template specified", 6707 errors.ECODE_INVAL) 6708 6709 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: 6710 # Check handshake to ensure both clusters have the same domain secret 6711 src_handshake = self.op.source_handshake 6712 if not src_handshake: 6713 raise errors.OpPrereqError("Missing source handshake", 6714 errors.ECODE_INVAL) 6715 6716 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds, 6717 src_handshake) 6718 if errmsg: 6719 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg, 6720 errors.ECODE_INVAL) 6721 6722 # Load and check source CA 6723 self.source_x509_ca_pem = self.op.source_x509_ca 6724 if not self.source_x509_ca_pem: 6725 raise errors.OpPrereqError("Missing source X509 CA", 6726 errors.ECODE_INVAL) 6727 6728 try: 6729 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem, 6730 self._cds) 6731 except OpenSSL.crypto.Error, err: 6732 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" % 6733 (err, ), errors.ECODE_INVAL) 6734 6735 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) 6736 if errcode is not None: 6737 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ), 6738 errors.ECODE_INVAL) 6739 6740 self.source_x509_ca = cert 6741 6742 src_instance_name = self.op.source_instance_name 6743 if not src_instance_name: 6744 raise errors.OpPrereqError("Missing source instance name", 6745 errors.ECODE_INVAL) 6746 6747 norm_name = netutils.HostInfo.NormalizeName(src_instance_name) 6748 self.source_instance_name = netutils.GetHostInfo(norm_name).name 6749 6750 else: 6751 raise errors.OpPrereqError("Invalid instance creation mode %r" % 6752 self.op.mode, errors.ECODE_INVAL)
6753
6754 - def ExpandNames(self):
6755 """ExpandNames for CreateInstance. 6756 6757 Figure out the right locks for instance creation. 6758 6759 """ 6760 self.needed_locks = {} 6761 6762 instance_name = self.op.instance_name 6763 # this is just a preventive check, but someone might still add this 6764 # instance in the meantime, and creation will fail at lock-add time 6765 if instance_name in self.cfg.GetInstanceList(): 6766 raise errors.OpPrereqError("Instance '%s' is already in the cluster" % 6767 instance_name, errors.ECODE_EXISTS) 6768 6769 self.add_locks[locking.LEVEL_INSTANCE] = instance_name 6770 6771 if self.op.iallocator: 6772 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 6773 else: 6774 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode) 6775 nodelist = [self.op.pnode] 6776 if self.op.snode is not None: 6777 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode) 6778 nodelist.append(self.op.snode) 6779 self.needed_locks[locking.LEVEL_NODE] = nodelist 6780 6781 # in case of import lock the source node too 6782 if self.op.mode == constants.INSTANCE_IMPORT: 6783 src_node = self.op.src_node 6784 src_path = self.op.src_path 6785 6786 if src_path is None: 6787 self.op.src_path = src_path = self.op.instance_name 6788 6789 if src_node is None: 6790 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 6791 self.op.src_node = None 6792 if os.path.isabs(src_path): 6793 raise errors.OpPrereqError("Importing an instance from an absolute" 6794 " path requires a source node option.", 6795 errors.ECODE_INVAL) 6796 else: 6797 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node) 6798 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET: 6799 self.needed_locks[locking.LEVEL_NODE].append(src_node) 6800 if not os.path.isabs(src_path): 6801 self.op.src_path = src_path = \ 6802 utils.PathJoin(constants.EXPORT_DIR, src_path)
6803
6804 - def _RunAllocator(self):
6805 """Run the allocator based on input opcode. 6806 6807 """ 6808 nics = [n.ToDict() for n in self.nics] 6809 ial = IAllocator(self.cfg, self.rpc, 6810 mode=constants.IALLOCATOR_MODE_ALLOC, 6811 name=self.op.instance_name, 6812 disk_template=self.op.disk_template, 6813 tags=[], 6814 os=self.op.os_type, 6815 vcpus=self.be_full[constants.BE_VCPUS], 6816 mem_size=self.be_full[constants.BE_MEMORY], 6817 disks=self.disks, 6818 nics=nics, 6819 hypervisor=self.op.hypervisor, 6820 ) 6821 6822 ial.Run(self.op.iallocator) 6823 6824 if not ial.success: 6825 raise errors.OpPrereqError("Can't compute nodes using" 6826 " iallocator '%s': %s" % 6827 (self.op.iallocator, ial.info), 6828 errors.ECODE_NORES) 6829 if len(ial.result) != ial.required_nodes: 6830 raise errors.OpPrereqError("iallocator '%s' returned invalid number" 6831 " of nodes (%s), required %s" % 6832 (self.op.iallocator, len(ial.result), 6833 ial.required_nodes), errors.ECODE_FAULT) 6834 self.op.pnode = ial.result[0] 6835 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s", 6836 self.op.instance_name, self.op.iallocator, 6837 utils.CommaJoin(ial.result)) 6838 if ial.required_nodes == 2: 6839 self.op.snode = ial.result[1]
6840
6841 - def BuildHooksEnv(self):
6842 """Build hooks env. 6843 6844 This runs on master, primary and secondary nodes of the instance. 6845 6846 """ 6847 env = { 6848 "ADD_MODE": self.op.mode, 6849 } 6850 if self.op.mode == constants.INSTANCE_IMPORT: 6851 env["SRC_NODE"] = self.op.src_node 6852 env["SRC_PATH"] = self.op.src_path 6853 env["SRC_IMAGES"] = self.src_images 6854 6855 env.update(_BuildInstanceHookEnv( 6856 name=self.op.instance_name, 6857 primary_node=self.op.pnode, 6858 secondary_nodes=self.secondaries, 6859 status=self.op.start, 6860 os_type=self.op.os_type, 6861 memory=self.be_full[constants.BE_MEMORY], 6862 vcpus=self.be_full[constants.BE_VCPUS], 6863 nics=_NICListToTuple(self, self.nics), 6864 disk_template=self.op.disk_template, 6865 disks=[(d["size"], d["mode"]) for d in self.disks], 6866 bep=self.be_full, 6867 hvp=self.hv_full, 6868 hypervisor_name=self.op.hypervisor, 6869 )) 6870 6871 nl = ([self.cfg.GetMasterNode(), self.op.pnode] + 6872 self.secondaries) 6873 return env, nl, nl
6874
6875 - def _ReadExportInfo(self):
6876 """Reads the export information from disk. 6877 6878 It will override the opcode source node and path with the actual 6879 information, if these two were not specified before. 6880 6881 @return: the export information 6882 6883 """ 6884 assert self.op.mode == constants.INSTANCE_IMPORT 6885 6886 src_node = self.op.src_node 6887 src_path = self.op.src_path 6888 6889 if src_node is None: 6890 locked_nodes = self.acquired_locks[locking.LEVEL_NODE] 6891 exp_list = self.rpc.call_export_list(locked_nodes) 6892 found = False 6893 for node in exp_list: 6894 if exp_list[node].fail_msg: 6895 continue 6896 if src_path in exp_list[node].payload: 6897 found = True 6898 self.op.src_node = src_node = node 6899 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR, 6900 src_path) 6901 break 6902 if not found: 6903 raise errors.OpPrereqError("No export found for relative path %s" % 6904 src_path, errors.ECODE_INVAL) 6905 6906 _CheckNodeOnline(self, src_node) 6907 result = self.rpc.call_export_info(src_node, src_path) 6908 result.Raise("No export or invalid export found in dir %s" % src_path) 6909 6910 export_info = objects.SerializableConfigParser.Loads(str(result.payload)) 6911 if not export_info.has_section(constants.INISECT_EXP): 6912 raise errors.ProgrammerError("Corrupted export config", 6913 errors.ECODE_ENVIRON) 6914 6915 ei_version = export_info.get(constants.INISECT_EXP, "version") 6916 if (int(ei_version) != constants.EXPORT_VERSION): 6917 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" % 6918 (ei_version, constants.EXPORT_VERSION), 6919 errors.ECODE_ENVIRON) 6920 return export_info
6921
6922 - def _ReadExportParams(self, einfo):
6923 """Use export parameters as defaults. 6924 6925 In case the opcode doesn't specify (as in override) some instance 6926 parameters, then try to use them from the export information, if 6927 that declares them. 6928 6929 """ 6930 self.op.os_type = einfo.get(constants.INISECT_EXP, "os") 6931 6932 if self.op.disk_template is None: 6933 if einfo.has_option(constants.INISECT_INS, "disk_template"): 6934 self.op.disk_template = einfo.get(constants.INISECT_INS, 6935 "disk_template") 6936 else: 6937 raise errors.OpPrereqError("No disk template specified and the export" 6938 " is missing the disk_template information", 6939 errors.ECODE_INVAL) 6940 6941 if not self.op.disks: 6942 if einfo.has_option(constants.INISECT_INS, "disk_count"): 6943 disks = [] 6944 # TODO: import the disk iv_name too 6945 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")): 6946 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx) 6947 disks.append({"size": disk_sz}) 6948 self.op.disks = disks 6949 else: 6950 raise errors.OpPrereqError("No disk info specified and the export" 6951 " is missing the disk information", 6952 errors.ECODE_INVAL) 6953 6954 if (not self.op.nics and 6955 einfo.has_option(constants.INISECT_INS, "nic_count")): 6956 nics = [] 6957 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")): 6958 ndict = {} 6959 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]: 6960 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name)) 6961 ndict[name] = v 6962 nics.append(ndict) 6963 self.op.nics = nics 6964 6965 if (self.op.hypervisor is None and 6966 einfo.has_option(constants.INISECT_INS, "hypervisor")): 6967 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor") 6968 if einfo.has_section(constants.INISECT_HYP): 6969 # use the export parameters but do not override the ones 6970 # specified by the user 6971 for name, value in einfo.items(constants.INISECT_HYP): 6972 if name not in self.op.hvparams: 6973 self.op.hvparams[name] = value 6974 6975 if einfo.has_section(constants.INISECT_BEP): 6976 # use the parameters, without overriding 6977 for name, value in einfo.items(constants.INISECT_BEP): 6978 if name not in self.op.beparams: 6979 self.op.beparams[name] = value 6980 else: 6981 # try to read the parameters old style, from the main section 6982 for name in constants.BES_PARAMETERS: 6983 if (name not in self.op.beparams and 6984 einfo.has_option(constants.INISECT_INS, name)): 6985 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name) 6986 6987 if einfo.has_section(constants.INISECT_OSP): 6988 # use the parameters, without overriding 6989 for name, value in einfo.items(constants.INISECT_OSP): 6990 if name not in self.op.osparams: 6991 self.op.osparams[name] = value
6992
6993 - def _RevertToDefaults(self, cluster):
6994 """Revert the instance parameters to the default values. 6995 6996 """ 6997 # hvparams 6998 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {}) 6999 for name in self.op.hvparams.keys(): 7000 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]: 7001 del self.op.hvparams[name] 7002 # beparams 7003 be_defs = cluster.SimpleFillBE({}) 7004 for name in self.op.beparams.keys(): 7005 if name in be_defs and be_defs[name] == self.op.beparams[name]: 7006 del self.op.beparams[name] 7007 # nic params 7008 nic_defs = cluster.SimpleFillNIC({}) 7009 for nic in self.op.nics: 7010 for name in constants.NICS_PARAMETERS: 7011 if name in nic and name in nic_defs and nic[name] == nic_defs[name]: 7012 del nic[name] 7013 # osparams 7014 os_defs = cluster.SimpleFillOS(self.op.os_type, {}) 7015 for name in self.op.osparams.keys(): 7016 if name in os_defs and os_defs[name] == self.op.osparams[name]: 7017 del self.op.osparams[name]
7018
7019 - def CheckPrereq(self):
7020 """Check prerequisites. 7021 7022 """ 7023 if self.op.mode == constants.INSTANCE_IMPORT: 7024 export_info = self._ReadExportInfo() 7025 self._ReadExportParams(export_info) 7026 7027 _CheckDiskTemplate(self.op.disk_template) 7028 7029 if (not self.cfg.GetVGName() and 7030 self.op.disk_template not in constants.DTS_NOT_LVM): 7031 raise errors.OpPrereqError("Cluster does not support lvm-based" 7032 " instances", errors.ECODE_STATE) 7033 7034 if self.op.hypervisor is None: 7035 self.op.hypervisor = self.cfg.GetHypervisorType() 7036 7037 cluster = self.cfg.GetClusterInfo() 7038 enabled_hvs = cluster.enabled_hypervisors 7039 if self.op.hypervisor not in enabled_hvs: 7040 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the" 7041 " cluster (%s)" % (self.op.hypervisor, 7042 ",".join(enabled_hvs)), 7043 errors.ECODE_STATE) 7044 7045 # check hypervisor parameter syntax (locally) 7046 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) 7047 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, 7048 self.op.hvparams) 7049 hv_type = hypervisor.GetHypervisor(self.op.hypervisor) 7050 hv_type.CheckParameterSyntax(filled_hvp) 7051 self.hv_full = filled_hvp 7052 # check that we don't specify global parameters on an instance 7053 _CheckGlobalHvParams(self.op.hvparams) 7054 7055 # fill and remember the beparams dict 7056 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 7057 self.be_full = cluster.SimpleFillBE(self.op.beparams) 7058 7059 # build os parameters 7060 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams) 7061 7062 # now that hvp/bep are in final format, let's reset to defaults, 7063 # if told to do so 7064 if self.op.identify_defaults: 7065 self._RevertToDefaults(cluster) 7066 7067 # NIC buildup 7068 self.nics = [] 7069 for idx, nic in enumerate(self.op.nics): 7070 nic_mode_req = nic.get("mode", None) 7071 nic_mode = nic_mode_req 7072 if nic_mode is None: 7073 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE] 7074 7075 # in routed mode, for the first nic, the default ip is 'auto' 7076 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0: 7077 default_ip_mode = constants.VALUE_AUTO 7078 else: 7079 default_ip_mode = constants.VALUE_NONE 7080 7081 # ip validity checks 7082 ip = nic.get("ip", default_ip_mode) 7083 if ip is None or ip.lower() == constants.VALUE_NONE: 7084 nic_ip = None 7085 elif ip.lower() == constants.VALUE_AUTO: 7086 if not self.op.name_check: 7087 raise errors.OpPrereqError("IP address set to auto but name checks" 7088 " have been skipped. Aborting.", 7089 errors.ECODE_INVAL) 7090 nic_ip = self.hostname1.ip 7091 else: 7092 if not netutils.IsValidIP4(ip): 7093 raise errors.OpPrereqError("Given IP address '%s' doesn't look" 7094 " like a valid IP" % ip, 7095 errors.ECODE_INVAL) 7096 nic_ip = ip 7097 7098 # TODO: check the ip address for uniqueness 7099 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip: 7100 raise errors.OpPrereqError("Routed nic mode requires an ip address", 7101 errors.ECODE_INVAL) 7102 7103 # MAC address verification 7104 mac = nic.get("mac", constants.VALUE_AUTO) 7105 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 7106 mac = utils.NormalizeAndValidateMac(mac) 7107 7108 try: 7109 self.cfg.ReserveMAC(mac, self.proc.GetECId()) 7110 except errors.ReservationError: 7111 raise errors.OpPrereqError("MAC address %s already in use" 7112 " in cluster" % mac, 7113 errors.ECODE_NOTUNIQUE) 7114 7115 # bridge verification 7116 bridge = nic.get("bridge", None) 7117 link = nic.get("link", None) 7118 if bridge and link: 7119 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'" 7120 " at the same time", errors.ECODE_INVAL) 7121 elif bridge and nic_mode == constants.NIC_MODE_ROUTED: 7122 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic", 7123 errors.ECODE_INVAL) 7124 elif bridge: 7125 link = bridge 7126 7127 nicparams = {} 7128 if nic_mode_req: 7129 nicparams[constants.NIC_MODE] = nic_mode_req 7130 if link: 7131 nicparams[constants.NIC_LINK] = link 7132 7133 check_params = cluster.SimpleFillNIC(nicparams) 7134 objects.NIC.CheckParameterSyntax(check_params) 7135 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams)) 7136 7137 # disk checks/pre-build 7138 self.disks = [] 7139 for disk in self.op.disks: 7140 mode = disk.get("mode", constants.DISK_RDWR) 7141 if mode not in constants.DISK_ACCESS_SET: 7142 raise errors.OpPrereqError("Invalid disk access mode '%s'" % 7143 mode, errors.ECODE_INVAL) 7144 size = disk.get("size", None) 7145 if size is None: 7146 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL) 7147 try: 7148 size = int(size) 7149 except (TypeError, ValueError): 7150 raise errors.OpPrereqError("Invalid disk size '%s'" % size, 7151 errors.ECODE_INVAL) 7152 new_disk = {"size": size, "mode": mode} 7153 if "adopt" in disk: 7154 new_disk["adopt"] = disk["adopt"] 7155 self.disks.append(new_disk) 7156 7157 if self.op.mode == constants.INSTANCE_IMPORT: 7158 7159 # Check that the new instance doesn't have less disks than the export 7160 instance_disks = len(self.disks) 7161 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count') 7162 if instance_disks < export_disks: 7163 raise errors.OpPrereqError("Not enough disks to import." 7164 " (instance: %d, export: %d)" % 7165 (instance_disks, export_disks), 7166 errors.ECODE_INVAL) 7167 7168 disk_images = [] 7169 for idx in range(export_disks): 7170 option = 'disk%d_dump' % idx 7171 if export_info.has_option(constants.INISECT_INS, option): 7172 # FIXME: are the old os-es, disk sizes, etc. useful? 7173 export_name = export_info.get(constants.INISECT_INS, option) 7174 image = utils.PathJoin(self.op.src_path, export_name) 7175 disk_images.append(image) 7176 else: 7177 disk_images.append(False) 7178 7179 self.src_images = disk_images 7180 7181 old_name = export_info.get(constants.INISECT_INS, 'name') 7182 try: 7183 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count') 7184 except (TypeError, ValueError), err: 7185 raise errors.OpPrereqError("Invalid export file, nic_count is not" 7186 " an integer: %s" % str(err), 7187 errors.ECODE_STATE) 7188 if self.op.instance_name == old_name: 7189 for idx, nic in enumerate(self.nics): 7190 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx: 7191 nic_mac_ini = 'nic%d_mac' % idx 7192 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini) 7193 7194 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT 7195 7196 # ip ping checks (we use the same ip that was resolved in ExpandNames) 7197 if self.op.ip_check: 7198 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT): 7199 raise errors.OpPrereqError("IP %s of instance %s already in use" % 7200 (self.check_ip, self.op.instance_name), 7201 errors.ECODE_NOTUNIQUE) 7202 7203 #### mac address generation 7204 # By generating here the mac address both the allocator and the hooks get 7205 # the real final mac address rather than the 'auto' or 'generate' value. 7206 # There is a race condition between the generation and the instance object 7207 # creation, which means that we know the mac is valid now, but we're not 7208 # sure it will be when we actually add the instance. If things go bad 7209 # adding the instance will abort because of a duplicate mac, and the 7210 # creation job will fail. 7211 for nic in self.nics: 7212 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 7213 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId()) 7214 7215 #### allocator run 7216 7217 if self.op.iallocator is not None: 7218 self._RunAllocator() 7219 7220 #### node related checks 7221 7222 # check primary node 7223 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode) 7224 assert self.pnode is not None, \ 7225 "Cannot retrieve locked node %s" % self.op.pnode 7226 if pnode.offline: 7227 raise errors.OpPrereqError("Cannot use offline primary node '%s'" % 7228 pnode.name, errors.ECODE_STATE) 7229 if pnode.drained: 7230 raise errors.OpPrereqError("Cannot use drained primary node '%s'" % 7231 pnode.name, errors.ECODE_STATE) 7232 7233 self.secondaries = [] 7234 7235 # mirror node verification 7236 if self.op.disk_template in constants.DTS_NET_MIRROR: 7237 if self.op.snode == pnode.name: 7238 raise errors.OpPrereqError("The secondary node cannot be the" 7239 " primary node.", errors.ECODE_INVAL) 7240 _CheckNodeOnline(self, self.op.snode) 7241 _CheckNodeNotDrained(self, self.op.snode) 7242 self.secondaries.append(self.op.snode) 7243 7244 nodenames = [pnode.name] + self.secondaries 7245 7246 req_size = _ComputeDiskSize(self.op.disk_template, 7247 self.disks) 7248 7249 # Check lv size requirements, if not adopting 7250 if req_size is not None and not self.adopt_disks: 7251 _CheckNodesFreeDisk(self, nodenames, req_size) 7252 7253 if self.adopt_disks: # instead, we must check the adoption data 7254 all_lvs = set([i["adopt"] for i in self.disks]) 7255 if len(all_lvs) != len(self.disks): 7256 raise errors.OpPrereqError("Duplicate volume names given for adoption", 7257 errors.ECODE_INVAL) 7258 for lv_name in all_lvs: 7259 try: 7260 self.cfg.ReserveLV(lv_name, self.proc.GetECId()) 7261 except errors.ReservationError: 7262 raise errors.OpPrereqError("LV named %s used by another instance" % 7263 lv_name, errors.ECODE_NOTUNIQUE) 7264 7265 node_lvs = self.rpc.call_lv_list([pnode.name], 7266 self.cfg.GetVGName())[pnode.name] 7267 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name) 7268 node_lvs = node_lvs.payload 7269 delta = all_lvs.difference(node_lvs.keys()) 7270 if delta: 7271 raise errors.OpPrereqError("Missing logical volume(s): %s" % 7272 utils.CommaJoin(delta), 7273 errors.ECODE_INVAL) 7274 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]] 7275 if online_lvs: 7276 raise errors.OpPrereqError("Online logical volumes found, cannot" 7277 " adopt: %s" % utils.CommaJoin(online_lvs), 7278 errors.ECODE_STATE) 7279 # update the size of disk based on what is found 7280 for dsk in self.disks: 7281 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0])) 7282 7283 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams) 7284 7285 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant) 7286 # check OS parameters (remotely) 7287 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full) 7288 7289 _CheckNicsBridgesExist(self, self.nics, self.pnode.name) 7290 7291 # memory check on primary node 7292 if self.op.start: 7293 _CheckNodeFreeMemory(self, self.pnode.name, 7294 "creating instance %s" % self.op.instance_name, 7295 self.be_full[constants.BE_MEMORY], 7296 self.op.hypervisor) 7297 7298 self.dry_run_result = list(nodenames)
7299
7300 - def Exec(self, feedback_fn):
7301 """Create and add the instance to the cluster. 7302 7303 """ 7304 instance = self.op.instance_name 7305 pnode_name = self.pnode.name 7306 7307 ht_kind = self.op.hypervisor 7308 if ht_kind in constants.HTS_REQ_PORT: 7309 network_port = self.cfg.AllocatePort() 7310 else: 7311 network_port = None 7312 7313 if constants.ENABLE_FILE_STORAGE: 7314 # this is needed because os.path.join does not accept None arguments 7315 if self.op.file_storage_dir is None: 7316 string_file_storage_dir = "" 7317 else: 7318 string_file_storage_dir = self.op.file_storage_dir 7319 7320 # build the full file storage dir path 7321 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(), 7322 string_file_storage_dir, instance) 7323 else: 7324 file_storage_dir = "" 7325 7326 disks = _GenerateDiskTemplate(self, 7327 self.op.disk_template, 7328 instance, pnode_name, 7329 self.secondaries, 7330 self.disks, 7331 file_storage_dir, 7332 self.op.file_driver, 7333 0) 7334 7335 iobj = objects.Instance(name=instance, os=self.op.os_type, 7336 primary_node=pnode_name, 7337 nics=self.nics, disks=disks, 7338 disk_template=self.op.disk_template, 7339 admin_up=False, 7340 network_port=network_port, 7341 beparams=self.op.beparams, 7342 hvparams=self.op.hvparams, 7343 hypervisor=self.op.hypervisor, 7344 osparams=self.op.osparams, 7345 ) 7346 7347 if self.adopt_disks: 7348 # rename LVs to the newly-generated names; we need to construct 7349 # 'fake' LV disks with the old data, plus the new unique_id 7350 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks] 7351 rename_to = [] 7352 for t_dsk, a_dsk in zip (tmp_disks, self.disks): 7353 rename_to.append(t_dsk.logical_id) 7354 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"]) 7355 self.cfg.SetDiskID(t_dsk, pnode_name) 7356 result = self.rpc.call_blockdev_rename(pnode_name, 7357 zip(tmp_disks, rename_to)) 7358 result.Raise("Failed to rename adoped LVs") 7359 else: 7360 feedback_fn("* creating instance disks...") 7361 try: 7362 _CreateDisks(self, iobj) 7363 except errors.OpExecError: 7364 self.LogWarning("Device creation failed, reverting...") 7365 try: 7366 _RemoveDisks(self, iobj) 7367 finally: 7368 self.cfg.ReleaseDRBDMinors(instance) 7369 raise 7370 7371 feedback_fn("adding instance %s to cluster config" % instance) 7372 7373 self.cfg.AddInstance(iobj, self.proc.GetECId()) 7374 7375 # Declare that we don't want to remove the instance lock anymore, as we've 7376 # added the instance to the config 7377 del self.remove_locks[locking.LEVEL_INSTANCE] 7378 # Unlock all the nodes 7379 if self.op.mode == constants.INSTANCE_IMPORT: 7380 nodes_keep = [self.op.src_node] 7381 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE] 7382 if node != self.op.src_node] 7383 self.context.glm.release(locking.LEVEL_NODE, nodes_release) 7384 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep 7385 else: 7386 self.context.glm.release(locking.LEVEL_NODE) 7387 del self.acquired_locks[locking.LEVEL_NODE] 7388 7389 if self.op.wait_for_sync: 7390 disk_abort = not _WaitForSync(self, iobj) 7391 elif iobj.disk_template in constants.DTS_NET_MIRROR: 7392 # make sure the disks are not degraded (still sync-ing is ok) 7393 time.sleep(15) 7394 feedback_fn("* checking mirrors status") 7395 disk_abort = not _WaitForSync(self, iobj, oneshot=True) 7396 else: 7397 disk_abort = False 7398 7399 if disk_abort: 7400 _RemoveDisks(self, iobj) 7401 self.cfg.RemoveInstance(iobj.name) 7402 # Make sure the instance lock gets removed 7403 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name 7404 raise errors.OpExecError("There are some degraded disks for" 7405 " this instance") 7406 7407 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks: 7408 if self.op.mode == constants.INSTANCE_CREATE: 7409 if not self.op.no_install: 7410 feedback_fn("* running the instance OS create scripts...") 7411 # FIXME: pass debug option from opcode to backend 7412 result = self.rpc.call_instance_os_add(pnode_name, iobj, False, 7413 self.op.debug_level) 7414 result.Raise("Could not add os for instance %s" 7415 " on node %s" % (instance, pnode_name)) 7416 7417 elif self.op.mode == constants.INSTANCE_IMPORT: 7418 feedback_fn("* running the instance OS import scripts...") 7419 7420 transfers = [] 7421 7422 for idx, image in enumerate(self.src_images): 7423 if not image: 7424 continue 7425 7426 # FIXME: pass debug option from opcode to backend 7427 dt = masterd.instance.DiskTransfer("disk/%s" % idx, 7428 constants.IEIO_FILE, (image, ), 7429 constants.IEIO_SCRIPT, 7430 (iobj.disks[idx], idx), 7431 None) 7432 transfers.append(dt) 7433 7434 import_result = \ 7435 masterd.instance.TransferInstanceData(self, feedback_fn, 7436 self.op.src_node, pnode_name, 7437 self.pnode.secondary_ip, 7438 iobj, transfers) 7439 if not compat.all(import_result): 7440 self.LogWarning("Some disks for instance %s on node %s were not" 7441 " imported successfully" % (instance, pnode_name)) 7442 7443 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: 7444 feedback_fn("* preparing remote import...") 7445 connect_timeout = constants.RIE_CONNECT_TIMEOUT 7446 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) 7447 7448 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj, 7449 self.source_x509_ca, 7450 self._cds, timeouts) 7451 if not compat.all(disk_results): 7452 # TODO: Should the instance still be started, even if some disks 7453 # failed to import (valid for local imports, too)? 7454 self.LogWarning("Some disks for instance %s on node %s were not" 7455 " imported successfully" % (instance, pnode_name)) 7456 7457 # Run rename script on newly imported instance 7458 assert iobj.name == instance 7459 feedback_fn("Running rename script for %s" % instance) 7460 result = self.rpc.call_instance_run_rename(pnode_name, iobj, 7461 self.source_instance_name, 7462 self.op.debug_level) 7463 if result.fail_msg: 7464 self.LogWarning("Failed to run rename script for %s on node" 7465 " %s: %s" % (instance, pnode_name, result.fail_msg)) 7466 7467 else: 7468 # also checked in the prereq part 7469 raise errors.ProgrammerError("Unknown OS initialization mode '%s'" 7470 % self.op.mode) 7471 7472 if self.op.start: 7473 iobj.admin_up = True 7474 self.cfg.Update(iobj, feedback_fn) 7475 logging.info("Starting instance %s on node %s", instance, pnode_name) 7476 feedback_fn("* starting instance...") 7477 result = self.rpc.call_instance_start(pnode_name, iobj, None, None) 7478 result.Raise("Could not start instance") 7479 7480 return list(iobj.all_nodes)
7481
7482 7483 -class LUConnectConsole(NoHooksLU):
7484 """Connect to an instance's console. 7485 7486 This is somewhat special in that it returns the command line that 7487 you need to run on the master node in order to connect to the 7488 console. 7489 7490 """ 7491 _OP_PARAMS = [ 7492 _PInstanceName 7493 ] 7494 REQ_BGL = False 7495
7496 - def ExpandNames(self):
7498
7499 - def CheckPrereq(self):
7500 """Check prerequisites. 7501 7502 This checks that the instance is in the cluster. 7503 7504 """ 7505 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7506 assert self.instance is not None, \ 7507 "Cannot retrieve locked instance %s" % self.op.instance_name 7508 _CheckNodeOnline(self, self.instance.primary_node)
7509
7510 - def Exec(self, feedback_fn):
7511 """Connect to the console of an instance 7512 7513 """ 7514 instance = self.instance 7515 node = instance.primary_node 7516 7517 node_insts = self.rpc.call_instance_list([node], 7518 [instance.hypervisor])[node] 7519 node_insts.Raise("Can't get node information from %s" % node) 7520 7521 if instance.name not in node_insts.payload: 7522 raise errors.OpExecError("Instance %s is not running." % instance.name) 7523 7524 logging.debug("Connecting to console of %s on %s", instance.name, node) 7525 7526 hyper = hypervisor.GetHypervisor(instance.hypervisor) 7527 cluster = self.cfg.GetClusterInfo() 7528 # beparams and hvparams are passed separately, to avoid editing the 7529 # instance and then saving the defaults in the instance itself. 7530 hvparams = cluster.FillHV(instance) 7531 beparams = cluster.FillBE(instance) 7532 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams) 7533 7534 # build ssh cmdline 7535 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7536
7537 7538 -class LUReplaceDisks(LogicalUnit):
7539 """Replace the disks of an instance. 7540 7541 """ 7542 HPATH = "mirrors-replace" 7543 HTYPE = constants.HTYPE_INSTANCE 7544 _OP_PARAMS = [ 7545 _PInstanceName, 7546 ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)), 7547 ("disks", _EmptyList, _TListOf(_TPositiveInt)), 7548 ("remote_node", None, _TMaybeString), 7549 ("iallocator", None, _TMaybeString), 7550 ("early_release", False, _TBool), 7551 ] 7552 REQ_BGL = False 7553
7554 - def CheckArguments(self):
7555 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node, 7556 self.op.iallocator)
7557
7558 - def ExpandNames(self):
7559 self._ExpandAndLockInstance() 7560 7561 if self.op.iallocator is not None: 7562 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 7563 7564 elif self.op.remote_node is not None: 7565 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 7566 self.op.remote_node = remote_node 7567 7568 # Warning: do not remove the locking of the new secondary here 7569 # unless DRBD8.AddChildren is changed to work in parallel; 7570 # currently it doesn't since parallel invocations of 7571 # FindUnusedMinor will conflict 7572 self.needed_locks[locking.LEVEL_NODE] = [remote_node] 7573 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 7574 7575 else: 7576 self.needed_locks[locking.LEVEL_NODE] = [] 7577 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 7578 7579 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode, 7580 self.op.iallocator, self.op.remote_node, 7581 self.op.disks, False, self.op.early_release) 7582 7583 self.tasklets = [self.replacer]
7584
7585 - def DeclareLocks(self, level):
7586 # If we're not already locking all nodes in the set we have to declare the 7587 # instance's primary/secondary nodes. 7588 if (level == locking.LEVEL_NODE and 7589 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET): 7590 self._LockInstancesNodes()
7591
7592 - def BuildHooksEnv(self):
7593 """Build hooks env. 7594 7595 This runs on the master, the primary and all the secondaries. 7596 7597 """ 7598 instance = self.replacer.instance 7599 env = { 7600 "MODE": self.op.mode, 7601 "NEW_SECONDARY": self.op.remote_node, 7602 "OLD_SECONDARY": instance.secondary_nodes[0], 7603 } 7604 env.update(_BuildInstanceHookEnvByObject(self, instance)) 7605 nl = [ 7606 self.cfg.GetMasterNode(), 7607 instance.primary_node, 7608 ] 7609 if self.op.remote_node is not None: 7610 nl.append(self.op.remote_node) 7611 return env, nl, nl
7612
7613 7614 -class TLReplaceDisks(Tasklet):
7615 """Replaces disks for an instance. 7616 7617 Note: Locking is not within the scope of this class. 7618 7619 """
7620 - def __init__(self, lu, instance_name, mode, iallocator_name, remote_node, 7621 disks, delay_iallocator, early_release):
7622 """Initializes this class. 7623 7624 """ 7625 Tasklet.__init__(self, lu) 7626 7627 # Parameters 7628 self.instance_name = instance_name 7629 self.mode = mode 7630 self.iallocator_name = iallocator_name 7631 self.remote_node = remote_node 7632 self.disks = disks 7633 self.delay_iallocator = delay_iallocator 7634 self.early_release = early_release 7635 7636 # Runtime data 7637 self.instance = None 7638 self.new_node = None 7639 self.target_node = None 7640 self.other_node = None 7641 self.remote_node_info = None 7642 self.node_secondary_ip = None
7643 7644 @staticmethod
7645 - def CheckArguments(mode, remote_node, iallocator):
7646 """Helper function for users of this class. 7647 7648 """ 7649 # check for valid parameter combination 7650 if mode == constants.REPLACE_DISK_CHG: 7651 if remote_node is None and iallocator is None: 7652 raise errors.OpPrereqError("When changing the secondary either an" 7653 " iallocator script must be used or the" 7654 " new node given", errors.ECODE_INVAL) 7655 7656 if remote_node is not None and iallocator is not None: 7657 raise errors.OpPrereqError("Give either the iallocator or the new" 7658 " secondary, not both", errors.ECODE_INVAL) 7659 7660 elif remote_node is not None or iallocator is not None: 7661 # Not replacing the secondary 7662 raise errors.OpPrereqError("The iallocator and new node options can" 7663 " only be used when changing the" 7664 " secondary node", errors.ECODE_INVAL)
7665 7666 @staticmethod
7667 - def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7668 """Compute a new secondary node using an IAllocator. 7669 7670 """ 7671 ial = IAllocator(lu.cfg, lu.rpc, 7672 mode=constants.IALLOCATOR_MODE_RELOC, 7673 name=instance_name, 7674 relocate_from=relocate_from) 7675 7676 ial.Run(iallocator_name) 7677 7678 if not ial.success: 7679 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':" 7680 " %s" % (iallocator_name, ial.info), 7681 errors.ECODE_NORES) 7682 7683 if len(ial.result) != ial.required_nodes: 7684 raise errors.OpPrereqError("iallocator '%s' returned invalid number" 7685 " of nodes (%s), required %s" % 7686 (iallocator_name, 7687 len(ial.result), ial.required_nodes), 7688 errors.ECODE_FAULT) 7689 7690 remote_node_name = ial.result[0] 7691 7692 lu.LogInfo("Selected new secondary for instance '%s': %s", 7693 instance_name, remote_node_name) 7694 7695 return remote_node_name
7696
7697 - def _FindFaultyDisks(self, node_name):
7698 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance, 7699 node_name, True)
7700
7701 - def CheckPrereq(self):
7702 """Check prerequisites. 7703 7704 This checks that the instance is in the cluster. 7705 7706 """ 7707 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name) 7708 assert instance is not None, \ 7709 "Cannot retrieve locked instance %s" % self.instance_name 7710 7711 if instance.disk_template != constants.DT_DRBD8: 7712 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based" 7713 " instances", errors.ECODE_INVAL) 7714 7715 if len(instance.secondary_nodes) != 1: 7716 raise errors.OpPrereqError("The instance has a strange layout," 7717 " expected one secondary but found %d" % 7718 len(instance.secondary_nodes), 7719 errors.ECODE_FAULT) 7720 7721 if not self.delay_iallocator: 7722 self._CheckPrereq2()
7723
7724 - def _CheckPrereq2(self):
7725 """Check prerequisites, second part. 7726 7727 This function should always be part of CheckPrereq. It was separated and is 7728 now called from Exec because during node evacuation iallocator was only 7729 called with an unmodified cluster model, not taking planned changes into 7730 account. 7731 7732 """ 7733 instance = self.instance 7734 secondary_node = instance.secondary_nodes[0] 7735 7736 if self.iallocator_name is None: 7737 remote_node = self.remote_node 7738 else: 7739 remote_node = self._RunAllocator(self.lu, self.iallocator_name, 7740 instance.name, instance.secondary_nodes) 7741 7742 if remote_node is not None: 7743 self.remote_node_info = self.cfg.GetNodeInfo(remote_node) 7744 assert self.remote_node_info is not None, \ 7745 "Cannot retrieve locked node %s" % remote_node 7746 else: 7747 self.remote_node_info = None 7748 7749 if remote_node == self.instance.primary_node: 7750 raise errors.OpPrereqError("The specified node is the primary node of" 7751 " the instance.", errors.ECODE_INVAL) 7752 7753 if remote_node == secondary_node: 7754 raise errors.OpPrereqError("The specified node is already the" 7755 " secondary node of the instance.", 7756 errors.ECODE_INVAL) 7757 7758 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO, 7759 constants.REPLACE_DISK_CHG): 7760 raise errors.OpPrereqError("Cannot specify disks to be replaced", 7761 errors.ECODE_INVAL) 7762 7763 if self.mode == constants.REPLACE_DISK_AUTO: 7764 faulty_primary = self._FindFaultyDisks(instance.primary_node) 7765 faulty_secondary = self._FindFaultyDisks(secondary_node) 7766 7767 if faulty_primary and faulty_secondary: 7768 raise errors.OpPrereqError("Instance %s has faulty disks on more than" 7769 " one node and can not be repaired" 7770 " automatically" % self.instance_name, 7771 errors.ECODE_STATE) 7772 7773 if faulty_primary: 7774 self.disks = faulty_primary 7775 self.target_node = instance.primary_node 7776 self.other_node = secondary_node 7777 check_nodes = [self.target_node, self.other_node] 7778 elif faulty_secondary: 7779 self.disks = faulty_secondary 7780 self.target_node = secondary_node 7781 self.other_node = instance.primary_node 7782 check_nodes = [self.target_node, self.other_node] 7783 else: 7784 self.disks = [] 7785 check_nodes = [] 7786 7787 else: 7788 # Non-automatic modes 7789 if self.mode == constants.REPLACE_DISK_PRI: 7790 self.target_node = instance.primary_node 7791 self.other_node = secondary_node 7792 check_nodes = [self.target_node, self.other_node] 7793 7794 elif self.mode == constants.REPLACE_DISK_SEC: 7795 self.target_node = secondary_node 7796 self.other_node = instance.primary_node 7797 check_nodes = [self.target_node, self.other_node] 7798 7799 elif self.mode == constants.REPLACE_DISK_CHG: 7800 self.new_node = remote_node 7801 self.other_node = instance.primary_node 7802 self.target_node = secondary_node 7803 check_nodes = [self.new_node, self.other_node] 7804 7805 _CheckNodeNotDrained(self.lu, remote_node) 7806 7807 old_node_info = self.cfg.GetNodeInfo(secondary_node) 7808 assert old_node_info is not None 7809 if old_node_info.offline and not self.early_release: 7810 # doesn't make sense to delay the release 7811 self.early_release = True 7812 self.lu.LogInfo("Old secondary %s is offline, automatically enabling" 7813 " early-release mode", secondary_node) 7814 7815 else: 7816 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" % 7817 self.mode) 7818 7819 # If not specified all disks should be replaced 7820 if not self.disks: 7821 self.disks = range(len(self.instance.disks)) 7822 7823 for node in check_nodes: 7824 _CheckNodeOnline(self.lu, node) 7825 7826 # Check whether disks are valid 7827 for disk_idx in self.disks: 7828 instance.FindDisk(disk_idx) 7829 7830 # Get secondary node IP addresses 7831 node_2nd_ip = {} 7832 7833 for node_name in [self.target_node, self.other_node, self.new_node]: 7834 if node_name is not None: 7835 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip 7836 7837 self.node_secondary_ip = node_2nd_ip
7838
7839 - def Exec(self, feedback_fn):
7840 """Execute disk replacement. 7841 7842 This dispatches the disk replacement to the appropriate handler. 7843 7844 """ 7845 if self.delay_iallocator: 7846 self._CheckPrereq2() 7847 7848 if not self.disks: 7849 feedback_fn("No disks need replacement") 7850 return 7851 7852 feedback_fn("Replacing disk(s) %s for %s" % 7853 (utils.CommaJoin(self.disks), self.instance.name)) 7854 7855 activate_disks = (not self.instance.admin_up) 7856 7857 # Activate the instance disks if we're replacing them on a down instance 7858 if activate_disks: 7859 _StartInstanceDisks(self.lu, self.instance, True) 7860 7861 try: 7862 # Should we replace the secondary node? 7863 if self.new_node is not None: 7864 fn = self._ExecDrbd8Secondary 7865 else: 7866 fn = self._ExecDrbd8DiskOnly 7867 7868 return fn(feedback_fn) 7869 7870 finally: 7871 # Deactivate the instance disks if we're replacing them on a 7872 # down instance 7873 if activate_disks: 7874 _SafeShutdownInstanceDisks(self.lu, self.instance)
7875
7876 - def _CheckVolumeGroup(self, nodes):
7877 self.lu.LogInfo("Checking volume groups") 7878 7879 vgname = self.cfg.GetVGName() 7880 7881 # Make sure volume group exists on all involved nodes 7882 results = self.rpc.call_vg_list(nodes) 7883 if not results: 7884 raise errors.OpExecError("Can't list volume groups on the nodes") 7885 7886 for node in nodes: 7887 res = results[node] 7888 res.Raise("Error checking node %s" % node) 7889 if vgname not in res.payload: 7890 raise errors.OpExecError("Volume group '%s' not found on node %s" % 7891 (vgname, node))
7892
7893 - def _CheckDisksExistence(self, nodes):
7894 # Check disk existence 7895 for idx, dev in enumerate(self.instance.disks): 7896 if idx not in self.disks: 7897 continue 7898 7899 for node in nodes: 7900 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node)) 7901 self.cfg.SetDiskID(dev, node) 7902 7903 result = self.rpc.call_blockdev_find(node, dev) 7904 7905 msg = result.fail_msg 7906 if msg or not result.payload: 7907 if not msg: 7908 msg = "disk not found" 7909 raise errors.OpExecError("Can't find disk/%d on node %s: %s" % 7910 (idx, node, msg))
7911
7912 - def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7913 for idx, dev in enumerate(self.instance.disks): 7914 if idx not in self.disks: 7915 continue 7916 7917 self.lu.LogInfo("Checking disk/%d consistency on node %s" % 7918 (idx, node_name)) 7919 7920 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary, 7921 ldisk=ldisk): 7922 raise errors.OpExecError("Node %s has degraded storage, unsafe to" 7923 " replace disks for instance %s" % 7924 (node_name, self.instance.name))
7925
7926 - def _CreateNewStorage(self, node_name):
7927 vgname = self.cfg.GetVGName() 7928 iv_names = {} 7929 7930 for idx, dev in enumerate(self.instance.disks): 7931 if idx not in self.disks: 7932 continue 7933 7934 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx)) 7935 7936 self.cfg.SetDiskID(dev, node_name) 7937 7938 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]] 7939 names = _GenerateUniqueNames(self.lu, lv_names) 7940 7941 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size, 7942 logical_id=(vgname, names[0])) 7943 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128, 7944 logical_id=(vgname, names[1])) 7945 7946 new_lvs = [lv_data, lv_meta] 7947 old_lvs = dev.children 7948 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs) 7949 7950 # we pass force_create=True to force the LVM creation 7951 for new_lv in new_lvs: 7952 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True, 7953 _GetInstanceInfoText(self.instance), False) 7954 7955 return iv_names
7956
7957 - def _CheckDevices(self, node_name, iv_names):
7958 for name, (dev, _, _) in iv_names.iteritems(): 7959 self.cfg.SetDiskID(dev, node_name) 7960 7961 result = self.rpc.call_blockdev_find(node_name, dev) 7962 7963 msg = result.fail_msg 7964 if msg or not result.payload: 7965 if not msg: 7966 msg = "disk not found" 7967 raise errors.OpExecError("Can't find DRBD device %s: %s" % 7968 (name, msg)) 7969 7970 if result.payload.is_degraded: 7971 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7972
7973 - def _RemoveOldStorage(self, node_name, iv_names):
7974 for name, (_, old_lvs, _) in iv_names.iteritems(): 7975 self.lu.LogInfo("Remove logical volumes for %s" % name) 7976 7977 for lv in old_lvs: 7978 self.cfg.SetDiskID(lv, node_name) 7979 7980 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg 7981 if msg: 7982 self.lu.LogWarning("Can't remove old LV: %s" % msg, 7983 hint="remove unused LVs manually")
7984
7985 - def _ReleaseNodeLock(self, node_name):
7986 """Releases the lock for a given node.""" 7987 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7988
7989 - def _ExecDrbd8DiskOnly(self, feedback_fn):
7990 """Replace a disk on the primary or secondary for DRBD 8. 7991 7992 The algorithm for replace is quite complicated: 7993 7994 1. for each disk to be replaced: 7995 7996 1. create new LVs on the target node with unique names 7997 1. detach old LVs from the drbd device 7998 1. rename old LVs to name_replaced.<time_t> 7999 1. rename new LVs to old LVs 8000 1. attach the new LVs (with the old names now) to the drbd device 8001 8002 1. wait for sync across all devices 8003 8004 1. for each modified disk: 8005 8006 1. remove old LVs (which have the name name_replaces.<time_t>) 8007 8008 Failures are not very well handled. 8009 8010 """ 8011 steps_total = 6 8012 8013 # Step: check device activation 8014 self.lu.LogStep(1, steps_total, "Check device existence") 8015 self._CheckDisksExistence([self.other_node, self.target_node]) 8016 self._CheckVolumeGroup([self.target_node, self.other_node]) 8017 8018 # Step: check other node consistency 8019 self.lu.LogStep(2, steps_total, "Check peer consistency") 8020 self._CheckDisksConsistency(self.other_node, 8021 self.other_node == self.instance.primary_node, 8022 False) 8023 8024 # Step: create new storage 8025 self.lu.LogStep(3, steps_total, "Allocate new storage") 8026 iv_names = self._CreateNewStorage(self.target_node) 8027 8028 # Step: for each lv, detach+rename*2+attach 8029 self.lu.LogStep(4, steps_total, "Changing drbd configuration") 8030 for dev, old_lvs, new_lvs in iv_names.itervalues(): 8031 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name) 8032 8033 result = self.rpc.call_blockdev_removechildren(self.target_node, dev, 8034 old_lvs) 8035 result.Raise("Can't detach drbd from local storage on node" 8036 " %s for device %s" % (self.target_node, dev.iv_name)) 8037 #dev.children = [] 8038 #cfg.Update(instance) 8039 8040 # ok, we created the new LVs, so now we know we have the needed 8041 # storage; as such, we proceed on the target node to rename 8042 # old_lv to _old, and new_lv to old_lv; note that we rename LVs 8043 # using the assumption that logical_id == physical_id (which in 8044 # turn is the unique_id on that node) 8045 8046 # FIXME(iustin): use a better name for the replaced LVs 8047 temp_suffix = int(time.time()) 8048 ren_fn = lambda d, suff: (d.physical_id[0], 8049 d.physical_id[1] + "_replaced-%s" % suff) 8050 8051 # Build the rename list based on what LVs exist on the node 8052 rename_old_to_new = [] 8053 for to_ren in old_lvs: 8054 result = self.rpc.call_blockdev_find(self.target_node, to_ren) 8055 if not result.fail_msg and result.payload: 8056 # device exists 8057 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix))) 8058 8059 self.lu.LogInfo("Renaming the old LVs on the target node") 8060 result = self.rpc.call_blockdev_rename(self.target_node, 8061 rename_old_to_new) 8062 result.Raise("Can't rename old LVs on node %s" % self.target_node) 8063 8064 # Now we rename the new LVs to the old LVs 8065 self.lu.LogInfo("Renaming the new LVs on the target node") 8066 rename_new_to_old = [(new, old.physical_id) 8067 for old, new in zip(old_lvs, new_lvs)] 8068 result = self.rpc.call_blockdev_rename(self.target_node, 8069 rename_new_to_old) 8070 result.Raise("Can't rename new LVs on node %s" % self.target_node) 8071 8072 for old, new in zip(old_lvs, new_lvs): 8073 new.logical_id = old.logical_id 8074 self.cfg.SetDiskID(new, self.target_node) 8075 8076 for disk in old_lvs: 8077 disk.logical_id = ren_fn(disk, temp_suffix) 8078 self.cfg.SetDiskID(disk, self.target_node) 8079 8080 # Now that the new lvs have the old name, we can add them to the device 8081 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node) 8082 result = self.rpc.call_blockdev_addchildren(self.target_node, dev, 8083 new_lvs) 8084 msg = result.fail_msg 8085 if msg: 8086 for new_lv in new_lvs: 8087 msg2 = self.rpc.call_blockdev_remove(self.target_node, 8088 new_lv).fail_msg 8089 if msg2: 8090 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2, 8091 hint=("cleanup manually the unused logical" 8092 "volumes")) 8093 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg) 8094 8095 dev.children = new_lvs 8096 8097 self.cfg.Update(self.instance, feedback_fn) 8098 8099 cstep = 5 8100 if self.early_release: 8101 self.lu.LogStep(cstep, steps_total, "Removing old storage") 8102 cstep += 1 8103 self._RemoveOldStorage(self.target_node, iv_names) 8104 # WARNING: we release both node locks here, do not do other RPCs 8105 # than WaitForSync to the primary node 8106 self._ReleaseNodeLock([self.target_node, self.other_node]) 8107 8108 # Wait for sync 8109 # This can fail as the old devices are degraded and _WaitForSync 8110 # does a combined result over all disks, so we don't check its return value 8111 self.lu.LogStep(cstep, steps_total, "Sync devices") 8112 cstep += 1 8113 _WaitForSync(self.lu, self.instance) 8114 8115 # Check all devices manually 8116 self._CheckDevices(self.instance.primary_node, iv_names) 8117 8118 # Step: remove old storage 8119 if not self.early_release: 8120 self.lu.LogStep(cstep, steps_total, "Removing old storage") 8121 cstep += 1 8122 self._RemoveOldStorage(self.target_node, iv_names)
8123
8124 - def _ExecDrbd8Secondary(self, feedback_fn):
8125 """Replace the secondary node for DRBD 8. 8126 8127 The algorithm for replace is quite complicated: 8128 - for all disks of the instance: 8129 - create new LVs on the new node with same names 8130 - shutdown the drbd device on the old secondary 8131 - disconnect the drbd network on the primary 8132 - create the drbd device on the new secondary 8133 - network attach the drbd on the primary, using an artifice: 8134 the drbd code for Attach() will connect to the network if it 8135 finds a device which is connected to the good local disks but 8136 not network enabled 8137 - wait for sync across all devices 8138 - remove all disks from the old secondary 8139 8140 Failures are not very well handled. 8141 8142 """ 8143 steps_total = 6 8144 8145 # Step: check device activation 8146 self.lu.LogStep(1, steps_total, "Check device existence") 8147 self._CheckDisksExistence([self.instance.primary_node]) 8148 self._CheckVolumeGroup([self.instance.primary_node]) 8149 8150 # Step: check other node consistency 8151 self.lu.LogStep(2, steps_total, "Check peer consistency") 8152 self._CheckDisksConsistency(self.instance.primary_node, True, True) 8153 8154 # Step: create new storage 8155 self.lu.LogStep(3, steps_total, "Allocate new storage") 8156 for idx, dev in enumerate(self.instance.disks): 8157 self.lu.LogInfo("Adding new local storage on %s for disk/%d" % 8158 (self.new_node, idx)) 8159 # we pass force_create=True to force LVM creation 8160 for new_lv in dev.children: 8161 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True, 8162 _GetInstanceInfoText(self.instance), False) 8163 8164 # Step 4: dbrd minors and drbd setups changes 8165 # after this, we must manually remove the drbd minors on both the 8166 # error and the success paths 8167 self.lu.LogStep(4, steps_total, "Changing drbd configuration") 8168 minors = self.cfg.AllocateDRBDMinor([self.new_node 8169 for dev in self.instance.disks], 8170 self.instance.name) 8171 logging.debug("Allocated minors %r", minors) 8172 8173 iv_names = {} 8174 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)): 8175 self.lu.LogInfo("activating a new drbd on %s for disk/%d" % 8176 (self.new_node, idx)) 8177 # create new devices on new_node; note that we create two IDs: 8178 # one without port, so the drbd will be activated without 8179 # networking information on the new node at this stage, and one 8180 # with network, for the latter activation in step 4 8181 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id 8182 if self.instance.primary_node == o_node1: 8183 p_minor = o_minor1 8184 else: 8185 assert self.instance.primary_node == o_node2, "Three-node instance?" 8186 p_minor = o_minor2 8187 8188 new_alone_id = (self.instance.primary_node, self.new_node, None, 8189 p_minor, new_minor, o_secret) 8190 new_net_id = (self.instance.primary_node, self.new_node, o_port, 8191 p_minor, new_minor, o_secret) 8192 8193 iv_names[idx] = (dev, dev.children, new_net_id) 8194 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor, 8195 new_net_id) 8196 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8, 8197 logical_id=new_alone_id, 8198 children=dev.children, 8199 size=dev.size) 8200 try: 8201 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd, 8202 _GetInstanceInfoText(self.instance), False) 8203 except errors.GenericError: 8204 self.cfg.ReleaseDRBDMinors(self.instance.name) 8205 raise 8206 8207 # We have new devices, shutdown the drbd on the old secondary 8208 for idx, dev in enumerate(self.instance.disks): 8209 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx) 8210 self.cfg.SetDiskID(dev, self.target_node) 8211 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg 8212 if msg: 8213 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old" 8214 "node: %s" % (idx, msg), 8215 hint=("Please cleanup this device manually as" 8216 " soon as possible")) 8217 8218 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)") 8219 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], 8220 self.node_secondary_ip, 8221 self.instance.disks)\ 8222 [self.instance.primary_node] 8223 8224 msg = result.fail_msg 8225 if msg: 8226 # detaches didn't succeed (unlikely) 8227 self.cfg.ReleaseDRBDMinors(self.instance.name) 8228 raise errors.OpExecError("Can't detach the disks from the network on" 8229 " old node: %s" % (msg,)) 8230 8231 # if we managed to detach at least one, we update all the disks of 8232 # the instance to point to the new secondary 8233 self.lu.LogInfo("Updating instance configuration") 8234 for dev, _, new_logical_id in iv_names.itervalues(): 8235 dev.logical_id = new_logical_id 8236 self.cfg.SetDiskID(dev, self.instance.primary_node) 8237 8238 self.cfg.Update(self.instance, feedback_fn) 8239 8240 # and now perform the drbd attach 8241 self.lu.LogInfo("Attaching primary drbds to new secondary" 8242 " (standalone => connected)") 8243 result = self.rpc.call_drbd_attach_net([self.instance.primary_node, 8244 self.new_node], 8245 self.node_secondary_ip, 8246 self.instance.disks, 8247 self.instance.name, 8248 False) 8249 for to_node, to_result in result.items(): 8250 msg = to_result.fail_msg 8251 if msg: 8252 self.lu.LogWarning("Can't attach drbd disks on node %s: %s", 8253 to_node, msg, 8254 hint=("please do a gnt-instance info to see the" 8255 " status of disks")) 8256 cstep = 5 8257 if self.early_release: 8258 self.lu.LogStep(cstep, steps_total, "Removing old storage") 8259 cstep += 1 8260 self._RemoveOldStorage(self.target_node, iv_names) 8261 # WARNING: we release all node locks here, do not do other RPCs 8262 # than WaitForSync to the primary node 8263 self._ReleaseNodeLock([self.instance.primary_node, 8264 self.target_node, 8265 self.new_node]) 8266 8267 # Wait for sync 8268 # This can fail as the old devices are degraded and _WaitForSync 8269 # does a combined result over all disks, so we don't check its return value 8270 self.lu.LogStep(cstep, steps_total, "Sync devices") 8271 cstep += 1 8272 _WaitForSync(self.lu, self.instance) 8273 8274 # Check all devices manually 8275 self._CheckDevices(self.instance.primary_node, iv_names) 8276 8277 # Step: remove old storage 8278 if not self.early_release: 8279 self.lu.LogStep(cstep, steps_total, "Removing old storage") 8280 self._RemoveOldStorage(self.target_node, iv_names)
8281
8282 8283 -class LURepairNodeStorage(NoHooksLU):
8284 """Repairs the volume group on a node. 8285 8286 """ 8287 _OP_PARAMS = [ 8288 _PNodeName, 8289 ("storage_type", _NoDefault, _CheckStorageType), 8290 ("name", _NoDefault, _TNonEmptyString), 8291 ("ignore_consistency", False, _TBool), 8292 ] 8293 REQ_BGL = False 8294
8295 - def CheckArguments(self):
8296 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 8297 8298 storage_type = self.op.storage_type 8299 8300 if (constants.SO_FIX_CONSISTENCY not in 8301 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])): 8302 raise errors.OpPrereqError("Storage units of type '%s' can not be" 8303 " repaired" % storage_type, 8304 errors.ECODE_INVAL)
8305
8306 - def ExpandNames(self):
8307 self.needed_locks = { 8308 locking.LEVEL_NODE: [self.op.node_name], 8309 }
8310
8311 - def _CheckFaultyDisks(self, instance, node_name):
8312 """Ensure faulty disks abort the opcode or at least warn.""" 8313 try: 8314 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance, 8315 node_name, True): 8316 raise errors.OpPrereqError("Instance '%s' has faulty disks on" 8317 " node '%s'" % (instance.name, node_name), 8318 errors.ECODE_STATE) 8319 except errors.OpPrereqError, err: 8320 if self.op.ignore_consistency: 8321 self.proc.LogWarning(str(err.args[0])) 8322 else: 8323 raise
8324
8325 - def CheckPrereq(self):
8326 """Check prerequisites. 8327 8328 """ 8329 # Check whether any instance on this node has faulty disks 8330 for inst in _GetNodeInstances(self.cfg, self.op.node_name): 8331 if not inst.admin_up: 8332 continue 8333 check_nodes = set(inst.all_nodes) 8334 check_nodes.discard(self.op.node_name) 8335 for inst_node_name in check_nodes: 8336 self._CheckFaultyDisks(inst, inst_node_name)
8337
8338 - def Exec(self, feedback_fn):
8339 feedback_fn("Repairing storage unit '%s' on %s ..." % 8340 (self.op.name, self.op.node_name)) 8341 8342 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 8343 result = self.rpc.call_storage_execute(self.op.node_name, 8344 self.op.storage_type, st_args, 8345 self.op.name, 8346 constants.SO_FIX_CONSISTENCY) 8347 result.Raise("Failed to repair storage unit '%s' on %s" % 8348 (self.op.name, self.op.node_name))
8349
8350 8351 -class LUNodeEvacuationStrategy(NoHooksLU):
8352 """Computes the node evacuation strategy. 8353 8354 """ 8355 _OP_PARAMS = [ 8356 ("nodes", _NoDefault, _TListOf(_TNonEmptyString)), 8357 ("remote_node", None, _TMaybeString), 8358 ("iallocator", None, _TMaybeString), 8359 ] 8360 REQ_BGL = False 8361
8362 - def CheckArguments(self):
8363 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8364
8365 - def ExpandNames(self):
8366 self.op.nodes = _GetWantedNodes(self, self.op.nodes) 8367 self.needed_locks = locks = {} 8368 if self.op.remote_node is None: 8369 locks[locking.LEVEL_NODE] = locking.ALL_SET 8370 else: 8371 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 8372 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8373
8374 - def Exec(self, feedback_fn):
8375 if self.op.remote_node is not None: 8376 instances = [] 8377 for node in self.op.nodes: 8378 instances.extend(_GetNodeSecondaryInstances(self.cfg, node)) 8379 result = [] 8380 for i in instances: 8381 if i.primary_node == self.op.remote_node: 8382 raise errors.OpPrereqError("Node %s is the primary node of" 8383 " instance %s, cannot use it as" 8384 " secondary" % 8385 (self.op.remote_node, i.name), 8386 errors.ECODE_INVAL) 8387 result.append([i.name, self.op.remote_node]) 8388 else: 8389 ial = IAllocator(self.cfg, self.rpc, 8390 mode=constants.IALLOCATOR_MODE_MEVAC, 8391 evac_nodes=self.op.nodes) 8392 ial.Run(self.op.iallocator, validate=True) 8393 if not ial.success: 8394 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info, 8395 errors.ECODE_NORES) 8396 result = ial.result 8397 return result
8398
8399 8400 -class LUGrowDisk(LogicalUnit):
8401 """Grow a disk of an instance. 8402 8403 """ 8404 HPATH = "disk-grow" 8405 HTYPE = constants.HTYPE_INSTANCE 8406 _OP_PARAMS = [ 8407 _PInstanceName, 8408 ("disk", _NoDefault, _TInt), 8409 ("amount", _NoDefault, _TInt), 8410 ("wait_for_sync", True, _TBool), 8411 ] 8412 REQ_BGL = False 8413
8414 - def ExpandNames(self):
8415 self._ExpandAndLockInstance() 8416 self.needed_locks[locking.LEVEL_NODE] = [] 8417 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8418
8419 - def DeclareLocks(self, level):
8420 if level == locking.LEVEL_NODE: 8421 self._LockInstancesNodes()
8422
8423 - def BuildHooksEnv(self):
8424 """Build hooks env. 8425 8426 This runs on the master, the primary and all the secondaries. 8427 8428 """ 8429 env = { 8430 "DISK": self.op.disk, 8431 "AMOUNT": self.op.amount, 8432 } 8433 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 8434 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 8435 return env, nl, nl
8436
8437 - def CheckPrereq(self):
8438 """Check prerequisites. 8439 8440 This checks that the instance is in the cluster. 8441 8442 """ 8443 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 8444 assert instance is not None, \ 8445 "Cannot retrieve locked instance %s" % self.op.instance_name 8446 nodenames = list(instance.all_nodes) 8447 for node in nodenames: 8448 _CheckNodeOnline(self, node) 8449 8450 self.instance = instance 8451 8452 if instance.disk_template not in constants.DTS_GROWABLE: 8453 raise errors.OpPrereqError("Instance's disk layout does not support" 8454 " growing.", errors.ECODE_INVAL) 8455 8456 self.disk = instance.FindDisk(self.op.disk) 8457 8458 if instance.disk_template != constants.DT_FILE: 8459 # TODO: check the free disk space for file, when that feature will be 8460 # supported 8461 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8462
8463 - def Exec(self, feedback_fn):
8464 """Execute disk grow. 8465 8466 """ 8467 instance = self.instance 8468 disk = self.disk 8469 8470 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk]) 8471 if not disks_ok: 8472 raise errors.OpExecError("Cannot activate block device to grow") 8473 8474 for node in instance.all_nodes: 8475 self.cfg.SetDiskID(disk, node) 8476 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount) 8477 result.Raise("Grow request failed to node %s" % node) 8478 8479 # TODO: Rewrite code to work properly 8480 # DRBD goes into sync mode for a short amount of time after executing the 8481 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby 8482 # calling "resize" in sync mode fails. Sleeping for a short amount of 8483 # time is a work-around. 8484 time.sleep(5) 8485 8486 disk.RecordGrow(self.op.amount) 8487 self.cfg.Update(instance, feedback_fn) 8488 if self.op.wait_for_sync: 8489 disk_abort = not _WaitForSync(self, instance, disks=[disk]) 8490 if disk_abort: 8491 self.proc.LogWarning("Warning: disk sync-ing has not returned a good" 8492 " status.\nPlease check the instance.") 8493 if not instance.admin_up: 8494 _SafeShutdownInstanceDisks(self, instance, disks=[disk]) 8495 elif not instance.admin_up: 8496 self.proc.LogWarning("Not shutting down the disk even if the instance is" 8497 " not supposed to be running because no wait for" 8498 " sync mode was requested.")
8499
8500 8501 -class LUQueryInstanceData(NoHooksLU):
8502 """Query runtime instance data. 8503 8504 """ 8505 _OP_PARAMS = [ 8506 ("instances", _EmptyList, _TListOf(_TNonEmptyString)), 8507 ("static", False, _TBool), 8508 ] 8509 REQ_BGL = False 8510
8511 - def ExpandNames(self):
8512 self.needed_locks = {} 8513 self.share_locks = dict.fromkeys(locking.LEVELS, 1) 8514 8515 if self.op.instances: 8516 self.wanted_names = [] 8517 for name in self.op.instances: 8518 full_name = _ExpandInstanceName(self.cfg, name) 8519 self.wanted_names.append(full_name) 8520 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names 8521 else: 8522 self.wanted_names = None 8523 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET 8524 8525 self.needed_locks[locking.LEVEL_NODE] = [] 8526 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8527
8528 - def DeclareLocks(self, level):
8529 if level == locking.LEVEL_NODE: 8530 self._LockInstancesNodes()
8531
8532 - def CheckPrereq(self):
8533 """Check prerequisites. 8534 8535 This only checks the optional instance list against the existing names. 8536 8537 """ 8538 if self.wanted_names is None: 8539 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE] 8540 8541 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name 8542 in self.wanted_names]
8543
8544 - def _ComputeBlockdevStatus(self, node, instance_name, dev):
8545 """Returns the status of a block device 8546 8547 """ 8548 if self.op.static or not node: 8549 return None 8550 8551 self.cfg.SetDiskID(dev, node) 8552 8553 result = self.rpc.call_blockdev_find(node, dev) 8554 if result.offline: 8555 return None 8556 8557 result.Raise("Can't compute disk status for %s" % instance_name) 8558 8559 status = result.payload 8560 if status is None: 8561 return None 8562 8563 return (status.dev_path, status.major, status.minor, 8564 status.sync_percent, status.estimated_time, 8565 status.is_degraded, status.ldisk_status)
8566
8567 - def _ComputeDiskStatus(self, instance, snode, dev):
8568 """Compute block device status. 8569 8570 """ 8571 if dev.dev_type in constants.LDS_DRBD: 8572 # we change the snode then (otherwise we use the one passed in) 8573 if dev.logical_id[0] == instance.primary_node: 8574 snode = dev.logical_id[1] 8575 else: 8576 snode = dev.logical_id[0] 8577 8578 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node, 8579 instance.name, dev) 8580 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev) 8581 8582 if dev.children: 8583 dev_children = [self._ComputeDiskStatus(instance, snode, child) 8584 for child in dev.children] 8585 else: 8586 dev_children = [] 8587 8588 data = { 8589 "iv_name": dev.iv_name, 8590 "dev_type": dev.dev_type, 8591 "logical_id": dev.logical_id, 8592 "physical_id": dev.physical_id, 8593 "pstatus": dev_pstatus, 8594 "sstatus": dev_sstatus, 8595 "children": dev_children, 8596 "mode": dev.mode, 8597 "size": dev.size, 8598 } 8599 8600 return data
8601
8602 - def Exec(self, feedback_fn):
8603 """Gather and return data""" 8604 result = {} 8605 8606 cluster = self.cfg.GetClusterInfo() 8607 8608 for instance in self.wanted_instances: 8609 if not self.op.static: 8610 remote_info = self.rpc.call_instance_info(instance.primary_node, 8611 instance.name, 8612 instance.hypervisor) 8613 remote_info.Raise("Error checking node %s" % instance.primary_node) 8614 remote_info = remote_info.payload 8615 if remote_info and "state" in remote_info: 8616 remote_state = "up" 8617 else: 8618 remote_state = "down" 8619 else: 8620 remote_state = None 8621 if instance.admin_up: 8622 config_state = "up" 8623 else: 8624 config_state = "down" 8625 8626 disks = [self._ComputeDiskStatus(instance, None, device) 8627 for device in instance.disks] 8628 8629 idict = { 8630 "name": instance.name, 8631 "config_state": config_state, 8632 "run_state": remote_state, 8633 "pnode": instance.primary_node, 8634 "snodes": instance.secondary_nodes, 8635 "os": instance.os, 8636 # this happens to be the same format used for hooks 8637 "nics": _NICListToTuple(self, instance.nics), 8638 "disk_template": instance.disk_template, 8639 "disks": disks, 8640 "hypervisor": instance.hypervisor, 8641 "network_port": instance.network_port, 8642 "hv_instance": instance.hvparams, 8643 "hv_actual": cluster.FillHV(instance, skip_globals=True), 8644 "be_instance": instance.beparams, 8645 "be_actual": cluster.FillBE(instance), 8646 "os_instance": instance.osparams, 8647 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams), 8648 "serial_no": instance.serial_no, 8649 "mtime": instance.mtime, 8650 "ctime": instance.ctime, 8651 "uuid": instance.uuid, 8652 } 8653 8654 result[instance.name] = idict 8655 8656 return result
8657
8658 8659 -class LUSetInstanceParams(LogicalUnit):
8660 """Modifies an instances's parameters. 8661 8662 """ 8663 HPATH = "instance-modify" 8664 HTYPE = constants.HTYPE_INSTANCE 8665 _OP_PARAMS = [ 8666 _PInstanceName, 8667 ("nics", _EmptyList, _TList), 8668 ("disks", _EmptyList, _TList), 8669 ("beparams", _EmptyDict, _TDict), 8670 ("hvparams", _EmptyDict, _TDict), 8671 ("disk_template", None, _TMaybeString), 8672 ("remote_node", None, _TMaybeString), 8673 ("os_name", None, _TMaybeString), 8674 ("force_variant", False, _TBool), 8675 ("osparams", None, _TOr(_TDict, _TNone)), 8676 _PForce, 8677 ] 8678 REQ_BGL = False 8679
8680 - def CheckArguments(self):
8681 if not (self.op.nics or self.op.disks or self.op.disk_template or 8682 self.op.hvparams or self.op.beparams or self.op.os_name): 8683 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL) 8684 8685 if self.op.hvparams: 8686 _CheckGlobalHvParams(self.op.hvparams) 8687 8688 # Disk validation 8689 disk_addremove = 0 8690 for disk_op, disk_dict in self.op.disks: 8691 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES) 8692 if disk_op == constants.DDM_REMOVE: 8693 disk_addremove += 1 8694 continue 8695 elif disk_op == constants.DDM_ADD: 8696 disk_addremove += 1 8697 else: 8698 if not isinstance(disk_op, int): 8699 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL) 8700 if not isinstance(disk_dict, dict): 8701 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict 8702 raise errors.OpPrereqError(msg, errors.ECODE_INVAL) 8703 8704 if disk_op == constants.DDM_ADD: 8705 mode = disk_dict.setdefault('mode', constants.DISK_RDWR) 8706 if mode not in constants.DISK_ACCESS_SET: 8707 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode, 8708 errors.ECODE_INVAL) 8709 size = disk_dict.get('size', None) 8710 if size is None: 8711 raise errors.OpPrereqError("Required disk parameter size missing", 8712 errors.ECODE_INVAL) 8713 try: 8714 size = int(size) 8715 except (TypeError, ValueError), err: 8716 raise errors.OpPrereqError("Invalid disk size parameter: %s" % 8717 str(err), errors.ECODE_INVAL) 8718 disk_dict['size'] = size 8719 else: 8720 # modification of disk 8721 if 'size' in disk_dict: 8722 raise errors.OpPrereqError("Disk size change not possible, use" 8723 " grow-disk", errors.ECODE_INVAL) 8724 8725 if disk_addremove > 1: 8726 raise errors.OpPrereqError("Only one disk add or remove operation" 8727 " supported at a time", errors.ECODE_INVAL) 8728 8729 if self.op.disks and self.op.disk_template is not None: 8730 raise errors.OpPrereqError("Disk template conversion and other disk" 8731 " changes not supported at the same time", 8732 errors.ECODE_INVAL) 8733 8734 if self.op.disk_template: 8735 _CheckDiskTemplate(self.op.disk_template) 8736 if (self.op.disk_template in constants.DTS_NET_MIRROR and 8737 self.op.remote_node is None): 8738 raise errors.OpPrereqError("Changing the disk template to a mirrored" 8739 " one requires specifying a secondary node", 8740 errors.ECODE_INVAL) 8741 8742 # NIC validation 8743 nic_addremove = 0 8744 for nic_op, nic_dict in self.op.nics: 8745 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES) 8746 if nic_op == constants.DDM_REMOVE: 8747 nic_addremove += 1 8748 continue 8749 elif nic_op == constants.DDM_ADD: 8750 nic_addremove += 1 8751 else: 8752 if not isinstance(nic_op, int): 8753 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL) 8754 if not isinstance(nic_dict, dict): 8755 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict 8756 raise errors.OpPrereqError(msg, errors.ECODE_INVAL) 8757 8758 # nic_dict should be a dict 8759 nic_ip = nic_dict.get('ip', None) 8760 if nic_ip is not None: 8761 if nic_ip.lower() == constants.VALUE_NONE: 8762 nic_dict['ip'] = None 8763 else: 8764 if not netutils.IsValidIP4(nic_ip): 8765 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip, 8766 errors.ECODE_INVAL) 8767 8768 nic_bridge = nic_dict.get('bridge', None) 8769 nic_link = nic_dict.get('link', None) 8770 if nic_bridge and nic_link: 8771 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'" 8772 " at the same time", errors.ECODE_INVAL) 8773 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE: 8774 nic_dict['bridge'] = None 8775 elif nic_link and nic_link.lower() == constants.VALUE_NONE: 8776 nic_dict['link'] = None 8777 8778 if nic_op == constants.DDM_ADD: 8779 nic_mac = nic_dict.get('mac', None) 8780 if nic_mac is None: 8781 nic_dict['mac'] = constants.VALUE_AUTO 8782 8783 if 'mac' in nic_dict: 8784 nic_mac = nic_dict['mac'] 8785 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 8786 nic_mac = utils.NormalizeAndValidateMac(nic_mac) 8787 8788 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO: 8789 raise errors.OpPrereqError("'auto' is not a valid MAC address when" 8790 " modifying an existing nic", 8791 errors.ECODE_INVAL) 8792 8793 if nic_addremove > 1: 8794 raise errors.OpPrereqError("Only one NIC add or remove operation" 8795 " supported at a time", errors.ECODE_INVAL)
8796
8797 - def ExpandNames(self):
8798 self._ExpandAndLockInstance() 8799 self.needed_locks[locking.LEVEL_NODE] = [] 8800 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8801
8802 - def DeclareLocks(self, level):
8803 if level == locking.LEVEL_NODE: 8804 self._LockInstancesNodes() 8805 if self.op.disk_template and self.op.remote_node: 8806 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 8807 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8808
8809 - def BuildHooksEnv(self):
8810 """Build hooks env. 8811 8812 This runs on the master, primary and secondaries. 8813 8814 """ 8815 args = dict() 8816 if constants.BE_MEMORY in self.be_new: 8817 args['memory'] = self.be_new[constants.BE_MEMORY] 8818 if constants.BE_VCPUS in self.be_new: 8819 args['vcpus'] = self.be_new[constants.BE_VCPUS] 8820 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk 8821 # information at all. 8822 if self.op.nics: 8823 args['nics'] = [] 8824 nic_override = dict(self.op.nics) 8825 for idx, nic in enumerate(self.instance.nics): 8826 if idx in nic_override: 8827 this_nic_override = nic_override[idx] 8828 else: 8829 this_nic_override = {} 8830 if 'ip' in this_nic_override: 8831 ip = this_nic_override['ip'] 8832 else: 8833 ip = nic.ip 8834 if 'mac' in this_nic_override: 8835 mac = this_nic_override['mac'] 8836 else: 8837 mac = nic.mac 8838 if idx in self.nic_pnew: 8839 nicparams = self.nic_pnew[idx] 8840 else: 8841 nicparams = self.cluster.SimpleFillNIC(nic.nicparams) 8842 mode = nicparams[constants.NIC_MODE] 8843 link = nicparams[constants.NIC_LINK] 8844 args['nics'].append((ip, mac, mode, link)) 8845 if constants.DDM_ADD in nic_override: 8846 ip = nic_override[constants.DDM_ADD].get('ip', None) 8847 mac = nic_override[constants.DDM_ADD]['mac'] 8848 nicparams = self.nic_pnew[constants.DDM_ADD] 8849 mode = nicparams[constants.NIC_MODE] 8850 link = nicparams[constants.NIC_LINK] 8851 args['nics'].append((ip, mac, mode, link)) 8852 elif constants.DDM_REMOVE in nic_override: 8853 del args['nics'][-1] 8854 8855 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args) 8856 if self.op.disk_template: 8857 env["NEW_DISK_TEMPLATE"] = self.op.disk_template 8858 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 8859 return env, nl, nl
8860
8861 - def CheckPrereq(self):
8862 """Check prerequisites. 8863 8864 This only checks the instance list against the existing names. 8865 8866 """ 8867 # checking the new params on the primary/secondary nodes 8868 8869 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 8870 cluster = self.cluster = self.cfg.GetClusterInfo() 8871 assert self.instance is not None, \ 8872 "Cannot retrieve locked instance %s" % self.op.instance_name 8873 pnode = instance.primary_node 8874 nodelist = list(instance.all_nodes) 8875 8876 # OS change 8877 if self.op.os_name and not self.op.force: 8878 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name, 8879 self.op.force_variant) 8880 instance_os = self.op.os_name 8881 else: 8882 instance_os = instance.os 8883 8884 if self.op.disk_template: 8885 if instance.disk_template == self.op.disk_template: 8886 raise errors.OpPrereqError("Instance already has disk template %s" % 8887 instance.disk_template, errors.ECODE_INVAL) 8888 8889 if (instance.disk_template, 8890 self.op.disk_template) not in self._DISK_CONVERSIONS: 8891 raise errors.OpPrereqError("Unsupported disk template conversion from" 8892 " %s to %s" % (instance.disk_template, 8893 self.op.disk_template), 8894 errors.ECODE_INVAL) 8895 _CheckInstanceDown(self, instance, "cannot change disk template") 8896 if self.op.disk_template in constants.DTS_NET_MIRROR: 8897 if self.op.remote_node == pnode: 8898 raise errors.OpPrereqError("Given new secondary node %s is the same" 8899 " as the primary node of the instance" % 8900 self.op.remote_node, errors.ECODE_STATE) 8901 _CheckNodeOnline(self, self.op.remote_node) 8902 _CheckNodeNotDrained(self, self.op.remote_node) 8903 disks = [{"size": d.size} for d in instance.disks] 8904 required = _ComputeDiskSize(self.op.disk_template, disks) 8905 _CheckNodesFreeDisk(self, [self.op.remote_node], required) 8906 8907 # hvparams processing 8908 if self.op.hvparams: 8909 hv_type = instance.hypervisor 8910 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams) 8911 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES) 8912 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict) 8913 8914 # local check 8915 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new) 8916 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new) 8917 self.hv_new = hv_new # the new actual values 8918 self.hv_inst = i_hvdict # the new dict (without defaults) 8919 else: 8920 self.hv_new = self.hv_inst = {} 8921 8922 # beparams processing 8923 if self.op.beparams: 8924 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams, 8925 use_none=True) 8926 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES) 8927 be_new = cluster.SimpleFillBE(i_bedict) 8928 self.be_new = be_new # the new actual values 8929 self.be_inst = i_bedict # the new dict (without defaults) 8930 else: 8931 self.be_new = self.be_inst = {} 8932 8933 # osparams processing 8934 if self.op.osparams: 8935 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams) 8936 _CheckOSParams(self, True, nodelist, instance_os, i_osdict) 8937 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict) 8938 self.os_inst = i_osdict # the new dict (without defaults) 8939 else: 8940 self.os_new = self.os_inst = {} 8941 8942 self.warn = [] 8943 8944 if constants.BE_MEMORY in self.op.beparams and not self.op.force: 8945 mem_check_list = [pnode] 8946 if be_new[constants.BE_AUTO_BALANCE]: 8947 # either we changed auto_balance to yes or it was from before 8948 mem_check_list.extend(instance.secondary_nodes) 8949 instance_info = self.rpc.call_instance_info(pnode, instance.name, 8950 instance.hypervisor) 8951 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(), 8952 instance.hypervisor) 8953 pninfo = nodeinfo[pnode] 8954 msg = pninfo.fail_msg 8955 if msg: 8956 # Assume the primary node is unreachable and go ahead 8957 self.warn.append("Can't get info from primary node %s: %s" % 8958 (pnode, msg)) 8959 elif not isinstance(pninfo.payload.get('memory_free', None), int): 8960 self.warn.append("Node data from primary node %s doesn't contain" 8961 " free memory information" % pnode) 8962 elif instance_info.fail_msg: 8963 self.warn.append("Can't get instance runtime information: %s" % 8964 instance_info.fail_msg) 8965 else: 8966 if instance_info.payload: 8967 current_mem = int(instance_info.payload['memory']) 8968 else: 8969 # Assume instance not running 8970 # (there is a slight race condition here, but it's not very probable, 8971 # and we have no other way to check) 8972 current_mem = 0 8973 miss_mem = (be_new[constants.BE_MEMORY] - current_mem - 8974 pninfo.payload['memory_free']) 8975 if miss_mem > 0: 8976 raise errors.OpPrereqError("This change will prevent the instance" 8977 " from starting, due to %d MB of memory" 8978 " missing on its primary node" % miss_mem, 8979 errors.ECODE_NORES) 8980 8981 if be_new[constants.BE_AUTO_BALANCE]: 8982 for node, nres in nodeinfo.items(): 8983 if node not in instance.secondary_nodes: 8984 continue 8985 msg = nres.fail_msg 8986 if msg: 8987 self.warn.append("Can't get info from secondary node %s: %s" % 8988 (node, msg)) 8989 elif not isinstance(nres.payload.get('memory_free', None), int): 8990 self.warn.append("Secondary node %s didn't return free" 8991 " memory information" % node) 8992 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']: 8993 self.warn.append("Not enough memory to failover instance to" 8994 " secondary node %s" % node) 8995 8996 # NIC processing 8997 self.nic_pnew = {} 8998 self.nic_pinst = {} 8999 for nic_op, nic_dict in self.op.nics: 9000 if nic_op == constants.DDM_REMOVE: 9001 if not instance.nics: 9002 raise errors.OpPrereqError("Instance has no NICs, cannot remove", 9003 errors.ECODE_INVAL) 9004 continue 9005 if nic_op != constants.DDM_ADD: 9006 # an existing nic 9007 if not instance.nics: 9008 raise errors.OpPrereqError("Invalid NIC index %s, instance has" 9009 " no NICs" % nic_op, 9010 errors.ECODE_INVAL) 9011 if nic_op < 0 or nic_op >= len(instance.nics): 9012 raise errors.OpPrereqError("Invalid NIC index %s, valid values" 9013 " are 0 to %d" % 9014 (nic_op, len(instance.nics) - 1), 9015 errors.ECODE_INVAL) 9016 old_nic_params = instance.nics[nic_op].nicparams 9017 old_nic_ip = instance.nics[nic_op].ip 9018 else: 9019 old_nic_params = {} 9020 old_nic_ip = None 9021 9022 update_params_dict = dict([(key, nic_dict[key]) 9023 for key in constants.NICS_PARAMETERS 9024 if key in nic_dict]) 9025 9026 if 'bridge' in nic_dict: 9027 update_params_dict[constants.NIC_LINK] = nic_dict['bridge'] 9028 9029 new_nic_params = _GetUpdatedParams(old_nic_params, 9030 update_params_dict) 9031 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES) 9032 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params) 9033 objects.NIC.CheckParameterSyntax(new_filled_nic_params) 9034 self.nic_pinst[nic_op] = new_nic_params 9035 self.nic_pnew[nic_op] = new_filled_nic_params 9036 new_nic_mode = new_filled_nic_params[constants.NIC_MODE] 9037 9038 if new_nic_mode == constants.NIC_MODE_BRIDGED: 9039 nic_bridge = new_filled_nic_params[constants.NIC_LINK] 9040 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg 9041 if msg: 9042 msg = "Error checking bridges on node %s: %s" % (pnode, msg) 9043 if self.op.force: 9044 self.warn.append(msg) 9045 else: 9046 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) 9047 if new_nic_mode == constants.NIC_MODE_ROUTED: 9048 if 'ip' in nic_dict: 9049 nic_ip = nic_dict['ip'] 9050 else: 9051 nic_ip = old_nic_ip 9052 if nic_ip is None: 9053 raise errors.OpPrereqError('Cannot set the nic ip to None' 9054 ' on a routed nic', errors.ECODE_INVAL) 9055 if 'mac' in nic_dict: 9056 nic_mac = nic_dict['mac'] 9057 if nic_mac is None: 9058 raise errors.OpPrereqError('Cannot set the nic mac to None', 9059 errors.ECODE_INVAL) 9060 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 9061 # otherwise generate the mac 9062 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId()) 9063 else: 9064 # or validate/reserve the current one 9065 try: 9066 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId()) 9067 except errors.ReservationError: 9068 raise errors.OpPrereqError("MAC address %s already in use" 9069 " in cluster" % nic_mac, 9070 errors.ECODE_NOTUNIQUE) 9071 9072 # DISK processing 9073 if self.op.disks and instance.disk_template == constants.DT_DISKLESS: 9074 raise errors.OpPrereqError("Disk operations not supported for" 9075 " diskless instances", 9076 errors.ECODE_INVAL) 9077 for disk_op, _ in self.op.disks: 9078 if disk_op == constants.DDM_REMOVE: 9079 if len(instance.disks) == 1: 9080 raise errors.OpPrereqError("Cannot remove the last disk of" 9081 " an instance", errors.ECODE_INVAL) 9082 _CheckInstanceDown(self, instance, "cannot remove disks") 9083 9084 if (disk_op == constants.DDM_ADD and 9085 len(instance.nics) >= constants.MAX_DISKS): 9086 raise errors.OpPrereqError("Instance has too many disks (%d), cannot" 9087 " add more" % constants.MAX_DISKS, 9088 errors.ECODE_STATE) 9089 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE): 9090 # an existing disk 9091 if disk_op < 0 or disk_op >= len(instance.disks): 9092 raise errors.OpPrereqError("Invalid disk index %s, valid values" 9093 " are 0 to %d" % 9094 (disk_op, len(instance.disks)), 9095 errors.ECODE_INVAL) 9096 9097 return
9098
9099 - def _ConvertPlainToDrbd(self, feedback_fn):
9100 """Converts an instance from plain to drbd. 9101 9102 """ 9103 feedback_fn("Converting template to drbd") 9104 instance = self.instance 9105 pnode = instance.primary_node 9106 snode = self.op.remote_node 9107 9108 # create a fake disk info for _GenerateDiskTemplate 9109 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks] 9110 new_disks = _GenerateDiskTemplate(self, self.op.disk_template, 9111 instance.name, pnode, [snode], 9112 disk_info, None, None, 0) 9113 info = _GetInstanceInfoText(instance) 9114 feedback_fn("Creating aditional volumes...") 9115 # first, create the missing data and meta devices 9116 for disk in new_disks: 9117 # unfortunately this is... not too nice 9118 _CreateSingleBlockDev(self, pnode, instance, disk.children[1], 9119 info, True) 9120 for child in disk.children: 9121 _CreateSingleBlockDev(self, snode, instance, child, info, True) 9122 # at this stage, all new LVs have been created, we can rename the 9123 # old ones 9124 feedback_fn("Renaming original volumes...") 9125 rename_list = [(o, n.children[0].logical_id) 9126 for (o, n) in zip(instance.disks, new_disks)] 9127 result = self.rpc.call_blockdev_rename(pnode, rename_list) 9128 result.Raise("Failed to rename original LVs") 9129 9130 feedback_fn("Initializing DRBD devices...") 9131 # all child devices are in place, we can now create the DRBD devices 9132 for disk in new_disks: 9133 for node in [pnode, snode]: 9134 f_create = node == pnode 9135 _CreateSingleBlockDev(self, node, instance, disk, info, f_create) 9136 9137 # at this point, the instance has been modified 9138 instance.disk_template = constants.DT_DRBD8 9139 instance.disks = new_disks 9140 self.cfg.Update(instance, feedback_fn) 9141 9142 # disks are created, waiting for sync 9143 disk_abort = not _WaitForSync(self, instance) 9144 if disk_abort: 9145 raise errors.OpExecError("There are some degraded disks for" 9146 " this instance, please cleanup manually")
9147
9148 - def _ConvertDrbdToPlain(self, feedback_fn):
9149 """Converts an instance from drbd to plain. 9150 9151 """ 9152 instance = self.instance 9153 assert len(instance.secondary_nodes) == 1 9154 pnode = instance.primary_node 9155 snode = instance.secondary_nodes[0] 9156 feedback_fn("Converting template to plain") 9157 9158 old_disks = instance.disks 9159 new_disks = [d.children[0] for d in old_disks] 9160 9161 # copy over size and mode 9162 for parent, child in zip(old_disks, new_disks): 9163 child.size = parent.size 9164 child.mode = parent.mode 9165 9166 # update instance structure 9167 instance.disks = new_disks 9168 instance.disk_template = constants.DT_PLAIN 9169 self.cfg.Update(instance, feedback_fn) 9170 9171 feedback_fn("Removing volumes on the secondary node...") 9172 for disk in old_disks: 9173 self.cfg.SetDiskID(disk, snode) 9174 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg 9175 if msg: 9176 self.LogWarning("Could not remove block device %s on node %s," 9177 " continuing anyway: %s", disk.iv_name, snode, msg) 9178 9179 feedback_fn("Removing unneeded volumes on the primary node...") 9180 for idx, disk in enumerate(old_disks): 9181 meta = disk.children[1] 9182 self.cfg.SetDiskID(meta, pnode) 9183 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg 9184 if msg: 9185 self.LogWarning("Could not remove metadata for disk %d on node %s," 9186 " continuing anyway: %s", idx, pnode, msg)
9187 9188
9189 - def Exec(self, feedback_fn):
9190 """Modifies an instance. 9191 9192 All parameters take effect only at the next restart of the instance. 9193 9194 """ 9195 # Process here the warnings from CheckPrereq, as we don't have a 9196 # feedback_fn there. 9197 for warn in self.warn: 9198 feedback_fn("WARNING: %s" % warn) 9199 9200 result = [] 9201 instance = self.instance 9202 # disk changes 9203 for disk_op, disk_dict in self.op.disks: 9204 if disk_op == constants.DDM_REMOVE: 9205 # remove the last disk 9206 device = instance.disks.pop() 9207 device_idx = len(instance.disks) 9208 for node, disk in device.ComputeNodeTree(instance.primary_node): 9209 self.cfg.SetDiskID(disk, node) 9210 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg 9211 if msg: 9212 self.LogWarning("Could not remove disk/%d on node %s: %s," 9213 " continuing anyway", device_idx, node, msg) 9214 result.append(("disk/%d" % device_idx, "remove")) 9215 elif disk_op == constants.DDM_ADD: 9216 # add a new disk 9217 if instance.disk_template == constants.DT_FILE: 9218 file_driver, file_path = instance.disks[0].logical_id 9219 file_path = os.path.dirname(file_path) 9220 else: 9221 file_driver = file_path = None 9222 disk_idx_base = len(instance.disks) 9223 new_disk = _GenerateDiskTemplate(self, 9224 instance.disk_template, 9225 instance.name, instance.primary_node, 9226 instance.secondary_nodes, 9227 [disk_dict], 9228 file_path, 9229 file_driver, 9230 disk_idx_base)[0] 9231 instance.disks.append(new_disk) 9232 info = _GetInstanceInfoText(instance) 9233 9234 logging.info("Creating volume %s for instance %s", 9235 new_disk.iv_name, instance.name) 9236 # Note: this needs to be kept in sync with _CreateDisks 9237 #HARDCODE 9238 for node in instance.all_nodes: 9239 f_create = node == instance.primary_node 9240 try: 9241 _CreateBlockDev(self, node, instance, new_disk, 9242 f_create, info, f_create) 9243 except errors.OpExecError, err: 9244 self.LogWarning("Failed to create volume %s (%s) on" 9245 " node %s: %s", 9246 new_disk.iv_name, new_disk, node, err) 9247 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" % 9248 (new_disk.size, new_disk.mode))) 9249 else: 9250 # change a given disk 9251 instance.disks[disk_op].mode = disk_dict['mode'] 9252 result.append(("disk.mode/%d" % disk_op, disk_dict['mode'])) 9253 9254 if self.op.disk_template: 9255 r_shut = _ShutdownInstanceDisks(self, instance) 9256 if not r_shut: 9257 raise errors.OpExecError("Cannot shutdow instance disks, unable to" 9258 " proceed with disk template conversion") 9259 mode = (instance.disk_template, self.op.disk_template) 9260 try: 9261 self._DISK_CONVERSIONS[mode](self, feedback_fn) 9262 except: 9263 self.cfg.ReleaseDRBDMinors(instance.name) 9264 raise 9265 result.append(("disk_template", self.op.disk_template)) 9266 9267 # NIC changes 9268 for nic_op, nic_dict in self.op.nics: 9269 if nic_op == constants.DDM_REMOVE: 9270 # remove the last nic 9271 del instance.nics[-1] 9272 result.append(("nic.%d" % len(instance.nics), "remove")) 9273 elif nic_op == constants.DDM_ADD: 9274 # mac and bridge should be set, by now 9275 mac = nic_dict['mac'] 9276 ip = nic_dict.get('ip', None) 9277 nicparams = self.nic_pinst[constants.DDM_ADD] 9278 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams) 9279 instance.nics.append(new_nic) 9280 result.append(("nic.%d" % (len(instance.nics) - 1), 9281 "add:mac=%s,ip=%s,mode=%s,link=%s" % 9282 (new_nic.mac, new_nic.ip, 9283 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE], 9284 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK] 9285 ))) 9286 else: 9287 for key in 'mac', 'ip': 9288 if key in nic_dict: 9289 setattr(instance.nics[nic_op], key, nic_dict[key]) 9290 if nic_op in self.nic_pinst: 9291 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op] 9292 for key, val in nic_dict.iteritems(): 9293 result.append(("nic.%s/%d" % (key, nic_op), val)) 9294 9295 # hvparams changes 9296 if self.op.hvparams: 9297 instance.hvparams = self.hv_inst 9298 for key, val in self.op.hvparams.iteritems(): 9299 result.append(("hv/%s" % key, val)) 9300 9301 # beparams changes 9302 if self.op.beparams: 9303 instance.beparams = self.be_inst 9304 for key, val in self.op.beparams.iteritems(): 9305 result.append(("be/%s" % key, val)) 9306 9307 # OS change 9308 if self.op.os_name: 9309 instance.os = self.op.os_name 9310 9311 # osparams changes 9312 if self.op.osparams: 9313 instance.osparams = self.os_inst 9314 for key, val in self.op.osparams.iteritems(): 9315 result.append(("os/%s" % key, val)) 9316 9317 self.cfg.Update(instance, feedback_fn) 9318 9319 return result
9320 9321 _DISK_CONVERSIONS = { 9322 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd, 9323 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain, 9324 }
9325
9326 9327 -class LUQueryExports(NoHooksLU):
9328 """Query the exports list 9329 9330 """ 9331 _OP_PARAMS = [ 9332 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)), 9333 ("use_locking", False, _TBool), 9334 ] 9335 REQ_BGL = False 9336
9337 - def ExpandNames(self):
9338 self.needed_locks = {} 9339 self.share_locks[locking.LEVEL_NODE] = 1 9340 if not self.op.nodes: 9341 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 9342 else: 9343 self.needed_locks[locking.LEVEL_NODE] = \ 9344 _GetWantedNodes(self, self.op.nodes)
9345
9346 - def Exec(self, feedback_fn):
9347 """Compute the list of all the exported system images. 9348 9349 @rtype: dict 9350 @return: a dictionary with the structure node->(export-list) 9351 where export-list is a list of the instances exported on 9352 that node. 9353 9354 """ 9355 self.nodes = self.acquired_locks[locking.LEVEL_NODE] 9356 rpcresult = self.rpc.call_export_list(self.nodes) 9357 result = {} 9358 for node in rpcresult: 9359 if rpcresult[node].fail_msg: 9360 result[node] = False 9361 else: 9362 result[node] = rpcresult[node].payload 9363 9364 return result
9365
9366 9367 -class LUPrepareExport(NoHooksLU):
9368 """Prepares an instance for an export and returns useful information. 9369 9370 """ 9371 _OP_PARAMS = [ 9372 _PInstanceName, 9373 ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)), 9374 ] 9375 REQ_BGL = False 9376
9377 - def ExpandNames(self):
9379
9380 - def CheckPrereq(self):
9381 """Check prerequisites. 9382 9383 """ 9384 instance_name = self.op.instance_name 9385 9386 self.instance = self.cfg.GetInstanceInfo(instance_name) 9387 assert self.instance is not None, \ 9388 "Cannot retrieve locked instance %s" % self.op.instance_name 9389 _CheckNodeOnline(self, self.instance.primary_node) 9390 9391 self._cds = _GetClusterDomainSecret()
9392
9393 - def Exec(self, feedback_fn):
9394 """Prepares an instance for an export. 9395 9396 """ 9397 instance = self.instance 9398 9399 if self.op.mode == constants.EXPORT_MODE_REMOTE: 9400 salt = utils.GenerateSecret(8) 9401 9402 feedback_fn("Generating X509 certificate on %s" % instance.primary_node) 9403 result = self.rpc.call_x509_cert_create(instance.primary_node, 9404 constants.RIE_CERT_VALIDITY) 9405 result.Raise("Can't create X509 key and certificate on %s" % result.node) 9406 9407 (name, cert_pem) = result.payload 9408 9409 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 9410 cert_pem) 9411 9412 return { 9413 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds), 9414 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt), 9415 salt), 9416 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt), 9417 } 9418 9419 return None
9420
9421 9422 -class LUExportInstance(LogicalUnit):
9423 """Export an instance to an image in the cluster. 9424 9425 """ 9426 HPATH = "instance-export" 9427 HTYPE = constants.HTYPE_INSTANCE 9428 _OP_PARAMS = [ 9429 _PInstanceName, 9430 ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)), 9431 ("shutdown", True, _TBool), 9432 _PShutdownTimeout, 9433 ("remove_instance", False, _TBool), 9434 ("ignore_remove_failures", False, _TBool), 9435 ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)), 9436 ("x509_key_name", None, _TOr(_TList, _TNone)), 9437 ("destination_x509_ca", None, _TMaybeString), 9438 ] 9439 REQ_BGL = False 9440
9441 - def CheckArguments(self):
9442 """Check the arguments. 9443 9444 """ 9445 self.x509_key_name = self.op.x509_key_name 9446 self.dest_x509_ca_pem = self.op.destination_x509_ca 9447 9448 if self.op.remove_instance and not self.op.shutdown: 9449 raise errors.OpPrereqError("Can not remove instance without shutting it" 9450 " down before") 9451 9452 if self.op.mode == constants.EXPORT_MODE_REMOTE: 9453 if not self.x509_key_name: 9454 raise errors.OpPrereqError("Missing X509 key name for encryption", 9455 errors.ECODE_INVAL) 9456 9457 if not self.dest_x509_ca_pem: 9458 raise errors.OpPrereqError("Missing destination X509 CA", 9459 errors.ECODE_INVAL)
9460
9461 - def ExpandNames(self):
9462 self._ExpandAndLockInstance() 9463 9464 # Lock all nodes for local exports 9465 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9466 # FIXME: lock only instance primary and destination node 9467 # 9468 # Sad but true, for now we have do lock all nodes, as we don't know where 9469 # the previous export might be, and in this LU we search for it and 9470 # remove it from its current node. In the future we could fix this by: 9471 # - making a tasklet to search (share-lock all), then create the 9472 # new one, then one to remove, after 9473 # - removing the removal operation altogether 9474 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9475
9476 - def DeclareLocks(self, level):
9477 """Last minute lock declaration."""
9478 # All nodes are locked anyway, so nothing to do here. 9479
9480 - def BuildHooksEnv(self):
9481 """Build hooks env. 9482 9483 This will run on the master, primary node and target node. 9484 9485 """ 9486 env = { 9487 "EXPORT_MODE": self.op.mode, 9488 "EXPORT_NODE": self.op.target_node, 9489 "EXPORT_DO_SHUTDOWN": self.op.shutdown, 9490 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 9491 # TODO: Generic function for boolean env variables 9492 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)), 9493 } 9494 9495 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 9496 9497 nl = [self.cfg.GetMasterNode(), self.instance.primary_node] 9498 9499 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9500 nl.append(self.op.target_node) 9501 9502 return env, nl, nl
9503
9504 - def CheckPrereq(self):
9505 """Check prerequisites. 9506 9507 This checks that the instance and node names are valid. 9508 9509 """ 9510 instance_name = self.op.instance_name 9511 9512 self.instance = self.cfg.GetInstanceInfo(instance_name) 9513 assert self.instance is not None, \ 9514 "Cannot retrieve locked instance %s" % self.op.instance_name 9515 _CheckNodeOnline(self, self.instance.primary_node) 9516 9517 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9518 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node) 9519 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node) 9520 assert self.dst_node is not None 9521 9522 _CheckNodeOnline(self, self.dst_node.name) 9523 _CheckNodeNotDrained(self, self.dst_node.name) 9524 9525 self._cds = None 9526 self.dest_disk_info = None 9527 self.dest_x509_ca = None 9528 9529 elif self.op.mode == constants.EXPORT_MODE_REMOTE: 9530 self.dst_node = None 9531 9532 if len(self.op.target_node) != len(self.instance.disks): 9533 raise errors.OpPrereqError(("Received destination information for %s" 9534 " disks, but instance %s has %s disks") % 9535 (len(self.op.target_node), instance_name, 9536 len(self.instance.disks)), 9537 errors.ECODE_INVAL) 9538 9539 cds = _GetClusterDomainSecret() 9540 9541 # Check X509 key name 9542 try: 9543 (key_name, hmac_digest, hmac_salt) = self.x509_key_name 9544 except (TypeError, ValueError), err: 9545 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err) 9546 9547 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt): 9548 raise errors.OpPrereqError("HMAC for X509 key name is wrong", 9549 errors.ECODE_INVAL) 9550 9551 # Load and verify CA 9552 try: 9553 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds) 9554 except OpenSSL.crypto.Error, err: 9555 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" % 9556 (err, ), errors.ECODE_INVAL) 9557 9558 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) 9559 if errcode is not None: 9560 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % 9561 (msg, ), errors.ECODE_INVAL) 9562 9563 self.dest_x509_ca = cert 9564 9565 # Verify target information 9566 disk_info = [] 9567 for idx, disk_data in enumerate(self.op.target_node): 9568 try: 9569 (host, port, magic) = \ 9570 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data) 9571 except errors.GenericError, err: 9572 raise errors.OpPrereqError("Target info for disk %s: %s" % 9573 (idx, err), errors.ECODE_INVAL) 9574 9575 disk_info.append((host, port, magic)) 9576 9577 assert len(disk_info) == len(self.op.target_node) 9578 self.dest_disk_info = disk_info 9579 9580 else: 9581 raise errors.ProgrammerError("Unhandled export mode %r" % 9582 self.op.mode) 9583 9584 # instance disk type verification 9585 # TODO: Implement export support for file-based disks 9586 for disk in self.instance.disks: 9587 if disk.dev_type == constants.LD_FILE: 9588 raise errors.OpPrereqError("Export not supported for instances with" 9589 " file-based disks", errors.ECODE_INVAL)
9590
9591 - def _CleanupExports(self, feedback_fn):
9592 """Removes exports of current instance from all other nodes. 9593 9594 If an instance in a cluster with nodes A..D was exported to node C, its 9595 exports will be removed from the nodes A, B and D. 9596 9597 """ 9598 assert self.op.mode != constants.EXPORT_MODE_REMOTE 9599 9600 nodelist = self.cfg.GetNodeList() 9601 nodelist.remove(self.dst_node.name) 9602 9603 # on one-node clusters nodelist will be empty after the removal 9604 # if we proceed the backup would be removed because OpQueryExports 9605 # substitutes an empty list with the full cluster node list. 9606 iname = self.instance.name 9607 if nodelist: 9608 feedback_fn("Removing old exports for instance %s" % iname) 9609 exportlist = self.rpc.call_export_list(nodelist) 9610 for node in exportlist: 9611 if exportlist[node].fail_msg: 9612 continue 9613 if iname in exportlist[node].payload: 9614 msg = self.rpc.call_export_remove(node, iname).fail_msg 9615 if msg: 9616 self.LogWarning("Could not remove older export for instance %s" 9617 " on node %s: %s", iname, node, msg)
9618
9619 - def Exec(self, feedback_fn):
9620 """Export an instance to an image in the cluster. 9621 9622 """ 9623 assert self.op.mode in constants.EXPORT_MODES 9624 9625 instance = self.instance 9626 src_node = instance.primary_node 9627 9628 if self.op.shutdown: 9629 # shutdown the instance, but not the disks 9630 feedback_fn("Shutting down instance %s" % instance.name) 9631 result = self.rpc.call_instance_shutdown(src_node, instance, 9632 self.op.shutdown_timeout) 9633 # TODO: Maybe ignore failures if ignore_remove_failures is set 9634 result.Raise("Could not shutdown instance %s on" 9635 " node %s" % (instance.name, src_node)) 9636 9637 # set the disks ID correctly since call_instance_start needs the 9638 # correct drbd minor to create the symlinks 9639 for disk in instance.disks: 9640 self.cfg.SetDiskID(disk, src_node) 9641 9642 activate_disks = (not instance.admin_up) 9643 9644 if activate_disks: 9645 # Activate the instance disks if we'exporting a stopped instance 9646 feedback_fn("Activating disks for %s" % instance.name) 9647 _StartInstanceDisks(self, instance, None) 9648 9649 try: 9650 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn, 9651 instance) 9652 9653 helper.CreateSnapshots() 9654 try: 9655 if (self.op.shutdown and instance.admin_up and 9656 not self.op.remove_instance): 9657 assert not activate_disks 9658 feedback_fn("Starting instance %s" % instance.name) 9659 result = self.rpc.call_instance_start(src_node, instance, None, None) 9660 msg = result.fail_msg 9661 if msg: 9662 feedback_fn("Failed to start instance: %s" % msg) 9663 _ShutdownInstanceDisks(self, instance) 9664 raise errors.OpExecError("Could not start instance: %s" % msg) 9665 9666 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9667 (fin_resu, dresults) = helper.LocalExport(self.dst_node) 9668 elif self.op.mode == constants.EXPORT_MODE_REMOTE: 9669 connect_timeout = constants.RIE_CONNECT_TIMEOUT 9670 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) 9671 9672 (key_name, _, _) = self.x509_key_name 9673 9674 dest_ca_pem = \ 9675 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, 9676 self.dest_x509_ca) 9677 9678 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info, 9679 key_name, dest_ca_pem, 9680 timeouts) 9681 finally: 9682 helper.Cleanup() 9683 9684 # Check for backwards compatibility 9685 assert len(dresults) == len(instance.disks) 9686 assert compat.all(isinstance(i, bool) for i in dresults), \ 9687 "Not all results are boolean: %r" % dresults 9688 9689 finally: 9690 if activate_disks: 9691 feedback_fn("Deactivating disks for %s" % instance.name) 9692 _ShutdownInstanceDisks(self, instance) 9693 9694 if not (compat.all(dresults) and fin_resu): 9695 failures = [] 9696 if not fin_resu: 9697 failures.append("export finalization") 9698 if not compat.all(dresults): 9699 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults) 9700 if not dsk) 9701 failures.append("disk export: disk(s) %s" % fdsk) 9702 9703 raise errors.OpExecError("Export failed, errors in %s" % 9704 utils.CommaJoin(failures)) 9705 9706 # At this point, the export was successful, we can cleanup/finish 9707 9708 # Remove instance if requested 9709 if self.op.remove_instance: 9710 feedback_fn("Removing instance %s" % instance.name) 9711 _RemoveInstance(self, feedback_fn, instance, 9712 self.op.ignore_remove_failures) 9713 9714 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9715 self._CleanupExports(feedback_fn) 9716 9717 return fin_resu, dresults
9718
9719 9720 -class LURemoveExport(NoHooksLU):
9721 """Remove exports related to the named instance. 9722 9723 """ 9724 _OP_PARAMS = [ 9725 _PInstanceName, 9726 ] 9727 REQ_BGL = False 9728
9729 - def ExpandNames(self):
9730 self.needed_locks = {} 9731 # We need all nodes to be locked in order for RemoveExport to work, but we 9732 # don't need to lock the instance itself, as nothing will happen to it (and 9733 # we can remove exports also for a removed instance) 9734 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9735
9736 - def Exec(self, feedback_fn):
9737 """Remove any export. 9738 9739 """ 9740 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name) 9741 # If the instance was not found we'll try with the name that was passed in. 9742 # This will only work if it was an FQDN, though. 9743 fqdn_warn = False 9744 if not instance_name: 9745 fqdn_warn = True 9746 instance_name = self.op.instance_name 9747 9748 locked_nodes = self.acquired_locks[locking.LEVEL_NODE] 9749 exportlist = self.rpc.call_export_list(locked_nodes) 9750 found = False 9751 for node in exportlist: 9752 msg = exportlist[node].fail_msg 9753 if msg: 9754 self.LogWarning("Failed to query node %s (continuing): %s", node, msg) 9755 continue 9756 if instance_name in exportlist[node].payload: 9757 found = True 9758 result = self.rpc.call_export_remove(node, instance_name) 9759 msg = result.fail_msg 9760 if msg: 9761 logging.error("Could not remove export for instance %s" 9762 " on node %s: %s", instance_name, node, msg) 9763 9764 if fqdn_warn and not found: 9765 feedback_fn("Export not found. If trying to remove an export belonging" 9766 " to a deleted instance please use its Fully Qualified" 9767 " Domain Name.")
9768
9769 9770 -class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9771 """Generic tags LU. 9772 9773 This is an abstract class which is the parent of all the other tags LUs. 9774 9775 """ 9776
9777 - def ExpandNames(self):
9778 self.needed_locks = {} 9779 if self.op.kind == constants.TAG_NODE: 9780 self.op.name = _ExpandNodeName(self.cfg, self.op.name) 9781 self.needed_locks[locking.LEVEL_NODE] = self.op.name 9782 elif self.op.kind == constants.TAG_INSTANCE: 9783 self.op.name = _ExpandInstanceName(self.cfg, self.op.name) 9784 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9785 9786 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's 9787 # not possible to acquire the BGL based on opcode parameters) 9788
9789 - def CheckPrereq(self):
9790 """Check prerequisites. 9791 9792 """ 9793 if self.op.kind == constants.TAG_CLUSTER: 9794 self.target = self.cfg.GetClusterInfo() 9795 elif self.op.kind == constants.TAG_NODE: 9796 self.target = self.cfg.GetNodeInfo(self.op.name) 9797 elif self.op.kind == constants.TAG_INSTANCE: 9798 self.target = self.cfg.GetInstanceInfo(self.op.name) 9799 else: 9800 raise errors.OpPrereqError("Wrong tag type requested (%s)" % 9801 str(self.op.kind), errors.ECODE_INVAL)
9802
9803 9804 -class LUGetTags(TagsLU):
9805 """Returns the tags of a given object. 9806 9807 """ 9808 _OP_PARAMS = [ 9809 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)), 9810 # Name is only meaningful for nodes and instances 9811 ("name", _NoDefault, _TMaybeString), 9812 ] 9813 REQ_BGL = False 9814
9815 - def ExpandNames(self):
9816 TagsLU.ExpandNames(self) 9817 9818 # Share locks as this is only a read operation 9819 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9820
9821 - def Exec(self, feedback_fn):
9822 """Returns the tag list. 9823 9824 """ 9825 return list(self.target.GetTags())
9826
9827 9828 -class LUSearchTags(NoHooksLU):
9829 """Searches the tags for a given pattern. 9830 9831 """ 9832 _OP_PARAMS = [ 9833 ("pattern", _NoDefault, _TNonEmptyString), 9834 ] 9835 REQ_BGL = False 9836
9837 - def ExpandNames(self):
9838 self.needed_locks = {}
9839
9840 - def CheckPrereq(self):
9841 """Check prerequisites. 9842 9843 This checks the pattern passed for validity by compiling it. 9844 9845 """ 9846 try: 9847 self.re = re.compile(self.op.pattern) 9848 except re.error, err: 9849 raise errors.OpPrereqError("Invalid search pattern '%s': %s" % 9850 (self.op.pattern, err), errors.ECODE_INVAL)
9851
9852 - def Exec(self, feedback_fn):
9853 """Returns the tag list. 9854 9855 """ 9856 cfg = self.cfg 9857 tgts = [("/cluster", cfg.GetClusterInfo())] 9858 ilist = cfg.GetAllInstancesInfo().values() 9859 tgts.extend([("/instances/%s" % i.name, i) for i in ilist]) 9860 nlist = cfg.GetAllNodesInfo().values() 9861 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist]) 9862 results = [] 9863 for path, target in tgts: 9864 for tag in target.GetTags(): 9865 if self.re.search(tag): 9866 results.append((path, tag)) 9867 return results
9868
9869 9870 -class LUAddTags(TagsLU):
9871 """Sets a tag on a given object. 9872 9873 """ 9874 _OP_PARAMS = [ 9875 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)), 9876 # Name is only meaningful for nodes and instances 9877 ("name", _NoDefault, _TMaybeString), 9878 ("tags", _NoDefault, _TListOf(_TNonEmptyString)), 9879 ] 9880 REQ_BGL = False 9881
9882 - def CheckPrereq(self):
9883 """Check prerequisites. 9884 9885 This checks the type and length of the tag name and value. 9886 9887 """ 9888 TagsLU.CheckPrereq(self) 9889 for tag in self.op.tags: 9890 objects.TaggableObject.ValidateTag(tag)
9891
9892 - def Exec(self, feedback_fn):
9893 """Sets the tag. 9894 9895 """ 9896 try: 9897 for tag in self.op.tags: 9898 self.target.AddTag(tag) 9899 except errors.TagError, err: 9900 raise errors.OpExecError("Error while setting tag: %s" % str(err)) 9901 self.cfg.Update(self.target, feedback_fn)
9902
9903 9904 -class LUDelTags(TagsLU):
9905 """Delete a list of tags from a given object. 9906 9907 """ 9908 _OP_PARAMS = [ 9909 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)), 9910 # Name is only meaningful for nodes and instances 9911 ("name", _NoDefault, _TMaybeString), 9912 ("tags", _NoDefault, _TListOf(_TNonEmptyString)), 9913 ] 9914 REQ_BGL = False 9915
9916 - def CheckPrereq(self):
9917 """Check prerequisites. 9918 9919 This checks that we have the given tag. 9920 9921 """ 9922 TagsLU.CheckPrereq(self) 9923 for tag in self.op.tags: 9924 objects.TaggableObject.ValidateTag(tag) 9925 del_tags = frozenset(self.op.tags) 9926 cur_tags = self.target.GetTags() 9927 9928 diff_tags = del_tags - cur_tags 9929 if diff_tags: 9930 diff_names = ("'%s'" % i for i in sorted(diff_tags)) 9931 raise errors.OpPrereqError("Tag(s) %s not found" % 9932 (utils.CommaJoin(diff_names), ), 9933 errors.ECODE_NOENT)
9934
9935 - def Exec(self, feedback_fn):
9936 """Remove the tag from the object. 9937 9938 """ 9939 for tag in self.op.tags: 9940 self.target.RemoveTag(tag) 9941 self.cfg.Update(self.target, feedback_fn)
9942
9943 9944 -class LUTestDelay(NoHooksLU):
9945 """Sleep for a specified amount of time. 9946 9947 This LU sleeps on the master and/or nodes for a specified amount of 9948 time. 9949 9950 """ 9951 _OP_PARAMS = [ 9952 ("duration", _NoDefault, _TFloat), 9953 ("on_master", True, _TBool), 9954 ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)), 9955 ("repeat", 0, _TPositiveInt) 9956 ] 9957 REQ_BGL = False 9958
9959 - def ExpandNames(self):
9960 """Expand names and set required locks. 9961 9962 This expands the node list, if any. 9963 9964 """ 9965 self.needed_locks = {} 9966 if self.op.on_nodes: 9967 # _GetWantedNodes can be used here, but is not always appropriate to use 9968 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for 9969 # more information. 9970 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes) 9971 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9972
9973 - def _TestDelay(self):
9974 """Do the actual sleep. 9975 9976 """ 9977 if self.op.on_master: 9978 if not utils.TestDelay(self.op.duration): 9979 raise errors.OpExecError("Error during master delay test") 9980 if self.op.on_nodes: 9981 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration) 9982 for node, node_result in result.items(): 9983 node_result.Raise("Failure during rpc call to node %s" % node)
9984
9985 - def Exec(self, feedback_fn):
9986 """Execute the test delay opcode, with the wanted repetitions. 9987 9988 """ 9989 if self.op.repeat == 0: 9990 self._TestDelay() 9991 else: 9992 top_value = self.op.repeat - 1 9993 for i in range(self.op.repeat): 9994 self.LogInfo("Test delay iteration %d/%d" % (i, top_value)) 9995 self._TestDelay()
9996
9997 9998 -class LUTestJobqueue(NoHooksLU):
9999 """Utility LU to test some aspects of the job queue. 10000 10001 """ 10002 _OP_PARAMS = [ 10003 ("notify_waitlock", False, _TBool), 10004 ("notify_exec", False, _TBool), 10005 ("log_messages", _EmptyList, _TListOf(_TString)), 10006 ("fail", False, _TBool), 10007 ] 10008 REQ_BGL = False 10009 10010 # Must be lower than default timeout for WaitForJobChange to see whether it 10011 # notices changed jobs 10012 _CLIENT_CONNECT_TIMEOUT = 20.0 10013 _CLIENT_CONFIRM_TIMEOUT = 60.0 10014 10015 @classmethod
10016 - def _NotifyUsingSocket(cls, cb, errcls):
10017 """Opens a Unix socket and waits for another program to connect. 10018 10019 @type cb: callable 10020 @param cb: Callback to send socket name to client 10021 @type errcls: class 10022 @param errcls: Exception class to use for errors 10023 10024 """ 10025 # Using a temporary directory as there's no easy way to create temporary 10026 # sockets without writing a custom loop around tempfile.mktemp and 10027 # socket.bind 10028 tmpdir = tempfile.mkdtemp() 10029 try: 10030 tmpsock = utils.PathJoin(tmpdir, "sock") 10031 10032 logging.debug("Creating temporary socket at %s", tmpsock) 10033 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) 10034 try: 10035 sock.bind(tmpsock) 10036 sock.listen(1) 10037 10038 # Send details to client 10039 cb(tmpsock) 10040 10041 # Wait for client to connect before continuing 10042 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT) 10043 try: 10044 (conn, _) = sock.accept() 10045 except socket.error, err: 10046 raise errcls("Client didn't connect in time (%s)" % err) 10047 finally: 10048 sock.close() 10049 finally: 10050 # Remove as soon as client is connected 10051 shutil.rmtree(tmpdir) 10052 10053 # Wait for client to close 10054 try: 10055 try: 10056 # pylint: disable-msg=E1101 10057 # Instance of '_socketobject' has no ... member 10058 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT) 10059 conn.recv(1) 10060 except socket.error, err: 10061 raise errcls("Client failed to confirm notification (%s)" % err) 10062 finally: 10063 conn.close()
10064
10065 - def _SendNotification(self, test, arg, sockname):
10066 """Sends a notification to the client. 10067 10068 @type test: string 10069 @param test: Test name 10070 @param arg: Test argument (depends on test) 10071 @type sockname: string 10072 @param sockname: Socket path 10073 10074 """ 10075 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10076
10077 - def _Notify(self, prereq, test, arg):
10078 """Notifies the client of a test. 10079 10080 @type prereq: bool 10081 @param prereq: Whether this is a prereq-phase test 10082 @type test: string 10083 @param test: Test name 10084 @param arg: Test argument (depends on test) 10085 10086 """ 10087 if prereq: 10088 errcls = errors.OpPrereqError 10089 else: 10090 errcls = errors.OpExecError 10091 10092 return self._NotifyUsingSocket(compat.partial(self._SendNotification, 10093 test, arg), 10094 errcls)
10095
10096 - def CheckArguments(self):
10097 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1 10098 self.expandnames_calls = 0
10099
10100 - def ExpandNames(self):
10101 checkargs_calls = getattr(self, "checkargs_calls", 0) 10102 if checkargs_calls < 1: 10103 raise errors.ProgrammerError("CheckArguments was not called") 10104 10105 self.expandnames_calls += 1 10106 10107 if self.op.notify_waitlock: 10108 self._Notify(True, constants.JQT_EXPANDNAMES, None) 10109 10110 self.LogInfo("Expanding names") 10111 10112 # Get lock on master node (just to get a lock, not for a particular reason) 10113 self.needed_locks = { 10114 locking.LEVEL_NODE: self.cfg.GetMasterNode(), 10115 }
10116
10117 - def Exec(self, feedback_fn):
10118 if self.expandnames_calls < 1: 10119 raise errors.ProgrammerError("ExpandNames was not called") 10120 10121 if self.op.notify_exec: 10122 self._Notify(False, constants.JQT_EXEC, None) 10123 10124 self.LogInfo("Executing") 10125 10126 if self.op.log_messages: 10127 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages)) 10128 for idx, msg in enumerate(self.op.log_messages): 10129 self.LogInfo("Sending log message %s", idx + 1) 10130 feedback_fn(constants.JQT_MSGPREFIX + msg) 10131 # Report how many test messages have been sent 10132 self._Notify(False, constants.JQT_LOGMSG, idx + 1) 10133 10134 if self.op.fail: 10135 raise errors.OpExecError("Opcode failure was requested") 10136 10137 return True
10138
10139 10140 -class IAllocator(object):
10141 """IAllocator framework. 10142 10143 An IAllocator instance has three sets of attributes: 10144 - cfg that is needed to query the cluster 10145 - input data (all members of the _KEYS class attribute are required) 10146 - four buffer attributes (in|out_data|text), that represent the 10147 input (to the external script) in text and data structure format, 10148 and the output from it, again in two formats 10149 - the result variables from the script (success, info, nodes) for 10150 easy usage 10151 10152 """ 10153 # pylint: disable-msg=R0902 10154 # lots of instance attributes 10155 _ALLO_KEYS = [ 10156 "name", "mem_size", "disks", "disk_template", 10157 "os", "tags", "nics", "vcpus", "hypervisor", 10158 ] 10159 _RELO_KEYS = [ 10160 "name", "relocate_from", 10161 ] 10162 _EVAC_KEYS = [ 10163 "evac_nodes", 10164 ] 10165
10166 - def __init__(self, cfg, rpc, mode, **kwargs):
10167 self.cfg = cfg 10168 self.rpc = rpc 10169 # init buffer variables 10170 self.in_text = self.out_text = self.in_data = self.out_data = None 10171 # init all input fields so that pylint is happy 10172 self.mode = mode 10173 self.mem_size = self.disks = self.disk_template = None 10174 self.os = self.tags = self.nics = self.vcpus = None 10175 self.hypervisor = None 10176 self.relocate_from = None 10177 self.name = None 10178 self.evac_nodes = None 10179 # computed fields 10180 self.required_nodes = None 10181 # init result fields 10182 self.success = self.info = self.result = None 10183 if self.mode == constants.IALLOCATOR_MODE_ALLOC: 10184 keyset = self._ALLO_KEYS 10185 fn = self._AddNewInstance 10186 elif self.mode == constants.IALLOCATOR_MODE_RELOC: 10187 keyset = self._RELO_KEYS 10188 fn = self._AddRelocateInstance 10189 elif self.mode == constants.IALLOCATOR_MODE_MEVAC: 10190 keyset = self._EVAC_KEYS 10191 fn = self._AddEvacuateNodes 10192 else: 10193 raise errors.ProgrammerError("Unknown mode '%s' passed to the" 10194 " IAllocator" % self.mode) 10195 for key in kwargs: 10196 if key not in keyset: 10197 raise errors.ProgrammerError("Invalid input parameter '%s' to" 10198 " IAllocator" % key) 10199 setattr(self, key, kwargs[key]) 10200 10201 for key in keyset: 10202 if key not in kwargs: 10203 raise errors.ProgrammerError("Missing input parameter '%s' to" 10204 " IAllocator" % key) 10205 self._BuildInputData(fn)
10206
10207 - def _ComputeClusterData(self):
10208 """Compute the generic allocator input data. 10209 10210 This is the data that is independent of the actual operation. 10211 10212 """ 10213 cfg = self.cfg 10214 cluster_info = cfg.GetClusterInfo() 10215 # cluster data 10216 data = { 10217 "version": constants.IALLOCATOR_VERSION, 10218 "cluster_name": cfg.GetClusterName(), 10219 "cluster_tags": list(cluster_info.GetTags()), 10220 "enabled_hypervisors": list(cluster_info.enabled_hypervisors), 10221 # we don't have job IDs 10222 } 10223 iinfo = cfg.GetAllInstancesInfo().values() 10224 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo] 10225 10226 # node data 10227 node_results = {} 10228 node_list = cfg.GetNodeList() 10229 10230 if self.mode == constants.IALLOCATOR_MODE_ALLOC: 10231 hypervisor_name = self.hypervisor 10232 elif self.mode == constants.IALLOCATOR_MODE_RELOC: 10233 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor 10234 elif self.mode == constants.IALLOCATOR_MODE_MEVAC: 10235 hypervisor_name = cluster_info.enabled_hypervisors[0] 10236 10237 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(), 10238 hypervisor_name) 10239 node_iinfo = \ 10240 self.rpc.call_all_instances_info(node_list, 10241 cluster_info.enabled_hypervisors) 10242 for nname, nresult in node_data.items(): 10243 # first fill in static (config-based) values 10244 ninfo = cfg.GetNodeInfo(nname) 10245 pnr = { 10246 "tags": list(ninfo.GetTags()), 10247 "primary_ip": ninfo.primary_ip, 10248 "secondary_ip": ninfo.secondary_ip, 10249 "offline": ninfo.offline, 10250 "drained": ninfo.drained, 10251 "master_candidate": ninfo.master_candidate, 10252 } 10253 10254 if not (ninfo.offline or ninfo.drained): 10255 nresult.Raise("Can't get data for node %s" % nname) 10256 node_iinfo[nname].Raise("Can't get node instance info from node %s" % 10257 nname) 10258 remote_info = nresult.payload 10259 10260 for attr in ['memory_total', 'memory_free', 'memory_dom0', 10261 'vg_size', 'vg_free', 'cpu_total']: 10262 if attr not in remote_info: 10263 raise errors.OpExecError("Node '%s' didn't return attribute" 10264 " '%s'" % (nname, attr)) 10265 if not isinstance(remote_info[attr], int): 10266 raise errors.OpExecError("Node '%s' returned invalid value" 10267 " for '%s': %s" % 10268 (nname, attr, remote_info[attr])) 10269 # compute memory used by primary instances 10270 i_p_mem = i_p_up_mem = 0 10271 for iinfo, beinfo in i_list: 10272 if iinfo.primary_node == nname: 10273 i_p_mem += beinfo[constants.BE_MEMORY] 10274 if iinfo.name not in node_iinfo[nname].payload: 10275 i_used_mem = 0 10276 else: 10277 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory']) 10278 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem 10279 remote_info['memory_free'] -= max(0, i_mem_diff) 10280 10281 if iinfo.admin_up: 10282 i_p_up_mem += beinfo[constants.BE_MEMORY] 10283 10284 # compute memory used by instances 10285 pnr_dyn = { 10286 "total_memory": remote_info['memory_total'], 10287 "reserved_memory": remote_info['memory_dom0'], 10288 "free_memory": remote_info['memory_free'], 10289 "total_disk": remote_info['vg_size'], 10290 "free_disk": remote_info['vg_free'], 10291 "total_cpus": remote_info['cpu_total'], 10292 "i_pri_memory": i_p_mem, 10293 "i_pri_up_memory": i_p_up_mem, 10294 } 10295 pnr.update(pnr_dyn) 10296 10297 node_results[nname] = pnr 10298 data["nodes"] = node_results 10299 10300 # instance data 10301 instance_data = {} 10302 for iinfo, beinfo in i_list: 10303 nic_data = [] 10304 for nic in iinfo.nics: 10305 filled_params = cluster_info.SimpleFillNIC(nic.nicparams) 10306 nic_dict = {"mac": nic.mac, 10307 "ip": nic.ip, 10308 "mode": filled_params[constants.NIC_MODE], 10309 "link": filled_params[constants.NIC_LINK], 10310 } 10311 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 10312 nic_dict["bridge"] = filled_params[constants.NIC_LINK] 10313 nic_data.append(nic_dict) 10314 pir = { 10315 "tags": list(iinfo.GetTags()), 10316 "admin_up": iinfo.admin_up, 10317 "vcpus": beinfo[constants.BE_VCPUS], 10318 "memory": beinfo[constants.BE_MEMORY], 10319 "os": iinfo.os, 10320 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes), 10321 "nics": nic_data, 10322 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks], 10323 "disk_template": iinfo.disk_template, 10324 "hypervisor": iinfo.hypervisor, 10325 } 10326 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template, 10327 pir["disks"]) 10328 instance_data[iinfo.name] = pir 10329 10330 data["instances"] = instance_data 10331 10332 self.in_data = data
10333
10334 - def _AddNewInstance(self):
10335 """Add new instance data to allocator structure. 10336 10337 This in combination with _AllocatorGetClusterData will create the 10338 correct structure needed as input for the allocator. 10339 10340 The checks for the completeness of the opcode must have already been 10341 done. 10342 10343 """ 10344 disk_space = _ComputeDiskSize(self.disk_template, self.disks) 10345 10346 if self.disk_template in constants.DTS_NET_MIRROR: 10347 self.required_nodes = 2 10348 else: 10349 self.required_nodes = 1 10350 request = { 10351 "name": self.name, 10352 "disk_template": self.disk_template, 10353 "tags": self.tags, 10354 "os": self.os, 10355 "vcpus": self.vcpus, 10356 "memory": self.mem_size, 10357 "disks": self.disks, 10358 "disk_space_total": disk_space, 10359 "nics": self.nics, 10360 "required_nodes": self.required_nodes, 10361 } 10362 return request
10363
10364 - def _AddRelocateInstance(self):
10365 """Add relocate instance data to allocator structure. 10366 10367 This in combination with _IAllocatorGetClusterData will create the 10368 correct structure needed as input for the allocator. 10369 10370 The checks for the completeness of the opcode must have already been 10371 done. 10372 10373 """ 10374 instance = self.cfg.GetInstanceInfo(self.name) 10375 if instance is None: 10376 raise errors.ProgrammerError("Unknown instance '%s' passed to" 10377 " IAllocator" % self.name) 10378 10379 if instance.disk_template not in constants.DTS_NET_MIRROR: 10380 raise errors.OpPrereqError("Can't relocate non-mirrored instances", 10381 errors.ECODE_INVAL) 10382 10383 if len(instance.secondary_nodes) != 1: 10384 raise errors.OpPrereqError("Instance has not exactly one secondary node", 10385 errors.ECODE_STATE) 10386 10387 self.required_nodes = 1 10388 disk_sizes = [{'size': disk.size} for disk in instance.disks] 10389 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes) 10390 10391 request = { 10392 "name": self.name, 10393 "disk_space_total": disk_space, 10394 "required_nodes": self.required_nodes, 10395 "relocate_from": self.relocate_from, 10396 } 10397 return request
10398
10399 - def _AddEvacuateNodes(self):
10400 """Add evacuate nodes data to allocator structure. 10401 10402 """ 10403 request = { 10404 "evac_nodes": self.evac_nodes 10405 } 10406 return request
10407
10408 - def _BuildInputData(self, fn):
10409 """Build input data structures. 10410 10411 """ 10412 self._ComputeClusterData() 10413 10414 request = fn() 10415 request["type"] = self.mode 10416 self.in_data["request"] = request 10417 10418 self.in_text = serializer.Dump(self.in_data)
10419
10420 - def Run(self, name, validate=True, call_fn=None):
10421 """Run an instance allocator and return the results. 10422 10423 """ 10424 if call_fn is None: 10425 call_fn = self.rpc.call_iallocator_runner 10426 10427 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text) 10428 result.Raise("Failure while running the iallocator script") 10429 10430 self.out_text = result.payload 10431 if validate: 10432 self._ValidateResult()
10433
10434 - def _ValidateResult(self):
10435 """Process the allocator results. 10436 10437 This will process and if successful save the result in 10438 self.out_data and the other parameters. 10439 10440 """ 10441 try: 10442 rdict = serializer.Load(self.out_text) 10443 except Exception, err: 10444 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err)) 10445 10446 if not isinstance(rdict, dict): 10447 raise errors.OpExecError("Can't parse iallocator results: not a dict") 10448 10449 # TODO: remove backwards compatiblity in later versions 10450 if "nodes" in rdict and "result" not in rdict: 10451 rdict["result"] = rdict["nodes"] 10452 del rdict["nodes"] 10453 10454 for key in "success", "info", "result": 10455 if key not in rdict: 10456 raise errors.OpExecError("Can't parse iallocator results:" 10457 " missing key '%s'" % key) 10458 setattr(self, key, rdict[key]) 10459 10460 if not isinstance(rdict["result"], list): 10461 raise errors.OpExecError("Can't parse iallocator results: 'result' key" 10462 " is not a list") 10463 self.out_data = rdict
10464
10465 10466 -class LUTestAllocator(NoHooksLU):
10467 """Run allocator tests. 10468 10469 This LU runs the allocator tests 10470 10471 """ 10472 _OP_PARAMS = [ 10473 ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)), 10474 ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)), 10475 ("name", _NoDefault, _TNonEmptyString), 10476 ("nics", _NoDefault, _TOr(_TNone, _TListOf( 10477 _TDictOf(_TElemOf(["mac", "ip", "bridge"]), 10478 _TOr(_TNone, _TNonEmptyString))))), 10479 ("disks", _NoDefault, _TOr(_TNone, _TList)), 10480 ("hypervisor", None, _TMaybeString), 10481 ("allocator", None, _TMaybeString), 10482 ("tags", _EmptyList, _TListOf(_TNonEmptyString)), 10483 ("mem_size", None, _TOr(_TNone, _TPositiveInt)), 10484 ("vcpus", None, _TOr(_TNone, _TPositiveInt)), 10485 ("os", None, _TMaybeString), 10486 ("disk_template", None, _TMaybeString), 10487 ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))), 10488 ] 10489
10490 - def CheckPrereq(self):
10491 """Check prerequisites. 10492 10493 This checks the opcode parameters depending on the director and mode test. 10494 10495 """ 10496 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC: 10497 for attr in ["mem_size", "disks", "disk_template", 10498 "os", "tags", "nics", "vcpus"]: 10499 if not hasattr(self.op, attr): 10500 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" % 10501 attr, errors.ECODE_INVAL) 10502 iname = self.cfg.ExpandInstanceName(self.op.name) 10503 if iname is not None: 10504 raise errors.OpPrereqError("Instance '%s' already in the cluster" % 10505 iname, errors.ECODE_EXISTS) 10506 if not isinstance(self.op.nics, list): 10507 raise errors.OpPrereqError("Invalid parameter 'nics'", 10508 errors.ECODE_INVAL) 10509 if not isinstance(self.op.disks, list): 10510 raise errors.OpPrereqError("Invalid parameter 'disks'", 10511 errors.ECODE_INVAL) 10512 for row in self.op.disks: 10513 if (not isinstance(row, dict) or 10514 "size" not in row or 10515 not isinstance(row["size"], int) or 10516 "mode" not in row or 10517 row["mode"] not in ['r', 'w']): 10518 raise errors.OpPrereqError("Invalid contents of the 'disks'" 10519 " parameter", errors.ECODE_INVAL) 10520 if self.op.hypervisor is None: 10521 self.op.hypervisor = self.cfg.GetHypervisorType() 10522 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: 10523 fname = _ExpandInstanceName(self.cfg, self.op.name) 10524 self.op.name = fname 10525 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes 10526 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC: 10527 if not hasattr(self.op, "evac_nodes"): 10528 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on" 10529 " opcode input", errors.ECODE_INVAL) 10530 else: 10531 raise errors.OpPrereqError("Invalid test allocator mode '%s'" % 10532 self.op.mode, errors.ECODE_INVAL) 10533 10534 if self.op.direction == constants.IALLOCATOR_DIR_OUT: 10535 if self.op.allocator is None: 10536 raise errors.OpPrereqError("Missing allocator name", 10537 errors.ECODE_INVAL) 10538 elif self.op.direction != constants.IALLOCATOR_DIR_IN: 10539 raise errors.OpPrereqError("Wrong allocator test '%s'" % 10540 self.op.direction, errors.ECODE_INVAL)
10541
10542 - def Exec(self, feedback_fn):
10543 """Run the allocator test. 10544 10545 """ 10546 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC: 10547 ial = IAllocator(self.cfg, self.rpc, 10548 mode=self.op.mode, 10549 name=self.op.name, 10550 mem_size=self.op.mem_size, 10551 disks=self.op.disks, 10552 disk_template=self.op.disk_template, 10553 os=self.op.os, 10554 tags=self.op.tags, 10555 nics=self.op.nics, 10556 vcpus=self.op.vcpus, 10557 hypervisor=self.op.hypervisor, 10558 ) 10559 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: 10560 ial = IAllocator(self.cfg, self.rpc, 10561 mode=self.op.mode, 10562 name=self.op.name, 10563 relocate_from=list(self.relocate_from), 10564 ) 10565 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC: 10566 ial = IAllocator(self.cfg, self.rpc, 10567 mode=self.op.mode, 10568 evac_nodes=self.op.evac_nodes) 10569 else: 10570 raise errors.ProgrammerError("Uncatched mode %s in" 10571 " LUTestAllocator.Exec", self.op.mode) 10572 10573 if self.op.direction == constants.IALLOCATOR_DIR_IN: 10574 result = ial.in_text 10575 else: 10576 ial.Run(self.op.allocator, validate=False) 10577 result = ial.out_text 10578 return result
10579