Package ganeti :: Module cmdlib
[hide private]
[frames] | no frames]

Source Code for Module ganeti.cmdlib

    1  # 
    2  # 
    3   
    4  # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc. 
    5  # 
    6  # This program is free software; you can redistribute it and/or modify 
    7  # it under the terms of the GNU General Public License as published by 
    8  # the Free Software Foundation; either version 2 of the License, or 
    9  # (at your option) any later version. 
   10  # 
   11  # This program is distributed in the hope that it will be useful, but 
   12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
   13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
   14  # General Public License for more details. 
   15  # 
   16  # You should have received a copy of the GNU General Public License 
   17  # along with this program; if not, write to the Free Software 
   18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
   19  # 02110-1301, USA. 
   20   
   21   
   22  """Module implementing the master-side code.""" 
   23   
   24  # pylint: disable-msg=W0201,C0302 
   25   
   26  # W0201 since most LU attributes are defined in CheckPrereq or similar 
   27  # functions 
   28   
   29  # C0302: since we have waaaay to many lines in this module 
   30   
   31  import os 
   32  import os.path 
   33  import time 
   34  import re 
   35  import platform 
   36  import logging 
   37  import copy 
   38  import OpenSSL 
   39  import socket 
   40  import tempfile 
   41  import shutil 
   42   
   43  from ganeti import ssh 
   44  from ganeti import utils 
   45  from ganeti import errors 
   46  from ganeti import hypervisor 
   47  from ganeti import locking 
   48  from ganeti import constants 
   49  from ganeti import objects 
   50  from ganeti import serializer 
   51  from ganeti import ssconf 
   52  from ganeti import uidpool 
   53  from ganeti import compat 
   54  from ganeti import masterd 
   55  from ganeti import netutils 
   56  from ganeti import ht 
   57   
   58  import ganeti.masterd.instance # pylint: disable-msg=W0611 
   59   
   60  # Common opcode attributes 
   61   
   62  #: output fields for a query operation 
   63  _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)) 
   64   
   65   
   66  #: the shutdown timeout 
   67  _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, 
   68                       ht.TPositiveInt) 
   69   
   70  #: the force parameter 
   71  _PForce = ("force", False, ht.TBool) 
   72   
   73  #: a required instance name (for single-instance LUs) 
   74  _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString) 
   75   
   76  #: Whether to ignore offline nodes 
   77  _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool) 
   78   
   79  #: a required node name (for single-node LUs) 
   80  _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString) 
   81   
   82  #: the migration type (live/non-live) 
   83  _PMigrationMode = ("mode", None, 
   84                     ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES))) 
   85   
   86  #: the obsolete 'live' mode (boolean) 
   87  _PMigrationLive = ("live", None, ht.TMaybeBool) 
88 89 90 # End types 91 -class LogicalUnit(object):
92 """Logical Unit base class. 93 94 Subclasses must follow these rules: 95 - implement ExpandNames 96 - implement CheckPrereq (except when tasklets are used) 97 - implement Exec (except when tasklets are used) 98 - implement BuildHooksEnv 99 - redefine HPATH and HTYPE 100 - optionally redefine their run requirements: 101 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively 102 103 Note that all commands require root permissions. 104 105 @ivar dry_run_result: the value (if any) that will be returned to the caller 106 in dry-run mode (signalled by opcode dry_run parameter) 107 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values 108 they should get if not already defined, and types they must match 109 110 """ 111 HPATH = None 112 HTYPE = None 113 _OP_PARAMS = [] 114 REQ_BGL = True 115
116 - def __init__(self, processor, op, context, rpc):
117 """Constructor for LogicalUnit. 118 119 This needs to be overridden in derived classes in order to check op 120 validity. 121 122 """ 123 self.proc = processor 124 self.op = op 125 self.cfg = context.cfg 126 self.context = context 127 self.rpc = rpc 128 # Dicts used to declare locking needs to mcpu 129 self.needed_locks = None 130 self.acquired_locks = {} 131 self.share_locks = dict.fromkeys(locking.LEVELS, 0) 132 self.add_locks = {} 133 self.remove_locks = {} 134 # Used to force good behavior when calling helper functions 135 self.recalculate_locks = {} 136 self.__ssh = None 137 # logging 138 self.Log = processor.Log # pylint: disable-msg=C0103 139 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103 140 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103 141 self.LogStep = processor.LogStep # pylint: disable-msg=C0103 142 # support for dry-run 143 self.dry_run_result = None 144 # support for generic debug attribute 145 if (not hasattr(self.op, "debug_level") or 146 not isinstance(self.op.debug_level, int)): 147 self.op.debug_level = 0 148 149 # Tasklets 150 self.tasklets = None 151 152 # The new kind-of-type-system 153 op_id = self.op.OP_ID 154 for attr_name, aval, test in self._OP_PARAMS: 155 if not hasattr(op, attr_name): 156 if aval == ht.NoDefault: 157 raise errors.OpPrereqError("Required parameter '%s.%s' missing" % 158 (op_id, attr_name), errors.ECODE_INVAL) 159 else: 160 if callable(aval): 161 dval = aval() 162 else: 163 dval = aval 164 setattr(self.op, attr_name, dval) 165 attr_val = getattr(op, attr_name) 166 if test == ht.NoType: 167 # no tests here 168 continue 169 if not callable(test): 170 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed," 171 " given type is not a proper type (%s)" % 172 (op_id, attr_name, test)) 173 if not test(attr_val): 174 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s", 175 self.op.OP_ID, attr_name, type(attr_val), attr_val) 176 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" % 177 (op_id, attr_name), errors.ECODE_INVAL) 178 179 self.CheckArguments()
180
181 - def __GetSSH(self):
182 """Returns the SshRunner object 183 184 """ 185 if not self.__ssh: 186 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName()) 187 return self.__ssh
188 189 ssh = property(fget=__GetSSH) 190
191 - def CheckArguments(self):
192 """Check syntactic validity for the opcode arguments. 193 194 This method is for doing a simple syntactic check and ensure 195 validity of opcode parameters, without any cluster-related 196 checks. While the same can be accomplished in ExpandNames and/or 197 CheckPrereq, doing these separate is better because: 198 199 - ExpandNames is left as as purely a lock-related function 200 - CheckPrereq is run after we have acquired locks (and possible 201 waited for them) 202 203 The function is allowed to change the self.op attribute so that 204 later methods can no longer worry about missing parameters. 205 206 """ 207 pass
208
209 - def ExpandNames(self):
210 """Expand names for this LU. 211 212 This method is called before starting to execute the opcode, and it should 213 update all the parameters of the opcode to their canonical form (e.g. a 214 short node name must be fully expanded after this method has successfully 215 completed). This way locking, hooks, logging, ecc. can work correctly. 216 217 LUs which implement this method must also populate the self.needed_locks 218 member, as a dict with lock levels as keys, and a list of needed lock names 219 as values. Rules: 220 221 - use an empty dict if you don't need any lock 222 - if you don't need any lock at a particular level omit that level 223 - don't put anything for the BGL level 224 - if you want all locks at a level use locking.ALL_SET as a value 225 226 If you need to share locks (rather than acquire them exclusively) at one 227 level you can modify self.share_locks, setting a true value (usually 1) for 228 that level. By default locks are not shared. 229 230 This function can also define a list of tasklets, which then will be 231 executed in order instead of the usual LU-level CheckPrereq and Exec 232 functions, if those are not defined by the LU. 233 234 Examples:: 235 236 # Acquire all nodes and one instance 237 self.needed_locks = { 238 locking.LEVEL_NODE: locking.ALL_SET, 239 locking.LEVEL_INSTANCE: ['instance1.example.com'], 240 } 241 # Acquire just two nodes 242 self.needed_locks = { 243 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'], 244 } 245 # Acquire no locks 246 self.needed_locks = {} # No, you can't leave it to the default value None 247 248 """ 249 # The implementation of this method is mandatory only if the new LU is 250 # concurrent, so that old LUs don't need to be changed all at the same 251 # time. 252 if self.REQ_BGL: 253 self.needed_locks = {} # Exclusive LUs don't need locks. 254 else: 255 raise NotImplementedError
256
257 - def DeclareLocks(self, level):
258 """Declare LU locking needs for a level 259 260 While most LUs can just declare their locking needs at ExpandNames time, 261 sometimes there's the need to calculate some locks after having acquired 262 the ones before. This function is called just before acquiring locks at a 263 particular level, but after acquiring the ones at lower levels, and permits 264 such calculations. It can be used to modify self.needed_locks, and by 265 default it does nothing. 266 267 This function is only called if you have something already set in 268 self.needed_locks for the level. 269 270 @param level: Locking level which is going to be locked 271 @type level: member of ganeti.locking.LEVELS 272 273 """
274
275 - def CheckPrereq(self):
276 """Check prerequisites for this LU. 277 278 This method should check that the prerequisites for the execution 279 of this LU are fulfilled. It can do internode communication, but 280 it should be idempotent - no cluster or system changes are 281 allowed. 282 283 The method should raise errors.OpPrereqError in case something is 284 not fulfilled. Its return value is ignored. 285 286 This method should also update all the parameters of the opcode to 287 their canonical form if it hasn't been done by ExpandNames before. 288 289 """ 290 if self.tasklets is not None: 291 for (idx, tl) in enumerate(self.tasklets): 292 logging.debug("Checking prerequisites for tasklet %s/%s", 293 idx + 1, len(self.tasklets)) 294 tl.CheckPrereq() 295 else: 296 pass
297
298 - def Exec(self, feedback_fn):
299 """Execute the LU. 300 301 This method should implement the actual work. It should raise 302 errors.OpExecError for failures that are somewhat dealt with in 303 code, or expected. 304 305 """ 306 if self.tasklets is not None: 307 for (idx, tl) in enumerate(self.tasklets): 308 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets)) 309 tl.Exec(feedback_fn) 310 else: 311 raise NotImplementedError
312
313 - def BuildHooksEnv(self):
314 """Build hooks environment for this LU. 315 316 This method should return a three-node tuple consisting of: a dict 317 containing the environment that will be used for running the 318 specific hook for this LU, a list of node names on which the hook 319 should run before the execution, and a list of node names on which 320 the hook should run after the execution. 321 322 The keys of the dict must not have 'GANETI_' prefixed as this will 323 be handled in the hooks runner. Also note additional keys will be 324 added by the hooks runner. If the LU doesn't define any 325 environment, an empty dict (and not None) should be returned. 326 327 No nodes should be returned as an empty list (and not None). 328 329 Note that if the HPATH for a LU class is None, this function will 330 not be called. 331 332 """ 333 raise NotImplementedError
334
335 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
336 """Notify the LU about the results of its hooks. 337 338 This method is called every time a hooks phase is executed, and notifies 339 the Logical Unit about the hooks' result. The LU can then use it to alter 340 its result based on the hooks. By default the method does nothing and the 341 previous result is passed back unchanged but any LU can define it if it 342 wants to use the local cluster hook-scripts somehow. 343 344 @param phase: one of L{constants.HOOKS_PHASE_POST} or 345 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 346 @param hook_results: the results of the multi-node hooks rpc call 347 @param feedback_fn: function used send feedback back to the caller 348 @param lu_result: the previous Exec result this LU had, or None 349 in the PRE phase 350 @return: the new Exec result, based on the previous result 351 and hook results 352 353 """ 354 # API must be kept, thus we ignore the unused argument and could 355 # be a function warnings 356 # pylint: disable-msg=W0613,R0201 357 return lu_result
358
359 - def _ExpandAndLockInstance(self):
360 """Helper function to expand and lock an instance. 361 362 Many LUs that work on an instance take its name in self.op.instance_name 363 and need to expand it and then declare the expanded name for locking. This 364 function does it, and then updates self.op.instance_name to the expanded 365 name. It also initializes needed_locks as a dict, if this hasn't been done 366 before. 367 368 """ 369 if self.needed_locks is None: 370 self.needed_locks = {} 371 else: 372 assert locking.LEVEL_INSTANCE not in self.needed_locks, \ 373 "_ExpandAndLockInstance called with instance-level locks set" 374 self.op.instance_name = _ExpandInstanceName(self.cfg, 375 self.op.instance_name) 376 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
377
378 - def _LockInstancesNodes(self, primary_only=False):
379 """Helper function to declare instances' nodes for locking. 380 381 This function should be called after locking one or more instances to lock 382 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE] 383 with all primary or secondary nodes for instances already locked and 384 present in self.needed_locks[locking.LEVEL_INSTANCE]. 385 386 It should be called from DeclareLocks, and for safety only works if 387 self.recalculate_locks[locking.LEVEL_NODE] is set. 388 389 In the future it may grow parameters to just lock some instance's nodes, or 390 to just lock primaries or secondary nodes, if needed. 391 392 If should be called in DeclareLocks in a way similar to:: 393 394 if level == locking.LEVEL_NODE: 395 self._LockInstancesNodes() 396 397 @type primary_only: boolean 398 @param primary_only: only lock primary nodes of locked instances 399 400 """ 401 assert locking.LEVEL_NODE in self.recalculate_locks, \ 402 "_LockInstancesNodes helper function called with no nodes to recalculate" 403 404 # TODO: check if we're really been called with the instance locks held 405 406 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the 407 # future we might want to have different behaviors depending on the value 408 # of self.recalculate_locks[locking.LEVEL_NODE] 409 wanted_nodes = [] 410 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]: 411 instance = self.context.cfg.GetInstanceInfo(instance_name) 412 wanted_nodes.append(instance.primary_node) 413 if not primary_only: 414 wanted_nodes.extend(instance.secondary_nodes) 415 416 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE: 417 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes 418 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND: 419 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes) 420 421 del self.recalculate_locks[locking.LEVEL_NODE]
422
423 424 -class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
425 """Simple LU which runs no hooks. 426 427 This LU is intended as a parent for other LogicalUnits which will 428 run no hooks, in order to reduce duplicate code. 429 430 """ 431 HPATH = None 432 HTYPE = None 433
434 - def BuildHooksEnv(self):
435 """Empty BuildHooksEnv for NoHooksLu. 436 437 This just raises an error. 438 439 """ 440 assert False, "BuildHooksEnv called for NoHooksLUs"
441
442 443 -class Tasklet:
444 """Tasklet base class. 445 446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or 447 they can mix legacy code with tasklets. Locking needs to be done in the LU, 448 tasklets know nothing about locks. 449 450 Subclasses must follow these rules: 451 - Implement CheckPrereq 452 - Implement Exec 453 454 """
455 - def __init__(self, lu):
456 self.lu = lu 457 458 # Shortcuts 459 self.cfg = lu.cfg 460 self.rpc = lu.rpc
461
462 - def CheckPrereq(self):
463 """Check prerequisites for this tasklets. 464 465 This method should check whether the prerequisites for the execution of 466 this tasklet are fulfilled. It can do internode communication, but it 467 should be idempotent - no cluster or system changes are allowed. 468 469 The method should raise errors.OpPrereqError in case something is not 470 fulfilled. Its return value is ignored. 471 472 This method should also update all parameters to their canonical form if it 473 hasn't been done before. 474 475 """ 476 pass
477
478 - def Exec(self, feedback_fn):
479 """Execute the tasklet. 480 481 This method should implement the actual work. It should raise 482 errors.OpExecError for failures that are somewhat dealt with in code, or 483 expected. 484 485 """ 486 raise NotImplementedError
487
488 489 -def _GetWantedNodes(lu, nodes):
490 """Returns list of checked and expanded node names. 491 492 @type lu: L{LogicalUnit} 493 @param lu: the logical unit on whose behalf we execute 494 @type nodes: list 495 @param nodes: list of node names or None for all nodes 496 @rtype: list 497 @return: the list of nodes, sorted 498 @raise errors.ProgrammerError: if the nodes parameter is wrong type 499 500 """ 501 if not nodes: 502 raise errors.ProgrammerError("_GetWantedNodes should only be called with a" 503 " non-empty list of nodes whose name is to be expanded.") 504 505 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes] 506 return utils.NiceSort(wanted)
507
508 509 -def _GetWantedInstances(lu, instances):
510 """Returns list of checked and expanded instance names. 511 512 @type lu: L{LogicalUnit} 513 @param lu: the logical unit on whose behalf we execute 514 @type instances: list 515 @param instances: list of instance names or None for all instances 516 @rtype: list 517 @return: the list of instances, sorted 518 @raise errors.OpPrereqError: if the instances parameter is wrong type 519 @raise errors.OpPrereqError: if any of the passed instances is not found 520 521 """ 522 if instances: 523 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances] 524 else: 525 wanted = utils.NiceSort(lu.cfg.GetInstanceList()) 526 return wanted
527
528 529 -def _GetUpdatedParams(old_params, update_dict, 530 use_default=True, use_none=False):
531 """Return the new version of a parameter dictionary. 532 533 @type old_params: dict 534 @param old_params: old parameters 535 @type update_dict: dict 536 @param update_dict: dict containing new parameter values, or 537 constants.VALUE_DEFAULT to reset the parameter to its default 538 value 539 @param use_default: boolean 540 @type use_default: whether to recognise L{constants.VALUE_DEFAULT} 541 values as 'to be deleted' values 542 @param use_none: boolean 543 @type use_none: whether to recognise C{None} values as 'to be 544 deleted' values 545 @rtype: dict 546 @return: the new parameter dictionary 547 548 """ 549 params_copy = copy.deepcopy(old_params) 550 for key, val in update_dict.iteritems(): 551 if ((use_default and val == constants.VALUE_DEFAULT) or 552 (use_none and val is None)): 553 try: 554 del params_copy[key] 555 except KeyError: 556 pass 557 else: 558 params_copy[key] = val 559 return params_copy
560
561 562 -def _CheckOutputFields(static, dynamic, selected):
563 """Checks whether all selected fields are valid. 564 565 @type static: L{utils.FieldSet} 566 @param static: static fields set 567 @type dynamic: L{utils.FieldSet} 568 @param dynamic: dynamic fields set 569 570 """ 571 f = utils.FieldSet() 572 f.Extend(static) 573 f.Extend(dynamic) 574 575 delta = f.NonMatching(selected) 576 if delta: 577 raise errors.OpPrereqError("Unknown output fields selected: %s" 578 % ",".join(delta), errors.ECODE_INVAL)
579
580 581 -def _CheckGlobalHvParams(params):
582 """Validates that given hypervisor params are not global ones. 583 584 This will ensure that instances don't get customised versions of 585 global params. 586 587 """ 588 used_globals = constants.HVC_GLOBALS.intersection(params) 589 if used_globals: 590 msg = ("The following hypervisor parameters are global and cannot" 591 " be customized at instance level, please modify them at" 592 " cluster level: %s" % utils.CommaJoin(used_globals)) 593 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
594
595 596 -def _CheckNodeOnline(lu, node, msg=None):
597 """Ensure that a given node is online. 598 599 @param lu: the LU on behalf of which we make the check 600 @param node: the node to check 601 @param msg: if passed, should be a message to replace the default one 602 @raise errors.OpPrereqError: if the node is offline 603 604 """ 605 if msg is None: 606 msg = "Can't use offline node" 607 if lu.cfg.GetNodeInfo(node).offline: 608 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
609
610 611 -def _CheckNodeNotDrained(lu, node):
612 """Ensure that a given node is not drained. 613 614 @param lu: the LU on behalf of which we make the check 615 @param node: the node to check 616 @raise errors.OpPrereqError: if the node is drained 617 618 """ 619 if lu.cfg.GetNodeInfo(node).drained: 620 raise errors.OpPrereqError("Can't use drained node %s" % node, 621 errors.ECODE_STATE)
622
623 624 -def _CheckNodeVmCapable(lu, node):
625 """Ensure that a given node is vm capable. 626 627 @param lu: the LU on behalf of which we make the check 628 @param node: the node to check 629 @raise errors.OpPrereqError: if the node is not vm capable 630 631 """ 632 if not lu.cfg.GetNodeInfo(node).vm_capable: 633 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node, 634 errors.ECODE_STATE)
635
636 637 -def _CheckNodeHasOS(lu, node, os_name, force_variant):
638 """Ensure that a node supports a given OS. 639 640 @param lu: the LU on behalf of which we make the check 641 @param node: the node to check 642 @param os_name: the OS to query about 643 @param force_variant: whether to ignore variant errors 644 @raise errors.OpPrereqError: if the node is not supporting the OS 645 646 """ 647 result = lu.rpc.call_os_get(node, os_name) 648 result.Raise("OS '%s' not in supported OS list for node %s" % 649 (os_name, node), 650 prereq=True, ecode=errors.ECODE_INVAL) 651 if not force_variant: 652 _CheckOSVariant(result.payload, os_name)
653
654 655 -def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
656 """Ensure that a node has the given secondary ip. 657 658 @type lu: L{LogicalUnit} 659 @param lu: the LU on behalf of which we make the check 660 @type node: string 661 @param node: the node to check 662 @type secondary_ip: string 663 @param secondary_ip: the ip to check 664 @type prereq: boolean 665 @param prereq: whether to throw a prerequisite or an execute error 666 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True 667 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False 668 669 """ 670 result = lu.rpc.call_node_has_ip_address(node, secondary_ip) 671 result.Raise("Failure checking secondary ip on node %s" % node, 672 prereq=prereq, ecode=errors.ECODE_ENVIRON) 673 if not result.payload: 674 msg = ("Node claims it doesn't have the secondary ip you gave (%s)," 675 " please fix and re-run this command" % secondary_ip) 676 if prereq: 677 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) 678 else: 679 raise errors.OpExecError(msg)
680
681 682 -def _RequireFileStorage():
683 """Checks that file storage is enabled. 684 685 @raise errors.OpPrereqError: when file storage is disabled 686 687 """ 688 if not constants.ENABLE_FILE_STORAGE: 689 raise errors.OpPrereqError("File storage disabled at configure time", 690 errors.ECODE_INVAL)
691
692 693 -def _CheckDiskTemplate(template):
694 """Ensure a given disk template is valid. 695 696 """ 697 if template not in constants.DISK_TEMPLATES: 698 msg = ("Invalid disk template name '%s', valid templates are: %s" % 699 (template, utils.CommaJoin(constants.DISK_TEMPLATES))) 700 raise errors.OpPrereqError(msg, errors.ECODE_INVAL) 701 if template == constants.DT_FILE: 702 _RequireFileStorage() 703 return True
704
705 706 -def _CheckStorageType(storage_type):
707 """Ensure a given storage type is valid. 708 709 """ 710 if storage_type not in constants.VALID_STORAGE_TYPES: 711 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type, 712 errors.ECODE_INVAL) 713 if storage_type == constants.ST_FILE: 714 _RequireFileStorage() 715 return True
716
717 718 -def _GetClusterDomainSecret():
719 """Reads the cluster domain secret. 720 721 """ 722 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE, 723 strict=True)
724
725 726 -def _CheckInstanceDown(lu, instance, reason):
727 """Ensure that an instance is not running.""" 728 if instance.admin_up: 729 raise errors.OpPrereqError("Instance %s is marked to be up, %s" % 730 (instance.name, reason), errors.ECODE_STATE) 731 732 pnode = instance.primary_node 733 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode] 734 ins_l.Raise("Can't contact node %s for instance information" % pnode, 735 prereq=True, ecode=errors.ECODE_ENVIRON) 736 737 if instance.name in ins_l.payload: 738 raise errors.OpPrereqError("Instance %s is running, %s" % 739 (instance.name, reason), errors.ECODE_STATE)
740
741 742 -def _ExpandItemName(fn, name, kind):
743 """Expand an item name. 744 745 @param fn: the function to use for expansion 746 @param name: requested item name 747 @param kind: text description ('Node' or 'Instance') 748 @return: the resolved (full) name 749 @raise errors.OpPrereqError: if the item is not found 750 751 """ 752 full_name = fn(name) 753 if full_name is None: 754 raise errors.OpPrereqError("%s '%s' not known" % (kind, name), 755 errors.ECODE_NOENT) 756 return full_name
757
758 759 -def _ExpandNodeName(cfg, name):
760 """Wrapper over L{_ExpandItemName} for nodes.""" 761 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
762
763 764 -def _ExpandInstanceName(cfg, name):
765 """Wrapper over L{_ExpandItemName} for instance.""" 766 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
767
768 769 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, 770 memory, vcpus, nics, disk_template, disks, 771 bep, hvp, hypervisor_name):
772 """Builds instance related env variables for hooks 773 774 This builds the hook environment from individual variables. 775 776 @type name: string 777 @param name: the name of the instance 778 @type primary_node: string 779 @param primary_node: the name of the instance's primary node 780 @type secondary_nodes: list 781 @param secondary_nodes: list of secondary nodes as strings 782 @type os_type: string 783 @param os_type: the name of the instance's OS 784 @type status: boolean 785 @param status: the should_run status of the instance 786 @type memory: string 787 @param memory: the memory size of the instance 788 @type vcpus: string 789 @param vcpus: the count of VCPUs the instance has 790 @type nics: list 791 @param nics: list of tuples (ip, mac, mode, link) representing 792 the NICs the instance has 793 @type disk_template: string 794 @param disk_template: the disk template of the instance 795 @type disks: list 796 @param disks: the list of (size, mode) pairs 797 @type bep: dict 798 @param bep: the backend parameters for the instance 799 @type hvp: dict 800 @param hvp: the hypervisor parameters for the instance 801 @type hypervisor_name: string 802 @param hypervisor_name: the hypervisor for the instance 803 @rtype: dict 804 @return: the hook environment for this instance 805 806 """ 807 if status: 808 str_status = "up" 809 else: 810 str_status = "down" 811 env = { 812 "OP_TARGET": name, 813 "INSTANCE_NAME": name, 814 "INSTANCE_PRIMARY": primary_node, 815 "INSTANCE_SECONDARIES": " ".join(secondary_nodes), 816 "INSTANCE_OS_TYPE": os_type, 817 "INSTANCE_STATUS": str_status, 818 "INSTANCE_MEMORY": memory, 819 "INSTANCE_VCPUS": vcpus, 820 "INSTANCE_DISK_TEMPLATE": disk_template, 821 "INSTANCE_HYPERVISOR": hypervisor_name, 822 } 823 824 if nics: 825 nic_count = len(nics) 826 for idx, (ip, mac, mode, link) in enumerate(nics): 827 if ip is None: 828 ip = "" 829 env["INSTANCE_NIC%d_IP" % idx] = ip 830 env["INSTANCE_NIC%d_MAC" % idx] = mac 831 env["INSTANCE_NIC%d_MODE" % idx] = mode 832 env["INSTANCE_NIC%d_LINK" % idx] = link 833 if mode == constants.NIC_MODE_BRIDGED: 834 env["INSTANCE_NIC%d_BRIDGE" % idx] = link 835 else: 836 nic_count = 0 837 838 env["INSTANCE_NIC_COUNT"] = nic_count 839 840 if disks: 841 disk_count = len(disks) 842 for idx, (size, mode) in enumerate(disks): 843 env["INSTANCE_DISK%d_SIZE" % idx] = size 844 env["INSTANCE_DISK%d_MODE" % idx] = mode 845 else: 846 disk_count = 0 847 848 env["INSTANCE_DISK_COUNT"] = disk_count 849 850 for source, kind in [(bep, "BE"), (hvp, "HV")]: 851 for key, value in source.items(): 852 env["INSTANCE_%s_%s" % (kind, key)] = value 853 854 return env
855
856 857 -def _NICListToTuple(lu, nics):
858 """Build a list of nic information tuples. 859 860 This list is suitable to be passed to _BuildInstanceHookEnv or as a return 861 value in LUQueryInstanceData. 862 863 @type lu: L{LogicalUnit} 864 @param lu: the logical unit on whose behalf we execute 865 @type nics: list of L{objects.NIC} 866 @param nics: list of nics to convert to hooks tuples 867 868 """ 869 hooks_nics = [] 870 cluster = lu.cfg.GetClusterInfo() 871 for nic in nics: 872 ip = nic.ip 873 mac = nic.mac 874 filled_params = cluster.SimpleFillNIC(nic.nicparams) 875 mode = filled_params[constants.NIC_MODE] 876 link = filled_params[constants.NIC_LINK] 877 hooks_nics.append((ip, mac, mode, link)) 878 return hooks_nics
879
880 881 -def _BuildInstanceHookEnvByObject(lu, instance, override=None):
882 """Builds instance related env variables for hooks from an object. 883 884 @type lu: L{LogicalUnit} 885 @param lu: the logical unit on whose behalf we execute 886 @type instance: L{objects.Instance} 887 @param instance: the instance for which we should build the 888 environment 889 @type override: dict 890 @param override: dictionary with key/values that will override 891 our values 892 @rtype: dict 893 @return: the hook environment dictionary 894 895 """ 896 cluster = lu.cfg.GetClusterInfo() 897 bep = cluster.FillBE(instance) 898 hvp = cluster.FillHV(instance) 899 args = { 900 'name': instance.name, 901 'primary_node': instance.primary_node, 902 'secondary_nodes': instance.secondary_nodes, 903 'os_type': instance.os, 904 'status': instance.admin_up, 905 'memory': bep[constants.BE_MEMORY], 906 'vcpus': bep[constants.BE_VCPUS], 907 'nics': _NICListToTuple(lu, instance.nics), 908 'disk_template': instance.disk_template, 909 'disks': [(disk.size, disk.mode) for disk in instance.disks], 910 'bep': bep, 911 'hvp': hvp, 912 'hypervisor_name': instance.hypervisor, 913 } 914 if override: 915 args.update(override) 916 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
917
918 919 -def _AdjustCandidatePool(lu, exceptions):
920 """Adjust the candidate pool after node operations. 921 922 """ 923 mod_list = lu.cfg.MaintainCandidatePool(exceptions) 924 if mod_list: 925 lu.LogInfo("Promoted nodes to master candidate role: %s", 926 utils.CommaJoin(node.name for node in mod_list)) 927 for name in mod_list: 928 lu.context.ReaddNode(name) 929 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions) 930 if mc_now > mc_max: 931 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % 932 (mc_now, mc_max))
933
934 935 -def _DecideSelfPromotion(lu, exceptions=None):
936 """Decide whether I should promote myself as a master candidate. 937 938 """ 939 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size 940 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions) 941 # the new node will increase mc_max with one, so: 942 mc_should = min(mc_should + 1, cp_size) 943 return mc_now < mc_should
944
945 946 -def _CheckNicsBridgesExist(lu, target_nics, target_node):
947 """Check that the brigdes needed by a list of nics exist. 948 949 """ 950 cluster = lu.cfg.GetClusterInfo() 951 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics] 952 brlist = [params[constants.NIC_LINK] for params in paramslist 953 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED] 954 if brlist: 955 result = lu.rpc.call_bridges_exist(target_node, brlist) 956 result.Raise("Error checking bridges on destination node '%s'" % 957 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
958
959 960 -def _CheckInstanceBridgesExist(lu, instance, node=None):
961 """Check that the brigdes needed by an instance exist. 962 963 """ 964 if node is None: 965 node = instance.primary_node 966 _CheckNicsBridgesExist(lu, instance.nics, node)
967
968 969 -def _CheckOSVariant(os_obj, name):
970 """Check whether an OS name conforms to the os variants specification. 971 972 @type os_obj: L{objects.OS} 973 @param os_obj: OS object to check 974 @type name: string 975 @param name: OS name passed by the user, to check for validity 976 977 """ 978 if not os_obj.supported_variants: 979 return 980 variant = objects.OS.GetVariant(name) 981 if not variant: 982 raise errors.OpPrereqError("OS name must include a variant", 983 errors.ECODE_INVAL) 984 985 if variant not in os_obj.supported_variants: 986 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
987
988 989 -def _GetNodeInstancesInner(cfg, fn):
990 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
991
992 993 -def _GetNodeInstances(cfg, node_name):
994 """Returns a list of all primary and secondary instances on a node. 995 996 """ 997 998 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
999
1000 1001 -def _GetNodePrimaryInstances(cfg, node_name):
1002 """Returns primary instances on a node. 1003 1004 """ 1005 return _GetNodeInstancesInner(cfg, 1006 lambda inst: node_name == inst.primary_node)
1007
1008 1009 -def _GetNodeSecondaryInstances(cfg, node_name):
1010 """Returns secondary instances on a node. 1011 1012 """ 1013 return _GetNodeInstancesInner(cfg, 1014 lambda inst: node_name in inst.secondary_nodes)
1015
1016 1017 -def _GetStorageTypeArgs(cfg, storage_type):
1018 """Returns the arguments for a storage type. 1019 1020 """ 1021 # Special case for file storage 1022 if storage_type == constants.ST_FILE: 1023 # storage.FileStorage wants a list of storage directories 1024 return [[cfg.GetFileStorageDir()]] 1025 1026 return []
1027
1028 1029 -def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1030 faulty = [] 1031 1032 for dev in instance.disks: 1033 cfg.SetDiskID(dev, node_name) 1034 1035 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks) 1036 result.Raise("Failed to get disk status from node %s" % node_name, 1037 prereq=prereq, ecode=errors.ECODE_ENVIRON) 1038 1039 for idx, bdev_status in enumerate(result.payload): 1040 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY: 1041 faulty.append(idx) 1042 1043 return faulty
1044
1045 1046 -def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1047 """Check the sanity of iallocator and node arguments and use the 1048 cluster-wide iallocator if appropriate. 1049 1050 Check that at most one of (iallocator, node) is specified. If none is 1051 specified, then the LU's opcode's iallocator slot is filled with the 1052 cluster-wide default iallocator. 1053 1054 @type iallocator_slot: string 1055 @param iallocator_slot: the name of the opcode iallocator slot 1056 @type node_slot: string 1057 @param node_slot: the name of the opcode target node slot 1058 1059 """ 1060 node = getattr(lu.op, node_slot, None) 1061 iallocator = getattr(lu.op, iallocator_slot, None) 1062 1063 if node is not None and iallocator is not None: 1064 raise errors.OpPrereqError("Do not specify both, iallocator and node.", 1065 errors.ECODE_INVAL) 1066 elif node is None and iallocator is None: 1067 default_iallocator = lu.cfg.GetDefaultIAllocator() 1068 if default_iallocator: 1069 setattr(lu.op, iallocator_slot, default_iallocator) 1070 else: 1071 raise errors.OpPrereqError("No iallocator or node given and no" 1072 " cluster-wide default iallocator found." 1073 " Please specify either an iallocator or a" 1074 " node, or set a cluster-wide default" 1075 " iallocator.")
1076
1077 1078 -class LUPostInitCluster(LogicalUnit):
1079 """Logical unit for running hooks after cluster initialization. 1080 1081 """ 1082 HPATH = "cluster-init" 1083 HTYPE = constants.HTYPE_CLUSTER 1084
1085 - def BuildHooksEnv(self):
1086 """Build hooks env. 1087 1088 """ 1089 env = {"OP_TARGET": self.cfg.GetClusterName()} 1090 mn = self.cfg.GetMasterNode() 1091 return env, [], [mn]
1092
1093 - def Exec(self, feedback_fn):
1094 """Nothing to do. 1095 1096 """ 1097 return True
1098
1099 1100 -class LUDestroyCluster(LogicalUnit):
1101 """Logical unit for destroying the cluster. 1102 1103 """ 1104 HPATH = "cluster-destroy" 1105 HTYPE = constants.HTYPE_CLUSTER 1106
1107 - def BuildHooksEnv(self):
1108 """Build hooks env. 1109 1110 """ 1111 env = {"OP_TARGET": self.cfg.GetClusterName()} 1112 return env, [], []
1113
1114 - def CheckPrereq(self):
1115 """Check prerequisites. 1116 1117 This checks whether the cluster is empty. 1118 1119 Any errors are signaled by raising errors.OpPrereqError. 1120 1121 """ 1122 master = self.cfg.GetMasterNode() 1123 1124 nodelist = self.cfg.GetNodeList() 1125 if len(nodelist) != 1 or nodelist[0] != master: 1126 raise errors.OpPrereqError("There are still %d node(s) in" 1127 " this cluster." % (len(nodelist) - 1), 1128 errors.ECODE_INVAL) 1129 instancelist = self.cfg.GetInstanceList() 1130 if instancelist: 1131 raise errors.OpPrereqError("There are still %d instance(s) in" 1132 " this cluster." % len(instancelist), 1133 errors.ECODE_INVAL)
1134
1135 - def Exec(self, feedback_fn):
1136 """Destroys the cluster. 1137 1138 """ 1139 master = self.cfg.GetMasterNode() 1140 1141 # Run post hooks on master node before it's removed 1142 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self) 1143 try: 1144 hm.RunPhase(constants.HOOKS_PHASE_POST, [master]) 1145 except: 1146 # pylint: disable-msg=W0702 1147 self.LogWarning("Errors occurred running hooks on %s" % master) 1148 1149 result = self.rpc.call_node_stop_master(master, False) 1150 result.Raise("Could not disable the master role") 1151 1152 return master
1153
1154 1155 -def _VerifyCertificate(filename):
1156 """Verifies a certificate for LUVerifyCluster. 1157 1158 @type filename: string 1159 @param filename: Path to PEM file 1160 1161 """ 1162 try: 1163 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 1164 utils.ReadFile(filename)) 1165 except Exception, err: # pylint: disable-msg=W0703 1166 return (LUVerifyCluster.ETYPE_ERROR, 1167 "Failed to load X509 certificate %s: %s" % (filename, err)) 1168 1169 (errcode, msg) = \ 1170 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN, 1171 constants.SSL_CERT_EXPIRATION_ERROR) 1172 1173 if msg: 1174 fnamemsg = "While verifying %s: %s" % (filename, msg) 1175 else: 1176 fnamemsg = None 1177 1178 if errcode is None: 1179 return (None, fnamemsg) 1180 elif errcode == utils.CERT_WARNING: 1181 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg) 1182 elif errcode == utils.CERT_ERROR: 1183 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg) 1184 1185 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1186
1187 1188 -class LUVerifyCluster(LogicalUnit):
1189 """Verifies the cluster status. 1190 1191 """ 1192 HPATH = "cluster-verify" 1193 HTYPE = constants.HTYPE_CLUSTER 1194 _OP_PARAMS = [ 1195 ("skip_checks", ht.EmptyList, 1196 ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))), 1197 ("verbose", False, ht.TBool), 1198 ("error_codes", False, ht.TBool), 1199 ("debug_simulate_errors", False, ht.TBool), 1200 ] 1201 REQ_BGL = False 1202 1203 TCLUSTER = "cluster" 1204 TNODE = "node" 1205 TINSTANCE = "instance" 1206 1207 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG") 1208 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT") 1209 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE") 1210 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN") 1211 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT") 1212 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK") 1213 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK") 1214 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE") 1215 ENODEDRBD = (TNODE, "ENODEDRBD") 1216 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER") 1217 ENODEFILECHECK = (TNODE, "ENODEFILECHECK") 1218 ENODEHOOKS = (TNODE, "ENODEHOOKS") 1219 ENODEHV = (TNODE, "ENODEHV") 1220 ENODELVM = (TNODE, "ENODELVM") 1221 ENODEN1 = (TNODE, "ENODEN1") 1222 ENODENET = (TNODE, "ENODENET") 1223 ENODEOS = (TNODE, "ENODEOS") 1224 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE") 1225 ENODEORPHANLV = (TNODE, "ENODEORPHANLV") 1226 ENODERPC = (TNODE, "ENODERPC") 1227 ENODESSH = (TNODE, "ENODESSH") 1228 ENODEVERSION = (TNODE, "ENODEVERSION") 1229 ENODESETUP = (TNODE, "ENODESETUP") 1230 ENODETIME = (TNODE, "ENODETIME") 1231 1232 ETYPE_FIELD = "code" 1233 ETYPE_ERROR = "ERROR" 1234 ETYPE_WARNING = "WARNING" 1235
1236 - class NodeImage(object):
1237 """A class representing the logical and physical status of a node. 1238 1239 @type name: string 1240 @ivar name: the node name to which this object refers 1241 @ivar volumes: a structure as returned from 1242 L{ganeti.backend.GetVolumeList} (runtime) 1243 @ivar instances: a list of running instances (runtime) 1244 @ivar pinst: list of configured primary instances (config) 1245 @ivar sinst: list of configured secondary instances (config) 1246 @ivar sbp: diction of {secondary-node: list of instances} of all peers 1247 of this node (config) 1248 @ivar mfree: free memory, as reported by hypervisor (runtime) 1249 @ivar dfree: free disk, as reported by the node (runtime) 1250 @ivar offline: the offline status (config) 1251 @type rpc_fail: boolean 1252 @ivar rpc_fail: whether the RPC verify call was successfull (overall, 1253 not whether the individual keys were correct) (runtime) 1254 @type lvm_fail: boolean 1255 @ivar lvm_fail: whether the RPC call didn't return valid LVM data 1256 @type hyp_fail: boolean 1257 @ivar hyp_fail: whether the RPC call didn't return the instance list 1258 @type ghost: boolean 1259 @ivar ghost: whether this is a known node or not (config) 1260 @type os_fail: boolean 1261 @ivar os_fail: whether the RPC call didn't return valid OS data 1262 @type oslist: list 1263 @ivar oslist: list of OSes as diagnosed by DiagnoseOS 1264 @type vm_capable: boolean 1265 @ivar vm_capable: whether the node can host instances 1266 1267 """
1268 - def __init__(self, offline=False, name=None, vm_capable=True):
1269 self.name = name 1270 self.volumes = {} 1271 self.instances = [] 1272 self.pinst = [] 1273 self.sinst = [] 1274 self.sbp = {} 1275 self.mfree = 0 1276 self.dfree = 0 1277 self.offline = offline 1278 self.vm_capable = vm_capable 1279 self.rpc_fail = False 1280 self.lvm_fail = False 1281 self.hyp_fail = False 1282 self.ghost = False 1283 self.os_fail = False 1284 self.oslist = {}
1285
1286 - def ExpandNames(self):
1287 self.needed_locks = { 1288 locking.LEVEL_NODE: locking.ALL_SET, 1289 locking.LEVEL_INSTANCE: locking.ALL_SET, 1290 } 1291 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1292
1293 - def _Error(self, ecode, item, msg, *args, **kwargs):
1294 """Format an error message. 1295 1296 Based on the opcode's error_codes parameter, either format a 1297 parseable error code, or a simpler error string. 1298 1299 This must be called only from Exec and functions called from Exec. 1300 1301 """ 1302 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) 1303 itype, etxt = ecode 1304 # first complete the msg 1305 if args: 1306 msg = msg % args 1307 # then format the whole message 1308 if self.op.error_codes: 1309 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) 1310 else: 1311 if item: 1312 item = " " + item 1313 else: 1314 item = "" 1315 msg = "%s: %s%s: %s" % (ltype, itype, item, msg) 1316 # and finally report it via the feedback_fn 1317 self._feedback_fn(" - %s" % msg)
1318
1319 - def _ErrorIf(self, cond, *args, **kwargs):
1320 """Log an error message if the passed condition is True. 1321 1322 """ 1323 cond = bool(cond) or self.op.debug_simulate_errors 1324 if cond: 1325 self._Error(*args, **kwargs) 1326 # do not mark the operation as failed for WARN cases only 1327 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR: 1328 self.bad = self.bad or cond
1329
1330 - def _VerifyNode(self, ninfo, nresult):
1331 """Perform some basic validation on data returned from a node. 1332 1333 - check the result data structure is well formed and has all the 1334 mandatory fields 1335 - check ganeti version 1336 1337 @type ninfo: L{objects.Node} 1338 @param ninfo: the node to check 1339 @param nresult: the results from the node 1340 @rtype: boolean 1341 @return: whether overall this call was successful (and we can expect 1342 reasonable values in the respose) 1343 1344 """ 1345 node = ninfo.name 1346 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1347 1348 # main result, nresult should be a non-empty dict 1349 test = not nresult or not isinstance(nresult, dict) 1350 _ErrorIf(test, self.ENODERPC, node, 1351 "unable to verify node: no data returned") 1352 if test: 1353 return False 1354 1355 # compares ganeti version 1356 local_version = constants.PROTOCOL_VERSION 1357 remote_version = nresult.get("version", None) 1358 test = not (remote_version and 1359 isinstance(remote_version, (list, tuple)) and 1360 len(remote_version) == 2) 1361 _ErrorIf(test, self.ENODERPC, node, 1362 "connection to node returned invalid data") 1363 if test: 1364 return False 1365 1366 test = local_version != remote_version[0] 1367 _ErrorIf(test, self.ENODEVERSION, node, 1368 "incompatible protocol versions: master %s," 1369 " node %s", local_version, remote_version[0]) 1370 if test: 1371 return False 1372 1373 # node seems compatible, we can actually try to look into its results 1374 1375 # full package version 1376 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], 1377 self.ENODEVERSION, node, 1378 "software version mismatch: master %s, node %s", 1379 constants.RELEASE_VERSION, remote_version[1], 1380 code=self.ETYPE_WARNING) 1381 1382 hyp_result = nresult.get(constants.NV_HYPERVISOR, None) 1383 if ninfo.vm_capable and isinstance(hyp_result, dict): 1384 for hv_name, hv_result in hyp_result.iteritems(): 1385 test = hv_result is not None 1386 _ErrorIf(test, self.ENODEHV, node, 1387 "hypervisor %s verify failure: '%s'", hv_name, hv_result) 1388 1389 test = nresult.get(constants.NV_NODESETUP, 1390 ["Missing NODESETUP results"]) 1391 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s", 1392 "; ".join(test)) 1393 1394 return True
1395
1396 - def _VerifyNodeTime(self, ninfo, nresult, 1397 nvinfo_starttime, nvinfo_endtime):
1398 """Check the node time. 1399 1400 @type ninfo: L{objects.Node} 1401 @param ninfo: the node to check 1402 @param nresult: the remote results for the node 1403 @param nvinfo_starttime: the start time of the RPC call 1404 @param nvinfo_endtime: the end time of the RPC call 1405 1406 """ 1407 node = ninfo.name 1408 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1409 1410 ntime = nresult.get(constants.NV_TIME, None) 1411 try: 1412 ntime_merged = utils.MergeTime(ntime) 1413 except (ValueError, TypeError): 1414 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time") 1415 return 1416 1417 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): 1418 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) 1419 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): 1420 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) 1421 else: 1422 ntime_diff = None 1423 1424 _ErrorIf(ntime_diff is not None, self.ENODETIME, node, 1425 "Node time diverges by at least %s from master node time", 1426 ntime_diff)
1427
1428 - def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1429 """Check the node time. 1430 1431 @type ninfo: L{objects.Node} 1432 @param ninfo: the node to check 1433 @param nresult: the remote results for the node 1434 @param vg_name: the configured VG name 1435 1436 """ 1437 if vg_name is None: 1438 return 1439 1440 node = ninfo.name 1441 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1442 1443 # checks vg existence and size > 20G 1444 vglist = nresult.get(constants.NV_VGLIST, None) 1445 test = not vglist 1446 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups") 1447 if not test: 1448 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, 1449 constants.MIN_VG_SIZE) 1450 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus) 1451 1452 # check pv names 1453 pvlist = nresult.get(constants.NV_PVLIST, None) 1454 test = pvlist is None 1455 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node") 1456 if not test: 1457 # check that ':' is not present in PV names, since it's a 1458 # special character for lvcreate (denotes the range of PEs to 1459 # use on the PV) 1460 for _, pvname, owner_vg in pvlist: 1461 test = ":" in pvname 1462 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV" 1463 " '%s' of VG '%s'", pvname, owner_vg)
1464
1465 - def _VerifyNodeNetwork(self, ninfo, nresult):
1466 """Check the node time. 1467 1468 @type ninfo: L{objects.Node} 1469 @param ninfo: the node to check 1470 @param nresult: the remote results for the node 1471 1472 """ 1473 node = ninfo.name 1474 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1475 1476 test = constants.NV_NODELIST not in nresult 1477 _ErrorIf(test, self.ENODESSH, node, 1478 "node hasn't returned node ssh connectivity data") 1479 if not test: 1480 if nresult[constants.NV_NODELIST]: 1481 for a_node, a_msg in nresult[constants.NV_NODELIST].items(): 1482 _ErrorIf(True, self.ENODESSH, node, 1483 "ssh communication with node '%s': %s", a_node, a_msg) 1484 1485 test = constants.NV_NODENETTEST not in nresult 1486 _ErrorIf(test, self.ENODENET, node, 1487 "node hasn't returned node tcp connectivity data") 1488 if not test: 1489 if nresult[constants.NV_NODENETTEST]: 1490 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) 1491 for anode in nlist: 1492 _ErrorIf(True, self.ENODENET, node, 1493 "tcp communication with node '%s': %s", 1494 anode, nresult[constants.NV_NODENETTEST][anode]) 1495 1496 test = constants.NV_MASTERIP not in nresult 1497 _ErrorIf(test, self.ENODENET, node, 1498 "node hasn't returned node master IP reachability data") 1499 if not test: 1500 if not nresult[constants.NV_MASTERIP]: 1501 if node == self.master_node: 1502 msg = "the master node cannot reach the master IP (not configured?)" 1503 else: 1504 msg = "cannot reach the master IP" 1505 _ErrorIf(True, self.ENODENET, node, msg)
1506
1507 - def _VerifyInstance(self, instance, instanceconfig, node_image, 1508 diskstatus):
1509 """Verify an instance. 1510 1511 This function checks to see if the required block devices are 1512 available on the instance's node. 1513 1514 """ 1515 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1516 node_current = instanceconfig.primary_node 1517 1518 node_vol_should = {} 1519 instanceconfig.MapLVsByNode(node_vol_should) 1520 1521 for node in node_vol_should: 1522 n_img = node_image[node] 1523 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 1524 # ignore missing volumes on offline or broken nodes 1525 continue 1526 for volume in node_vol_should[node]: 1527 test = volume not in n_img.volumes 1528 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance, 1529 "volume %s missing on node %s", volume, node) 1530 1531 if instanceconfig.admin_up: 1532 pri_img = node_image[node_current] 1533 test = instance not in pri_img.instances and not pri_img.offline 1534 _ErrorIf(test, self.EINSTANCEDOWN, instance, 1535 "instance not running on its primary node %s", 1536 node_current) 1537 1538 for node, n_img in node_image.items(): 1539 if (not node == node_current): 1540 test = instance in n_img.instances 1541 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance, 1542 "instance should not run on node %s", node) 1543 1544 diskdata = [(nname, success, status, idx) 1545 for (nname, disks) in diskstatus.items() 1546 for idx, (success, status) in enumerate(disks)] 1547 1548 for nname, success, bdev_status, idx in diskdata: 1549 _ErrorIf(instanceconfig.admin_up and not success, 1550 self.EINSTANCEFAULTYDISK, instance, 1551 "couldn't retrieve status for disk/%s on %s: %s", 1552 idx, nname, bdev_status) 1553 _ErrorIf((instanceconfig.admin_up and success and 1554 bdev_status.ldisk_status == constants.LDS_FAULTY), 1555 self.EINSTANCEFAULTYDISK, instance, 1556 "disk/%s on %s is faulty", idx, nname)
1557
1558 - def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1559 """Verify if there are any unknown volumes in the cluster. 1560 1561 The .os, .swap and backup volumes are ignored. All other volumes are 1562 reported as unknown. 1563 1564 @type reserved: L{ganeti.utils.FieldSet} 1565 @param reserved: a FieldSet of reserved volume names 1566 1567 """ 1568 for node, n_img in node_image.items(): 1569 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 1570 # skip non-healthy nodes 1571 continue 1572 for volume in n_img.volumes: 1573 test = ((node not in node_vol_should or 1574 volume not in node_vol_should[node]) and 1575 not reserved.Matches(volume)) 1576 self._ErrorIf(test, self.ENODEORPHANLV, node, 1577 "volume %s is unknown", volume)
1578
1579 - def _VerifyOrphanInstances(self, instancelist, node_image):
1580 """Verify the list of running instances. 1581 1582 This checks what instances are running but unknown to the cluster. 1583 1584 """ 1585 for node, n_img in node_image.items(): 1586 for o_inst in n_img.instances: 1587 test = o_inst not in instancelist 1588 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node, 1589 "instance %s on node %s should not exist", o_inst, node)
1590
1591 - def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1592 """Verify N+1 Memory Resilience. 1593 1594 Check that if one single node dies we can still start all the 1595 instances it was primary for. 1596 1597 """ 1598 for node, n_img in node_image.items(): 1599 # This code checks that every node which is now listed as 1600 # secondary has enough memory to host all instances it is 1601 # supposed to should a single other node in the cluster fail. 1602 # FIXME: not ready for failover to an arbitrary node 1603 # FIXME: does not support file-backed instances 1604 # WARNING: we currently take into account down instances as well 1605 # as up ones, considering that even if they're down someone 1606 # might want to start them even in the event of a node failure. 1607 for prinode, instances in n_img.sbp.items(): 1608 needed_mem = 0 1609 for instance in instances: 1610 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance]) 1611 if bep[constants.BE_AUTO_BALANCE]: 1612 needed_mem += bep[constants.BE_MEMORY] 1613 test = n_img.mfree < needed_mem 1614 self._ErrorIf(test, self.ENODEN1, node, 1615 "not enough memory on to accommodate" 1616 " failovers should peer node %s fail", prinode)
1617
1618 - def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum, 1619 master_files):
1620 """Verifies and computes the node required file checksums. 1621 1622 @type ninfo: L{objects.Node} 1623 @param ninfo: the node to check 1624 @param nresult: the remote results for the node 1625 @param file_list: required list of files 1626 @param local_cksum: dictionary of local files and their checksums 1627 @param master_files: list of files that only masters should have 1628 1629 """ 1630 node = ninfo.name 1631 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1632 1633 remote_cksum = nresult.get(constants.NV_FILELIST, None) 1634 test = not isinstance(remote_cksum, dict) 1635 _ErrorIf(test, self.ENODEFILECHECK, node, 1636 "node hasn't returned file checksum data") 1637 if test: 1638 return 1639 1640 for file_name in file_list: 1641 node_is_mc = ninfo.master_candidate 1642 must_have = (file_name not in master_files) or node_is_mc 1643 # missing 1644 test1 = file_name not in remote_cksum 1645 # invalid checksum 1646 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name] 1647 # existing and good 1648 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name] 1649 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node, 1650 "file '%s' missing", file_name) 1651 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node, 1652 "file '%s' has wrong checksum", file_name) 1653 # not candidate and this is not a must-have file 1654 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node, 1655 "file '%s' should not exist on non master" 1656 " candidates (and the file is outdated)", file_name) 1657 # all good, except non-master/non-must have combination 1658 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node, 1659 "file '%s' should not exist" 1660 " on non master candidates", file_name)
1661
1662 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper, 1663 drbd_map):
1664 """Verifies and the node DRBD status. 1665 1666 @type ninfo: L{objects.Node} 1667 @param ninfo: the node to check 1668 @param nresult: the remote results for the node 1669 @param instanceinfo: the dict of instances 1670 @param drbd_helper: the configured DRBD usermode helper 1671 @param drbd_map: the DRBD map as returned by 1672 L{ganeti.config.ConfigWriter.ComputeDRBDMap} 1673 1674 """ 1675 node = ninfo.name 1676 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1677 1678 if drbd_helper: 1679 helper_result = nresult.get(constants.NV_DRBDHELPER, None) 1680 test = (helper_result == None) 1681 _ErrorIf(test, self.ENODEDRBDHELPER, node, 1682 "no drbd usermode helper returned") 1683 if helper_result: 1684 status, payload = helper_result 1685 test = not status 1686 _ErrorIf(test, self.ENODEDRBDHELPER, node, 1687 "drbd usermode helper check unsuccessful: %s", payload) 1688 test = status and (payload != drbd_helper) 1689 _ErrorIf(test, self.ENODEDRBDHELPER, node, 1690 "wrong drbd usermode helper: %s", payload) 1691 1692 # compute the DRBD minors 1693 node_drbd = {} 1694 for minor, instance in drbd_map[node].items(): 1695 test = instance not in instanceinfo 1696 _ErrorIf(test, self.ECLUSTERCFG, None, 1697 "ghost instance '%s' in temporary DRBD map", instance) 1698 # ghost instance should not be running, but otherwise we 1699 # don't give double warnings (both ghost instance and 1700 # unallocated minor in use) 1701 if test: 1702 node_drbd[minor] = (instance, False) 1703 else: 1704 instance = instanceinfo[instance] 1705 node_drbd[minor] = (instance.name, instance.admin_up) 1706 1707 # and now check them 1708 used_minors = nresult.get(constants.NV_DRBDLIST, []) 1709 test = not isinstance(used_minors, (tuple, list)) 1710 _ErrorIf(test, self.ENODEDRBD, node, 1711 "cannot parse drbd status file: %s", str(used_minors)) 1712 if test: 1713 # we cannot check drbd status 1714 return 1715 1716 for minor, (iname, must_exist) in node_drbd.items(): 1717 test = minor not in used_minors and must_exist 1718 _ErrorIf(test, self.ENODEDRBD, node, 1719 "drbd minor %d of instance %s is not active", minor, iname) 1720 for minor in used_minors: 1721 test = minor not in node_drbd 1722 _ErrorIf(test, self.ENODEDRBD, node, 1723 "unallocated drbd minor %d is in use", minor)
1724
1725 - def _UpdateNodeOS(self, ninfo, nresult, nimg):
1726 """Builds the node OS structures. 1727 1728 @type ninfo: L{objects.Node} 1729 @param ninfo: the node to check 1730 @param nresult: the remote results for the node 1731 @param nimg: the node image object 1732 1733 """ 1734 node = ninfo.name 1735 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1736 1737 remote_os = nresult.get(constants.NV_OSLIST, None) 1738 test = (not isinstance(remote_os, list) or 1739 not compat.all(isinstance(v, list) and len(v) == 7 1740 for v in remote_os)) 1741 1742 _ErrorIf(test, self.ENODEOS, node, 1743 "node hasn't returned valid OS data") 1744 1745 nimg.os_fail = test 1746 1747 if test: 1748 return 1749 1750 os_dict = {} 1751 1752 for (name, os_path, status, diagnose, 1753 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]: 1754 1755 if name not in os_dict: 1756 os_dict[name] = [] 1757 1758 # parameters is a list of lists instead of list of tuples due to 1759 # JSON lacking a real tuple type, fix it: 1760 parameters = [tuple(v) for v in parameters] 1761 os_dict[name].append((os_path, status, diagnose, 1762 set(variants), set(parameters), set(api_ver))) 1763 1764 nimg.oslist = os_dict
1765
1766 - def _VerifyNodeOS(self, ninfo, nimg, base):
1767 """Verifies the node OS list. 1768 1769 @type ninfo: L{objects.Node} 1770 @param ninfo: the node to check 1771 @param nimg: the node image object 1772 @param base: the 'template' node we match against (e.g. from the master) 1773 1774 """ 1775 node = ninfo.name 1776 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1777 1778 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" 1779 1780 for os_name, os_data in nimg.oslist.items(): 1781 assert os_data, "Empty OS status for OS %s?!" % os_name 1782 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] 1783 _ErrorIf(not f_status, self.ENODEOS, node, 1784 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag) 1785 _ErrorIf(len(os_data) > 1, self.ENODEOS, node, 1786 "OS '%s' has multiple entries (first one shadows the rest): %s", 1787 os_name, utils.CommaJoin([v[0] for v in os_data])) 1788 # this will catched in backend too 1789 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api) 1790 and not f_var, self.ENODEOS, node, 1791 "OS %s with API at least %d does not declare any variant", 1792 os_name, constants.OS_API_V15) 1793 # comparisons with the 'base' image 1794 test = os_name not in base.oslist 1795 _ErrorIf(test, self.ENODEOS, node, 1796 "Extra OS %s not present on reference node (%s)", 1797 os_name, base.name) 1798 if test: 1799 continue 1800 assert base.oslist[os_name], "Base node has empty OS status?" 1801 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0] 1802 if not b_status: 1803 # base OS is invalid, skipping 1804 continue 1805 for kind, a, b in [("API version", f_api, b_api), 1806 ("variants list", f_var, b_var), 1807 ("parameters", f_param, b_param)]: 1808 _ErrorIf(a != b, self.ENODEOS, node, 1809 "OS %s %s differs from reference node %s: %s vs. %s", 1810 kind, os_name, base.name, 1811 utils.CommaJoin(a), utils.CommaJoin(b)) 1812 1813 # check any missing OSes 1814 missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) 1815 _ErrorIf(missing, self.ENODEOS, node, 1816 "OSes present on reference node %s but missing on this node: %s", 1817 base.name, utils.CommaJoin(missing))
1818
1819 - def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1820 """Verifies and updates the node volume data. 1821 1822 This function will update a L{NodeImage}'s internal structures 1823 with data from the remote call. 1824 1825 @type ninfo: L{objects.Node} 1826 @param ninfo: the node to check 1827 @param nresult: the remote results for the node 1828 @param nimg: the node image object 1829 @param vg_name: the configured VG name 1830 1831 """ 1832 node = ninfo.name 1833 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1834 1835 nimg.lvm_fail = True 1836 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") 1837 if vg_name is None: 1838 pass 1839 elif isinstance(lvdata, basestring): 1840 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s", 1841 utils.SafeEncode(lvdata)) 1842 elif not isinstance(lvdata, dict): 1843 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)") 1844 else: 1845 nimg.volumes = lvdata 1846 nimg.lvm_fail = False
1847
1848 - def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1849 """Verifies and updates the node instance list. 1850 1851 If the listing was successful, then updates this node's instance 1852 list. Otherwise, it marks the RPC call as failed for the instance 1853 list key. 1854 1855 @type ninfo: L{objects.Node} 1856 @param ninfo: the node to check 1857 @param nresult: the remote results for the node 1858 @param nimg: the node image object 1859 1860 """ 1861 idata = nresult.get(constants.NV_INSTANCELIST, None) 1862 test = not isinstance(idata, list) 1863 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed" 1864 " (instancelist): %s", utils.SafeEncode(str(idata))) 1865 if test: 1866 nimg.hyp_fail = True 1867 else: 1868 nimg.instances = idata
1869
1870 - def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1871 """Verifies and computes a node information map 1872 1873 @type ninfo: L{objects.Node} 1874 @param ninfo: the node to check 1875 @param nresult: the remote results for the node 1876 @param nimg: the node image object 1877 @param vg_name: the configured VG name 1878 1879 """ 1880 node = ninfo.name 1881 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1882 1883 # try to read free memory (from the hypervisor) 1884 hv_info = nresult.get(constants.NV_HVINFO, None) 1885 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info 1886 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)") 1887 if not test: 1888 try: 1889 nimg.mfree = int(hv_info["memory_free"]) 1890 except (ValueError, TypeError): 1891 _ErrorIf(True, self.ENODERPC, node, 1892 "node returned invalid nodeinfo, check hypervisor") 1893 1894 # FIXME: devise a free space model for file based instances as well 1895 if vg_name is not None: 1896 test = (constants.NV_VGLIST not in nresult or 1897 vg_name not in nresult[constants.NV_VGLIST]) 1898 _ErrorIf(test, self.ENODELVM, node, 1899 "node didn't return data for the volume group '%s'" 1900 " - it is either missing or broken", vg_name) 1901 if not test: 1902 try: 1903 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) 1904 except (ValueError, TypeError): 1905 _ErrorIf(True, self.ENODERPC, node, 1906 "node returned invalid LVM info, check LVM status")
1907
1908 - def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1909 """Gets per-disk status information for all instances. 1910 1911 @type nodelist: list of strings 1912 @param nodelist: Node names 1913 @type node_image: dict of (name, L{objects.Node}) 1914 @param node_image: Node objects 1915 @type instanceinfo: dict of (name, L{objects.Instance}) 1916 @param instanceinfo: Instance objects 1917 @rtype: {instance: {node: [(succes, payload)]}} 1918 @return: a dictionary of per-instance dictionaries with nodes as 1919 keys and disk information as values; the disk information is a 1920 list of tuples (success, payload) 1921 1922 """ 1923 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1924 1925 node_disks = {} 1926 node_disks_devonly = {} 1927 1928 for nname in nodelist: 1929 disks = [(inst, disk) 1930 for instlist in [node_image[nname].pinst, 1931 node_image[nname].sinst] 1932 for inst in instlist 1933 for disk in instanceinfo[inst].disks] 1934 1935 if not disks: 1936 # No need to collect data 1937 continue 1938 1939 node_disks[nname] = disks 1940 1941 # Creating copies as SetDiskID below will modify the objects and that can 1942 # lead to incorrect data returned from nodes 1943 devonly = [dev.Copy() for (_, dev) in disks] 1944 1945 for dev in devonly: 1946 self.cfg.SetDiskID(dev, nname) 1947 1948 node_disks_devonly[nname] = devonly 1949 1950 assert len(node_disks) == len(node_disks_devonly) 1951 1952 # Collect data from all nodes with disks 1953 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(), 1954 node_disks_devonly) 1955 1956 assert len(result) == len(node_disks) 1957 1958 instdisk = {} 1959 1960 for (nname, nres) in result.items(): 1961 disks = node_disks[nname] 1962 1963 if nres.offline: 1964 # No data from this node 1965 data = len(disks) * [(False, "node offline")] 1966 else: 1967 msg = nres.fail_msg 1968 _ErrorIf(msg, self.ENODERPC, nname, 1969 "while getting disk information: %s", msg) 1970 if msg: 1971 # No data from this node 1972 data = len(disks) * [(False, msg)] 1973 else: 1974 data = [] 1975 for idx, i in enumerate(nres.payload): 1976 if isinstance(i, (tuple, list)) and len(i) == 2: 1977 data.append(i) 1978 else: 1979 logging.warning("Invalid result from node %s, entry %d: %s", 1980 nname, idx, i) 1981 data.append((False, "Invalid result from the remote node")) 1982 1983 for ((inst, _), status) in zip(disks, data): 1984 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status) 1985 1986 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and 1987 len(nnames) <= len(instanceinfo[inst].all_nodes) and 1988 compat.all(isinstance(s, (tuple, list)) and 1989 len(s) == 2 for s in statuses) 1990 for inst, nnames in instdisk.items() 1991 for nname, statuses in nnames.items()) 1992 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure" 1993 1994 return instdisk
1995
1996 - def BuildHooksEnv(self):
1997 """Build hooks env. 1998 1999 Cluster-Verify hooks just ran in the post phase and their failure makes 2000 the output be logged in the verify output and the verification to fail. 2001 2002 """ 2003 all_nodes = self.cfg.GetNodeList() 2004 env = { 2005 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()) 2006 } 2007 for node in self.cfg.GetAllNodesInfo().values(): 2008 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags()) 2009 2010 return env, [], all_nodes
2011
2012 - def Exec(self, feedback_fn):
2013 """Verify integrity of cluster, performing various test on nodes. 2014 2015 """ 2016 self.bad = False 2017 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 2018 verbose = self.op.verbose 2019 self._feedback_fn = feedback_fn 2020 feedback_fn("* Verifying global settings") 2021 for msg in self.cfg.VerifyConfig(): 2022 _ErrorIf(True, self.ECLUSTERCFG, None, msg) 2023 2024 # Check the cluster certificates 2025 for cert_filename in constants.ALL_CERT_FILES: 2026 (errcode, msg) = _VerifyCertificate(cert_filename) 2027 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode) 2028 2029 vg_name = self.cfg.GetVGName() 2030 drbd_helper = self.cfg.GetDRBDHelper() 2031 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors 2032 cluster = self.cfg.GetClusterInfo() 2033 nodelist = utils.NiceSort(self.cfg.GetNodeList()) 2034 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist] 2035 instancelist = utils.NiceSort(self.cfg.GetInstanceList()) 2036 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname)) 2037 for iname in instancelist) 2038 i_non_redundant = [] # Non redundant instances 2039 i_non_a_balanced = [] # Non auto-balanced instances 2040 n_offline = 0 # Count of offline nodes 2041 n_drained = 0 # Count of nodes being drained 2042 node_vol_should = {} 2043 2044 # FIXME: verify OS list 2045 # do local checksums 2046 master_files = [constants.CLUSTER_CONF_FILE] 2047 master_node = self.master_node = self.cfg.GetMasterNode() 2048 master_ip = self.cfg.GetMasterIP() 2049 2050 file_names = ssconf.SimpleStore().GetFileList() 2051 file_names.extend(constants.ALL_CERT_FILES) 2052 file_names.extend(master_files) 2053 if cluster.modify_etc_hosts: 2054 file_names.append(constants.ETC_HOSTS) 2055 2056 local_checksums = utils.FingerprintFiles(file_names) 2057 2058 feedback_fn("* Gathering data (%d nodes)" % len(nodelist)) 2059 node_verify_param = { 2060 constants.NV_FILELIST: file_names, 2061 constants.NV_NODELIST: [node.name for node in nodeinfo 2062 if not node.offline], 2063 constants.NV_HYPERVISOR: hypervisors, 2064 constants.NV_NODENETTEST: [(node.name, node.primary_ip, 2065 node.secondary_ip) for node in nodeinfo 2066 if not node.offline], 2067 constants.NV_INSTANCELIST: hypervisors, 2068 constants.NV_VERSION: None, 2069 constants.NV_HVINFO: self.cfg.GetHypervisorType(), 2070 constants.NV_NODESETUP: None, 2071 constants.NV_TIME: None, 2072 constants.NV_MASTERIP: (master_node, master_ip), 2073 constants.NV_OSLIST: None, 2074 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(), 2075 } 2076 2077 if vg_name is not None: 2078 node_verify_param[constants.NV_VGLIST] = None 2079 node_verify_param[constants.NV_LVLIST] = vg_name 2080 node_verify_param[constants.NV_PVLIST] = [vg_name] 2081 node_verify_param[constants.NV_DRBDLIST] = None 2082 2083 if drbd_helper: 2084 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper 2085 2086 # Build our expected cluster state 2087 node_image = dict((node.name, self.NodeImage(offline=node.offline, 2088 name=node.name, 2089 vm_capable=node.vm_capable)) 2090 for node in nodeinfo) 2091 2092 for instance in instancelist: 2093 inst_config = instanceinfo[instance] 2094 2095 for nname in inst_config.all_nodes: 2096 if nname not in node_image: 2097 # ghost node 2098 gnode = self.NodeImage(name=nname) 2099 gnode.ghost = True 2100 node_image[nname] = gnode 2101 2102 inst_config.MapLVsByNode(node_vol_should) 2103 2104 pnode = inst_config.primary_node 2105 node_image[pnode].pinst.append(instance) 2106 2107 for snode in inst_config.secondary_nodes: 2108 nimg = node_image[snode] 2109 nimg.sinst.append(instance) 2110 if pnode not in nimg.sbp: 2111 nimg.sbp[pnode] = [] 2112 nimg.sbp[pnode].append(instance) 2113 2114 # At this point, we have the in-memory data structures complete, 2115 # except for the runtime information, which we'll gather next 2116 2117 # Due to the way our RPC system works, exact response times cannot be 2118 # guaranteed (e.g. a broken node could run into a timeout). By keeping the 2119 # time before and after executing the request, we can at least have a time 2120 # window. 2121 nvinfo_starttime = time.time() 2122 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param, 2123 self.cfg.GetClusterName()) 2124 nvinfo_endtime = time.time() 2125 2126 all_drbd_map = self.cfg.ComputeDRBDMap() 2127 2128 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist)) 2129 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo) 2130 2131 feedback_fn("* Verifying node status") 2132 2133 refos_img = None 2134 2135 for node_i in nodeinfo: 2136 node = node_i.name 2137 nimg = node_image[node] 2138 2139 if node_i.offline: 2140 if verbose: 2141 feedback_fn("* Skipping offline node %s" % (node,)) 2142 n_offline += 1 2143 continue 2144 2145 if node == master_node: 2146 ntype = "master" 2147 elif node_i.master_candidate: 2148 ntype = "master candidate" 2149 elif node_i.drained: 2150 ntype = "drained" 2151 n_drained += 1 2152 else: 2153 ntype = "regular" 2154 if verbose: 2155 feedback_fn("* Verifying node %s (%s)" % (node, ntype)) 2156 2157 msg = all_nvinfo[node].fail_msg 2158 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg) 2159 if msg: 2160 nimg.rpc_fail = True 2161 continue 2162 2163 nresult = all_nvinfo[node].payload 2164 2165 nimg.call_ok = self._VerifyNode(node_i, nresult) 2166 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime) 2167 self._VerifyNodeNetwork(node_i, nresult) 2168 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums, 2169 master_files) 2170 2171 if nimg.vm_capable: 2172 self._VerifyNodeLVM(node_i, nresult, vg_name) 2173 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper, 2174 all_drbd_map) 2175 2176 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name) 2177 self._UpdateNodeInstances(node_i, nresult, nimg) 2178 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name) 2179 self._UpdateNodeOS(node_i, nresult, nimg) 2180 if not nimg.os_fail: 2181 if refos_img is None: 2182 refos_img = nimg 2183 self._VerifyNodeOS(node_i, nimg, refos_img) 2184 2185 feedback_fn("* Verifying instance status") 2186 for instance in instancelist: 2187 if verbose: 2188 feedback_fn("* Verifying instance %s" % instance) 2189 inst_config = instanceinfo[instance] 2190 self._VerifyInstance(instance, inst_config, node_image, 2191 instdisk[instance]) 2192 inst_nodes_offline = [] 2193 2194 pnode = inst_config.primary_node 2195 pnode_img = node_image[pnode] 2196 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, 2197 self.ENODERPC, pnode, "instance %s, connection to" 2198 " primary node failed", instance) 2199 2200 if pnode_img.offline: 2201 inst_nodes_offline.append(pnode) 2202 2203 # If the instance is non-redundant we cannot survive losing its primary 2204 # node, so we are not N+1 compliant. On the other hand we have no disk 2205 # templates with more than one secondary so that situation is not well 2206 # supported either. 2207 # FIXME: does not support file-backed instances 2208 if not inst_config.secondary_nodes: 2209 i_non_redundant.append(instance) 2210 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT, 2211 instance, "instance has multiple secondary nodes: %s", 2212 utils.CommaJoin(inst_config.secondary_nodes), 2213 code=self.ETYPE_WARNING) 2214 2215 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]: 2216 i_non_a_balanced.append(instance) 2217 2218 for snode in inst_config.secondary_nodes: 2219 s_img = node_image[snode] 2220 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode, 2221 "instance %s, connection to secondary node failed", instance) 2222 2223 if s_img.offline: 2224 inst_nodes_offline.append(snode) 2225 2226 # warn that the instance lives on offline nodes 2227 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance, 2228 "instance lives on offline node(s) %s", 2229 utils.CommaJoin(inst_nodes_offline)) 2230 # ... or ghost/non-vm_capable nodes 2231 for node in inst_config.all_nodes: 2232 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance, 2233 "instance lives on ghost node %s", node) 2234 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE, 2235 instance, "instance lives on non-vm_capable node %s", node) 2236 2237 feedback_fn("* Verifying orphan volumes") 2238 reserved = utils.FieldSet(*cluster.reserved_lvs) 2239 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved) 2240 2241 feedback_fn("* Verifying orphan instances") 2242 self._VerifyOrphanInstances(instancelist, node_image) 2243 2244 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks: 2245 feedback_fn("* Verifying N+1 Memory redundancy") 2246 self._VerifyNPlusOneMemory(node_image, instanceinfo) 2247 2248 feedback_fn("* Other Notes") 2249 if i_non_redundant: 2250 feedback_fn(" - NOTICE: %d non-redundant instance(s) found." 2251 % len(i_non_redundant)) 2252 2253 if i_non_a_balanced: 2254 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found." 2255 % len(i_non_a_balanced)) 2256 2257 if n_offline: 2258 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline) 2259 2260 if n_drained: 2261 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained) 2262 2263 return not self.bad
2264
2265 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2266 """Analyze the post-hooks' result 2267 2268 This method analyses the hook result, handles it, and sends some 2269 nicely-formatted feedback back to the user. 2270 2271 @param phase: one of L{constants.HOOKS_PHASE_POST} or 2272 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 2273 @param hooks_results: the results of the multi-node hooks rpc call 2274 @param feedback_fn: function used send feedback back to the caller 2275 @param lu_result: previous Exec result 2276 @return: the new Exec result, based on the previous result 2277 and hook results 2278 2279 """ 2280 # We only really run POST phase hooks, and are only interested in 2281 # their results 2282 if phase == constants.HOOKS_PHASE_POST: 2283 # Used to change hooks' output to proper indentation 2284 indent_re = re.compile('^', re.M) 2285 feedback_fn("* Hooks Results") 2286 assert hooks_results, "invalid result from hooks" 2287 2288 for node_name in hooks_results: 2289 res = hooks_results[node_name] 2290 msg = res.fail_msg 2291 test = msg and not res.offline 2292 self._ErrorIf(test, self.ENODEHOOKS, node_name, 2293 "Communication failure in hooks execution: %s", msg) 2294 if res.offline or msg: 2295 # No need to investigate payload if node is offline or gave an error. 2296 # override manually lu_result here as _ErrorIf only 2297 # overrides self.bad 2298 lu_result = 1 2299 continue 2300 for script, hkr, output in res.payload: 2301 test = hkr == constants.HKR_FAIL 2302 self._ErrorIf(test, self.ENODEHOOKS, node_name, 2303 "Script %s failed, output:", script) 2304 if test: 2305 output = indent_re.sub(' ', output) 2306 feedback_fn("%s" % output) 2307 lu_result = 0 2308 2309 return lu_result
2310
2311 2312 -class LUVerifyDisks(NoHooksLU):
2313 """Verifies the cluster disks status. 2314 2315 """ 2316 REQ_BGL = False 2317
2318 - def ExpandNames(self):
2319 self.needed_locks = { 2320 locking.LEVEL_NODE: locking.ALL_SET, 2321 locking.LEVEL_INSTANCE: locking.ALL_SET, 2322 } 2323 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2324
2325 - def Exec(self, feedback_fn):
2326 """Verify integrity of cluster disks. 2327 2328 @rtype: tuple of three items 2329 @return: a tuple of (dict of node-to-node_error, list of instances 2330 which need activate-disks, dict of instance: (node, volume) for 2331 missing volumes 2332 2333 """ 2334 result = res_nodes, res_instances, res_missing = {}, [], {} 2335 2336 vg_name = self.cfg.GetVGName() 2337 nodes = utils.NiceSort(self.cfg.GetNodeList()) 2338 instances = [self.cfg.GetInstanceInfo(name) 2339 for name in self.cfg.GetInstanceList()] 2340 2341 nv_dict = {} 2342 for inst in instances: 2343 inst_lvs = {} 2344 if (not inst.admin_up or 2345 inst.disk_template not in constants.DTS_NET_MIRROR): 2346 continue 2347 inst.MapLVsByNode(inst_lvs) 2348 # transform { iname: {node: [vol,],},} to {(node, vol): iname} 2349 for node, vol_list in inst_lvs.iteritems(): 2350 for vol in vol_list: 2351 nv_dict[(node, vol)] = inst 2352 2353 if not nv_dict: 2354 return result 2355 2356 node_lvs = self.rpc.call_lv_list(nodes, vg_name) 2357 2358 for node in nodes: 2359 # node_volume 2360 node_res = node_lvs[node] 2361 if node_res.offline: 2362 continue 2363 msg = node_res.fail_msg 2364 if msg: 2365 logging.warning("Error enumerating LVs on node %s: %s", node, msg) 2366 res_nodes[node] = msg 2367 continue 2368 2369 lvs = node_res.payload 2370 for lv_name, (_, _, lv_online) in lvs.items(): 2371 inst = nv_dict.pop((node, lv_name), None) 2372 if (not lv_online and inst is not None 2373 and inst.name not in res_instances): 2374 res_instances.append(inst.name) 2375 2376 # any leftover items in nv_dict are missing LVs, let's arrange the 2377 # data better 2378 for key, inst in nv_dict.iteritems(): 2379 if inst.name not in res_missing: 2380 res_missing[inst.name] = [] 2381 res_missing[inst.name].append(key) 2382 2383 return result
2384
2385 2386 -class LURepairDiskSizes(NoHooksLU):
2387 """Verifies the cluster disks sizes. 2388 2389 """ 2390 _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))] 2391 REQ_BGL = False 2392
2393 - def ExpandNames(self):
2394 if self.op.instances: 2395 self.wanted_names = [] 2396 for name in self.op.instances: 2397 full_name = _ExpandInstanceName(self.cfg, name) 2398 self.wanted_names.append(full_name) 2399 self.needed_locks = { 2400 locking.LEVEL_NODE: [], 2401 locking.LEVEL_INSTANCE: self.wanted_names, 2402 } 2403 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 2404 else: 2405 self.wanted_names = None 2406 self.needed_locks = { 2407 locking.LEVEL_NODE: locking.ALL_SET, 2408 locking.LEVEL_INSTANCE: locking.ALL_SET, 2409 } 2410 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2411
2412 - def DeclareLocks(self, level):
2413 if level == locking.LEVEL_NODE and self.wanted_names is not None: 2414 self._LockInstancesNodes(primary_only=True)
2415
2416 - def CheckPrereq(self):
2417 """Check prerequisites. 2418 2419 This only checks the optional instance list against the existing names. 2420 2421 """ 2422 if self.wanted_names is None: 2423 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE] 2424 2425 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name 2426 in self.wanted_names]
2427
2428 - def _EnsureChildSizes(self, disk):
2429 """Ensure children of the disk have the needed disk size. 2430 2431 This is valid mainly for DRBD8 and fixes an issue where the 2432 children have smaller disk size. 2433 2434 @param disk: an L{ganeti.objects.Disk} object 2435 2436 """ 2437 if disk.dev_type == constants.LD_DRBD8: 2438 assert disk.children, "Empty children for DRBD8?" 2439 fchild = disk.children[0] 2440 mismatch = fchild.size < disk.size 2441 if mismatch: 2442 self.LogInfo("Child disk has size %d, parent %d, fixing", 2443 fchild.size, disk.size) 2444 fchild.size = disk.size 2445 2446 # and we recurse on this child only, not on the metadev 2447 return self._EnsureChildSizes(fchild) or mismatch 2448 else: 2449 return False
2450
2451 - def Exec(self, feedback_fn):
2452 """Verify the size of cluster disks. 2453 2454 """ 2455 # TODO: check child disks too 2456 # TODO: check differences in size between primary/secondary nodes 2457 per_node_disks = {} 2458 for instance in self.wanted_instances: 2459 pnode = instance.primary_node 2460 if pnode not in per_node_disks: 2461 per_node_disks[pnode] = [] 2462 for idx, disk in enumerate(instance.disks): 2463 per_node_disks[pnode].append((instance, idx, disk)) 2464 2465 changed = [] 2466 for node, dskl in per_node_disks.items(): 2467 newl = [v[2].Copy() for v in dskl] 2468 for dsk in newl: 2469 self.cfg.SetDiskID(dsk, node) 2470 result = self.rpc.call_blockdev_getsizes(node, newl) 2471 if result.fail_msg: 2472 self.LogWarning("Failure in blockdev_getsizes call to node" 2473 " %s, ignoring", node) 2474 continue 2475 if len(result.data) != len(dskl): 2476 self.LogWarning("Invalid result from node %s, ignoring node results", 2477 node) 2478 continue 2479 for ((instance, idx, disk), size) in zip(dskl, result.data): 2480 if size is None: 2481 self.LogWarning("Disk %d of instance %s did not return size" 2482 " information, ignoring", idx, instance.name) 2483 continue 2484 if not isinstance(size, (int, long)): 2485 self.LogWarning("Disk %d of instance %s did not return valid" 2486 " size information, ignoring", idx, instance.name) 2487 continue 2488 size = size >> 20 2489 if size != disk.size: 2490 self.LogInfo("Disk %d of instance %s has mismatched size," 2491 " correcting: recorded %d, actual %d", idx, 2492 instance.name, disk.size, size) 2493 disk.size = size 2494 self.cfg.Update(instance, feedback_fn) 2495 changed.append((instance.name, idx, size)) 2496 if self._EnsureChildSizes(disk): 2497 self.cfg.Update(instance, feedback_fn) 2498 changed.append((instance.name, idx, disk.size)) 2499 return changed
2500
2501 2502 -class LURenameCluster(LogicalUnit):
2503 """Rename the cluster. 2504 2505 """ 2506 HPATH = "cluster-rename" 2507 HTYPE = constants.HTYPE_CLUSTER 2508 _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)] 2509
2510 - def BuildHooksEnv(self):
2511 """Build hooks env. 2512 2513 """ 2514 env = { 2515 "OP_TARGET": self.cfg.GetClusterName(), 2516 "NEW_NAME": self.op.name, 2517 } 2518 mn = self.cfg.GetMasterNode() 2519 all_nodes = self.cfg.GetNodeList() 2520 return env, [mn], all_nodes
2521
2522 - def CheckPrereq(self):
2523 """Verify that the passed name is a valid one. 2524 2525 """ 2526 hostname = netutils.GetHostname(name=self.op.name, 2527 family=self.cfg.GetPrimaryIPFamily()) 2528 2529 new_name = hostname.name 2530 self.ip = new_ip = hostname.ip 2531 old_name = self.cfg.GetClusterName() 2532 old_ip = self.cfg.GetMasterIP() 2533 if new_name == old_name and new_ip == old_ip: 2534 raise errors.OpPrereqError("Neither the name nor the IP address of the" 2535 " cluster has changed", 2536 errors.ECODE_INVAL) 2537 if new_ip != old_ip: 2538 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT): 2539 raise errors.OpPrereqError("The given cluster IP address (%s) is" 2540 " reachable on the network" % 2541 new_ip, errors.ECODE_NOTUNIQUE) 2542 2543 self.op.name = new_name
2544
2545 - def Exec(self, feedback_fn):
2546 """Rename the cluster. 2547 2548 """ 2549 clustername = self.op.name 2550 ip = self.ip 2551 2552 # shutdown the master IP 2553 master = self.cfg.GetMasterNode() 2554 result = self.rpc.call_node_stop_master(master, False) 2555 result.Raise("Could not disable the master role") 2556 2557 try: 2558 cluster = self.cfg.GetClusterInfo() 2559 cluster.cluster_name = clustername 2560 cluster.master_ip = ip 2561 self.cfg.Update(cluster, feedback_fn) 2562 2563 # update the known hosts file 2564 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE) 2565 node_list = self.cfg.GetNodeList() 2566 try: 2567 node_list.remove(master) 2568 except ValueError: 2569 pass 2570 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE) 2571 finally: 2572 result = self.rpc.call_node_start_master(master, False, False) 2573 msg = result.fail_msg 2574 if msg: 2575 self.LogWarning("Could not re-enable the master role on" 2576 " the master, please restart manually: %s", msg) 2577 2578 return clustername
2579
2580 2581 -class LUSetClusterParams(LogicalUnit):
2582 """Change the parameters of the cluster. 2583 2584 """ 2585 HPATH = "cluster-modify" 2586 HTYPE = constants.HTYPE_CLUSTER 2587 _OP_PARAMS = [ 2588 ("vg_name", None, ht.TMaybeString), 2589 ("enabled_hypervisors", None, 2590 ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue), 2591 ht.TNone)), 2592 ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict), 2593 ht.TNone)), 2594 ("beparams", None, ht.TOr(ht.TDict, ht.TNone)), 2595 ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict), 2596 ht.TNone)), 2597 ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict), 2598 ht.TNone)), 2599 ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)), 2600 ("uid_pool", None, ht.NoType), 2601 ("add_uids", None, ht.NoType), 2602 ("remove_uids", None, ht.NoType), 2603 ("maintain_node_health", None, ht.TMaybeBool), 2604 ("prealloc_wipe_disks", None, ht.TMaybeBool), 2605 ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)), 2606 ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)), 2607 ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)), 2608 ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)), 2609 ("hidden_os", None, ht.TOr(ht.TListOf(\ 2610 ht.TAnd(ht.TList, 2611 ht.TIsLength(2), 2612 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))), 2613 ht.TNone)), 2614 ("blacklisted_os", None, ht.TOr(ht.TListOf(\ 2615 ht.TAnd(ht.TList, 2616 ht.TIsLength(2), 2617 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))), 2618 ht.TNone)), 2619 ] 2620 REQ_BGL = False 2621
2622 - def CheckArguments(self):
2623 """Check parameters 2624 2625 """ 2626 if self.op.uid_pool: 2627 uidpool.CheckUidPool(self.op.uid_pool) 2628 2629 if self.op.add_uids: 2630 uidpool.CheckUidPool(self.op.add_uids) 2631 2632 if self.op.remove_uids: 2633 uidpool.CheckUidPool(self.op.remove_uids)
2634
2635 - def ExpandNames(self):
2636 # FIXME: in the future maybe other cluster params won't require checking on 2637 # all nodes to be modified. 2638 self.needed_locks = { 2639 locking.LEVEL_NODE: locking.ALL_SET, 2640 } 2641 self.share_locks[locking.LEVEL_NODE] = 1
2642
2643 - def BuildHooksEnv(self):
2644 """Build hooks env. 2645 2646 """ 2647 env = { 2648 "OP_TARGET": self.cfg.GetClusterName(), 2649 "NEW_VG_NAME": self.op.vg_name, 2650 } 2651 mn = self.cfg.GetMasterNode() 2652 return env, [mn], [mn]
2653
2654 - def CheckPrereq(self):
2655 """Check prerequisites. 2656 2657 This checks whether the given params don't conflict and 2658 if the given volume group is valid. 2659 2660 """ 2661 if self.op.vg_name is not None and not self.op.vg_name: 2662 if self.cfg.HasAnyDiskOfType(constants.LD_LV): 2663 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based" 2664 " instances exist", errors.ECODE_INVAL) 2665 2666 if self.op.drbd_helper is not None and not self.op.drbd_helper: 2667 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8): 2668 raise errors.OpPrereqError("Cannot disable drbd helper while" 2669 " drbd-based instances exist", 2670 errors.ECODE_INVAL) 2671 2672 node_list = self.acquired_locks[locking.LEVEL_NODE] 2673 2674 # if vg_name not None, checks given volume group on all nodes 2675 if self.op.vg_name: 2676 vglist = self.rpc.call_vg_list(node_list) 2677 for node in node_list: 2678 msg = vglist[node].fail_msg 2679 if msg: 2680 # ignoring down node 2681 self.LogWarning("Error while gathering data on node %s" 2682 " (ignoring node): %s", node, msg) 2683 continue 2684 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload, 2685 self.op.vg_name, 2686 constants.MIN_VG_SIZE) 2687 if vgstatus: 2688 raise errors.OpPrereqError("Error on node '%s': %s" % 2689 (node, vgstatus), errors.ECODE_ENVIRON) 2690 2691 if self.op.drbd_helper: 2692 # checks given drbd helper on all nodes 2693 helpers = self.rpc.call_drbd_helper(node_list) 2694 for node in node_list: 2695 ninfo = self.cfg.GetNodeInfo(node) 2696 if ninfo.offline: 2697 self.LogInfo("Not checking drbd helper on offline node %s", node) 2698 continue 2699 msg = helpers[node].fail_msg 2700 if msg: 2701 raise errors.OpPrereqError("Error checking drbd helper on node" 2702 " '%s': %s" % (node, msg), 2703 errors.ECODE_ENVIRON) 2704 node_helper = helpers[node].payload 2705 if node_helper != self.op.drbd_helper: 2706 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" % 2707 (node, node_helper), errors.ECODE_ENVIRON) 2708 2709 self.cluster = cluster = self.cfg.GetClusterInfo() 2710 # validate params changes 2711 if self.op.beparams: 2712 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 2713 self.new_beparams = cluster.SimpleFillBE(self.op.beparams) 2714 2715 if self.op.nicparams: 2716 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES) 2717 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams) 2718 objects.NIC.CheckParameterSyntax(self.new_nicparams) 2719 nic_errors = [] 2720 2721 # check all instances for consistency 2722 for instance in self.cfg.GetAllInstancesInfo().values(): 2723 for nic_idx, nic in enumerate(instance.nics): 2724 params_copy = copy.deepcopy(nic.nicparams) 2725 params_filled = objects.FillDict(self.new_nicparams, params_copy) 2726 2727 # check parameter syntax 2728 try: 2729 objects.NIC.CheckParameterSyntax(params_filled) 2730 except errors.ConfigurationError, err: 2731 nic_errors.append("Instance %s, nic/%d: %s" % 2732 (instance.name, nic_idx, err)) 2733 2734 # if we're moving instances to routed, check that they have an ip 2735 target_mode = params_filled[constants.NIC_MODE] 2736 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip: 2737 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" % 2738 (instance.name, nic_idx)) 2739 if nic_errors: 2740 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" % 2741 "\n".join(nic_errors)) 2742 2743 # hypervisor list/parameters 2744 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {}) 2745 if self.op.hvparams: 2746 for hv_name, hv_dict in self.op.hvparams.items(): 2747 if hv_name not in self.new_hvparams: 2748 self.new_hvparams[hv_name] = hv_dict 2749 else: 2750 self.new_hvparams[hv_name].update(hv_dict) 2751 2752 # os hypervisor parameters 2753 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {}) 2754 if self.op.os_hvp: 2755 for os_name, hvs in self.op.os_hvp.items(): 2756 if os_name not in self.new_os_hvp: 2757 self.new_os_hvp[os_name] = hvs 2758 else: 2759 for hv_name, hv_dict in hvs.items(): 2760 if hv_name not in self.new_os_hvp[os_name]: 2761 self.new_os_hvp[os_name][hv_name] = hv_dict 2762 else: 2763 self.new_os_hvp[os_name][hv_name].update(hv_dict) 2764 2765 # os parameters 2766 self.new_osp = objects.FillDict(cluster.osparams, {}) 2767 if self.op.osparams: 2768 for os_name, osp in self.op.osparams.items(): 2769 if os_name not in self.new_osp: 2770 self.new_osp[os_name] = {} 2771 2772 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp, 2773 use_none=True) 2774 2775 if not self.new_osp[os_name]: 2776 # we removed all parameters 2777 del self.new_osp[os_name] 2778 else: 2779 # check the parameter validity (remote check) 2780 _CheckOSParams(self, False, [self.cfg.GetMasterNode()], 2781 os_name, self.new_osp[os_name]) 2782 2783 # changes to the hypervisor list 2784 if self.op.enabled_hypervisors is not None: 2785 self.hv_list = self.op.enabled_hypervisors 2786 for hv in self.hv_list: 2787 # if the hypervisor doesn't already exist in the cluster 2788 # hvparams, we initialize it to empty, and then (in both 2789 # cases) we make sure to fill the defaults, as we might not 2790 # have a complete defaults list if the hypervisor wasn't 2791 # enabled before 2792 if hv not in new_hvp: 2793 new_hvp[hv] = {} 2794 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv]) 2795 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES) 2796 else: 2797 self.hv_list = cluster.enabled_hypervisors 2798 2799 if self.op.hvparams or self.op.enabled_hypervisors is not None: 2800 # either the enabled list has changed, or the parameters have, validate 2801 for hv_name, hv_params in self.new_hvparams.items(): 2802 if ((self.op.hvparams and hv_name in self.op.hvparams) or 2803 (self.op.enabled_hypervisors and 2804 hv_name in self.op.enabled_hypervisors)): 2805 # either this is a new hypervisor, or its parameters have changed 2806 hv_class = hypervisor.GetHypervisor(hv_name) 2807 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 2808 hv_class.CheckParameterSyntax(hv_params) 2809 _CheckHVParams(self, node_list, hv_name, hv_params) 2810 2811 if self.op.os_hvp: 2812 # no need to check any newly-enabled hypervisors, since the 2813 # defaults have already been checked in the above code-block 2814 for os_name, os_hvp in self.new_os_hvp.items(): 2815 for hv_name, hv_params in os_hvp.items(): 2816 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 2817 # we need to fill in the new os_hvp on top of the actual hv_p 2818 cluster_defaults = self.new_hvparams.get(hv_name, {}) 2819 new_osp = objects.FillDict(cluster_defaults, hv_params) 2820 hv_class = hypervisor.GetHypervisor(hv_name) 2821 hv_class.CheckParameterSyntax(new_osp) 2822 _CheckHVParams(self, node_list, hv_name, new_osp) 2823 2824 if self.op.default_iallocator: 2825 alloc_script = utils.FindFile(self.op.default_iallocator, 2826 constants.IALLOCATOR_SEARCH_PATH, 2827 os.path.isfile) 2828 if alloc_script is None: 2829 raise errors.OpPrereqError("Invalid default iallocator script '%s'" 2830 " specified" % self.op.default_iallocator, 2831 errors.ECODE_INVAL)
2832
2833 - def Exec(self, feedback_fn):
2834 """Change the parameters of the cluster. 2835 2836 """ 2837 if self.op.vg_name is not None: 2838 new_volume = self.op.vg_name 2839 if not new_volume: 2840 new_volume = None 2841 if new_volume != self.cfg.GetVGName(): 2842 self.cfg.SetVGName(new_volume) 2843 else: 2844 feedback_fn("Cluster LVM configuration already in desired" 2845 " state, not changing") 2846 if self.op.drbd_helper is not None: 2847 new_helper = self.op.drbd_helper 2848 if not new_helper: 2849 new_helper = None 2850 if new_helper != self.cfg.GetDRBDHelper(): 2851 self.cfg.SetDRBDHelper(new_helper) 2852 else: 2853 feedback_fn("Cluster DRBD helper already in desired state," 2854 " not changing") 2855 if self.op.hvparams: 2856 self.cluster.hvparams = self.new_hvparams 2857 if self.op.os_hvp: 2858 self.cluster.os_hvp = self.new_os_hvp 2859 if self.op.enabled_hypervisors is not None: 2860 self.cluster.hvparams = self.new_hvparams 2861 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors 2862 if self.op.beparams: 2863 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams 2864 if self.op.nicparams: 2865 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams 2866 if self.op.osparams: 2867 self.cluster.osparams = self.new_osp 2868 2869 if self.op.candidate_pool_size is not None: 2870 self.cluster.candidate_pool_size = self.op.candidate_pool_size 2871 # we need to update the pool size here, otherwise the save will fail 2872 _AdjustCandidatePool(self, []) 2873 2874 if self.op.maintain_node_health is not None: 2875 self.cluster.maintain_node_health = self.op.maintain_node_health 2876 2877 if self.op.prealloc_wipe_disks is not None: 2878 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks 2879 2880 if self.op.add_uids is not None: 2881 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids) 2882 2883 if self.op.remove_uids is not None: 2884 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids) 2885 2886 if self.op.uid_pool is not None: 2887 self.cluster.uid_pool = self.op.uid_pool 2888 2889 if self.op.default_iallocator is not None: 2890 self.cluster.default_iallocator = self.op.default_iallocator 2891 2892 if self.op.reserved_lvs is not None: 2893 self.cluster.reserved_lvs = self.op.reserved_lvs 2894 2895 def helper_os(aname, mods, desc): 2896 desc += " OS list" 2897 lst = getattr(self.cluster, aname) 2898 for key, val in mods: 2899 if key == constants.DDM_ADD: 2900 if val in lst: 2901 feedback_fn("OS %s already in %s, ignoring" % (val, desc)) 2902 else: 2903 lst.append(val) 2904 elif key == constants.DDM_REMOVE: 2905 if val in lst: 2906 lst.remove(val) 2907 else: 2908 feedback_fn("OS %s not found in %s, ignoring" % (val, desc)) 2909 else: 2910 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2911 2912 if self.op.hidden_os: 2913 helper_os("hidden_os", self.op.hidden_os, "hidden") 2914 2915 if self.op.blacklisted_os: 2916 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted") 2917 2918 self.cfg.Update(self.cluster, feedback_fn)
2919
2920 2921 -def _UploadHelper(lu, nodes, fname):
2922 """Helper for uploading a file and showing warnings. 2923 2924 """ 2925 if os.path.exists(fname): 2926 result = lu.rpc.call_upload_file(nodes, fname) 2927 for to_node, to_result in result.items(): 2928 msg = to_result.fail_msg 2929 if msg: 2930 msg = ("Copy of file %s to node %s failed: %s" % 2931 (fname, to_node, msg)) 2932 lu.proc.LogWarning(msg)
2933
2934 2935 -def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2936 """Distribute additional files which are part of the cluster configuration. 2937 2938 ConfigWriter takes care of distributing the config and ssconf files, but 2939 there are more files which should be distributed to all nodes. This function 2940 makes sure those are copied. 2941 2942 @param lu: calling logical unit 2943 @param additional_nodes: list of nodes not in the config to distribute to 2944 @type additional_vm: boolean 2945 @param additional_vm: whether the additional nodes are vm-capable or not 2946 2947 """ 2948 # 1. Gather target nodes 2949 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode()) 2950 dist_nodes = lu.cfg.GetOnlineNodeList() 2951 nvm_nodes = lu.cfg.GetNonVmCapableNodeList() 2952 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes] 2953 if additional_nodes is not None: 2954 dist_nodes.extend(additional_nodes) 2955 if additional_vm: 2956 vm_nodes.extend(additional_nodes) 2957 if myself.name in dist_nodes: 2958 dist_nodes.remove(myself.name) 2959 if myself.name in vm_nodes: 2960 vm_nodes.remove(myself.name) 2961 2962 # 2. Gather files to distribute 2963 dist_files = set([constants.ETC_HOSTS, 2964 constants.SSH_KNOWN_HOSTS_FILE, 2965 constants.RAPI_CERT_FILE, 2966 constants.RAPI_USERS_FILE, 2967 constants.CONFD_HMAC_KEY, 2968 constants.CLUSTER_DOMAIN_SECRET_FILE, 2969 ]) 2970 2971 vm_files = set() 2972 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors 2973 for hv_name in enabled_hypervisors: 2974 hv_class = hypervisor.GetHypervisor(hv_name) 2975 vm_files.update(hv_class.GetAncillaryFiles()) 2976 2977 # 3. Perform the files upload 2978 for fname in dist_files: 2979 _UploadHelper(lu, dist_nodes, fname) 2980 for fname in vm_files: 2981 _UploadHelper(lu, vm_nodes, fname)
2982
2983 2984 -class LURedistributeConfig(NoHooksLU):
2985 """Force the redistribution of cluster configuration. 2986 2987 This is a very simple LU. 2988 2989 """ 2990 REQ_BGL = False 2991
2992 - def ExpandNames(self):
2993 self.needed_locks = { 2994 locking.LEVEL_NODE: locking.ALL_SET, 2995 } 2996 self.share_locks[locking.LEVEL_NODE] = 1
2997
2998 - def Exec(self, feedback_fn):
2999 """Redistribute the configuration. 3000 3001 """ 3002 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn) 3003 _RedistributeAncillaryFiles(self)
3004
3005 3006 -def _WaitForSync(lu, instance, disks=None, oneshot=False):
3007 """Sleep and poll for an instance's disk to sync. 3008 3009 """ 3010 if not instance.disks or disks is not None and not disks: 3011 return True 3012 3013 disks = _ExpandCheckDisks(instance, disks) 3014 3015 if not oneshot: 3016 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name) 3017 3018 node = instance.primary_node 3019 3020 for dev in disks: 3021 lu.cfg.SetDiskID(dev, node) 3022 3023 # TODO: Convert to utils.Retry 3024 3025 retries = 0 3026 degr_retries = 10 # in seconds, as we sleep 1 second each time 3027 while True: 3028 max_time = 0 3029 done = True 3030 cumul_degraded = False 3031 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks) 3032 msg = rstats.fail_msg 3033 if msg: 3034 lu.LogWarning("Can't get any data from node %s: %s", node, msg) 3035 retries += 1 3036 if retries >= 10: 3037 raise errors.RemoteError("Can't contact node %s for mirror data," 3038 " aborting." % node) 3039 time.sleep(6) 3040 continue 3041 rstats = rstats.payload 3042 retries = 0 3043 for i, mstat in enumerate(rstats): 3044 if mstat is None: 3045 lu.LogWarning("Can't compute data for node %s/%s", 3046 node, disks[i].iv_name) 3047 continue 3048 3049 cumul_degraded = (cumul_degraded or 3050 (mstat.is_degraded and mstat.sync_percent is None)) 3051 if mstat.sync_percent is not None: 3052 done = False 3053 if mstat.estimated_time is not None: 3054 rem_time = ("%s remaining (estimated)" % 3055 utils.FormatSeconds(mstat.estimated_time)) 3056 max_time = mstat.estimated_time 3057 else: 3058 rem_time = "no time estimate" 3059 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" % 3060 (disks[i].iv_name, mstat.sync_percent, rem_time)) 3061 3062 # if we're done but degraded, let's do a few small retries, to 3063 # make sure we see a stable and not transient situation; therefore 3064 # we force restart of the loop 3065 if (done or oneshot) and cumul_degraded and degr_retries > 0: 3066 logging.info("Degraded disks found, %d retries left", degr_retries) 3067 degr_retries -= 1 3068 time.sleep(1) 3069 continue 3070 3071 if done or oneshot: 3072 break 3073 3074 time.sleep(min(60, max_time)) 3075 3076 if done: 3077 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name) 3078 return not cumul_degraded
3079
3080 3081 -def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3082 """Check that mirrors are not degraded. 3083 3084 The ldisk parameter, if True, will change the test from the 3085 is_degraded attribute (which represents overall non-ok status for 3086 the device(s)) to the ldisk (representing the local storage status). 3087 3088 """ 3089 lu.cfg.SetDiskID(dev, node) 3090 3091 result = True 3092 3093 if on_primary or dev.AssembleOnSecondary(): 3094 rstats = lu.rpc.call_blockdev_find(node, dev) 3095 msg = rstats.fail_msg 3096 if msg: 3097 lu.LogWarning("Can't find disk on node %s: %s", node, msg) 3098 result = False 3099 elif not rstats.payload: 3100 lu.LogWarning("Can't find disk on node %s", node) 3101 result = False 3102 else: 3103 if ldisk: 3104 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY 3105 else: 3106 result = result and not rstats.payload.is_degraded 3107 3108 if dev.children: 3109 for child in dev.children: 3110 result = result and _CheckDiskConsistency(lu, child, node, on_primary) 3111 3112 return result
3113
3114 3115 -class LUDiagnoseOS(NoHooksLU):
3116 """Logical unit for OS diagnose/query. 3117 3118 """ 3119 _OP_PARAMS = [ 3120 _POutputFields, 3121 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), 3122 ] 3123 REQ_BGL = False 3124 _HID = "hidden" 3125 _BLK = "blacklisted" 3126 _VLD = "valid" 3127 _FIELDS_STATIC = utils.FieldSet() 3128 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants", 3129 "parameters", "api_versions", _HID, _BLK) 3130
3131 - def CheckArguments(self):
3132 if self.op.names: 3133 raise errors.OpPrereqError("Selective OS query not supported", 3134 errors.ECODE_INVAL) 3135 3136 _CheckOutputFields(static=self._FIELDS_STATIC, 3137 dynamic=self._FIELDS_DYNAMIC, 3138 selected=self.op.output_fields)
3139
3140 - def ExpandNames(self):
3141 # Lock all nodes, in shared mode 3142 # Temporary removal of locks, should be reverted later 3143 # TODO: reintroduce locks when they are lighter-weight 3144 self.needed_locks = {}
3145 #self.share_locks[locking.LEVEL_NODE] = 1 3146 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 3147 3148 @staticmethod
3149 - def _DiagnoseByOS(rlist):
3150 """Remaps a per-node return list into an a per-os per-node dictionary 3151 3152 @param rlist: a map with node names as keys and OS objects as values 3153 3154 @rtype: dict 3155 @return: a dictionary with osnames as keys and as value another 3156 map, with nodes as keys and tuples of (path, status, diagnose, 3157 variants, parameters, api_versions) as values, eg:: 3158 3159 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []), 3160 (/srv/..., False, "invalid api")], 3161 "node2": [(/srv/..., True, "", [], [])]} 3162 } 3163 3164 """ 3165 all_os = {} 3166 # we build here the list of nodes that didn't fail the RPC (at RPC 3167 # level), so that nodes with a non-responding node daemon don't 3168 # make all OSes invalid 3169 good_nodes = [node_name for node_name in rlist 3170 if not rlist[node_name].fail_msg] 3171 for node_name, nr in rlist.items(): 3172 if nr.fail_msg or not nr.payload: 3173 continue 3174 for (name, path, status, diagnose, variants, 3175 params, api_versions) in nr.payload: 3176 if name not in all_os: 3177 # build a list of nodes for this os containing empty lists 3178 # for each node in node_list 3179 all_os[name] = {} 3180 for nname in good_nodes: 3181 all_os[name][nname] = [] 3182 # convert params from [name, help] to (name, help) 3183 params = [tuple(v) for v in params] 3184 all_os[name][node_name].append((path, status, diagnose, 3185 variants, params, api_versions)) 3186 return all_os
3187
3188 - def Exec(self, feedback_fn):
3189 """Compute the list of OSes. 3190 3191 """ 3192 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()] 3193 node_data = self.rpc.call_os_diagnose(valid_nodes) 3194 pol = self._DiagnoseByOS(node_data) 3195 output = [] 3196 cluster = self.cfg.GetClusterInfo() 3197 3198 for os_name in utils.NiceSort(pol.keys()): 3199 os_data = pol[os_name] 3200 row = [] 3201 valid = True 3202 (variants, params, api_versions) = null_state = (set(), set(), set()) 3203 for idx, osl in enumerate(os_data.values()): 3204 valid = bool(valid and osl and osl[0][1]) 3205 if not valid: 3206 (variants, params, api_versions) = null_state 3207 break 3208 node_variants, node_params, node_api = osl[0][3:6] 3209 if idx == 0: # first entry 3210 variants = set(node_variants) 3211 params = set(node_params) 3212 api_versions = set(node_api) 3213 else: # keep consistency 3214 variants.intersection_update(node_variants) 3215 params.intersection_update(node_params) 3216 api_versions.intersection_update(node_api) 3217 3218 is_hid = os_name in cluster.hidden_os 3219 is_blk = os_name in cluster.blacklisted_os 3220 if ((self._HID not in self.op.output_fields and is_hid) or 3221 (self._BLK not in self.op.output_fields and is_blk) or 3222 (self._VLD not in self.op.output_fields and not valid)): 3223 continue 3224 3225 for field in self.op.output_fields: 3226 if field == "name": 3227 val = os_name 3228 elif field == self._VLD: 3229 val = valid 3230 elif field == "node_status": 3231 # this is just a copy of the dict 3232 val = {} 3233 for node_name, nos_list in os_data.items(): 3234 val[node_name] = nos_list 3235 elif field == "variants": 3236 val = utils.NiceSort(list(variants)) 3237 elif field == "parameters": 3238 val = list(params) 3239 elif field == "api_versions": 3240 val = list(api_versions) 3241 elif field == self._HID: 3242 val = is_hid 3243 elif field == self._BLK: 3244 val = is_blk 3245 else: 3246 raise errors.ParameterError(field) 3247 row.append(val) 3248 output.append(row) 3249 3250 return output
3251
3252 3253 -class LURemoveNode(LogicalUnit):
3254 """Logical unit for removing a node. 3255 3256 """ 3257 HPATH = "node-remove" 3258 HTYPE = constants.HTYPE_NODE 3259 _OP_PARAMS = [ 3260 _PNodeName, 3261 ] 3262
3263 - def BuildHooksEnv(self):
3264 """Build hooks env. 3265 3266 This doesn't run on the target node in the pre phase as a failed 3267 node would then be impossible to remove. 3268 3269 """ 3270 env = { 3271 "OP_TARGET": self.op.node_name, 3272 "NODE_NAME": self.op.node_name, 3273 } 3274 all_nodes = self.cfg.GetNodeList() 3275 try: 3276 all_nodes.remove(self.op.node_name) 3277 except ValueError: 3278 logging.warning("Node %s which is about to be removed not found" 3279 " in the all nodes list", self.op.node_name) 3280 return env, all_nodes, all_nodes
3281
3282 - def CheckPrereq(self):
3283 """Check prerequisites. 3284 3285 This checks: 3286 - the node exists in the configuration 3287 - it does not have primary or secondary instances 3288 - it's not the master 3289 3290 Any errors are signaled by raising errors.OpPrereqError. 3291 3292 """ 3293 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 3294 node = self.cfg.GetNodeInfo(self.op.node_name) 3295 assert node is not None 3296 3297 instance_list = self.cfg.GetInstanceList() 3298 3299 masternode = self.cfg.GetMasterNode() 3300 if node.name == masternode: 3301 raise errors.OpPrereqError("Node is the master node," 3302 " you need to failover first.", 3303 errors.ECODE_INVAL) 3304 3305 for instance_name in instance_list: 3306 instance = self.cfg.GetInstanceInfo(instance_name) 3307 if node.name in instance.all_nodes: 3308 raise errors.OpPrereqError("Instance %s is still running on the node," 3309 " please remove first." % instance_name, 3310 errors.ECODE_INVAL) 3311 self.op.node_name = node.name 3312 self.node = node
3313
3314 - def Exec(self, feedback_fn):
3315 """Removes the node from the cluster. 3316 3317 """ 3318 node = self.node 3319 logging.info("Stopping the node daemon and removing configs from node %s", 3320 node.name) 3321 3322 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup 3323 3324 # Promote nodes to master candidate as needed 3325 _AdjustCandidatePool(self, exceptions=[node.name]) 3326 self.context.RemoveNode(node.name) 3327 3328 # Run post hooks on the node before it's removed 3329 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self) 3330 try: 3331 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name]) 3332 except: 3333 # pylint: disable-msg=W0702 3334 self.LogWarning("Errors occurred running hooks on %s" % node.name) 3335 3336 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup) 3337 msg = result.fail_msg 3338 if msg: 3339 self.LogWarning("Errors encountered on the remote node while leaving" 3340 " the cluster: %s", msg) 3341 3342 # Remove node from our /etc/hosts 3343 if self.cfg.GetClusterInfo().modify_etc_hosts: 3344 master_node = self.cfg.GetMasterNode() 3345 result = self.rpc.call_etc_hosts_modify(master_node, 3346 constants.ETC_HOSTS_REMOVE, 3347 node.name, None) 3348 result.Raise("Can't update hosts file with new host data") 3349 _RedistributeAncillaryFiles(self)
3350
3351 3352 -class LUQueryNodes(NoHooksLU):
3353 """Logical unit for querying nodes. 3354 3355 """ 3356 # pylint: disable-msg=W0142 3357 _OP_PARAMS = [ 3358 _POutputFields, 3359 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), 3360 ("use_locking", False, ht.TBool), 3361 ] 3362 REQ_BGL = False 3363 3364 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid", 3365 "master_candidate", "offline", "drained", 3366 "master_capable", "vm_capable"] 3367 3368 _FIELDS_DYNAMIC = utils.FieldSet( 3369 "dtotal", "dfree", 3370 "mtotal", "mnode", "mfree", 3371 "bootid", 3372 "ctotal", "cnodes", "csockets", 3373 ) 3374 3375 _FIELDS_STATIC = utils.FieldSet(*[ 3376 "pinst_cnt", "sinst_cnt", 3377 "pinst_list", "sinst_list", 3378 "pip", "sip", "tags", 3379 "master", 3380 "role"] + _SIMPLE_FIELDS 3381 ) 3382
3383 - def CheckArguments(self):
3384 _CheckOutputFields(static=self._FIELDS_STATIC, 3385 dynamic=self._FIELDS_DYNAMIC, 3386 selected=self.op.output_fields)
3387
3388 - def ExpandNames(self):
3389 self.needed_locks = {} 3390 self.share_locks[locking.LEVEL_NODE] = 1 3391 3392 if self.op.names: 3393 self.wanted = _GetWantedNodes(self, self.op.names) 3394 else: 3395 self.wanted = locking.ALL_SET 3396 3397 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields) 3398 self.do_locking = self.do_node_query and self.op.use_locking 3399 if self.do_locking: 3400 # if we don't request only static fields, we need to lock the nodes 3401 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3402
3403 - def Exec(self, feedback_fn):
3404 """Computes the list of nodes and their attributes. 3405 3406 """ 3407 all_info = self.cfg.GetAllNodesInfo() 3408 if self.do_locking: 3409 nodenames = self.acquired_locks[locking.LEVEL_NODE] 3410 elif self.wanted != locking.ALL_SET: 3411 nodenames = self.wanted 3412 missing = set(nodenames).difference(all_info.keys()) 3413 if missing: 3414 raise errors.OpExecError( 3415 "Some nodes were removed before retrieving their data: %s" % missing) 3416 else: 3417 nodenames = all_info.keys() 3418 3419 nodenames = utils.NiceSort(nodenames) 3420 nodelist = [all_info[name] for name in nodenames] 3421 3422 # begin data gathering 3423 3424 if self.do_node_query: 3425 live_data = {} 3426 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(), 3427 self.cfg.GetHypervisorType()) 3428 for name in nodenames: 3429 nodeinfo = node_data[name] 3430 if not nodeinfo.fail_msg and nodeinfo.payload: 3431 nodeinfo = nodeinfo.payload 3432 fn = utils.TryConvert 3433 live_data[name] = { 3434 "mtotal": fn(int, nodeinfo.get('memory_total', None)), 3435 "mnode": fn(int, nodeinfo.get('memory_dom0', None)), 3436 "mfree": fn(int, nodeinfo.get('memory_free', None)), 3437 "dtotal": fn(int, nodeinfo.get('vg_size', None)), 3438 "dfree": fn(int, nodeinfo.get('vg_free', None)), 3439 "ctotal": fn(int, nodeinfo.get('cpu_total', None)), 3440 "bootid": nodeinfo.get('bootid', None), 3441 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)), 3442 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)), 3443 } 3444 else: 3445 live_data[name] = {} 3446 else: 3447 live_data = dict.fromkeys(nodenames, {}) 3448 3449 node_to_primary = dict([(name, set()) for name in nodenames]) 3450 node_to_secondary = dict([(name, set()) for name in nodenames]) 3451 3452 inst_fields = frozenset(("pinst_cnt", "pinst_list", 3453 "sinst_cnt", "sinst_list")) 3454 if inst_fields & frozenset(self.op.output_fields): 3455 inst_data = self.cfg.GetAllInstancesInfo() 3456 3457 for inst in inst_data.values(): 3458 if inst.primary_node in node_to_primary: 3459 node_to_primary[inst.primary_node].add(inst.name) 3460 for secnode in inst.secondary_nodes: 3461 if secnode in node_to_secondary: 3462 node_to_secondary[secnode].add(inst.name) 3463 3464 master_node = self.cfg.GetMasterNode() 3465 3466 # end data gathering 3467 3468 output = [] 3469 for node in nodelist: 3470 node_output = [] 3471 for field in self.op.output_fields: 3472 if field in self._SIMPLE_FIELDS: 3473 val = getattr(node, field) 3474 elif field == "pinst_list": 3475 val = list(node_to_primary[node.name]) 3476 elif field == "sinst_list": 3477 val = list(node_to_secondary[node.name]) 3478 elif field == "pinst_cnt": 3479 val = len(node_to_primary[node.name]) 3480 elif field == "sinst_cnt": 3481 val = len(node_to_secondary[node.name]) 3482 elif field == "pip": 3483 val = node.primary_ip 3484 elif field == "sip": 3485 val = node.secondary_ip 3486 elif field == "tags": 3487 val = list(node.GetTags()) 3488 elif field == "master": 3489 val = node.name == master_node 3490 elif self._FIELDS_DYNAMIC.Matches(field): 3491 val = live_data[node.name].get(field, None) 3492 elif field == "role": 3493 if node.name == master_node: 3494 val = "M" 3495 elif node.master_candidate: 3496 val = "C" 3497 elif node.drained: 3498 val = "D" 3499 elif node.offline: 3500 val = "O" 3501 else: 3502 val = "R" 3503 else: 3504 raise errors.ParameterError(field) 3505 node_output.append(val) 3506 output.append(node_output) 3507 3508 return output
3509
3510 3511 -class LUQueryNodeVolumes(NoHooksLU):
3512 """Logical unit for getting volumes on node(s). 3513 3514 """ 3515 _OP_PARAMS = [ 3516 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), 3517 ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)), 3518 ] 3519 REQ_BGL = False 3520 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance") 3521 _FIELDS_STATIC = utils.FieldSet("node") 3522
3523 - def CheckArguments(self):
3524 _CheckOutputFields(static=self._FIELDS_STATIC, 3525 dynamic=self._FIELDS_DYNAMIC, 3526 selected=self.op.output_fields)
3527
3528 - def ExpandNames(self):
3529 self.needed_locks = {} 3530 self.share_locks[locking.LEVEL_NODE] = 1 3531 if not self.op.nodes: 3532 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 3533 else: 3534 self.needed_locks[locking.LEVEL_NODE] = \ 3535 _GetWantedNodes(self, self.op.nodes)
3536
3537 - def Exec(self, feedback_fn):
3538 """Computes the list of nodes and their attributes. 3539 3540 """ 3541 nodenames = self.acquired_locks[locking.LEVEL_NODE] 3542 volumes = self.rpc.call_node_volumes(nodenames) 3543 3544 ilist = [self.cfg.GetInstanceInfo(iname) for iname 3545 in self.cfg.GetInstanceList()] 3546 3547 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist]) 3548 3549 output = [] 3550 for node in nodenames: 3551 nresult = volumes[node] 3552 if nresult.offline: 3553 continue 3554 msg = nresult.fail_msg 3555 if msg: 3556 self.LogWarning("Can't compute volume data on node %s: %s", node, msg) 3557 continue 3558 3559 node_vols = nresult.payload[:] 3560 node_vols.sort(key=lambda vol: vol['dev']) 3561 3562 for vol in node_vols: 3563 node_output = [] 3564 for field in self.op.output_fields: 3565 if field == "node": 3566 val = node 3567 elif field == "phys": 3568 val = vol['dev'] 3569 elif field == "vg": 3570 val = vol['vg'] 3571 elif field == "name": 3572 val = vol['name'] 3573 elif field == "size": 3574 val = int(float(vol['size'])) 3575 elif field == "instance": 3576 for inst in ilist: 3577 if node not in lv_by_node[inst]: 3578 continue 3579 if vol['name'] in lv_by_node[inst][node]: 3580 val = inst.name 3581 break 3582 else: 3583 val = '-' 3584 else: 3585 raise errors.ParameterError(field) 3586 node_output.append(str(val)) 3587 3588 output.append(node_output) 3589 3590 return output
3591
3592 3593 -class LUQueryNodeStorage(NoHooksLU):
3594 """Logical unit for getting information on storage units on node(s). 3595 3596 """ 3597 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE) 3598 _OP_PARAMS = [ 3599 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), 3600 ("storage_type", ht.NoDefault, _CheckStorageType), 3601 ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)), 3602 ("name", None, ht.TMaybeString), 3603 ] 3604 REQ_BGL = False 3605
3606 - def CheckArguments(self):
3607 _CheckOutputFields(static=self._FIELDS_STATIC, 3608 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS), 3609 selected=self.op.output_fields)
3610
3611 - def ExpandNames(self):
3612 self.needed_locks = {} 3613 self.share_locks[locking.LEVEL_NODE] = 1 3614 3615 if self.op.nodes: 3616 self.needed_locks[locking.LEVEL_NODE] = \ 3617 _GetWantedNodes(self, self.op.nodes) 3618 else: 3619 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3620
3621 - def Exec(self, feedback_fn):
3622 """Computes the list of nodes and their attributes. 3623 3624 """ 3625 self.nodes = self.acquired_locks[locking.LEVEL_NODE] 3626 3627 # Always get name to sort by 3628 if constants.SF_NAME in self.op.output_fields: 3629 fields = self.op.output_fields[:] 3630 else: 3631 fields = [constants.SF_NAME] + self.op.output_fields 3632 3633 # Never ask for node or type as it's only known to the LU 3634 for extra in [constants.SF_NODE, constants.SF_TYPE]: 3635 while extra in fields: 3636 fields.remove(extra) 3637 3638 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)]) 3639 name_idx = field_idx[constants.SF_NAME] 3640 3641 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 3642 data = self.rpc.call_storage_list(self.nodes, 3643 self.op.storage_type, st_args, 3644 self.op.name, fields) 3645 3646 result = [] 3647 3648 for node in utils.NiceSort(self.nodes): 3649 nresult = data[node] 3650 if nresult.offline: 3651 continue 3652 3653 msg = nresult.fail_msg 3654 if msg: 3655 self.LogWarning("Can't get storage data from node %s: %s", node, msg) 3656 continue 3657 3658 rows = dict([(row[name_idx], row) for row in nresult.payload]) 3659 3660 for name in utils.NiceSort(rows.keys()): 3661 row = rows[name] 3662 3663 out = [] 3664 3665 for field in self.op.output_fields: 3666 if field == constants.SF_NODE: 3667 val = node 3668 elif field == constants.SF_TYPE: 3669 val = self.op.storage_type 3670 elif field in field_idx: 3671 val = row[field_idx[field]] 3672 else: 3673 raise errors.ParameterError(field) 3674 3675 out.append(val) 3676 3677 result.append(out) 3678 3679 return result
3680
3681 3682 -class LUModifyNodeStorage(NoHooksLU):
3683 """Logical unit for modifying a storage volume on a node. 3684 3685 """ 3686 _OP_PARAMS = [ 3687 _PNodeName, 3688 ("storage_type", ht.NoDefault, _CheckStorageType), 3689 ("name", ht.NoDefault, ht.TNonEmptyString), 3690 ("changes", ht.NoDefault, ht.TDict), 3691 ] 3692 REQ_BGL = False 3693
3694 - def CheckArguments(self):
3695 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 3696 3697 storage_type = self.op.storage_type 3698 3699 try: 3700 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type] 3701 except KeyError: 3702 raise errors.OpPrereqError("Storage units of type '%s' can not be" 3703 " modified" % storage_type, 3704 errors.ECODE_INVAL) 3705 3706 diff = set(self.op.changes.keys()) - modifiable 3707 if diff: 3708 raise errors.OpPrereqError("The following fields can not be modified for" 3709 " storage units of type '%s': %r" % 3710 (storage_type, list(diff)), 3711 errors.ECODE_INVAL)
3712
3713 - def ExpandNames(self):
3714 self.needed_locks = { 3715 locking.LEVEL_NODE: self.op.node_name, 3716 }
3717
3718 - def Exec(self, feedback_fn):
3719 """Computes the list of nodes and their attributes. 3720 3721 """ 3722 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 3723 result = self.rpc.call_storage_modify(self.op.node_name, 3724 self.op.storage_type, st_args, 3725 self.op.name, self.op.changes) 3726 result.Raise("Failed to modify storage unit '%s' on %s" % 3727 (self.op.name, self.op.node_name))
3728
3729 3730 -class LUAddNode(LogicalUnit):
3731 """Logical unit for adding node to the cluster. 3732 3733 """ 3734 HPATH = "node-add" 3735 HTYPE = constants.HTYPE_NODE 3736 _OP_PARAMS = [ 3737 _PNodeName, 3738 ("primary_ip", None, ht.NoType), 3739 ("secondary_ip", None, ht.TMaybeString), 3740 ("readd", False, ht.TBool), 3741 ("group", None, ht.TMaybeString), 3742 ("master_capable", None, ht.TMaybeBool), 3743 ("vm_capable", None, ht.TMaybeBool), 3744 ] 3745 _NFLAGS = ["master_capable", "vm_capable"] 3746
3747 - def CheckArguments(self):
3748 self.primary_ip_family = self.cfg.GetPrimaryIPFamily() 3749 # validate/normalize the node name 3750 self.hostname = netutils.GetHostname(name=self.op.node_name, 3751 family=self.primary_ip_family) 3752 self.op.node_name = self.hostname.name 3753 if self.op.readd and self.op.group: 3754 raise errors.OpPrereqError("Cannot pass a node group when a node is" 3755 " being readded", errors.ECODE_INVAL)
3756
3757 - def BuildHooksEnv(self):
3758 """Build hooks env. 3759 3760 This will run on all nodes before, and on all nodes + the new node after. 3761 3762 """ 3763 env = { 3764 "OP_TARGET": self.op.node_name, 3765 "NODE_NAME": self.op.node_name, 3766 "NODE_PIP": self.op.primary_ip, 3767 "NODE_SIP": self.op.secondary_ip, 3768 "MASTER_CAPABLE": str(self.op.master_capable), 3769 "VM_CAPABLE": str(self.op.vm_capable), 3770 } 3771 nodes_0 = self.cfg.GetNodeList() 3772 nodes_1 = nodes_0 + [self.op.node_name, ] 3773 return env, nodes_0, nodes_1
3774
3775 - def CheckPrereq(self):
3776 """Check prerequisites. 3777 3778 This checks: 3779 - the new node is not already in the config 3780 - it is resolvable 3781 - its parameters (single/dual homed) matches the cluster 3782 3783 Any errors are signaled by raising errors.OpPrereqError. 3784 3785 """ 3786 cfg = self.cfg 3787 hostname = self.hostname 3788 node = hostname.name 3789 primary_ip = self.op.primary_ip = hostname.ip 3790 if self.op.secondary_ip is None: 3791 if self.primary_ip_family == netutils.IP6Address.family: 3792 raise errors.OpPrereqError("When using a IPv6 primary address, a valid" 3793 " IPv4 address must be given as secondary", 3794 errors.ECODE_INVAL) 3795 self.op.secondary_ip = primary_ip 3796 3797 secondary_ip = self.op.secondary_ip 3798 if not netutils.IP4Address.IsValid(secondary_ip): 3799 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4" 3800 " address" % secondary_ip, errors.ECODE_INVAL) 3801 3802 node_list = cfg.GetNodeList() 3803 if not self.op.readd and node in node_list: 3804 raise errors.OpPrereqError("Node %s is already in the configuration" % 3805 node, errors.ECODE_EXISTS) 3806 elif self.op.readd and node not in node_list: 3807 raise errors.OpPrereqError("Node %s is not in the configuration" % node, 3808 errors.ECODE_NOENT) 3809 3810 self.changed_primary_ip = False 3811 3812 for existing_node_name in node_list: 3813 existing_node = cfg.GetNodeInfo(existing_node_name) 3814 3815 if self.op.readd and node == existing_node_name: 3816 if existing_node.secondary_ip != secondary_ip: 3817 raise errors.OpPrereqError("Readded node doesn't have the same IP" 3818 " address configuration as before", 3819 errors.ECODE_INVAL) 3820 if existing_node.primary_ip != primary_ip: 3821 self.changed_primary_ip = True 3822 3823 continue 3824 3825 if (existing_node.primary_ip == primary_ip or 3826 existing_node.secondary_ip == primary_ip or 3827 existing_node.primary_ip == secondary_ip or 3828 existing_node.secondary_ip == secondary_ip): 3829 raise errors.OpPrereqError("New node ip address(es) conflict with" 3830 " existing node %s" % existing_node.name, 3831 errors.ECODE_NOTUNIQUE) 3832 3833 # After this 'if' block, None is no longer a valid value for the 3834 # _capable op attributes 3835 if self.op.readd: 3836 old_node = self.cfg.GetNodeInfo(node) 3837 assert old_node is not None, "Can't retrieve locked node %s" % node 3838 for attr in self._NFLAGS: 3839 if getattr(self.op, attr) is None: 3840 setattr(self.op, attr, getattr(old_node, attr)) 3841 else: 3842 for attr in self._NFLAGS: 3843 if getattr(self.op, attr) is None: 3844 setattr(self.op, attr, True) 3845 3846 if self.op.readd and not self.op.vm_capable: 3847 pri, sec = cfg.GetNodeInstances(node) 3848 if pri or sec: 3849 raise errors.OpPrereqError("Node %s being re-added with vm_capable" 3850 " flag set to false, but it already holds" 3851 " instances" % node, 3852 errors.ECODE_STATE) 3853 3854 # check that the type of the node (single versus dual homed) is the 3855 # same as for the master 3856 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode()) 3857 master_singlehomed = myself.secondary_ip == myself.primary_ip 3858 newbie_singlehomed = secondary_ip == primary_ip 3859 if master_singlehomed != newbie_singlehomed: 3860 if master_singlehomed: 3861 raise errors.OpPrereqError("The master has no secondary ip but the" 3862 " new node has one", 3863 errors.ECODE_INVAL) 3864 else: 3865 raise errors.OpPrereqError("The master has a secondary ip but the" 3866 " new node doesn't have one", 3867 errors.ECODE_INVAL) 3868 3869 # checks reachability 3870 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT): 3871 raise errors.OpPrereqError("Node not reachable by ping", 3872 errors.ECODE_ENVIRON) 3873 3874 if not newbie_singlehomed: 3875 # check reachability from my secondary ip to newbie's secondary ip 3876 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT, 3877 source=myself.secondary_ip): 3878 raise errors.OpPrereqError("Node secondary ip not reachable by TCP" 3879 " based ping to node daemon port", 3880 errors.ECODE_ENVIRON) 3881 3882 if self.op.readd: 3883 exceptions = [node] 3884 else: 3885 exceptions = [] 3886 3887 if self.op.master_capable: 3888 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions) 3889 else: 3890 self.master_candidate = False 3891 3892 if self.op.readd: 3893 self.new_node = old_node 3894 else: 3895 node_group = cfg.LookupNodeGroup(self.op.group) 3896 self.new_node = objects.Node(name=node, 3897 primary_ip=primary_ip, 3898 secondary_ip=secondary_ip, 3899 master_candidate=self.master_candidate, 3900 offline=False, drained=False, 3901 group=node_group)
3902
3903 - def Exec(self, feedback_fn):
3904 """Adds the new node to the cluster. 3905 3906 """ 3907 new_node = self.new_node 3908 node = new_node.name 3909 3910 # for re-adds, reset the offline/drained/master-candidate flags; 3911 # we need to reset here, otherwise offline would prevent RPC calls 3912 # later in the procedure; this also means that if the re-add 3913 # fails, we are left with a non-offlined, broken node 3914 if self.op.readd: 3915 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201 3916 self.LogInfo("Readding a node, the offline/drained flags were reset") 3917 # if we demote the node, we do cleanup later in the procedure 3918 new_node.master_candidate = self.master_candidate 3919 if self.changed_primary_ip: 3920 new_node.primary_ip = self.op.primary_ip 3921 3922 # copy the master/vm_capable flags 3923 for attr in self._NFLAGS: 3924 setattr(new_node, attr, getattr(self.op, attr)) 3925 3926 # notify the user about any possible mc promotion 3927 if new_node.master_candidate: 3928 self.LogInfo("Node will be a master candidate") 3929 3930 # check connectivity 3931 result = self.rpc.call_version([node])[node] 3932 result.Raise("Can't get version information from node %s" % node) 3933 if constants.PROTOCOL_VERSION == result.payload: 3934 logging.info("Communication to node %s fine, sw version %s match", 3935 node, result.payload) 3936 else: 3937 raise errors.OpExecError("Version mismatch master version %s," 3938 " node version %s" % 3939 (constants.PROTOCOL_VERSION, result.payload)) 3940 3941 # Add node to our /etc/hosts, and add key to known_hosts 3942 if self.cfg.GetClusterInfo().modify_etc_hosts: 3943 master_node = self.cfg.GetMasterNode() 3944 result = self.rpc.call_etc_hosts_modify(master_node, 3945 constants.ETC_HOSTS_ADD, 3946 self.hostname.name, 3947 self.hostname.ip) 3948 result.Raise("Can't update hosts file with new host data") 3949 3950 if new_node.secondary_ip != new_node.primary_ip: 3951 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip, 3952 False) 3953 3954 node_verify_list = [self.cfg.GetMasterNode()] 3955 node_verify_param = { 3956 constants.NV_NODELIST: [node], 3957 # TODO: do a node-net-test as well? 3958 } 3959 3960 result = self.rpc.call_node_verify(node_verify_list, node_verify_param, 3961 self.cfg.GetClusterName()) 3962 for verifier in node_verify_list: 3963 result[verifier].Raise("Cannot communicate with node %s" % verifier) 3964 nl_payload = result[verifier].payload[constants.NV_NODELIST] 3965 if nl_payload: 3966 for failed in nl_payload: 3967 feedback_fn("ssh/hostname verification failed" 3968 " (checking from %s): %s" % 3969 (verifier, nl_payload[failed])) 3970 raise errors.OpExecError("ssh/hostname verification failed.") 3971 3972 if self.op.readd: 3973 _RedistributeAncillaryFiles(self) 3974 self.context.ReaddNode(new_node) 3975 # make sure we redistribute the config 3976 self.cfg.Update(new_node, feedback_fn) 3977 # and make sure the new node will not have old files around 3978 if not new_node.master_candidate: 3979 result = self.rpc.call_node_demote_from_mc(new_node.name) 3980 msg = result.fail_msg 3981 if msg: 3982 self.LogWarning("Node failed to demote itself from master" 3983 " candidate status: %s" % msg) 3984 else: 3985 _RedistributeAncillaryFiles(self, additional_nodes=[node], 3986 additional_vm=self.op.vm_capable) 3987 self.context.AddNode(new_node, self.proc.GetECId())
3988
3989 3990 -class LUSetNodeParams(LogicalUnit):
3991 """Modifies the parameters of a node. 3992 3993 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline) 3994 to the node role (as _ROLE_*) 3995 @cvar _R2F: a dictionary from node role to tuples of flags 3996 @cvar _FLAGS: a list of attribute names corresponding to the flags 3997 3998 """ 3999 HPATH = "node-modify" 4000 HTYPE = constants.HTYPE_NODE 4001 _OP_PARAMS = [ 4002 _PNodeName, 4003 ("master_candidate", None, ht.TMaybeBool), 4004 ("offline", None, ht.TMaybeBool), 4005 ("drained", None, ht.TMaybeBool), 4006 ("auto_promote", False, ht.TBool), 4007 ("master_capable", None, ht.TMaybeBool), 4008 ("vm_capable", None, ht.TMaybeBool), 4009 ("secondary_ip", None, ht.TMaybeString), 4010 _PForce, 4011 ] 4012 REQ_BGL = False 4013 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4) 4014 _F2R = { 4015 (True, False, False): _ROLE_CANDIDATE, 4016 (False, True, False): _ROLE_DRAINED, 4017 (False, False, True): _ROLE_OFFLINE, 4018 (False, False, False): _ROLE_REGULAR, 4019 } 4020 _R2F = dict((v, k) for k, v in _F2R.items()) 4021 _FLAGS = ["master_candidate", "drained", "offline"] 4022
4023 - def CheckArguments(self):
4024 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 4025 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained, 4026 self.op.master_capable, self.op.vm_capable, 4027 self.op.secondary_ip] 4028 if all_mods.count(None) == len(all_mods): 4029 raise errors.OpPrereqError("Please pass at least one modification", 4030 errors.ECODE_INVAL) 4031 if all_mods.count(True) > 1: 4032 raise errors.OpPrereqError("Can't set the node into more than one" 4033 " state at the same time", 4034 errors.ECODE_INVAL) 4035 4036 # Boolean value that tells us whether we might be demoting from MC 4037 self.might_demote = (self.op.master_candidate == False or 4038 self.op.offline == True or 4039 self.op.drained == True or 4040 self.op.master_capable == False) 4041 4042 if self.op.secondary_ip: 4043 if not netutils.IP4Address.IsValid(self.op.secondary_ip): 4044 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4" 4045 " address" % self.op.secondary_ip, 4046 errors.ECODE_INVAL) 4047 4048 self.lock_all = self.op.auto_promote and self.might_demote 4049 self.lock_instances = self.op.secondary_ip is not None
4050
4051 - def ExpandNames(self):
4052 if self.lock_all: 4053 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET} 4054 else: 4055 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name} 4056 4057 if self.lock_instances: 4058 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4059
4060 - def DeclareLocks(self, level):
4061 # If we have locked all instances, before waiting to lock nodes, release 4062 # all the ones living on nodes unrelated to the current operation. 4063 if level == locking.LEVEL_NODE and self.lock_instances: 4064 instances_release = [] 4065 instances_keep = [] 4066 self.affected_instances = [] 4067 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET: 4068 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]: 4069 instance = self.context.cfg.GetInstanceInfo(instance_name) 4070 i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR 4071 if i_mirrored and self.op.node_name in instance.all_nodes: 4072 instances_keep.append(instance_name) 4073 self.affected_instances.append(instance) 4074 else: 4075 instances_release.append(instance_name) 4076 if instances_release: 4077 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release) 4078 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4079
4080 - def BuildHooksEnv(self):
4081 """Build hooks env. 4082 4083 This runs on the master node. 4084 4085 """ 4086 env = { 4087 "OP_TARGET": self.op.node_name, 4088 "MASTER_CANDIDATE": str(self.op.master_candidate), 4089 "OFFLINE": str(self.op.offline), 4090 "DRAINED": str(self.op.drained), 4091 "MASTER_CAPABLE": str(self.op.master_capable), 4092 "VM_CAPABLE": str(self.op.vm_capable), 4093 } 4094 nl = [self.cfg.GetMasterNode(), 4095 self.op.node_name] 4096 return env, nl, nl
4097
4098 - def CheckPrereq(self):
4099 """Check prerequisites. 4100 4101 This only checks the instance list against the existing names. 4102 4103 """ 4104 node = self.node = self.cfg.GetNodeInfo(self.op.node_name) 4105 4106 if (self.op.master_candidate is not None or 4107 self.op.drained is not None or 4108 self.op.offline is not None): 4109 # we can't change the master's node flags 4110 if self.op.node_name == self.cfg.GetMasterNode(): 4111 raise errors.OpPrereqError("The master role can be changed" 4112 " only via master-failover", 4113 errors.ECODE_INVAL) 4114 4115 if self.op.master_candidate and not node.master_capable: 4116 raise errors.OpPrereqError("Node %s is not master capable, cannot make" 4117 " it a master candidate" % node.name, 4118 errors.ECODE_STATE) 4119 4120 if self.op.vm_capable == False: 4121 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name) 4122 if ipri or isec: 4123 raise errors.OpPrereqError("Node %s hosts instances, cannot unset" 4124 " the vm_capable flag" % node.name, 4125 errors.ECODE_STATE) 4126 4127 if node.master_candidate and self.might_demote and not self.lock_all: 4128 assert not self.op.auto_promote, "auto-promote set but lock_all not" 4129 # check if after removing the current node, we're missing master 4130 # candidates 4131 (mc_remaining, mc_should, _) = \ 4132 self.cfg.GetMasterCandidateStats(exceptions=[node.name]) 4133 if mc_remaining < mc_should: 4134 raise errors.OpPrereqError("Not enough master candidates, please" 4135 " pass auto_promote to allow promotion", 4136 errors.ECODE_STATE) 4137 4138 self.old_flags = old_flags = (node.master_candidate, 4139 node.drained, node.offline) 4140 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags) 4141 self.old_role = old_role = self._F2R[old_flags] 4142 4143 # Check for ineffective changes 4144 for attr in self._FLAGS: 4145 if (getattr(self.op, attr) == False and getattr(node, attr) == False): 4146 self.LogInfo("Ignoring request to unset flag %s, already unset", attr) 4147 setattr(self.op, attr, None) 4148 4149 # Past this point, any flag change to False means a transition 4150 # away from the respective state, as only real changes are kept 4151 4152 # If we're being deofflined/drained, we'll MC ourself if needed 4153 if (self.op.drained == False or self.op.offline == False or 4154 (self.op.master_capable and not node.master_capable)): 4155 if _DecideSelfPromotion(self): 4156 self.op.master_candidate = True 4157 self.LogInfo("Auto-promoting node to master candidate") 4158 4159 # If we're no longer master capable, we'll demote ourselves from MC 4160 if self.op.master_capable == False and node.master_candidate: 4161 self.LogInfo("Demoting from master candidate") 4162 self.op.master_candidate = False 4163 4164 # Compute new role 4165 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1 4166 if self.op.master_candidate: 4167 new_role = self._ROLE_CANDIDATE 4168 elif self.op.drained: 4169 new_role = self._ROLE_DRAINED 4170 elif self.op.offline: 4171 new_role = self._ROLE_OFFLINE 4172 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]: 4173 # False is still in new flags, which means we're un-setting (the 4174 # only) True flag 4175 new_role = self._ROLE_REGULAR 4176 else: # no new flags, nothing, keep old role 4177 new_role = old_role 4178 4179 self.new_role = new_role 4180 4181 if old_role == self._ROLE_OFFLINE and new_role != old_role: 4182 # Trying to transition out of offline status 4183 result = self.rpc.call_version([node.name])[node.name] 4184 if result.fail_msg: 4185 raise errors.OpPrereqError("Node %s is being de-offlined but fails" 4186 " to report its version: %s" % 4187 (node.name, result.fail_msg), 4188 errors.ECODE_STATE) 4189 else: 4190 self.LogWarning("Transitioning node from offline to online state" 4191 " without using re-add. Please make sure the node" 4192 " is healthy!") 4193 4194 if self.op.secondary_ip: 4195 # Ok even without locking, because this can't be changed by any LU 4196 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode()) 4197 master_singlehomed = master.secondary_ip == master.primary_ip 4198 if master_singlehomed and self.op.secondary_ip: 4199 raise errors.OpPrereqError("Cannot change the secondary ip on a single" 4200 " homed cluster", errors.ECODE_INVAL) 4201 4202 if node.offline: 4203 if self.affected_instances: 4204 raise errors.OpPrereqError("Cannot change secondary ip: offline" 4205 " node has instances (%s) configured" 4206 " to use it" % self.affected_instances) 4207 else: 4208 # On online nodes, check that no instances are running, and that 4209 # the node has the new ip and we can reach it. 4210 for instance in self.affected_instances: 4211 _CheckInstanceDown(self, instance, "cannot change secondary ip") 4212 4213 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True) 4214 if master.name != node.name: 4215 # check reachability from master secondary ip to new secondary ip 4216 if not netutils.TcpPing(self.op.secondary_ip, 4217 constants.DEFAULT_NODED_PORT, 4218 source=master.secondary_ip): 4219 raise errors.OpPrereqError("Node secondary ip not reachable by TCP" 4220 " based ping to node daemon port", 4221 errors.ECODE_ENVIRON)
4222
4223 - def Exec(self, feedback_fn):
4224 """Modifies a node. 4225 4226 """ 4227 node = self.node 4228 old_role = self.old_role 4229 new_role = self.new_role 4230 4231 result = [] 4232 4233 for attr in ["master_capable", "vm_capable"]: 4234 val = getattr(self.op, attr) 4235 if val is not None: 4236 setattr(node, attr, val) 4237 result.append((attr, str(val))) 4238 4239 if new_role != old_role: 4240 # Tell the node to demote itself, if no longer MC and not offline 4241 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE: 4242 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg 4243 if msg: 4244 self.LogWarning("Node failed to demote itself: %s", msg) 4245 4246 new_flags = self._R2F[new_role] 4247 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS): 4248 if of != nf: 4249 result.append((desc, str(nf))) 4250 (node.master_candidate, node.drained, node.offline) = new_flags 4251 4252 # we locked all nodes, we adjust the CP before updating this node 4253 if self.lock_all: 4254 _AdjustCandidatePool(self, [node.name]) 4255 4256 if self.op.secondary_ip: 4257 node.secondary_ip = self.op.secondary_ip 4258 result.append(("secondary_ip", self.op.secondary_ip)) 4259 4260 # this will trigger configuration file update, if needed 4261 self.cfg.Update(node, feedback_fn) 4262 4263 # this will trigger job queue propagation or cleanup if the mc 4264 # flag changed 4265 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1: 4266 self.context.ReaddNode(node) 4267 4268 return result
4269
4270 4271 -class LUPowercycleNode(NoHooksLU):
4272 """Powercycles a node. 4273 4274 """ 4275 _OP_PARAMS = [ 4276 _PNodeName, 4277 _PForce, 4278 ] 4279 REQ_BGL = False 4280
4281 - def CheckArguments(self):
4282 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 4283 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force: 4284 raise errors.OpPrereqError("The node is the master and the force" 4285 " parameter was not set", 4286 errors.ECODE_INVAL)
4287
4288 - def ExpandNames(self):
4289 """Locking for PowercycleNode. 4290 4291 This is a last-resort option and shouldn't block on other 4292 jobs. Therefore, we grab no locks. 4293 4294 """ 4295 self.needed_locks = {}
4296
4297 - def Exec(self, feedback_fn):
4298 """Reboots a node. 4299 4300 """ 4301 result = self.rpc.call_node_powercycle(self.op.node_name, 4302 self.cfg.GetHypervisorType()) 4303 result.Raise("Failed to schedule the reboot") 4304 return result.payload
4305
4306 4307 -class LUQueryClusterInfo(NoHooksLU):
4308 """Query cluster configuration. 4309 4310 """ 4311 REQ_BGL = False 4312
4313 - def ExpandNames(self):
4314 self.needed_locks = {}
4315
4316 - def Exec(self, feedback_fn):
4317 """Return cluster config. 4318 4319 """ 4320 cluster = self.cfg.GetClusterInfo() 4321 os_hvp = {} 4322 4323 # Filter just for enabled hypervisors 4324 for os_name, hv_dict in cluster.os_hvp.items(): 4325 os_hvp[os_name] = {} 4326 for hv_name, hv_params in hv_dict.items(): 4327 if hv_name in cluster.enabled_hypervisors: 4328 os_hvp[os_name][hv_name] = hv_params 4329 4330 # Convert ip_family to ip_version 4331 primary_ip_version = constants.IP4_VERSION 4332 if cluster.primary_ip_family == netutils.IP6Address.family: 4333 primary_ip_version = constants.IP6_VERSION 4334 4335 result = { 4336 "software_version": constants.RELEASE_VERSION, 4337 "protocol_version": constants.PROTOCOL_VERSION, 4338 "config_version": constants.CONFIG_VERSION, 4339 "os_api_version": max(constants.OS_API_VERSIONS), 4340 "export_version": constants.EXPORT_VERSION, 4341 "architecture": (platform.architecture()[0], platform.machine()), 4342 "name": cluster.cluster_name, 4343 "master": cluster.master_node, 4344 "default_hypervisor": cluster.enabled_hypervisors[0], 4345 "enabled_hypervisors": cluster.enabled_hypervisors, 4346 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name]) 4347 for hypervisor_name in cluster.enabled_hypervisors]), 4348 "os_hvp": os_hvp, 4349 "beparams": cluster.beparams, 4350 "osparams": cluster.osparams, 4351 "nicparams": cluster.nicparams, 4352 "candidate_pool_size": cluster.candidate_pool_size, 4353 "master_netdev": cluster.master_netdev, 4354 "volume_group_name": cluster.volume_group_name, 4355 "drbd_usermode_helper": cluster.drbd_usermode_helper, 4356 "file_storage_dir": cluster.file_storage_dir, 4357 "maintain_node_health": cluster.maintain_node_health, 4358 "ctime": cluster.ctime, 4359 "mtime": cluster.mtime, 4360 "uuid": cluster.uuid, 4361 "tags": list(cluster.GetTags()), 4362 "uid_pool": cluster.uid_pool, 4363 "default_iallocator": cluster.default_iallocator, 4364 "reserved_lvs": cluster.reserved_lvs, 4365 "primary_ip_version": primary_ip_version, 4366 "prealloc_wipe_disks": cluster.prealloc_wipe_disks, 4367 } 4368 4369 return result
4370
4371 4372 -class LUQueryConfigValues(NoHooksLU):
4373 """Return configuration values. 4374 4375 """ 4376 _OP_PARAMS = [_POutputFields] 4377 REQ_BGL = False 4378 _FIELDS_DYNAMIC = utils.FieldSet() 4379 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag", 4380 "watcher_pause", "volume_group_name") 4381
4382 - def CheckArguments(self):
4383 _CheckOutputFields(static=self._FIELDS_STATIC, 4384 dynamic=self._FIELDS_DYNAMIC, 4385 selected=self.op.output_fields)
4386
4387 - def ExpandNames(self):
4388 self.needed_locks = {}
4389
4390 - def Exec(self, feedback_fn):
4391 """Dump a representation of the cluster config to the standard output. 4392 4393 """ 4394 values = [] 4395 for field in self.op.output_fields: 4396 if field == "cluster_name": 4397 entry = self.cfg.GetClusterName() 4398 elif field == "master_node": 4399 entry = self.cfg.GetMasterNode() 4400 elif field == "drain_flag": 4401 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE) 4402 elif field == "watcher_pause": 4403 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE) 4404 elif field == "volume_group_name": 4405 entry = self.cfg.GetVGName() 4406 else: 4407 raise errors.ParameterError(field) 4408 values.append(entry) 4409 return values
4410
4411 4412 -class LUActivateInstanceDisks(NoHooksLU):
4413 """Bring up an instance's disks. 4414 4415 """ 4416 _OP_PARAMS = [ 4417 _PInstanceName, 4418 ("ignore_size", False, ht.TBool), 4419 ] 4420 REQ_BGL = False 4421
4422 - def ExpandNames(self):
4423 self._ExpandAndLockInstance() 4424 self.needed_locks[locking.LEVEL_NODE] = [] 4425 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4426
4427 - def DeclareLocks(self, level):
4428 if level == locking.LEVEL_NODE: 4429 self._LockInstancesNodes()
4430
4431 - def CheckPrereq(self):
4432 """Check prerequisites. 4433 4434 This checks that the instance is in the cluster. 4435 4436 """ 4437 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4438 assert self.instance is not None, \ 4439 "Cannot retrieve locked instance %s" % self.op.instance_name 4440 _CheckNodeOnline(self, self.instance.primary_node)
4441
4442 - def Exec(self, feedback_fn):
4443 """Activate the disks. 4444 4445 """ 4446 disks_ok, disks_info = \ 4447 _AssembleInstanceDisks(self, self.instance, 4448 ignore_size=self.op.ignore_size) 4449 if not disks_ok: 4450 raise errors.OpExecError("Cannot activate block devices") 4451 4452 return disks_info
4453
4454 4455 -def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False, 4456 ignore_size=False):
4457 """Prepare the block devices for an instance. 4458 4459 This sets up the block devices on all nodes. 4460 4461 @type lu: L{LogicalUnit} 4462 @param lu: the logical unit on whose behalf we execute 4463 @type instance: L{objects.Instance} 4464 @param instance: the instance for whose disks we assemble 4465 @type disks: list of L{objects.Disk} or None 4466 @param disks: which disks to assemble (or all, if None) 4467 @type ignore_secondaries: boolean 4468 @param ignore_secondaries: if true, errors on secondary nodes 4469 won't result in an error return from the function 4470 @type ignore_size: boolean 4471 @param ignore_size: if true, the current known size of the disk 4472 will not be used during the disk activation, useful for cases 4473 when the size is wrong 4474 @return: False if the operation failed, otherwise a list of 4475 (host, instance_visible_name, node_visible_name) 4476 with the mapping from node devices to instance devices 4477 4478 """ 4479 device_info = [] 4480 disks_ok = True 4481 iname = instance.name 4482 disks = _ExpandCheckDisks(instance, disks) 4483 4484 # With the two passes mechanism we try to reduce the window of 4485 # opportunity for the race condition of switching DRBD to primary 4486 # before handshaking occured, but we do not eliminate it 4487 4488 # The proper fix would be to wait (with some limits) until the 4489 # connection has been made and drbd transitions from WFConnection 4490 # into any other network-connected state (Connected, SyncTarget, 4491 # SyncSource, etc.) 4492 4493 # 1st pass, assemble on all nodes in secondary mode 4494 for inst_disk in disks: 4495 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): 4496 if ignore_size: 4497 node_disk = node_disk.Copy() 4498 node_disk.UnsetSize() 4499 lu.cfg.SetDiskID(node_disk, node) 4500 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False) 4501 msg = result.fail_msg 4502 if msg: 4503 lu.proc.LogWarning("Could not prepare block device %s on node %s" 4504 " (is_primary=False, pass=1): %s", 4505 inst_disk.iv_name, node, msg) 4506 if not ignore_secondaries: 4507 disks_ok = False 4508 4509 # FIXME: race condition on drbd migration to primary 4510 4511 # 2nd pass, do only the primary node 4512 for inst_disk in disks: 4513 dev_path = None 4514 4515 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): 4516 if node != instance.primary_node: 4517 continue 4518 if ignore_size: 4519 node_disk = node_disk.Copy() 4520 node_disk.UnsetSize() 4521 lu.cfg.SetDiskID(node_disk, node) 4522 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True) 4523 msg = result.fail_msg 4524 if msg: 4525 lu.proc.LogWarning("Could not prepare block device %s on node %s" 4526 " (is_primary=True, pass=2): %s", 4527 inst_disk.iv_name, node, msg) 4528 disks_ok = False 4529 else: 4530 dev_path = result.payload 4531 4532 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path)) 4533 4534 # leave the disks configured for the primary node 4535 # this is a workaround that would be fixed better by 4536 # improving the logical/physical id handling 4537 for disk in disks: 4538 lu.cfg.SetDiskID(disk, instance.primary_node) 4539 4540 return disks_ok, device_info
4541
4542 4543 -def _StartInstanceDisks(lu, instance, force):
4544 """Start the disks of an instance. 4545 4546 """ 4547 disks_ok, _ = _AssembleInstanceDisks(lu, instance, 4548 ignore_secondaries=force) 4549 if not disks_ok: 4550 _ShutdownInstanceDisks(lu, instance) 4551 if force is not None and not force: 4552 lu.proc.LogWarning("", hint="If the message above refers to a" 4553 " secondary node," 4554 " you can retry the operation using '--force'.") 4555 raise errors.OpExecError("Disk consistency error")
4556
4557 4558 -class LUDeactivateInstanceDisks(NoHooksLU):
4559 """Shutdown an instance's disks. 4560 4561 """ 4562 _OP_PARAMS = [ 4563 _PInstanceName, 4564 ] 4565 REQ_BGL = False 4566
4567 - def ExpandNames(self):
4568 self._ExpandAndLockInstance() 4569 self.needed_locks[locking.LEVEL_NODE] = [] 4570 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4571
4572 - def DeclareLocks(self, level):
4573 if level == locking.LEVEL_NODE: 4574 self._LockInstancesNodes()
4575
4576 - def CheckPrereq(self):
4577 """Check prerequisites. 4578 4579 This checks that the instance is in the cluster. 4580 4581 """ 4582 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4583 assert self.instance is not None, \ 4584 "Cannot retrieve locked instance %s" % self.op.instance_name
4585
4586 - def Exec(self, feedback_fn):
4587 """Deactivate the disks 4588 4589 """ 4590 instance = self.instance 4591 _SafeShutdownInstanceDisks(self, instance)
4592
4593 4594 -def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4595 """Shutdown block devices of an instance. 4596 4597 This function checks if an instance is running, before calling 4598 _ShutdownInstanceDisks. 4599 4600 """ 4601 _CheckInstanceDown(lu, instance, "cannot shutdown disks") 4602 _ShutdownInstanceDisks(lu, instance, disks=disks)
4603
4604 4605 -def _ExpandCheckDisks(instance, disks):
4606 """Return the instance disks selected by the disks list 4607 4608 @type disks: list of L{objects.Disk} or None 4609 @param disks: selected disks 4610 @rtype: list of L{objects.Disk} 4611 @return: selected instance disks to act on 4612 4613 """ 4614 if disks is None: 4615 return instance.disks 4616 else: 4617 if not set(disks).issubset(instance.disks): 4618 raise errors.ProgrammerError("Can only act on disks belonging to the" 4619 " target instance") 4620 return disks
4621
4622 4623 -def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4624 """Shutdown block devices of an instance. 4625 4626 This does the shutdown on all nodes of the instance. 4627 4628 If the ignore_primary is false, errors on the primary node are 4629 ignored. 4630 4631 """ 4632 all_result = True 4633 disks = _ExpandCheckDisks(instance, disks) 4634 4635 for disk in disks: 4636 for node, top_disk in disk.ComputeNodeTree(instance.primary_node): 4637 lu.cfg.SetDiskID(top_disk, node) 4638 result = lu.rpc.call_blockdev_shutdown(node, top_disk) 4639 msg = result.fail_msg 4640 if msg: 4641 lu.LogWarning("Could not shutdown block device %s on node %s: %s", 4642 disk.iv_name, node, msg) 4643 if not ignore_primary or node != instance.primary_node: 4644 all_result = False 4645 return all_result
4646
4647 4648 -def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4649 """Checks if a node has enough free memory. 4650 4651 This function check if a given node has the needed amount of free 4652 memory. In case the node has less memory or we cannot get the 4653 information from the node, this function raise an OpPrereqError 4654 exception. 4655 4656 @type lu: C{LogicalUnit} 4657 @param lu: a logical unit from which we get configuration data 4658 @type node: C{str} 4659 @param node: the node to check 4660 @type reason: C{str} 4661 @param reason: string to use in the error message 4662 @type requested: C{int} 4663 @param requested: the amount of memory in MiB to check for 4664 @type hypervisor_name: C{str} 4665 @param hypervisor_name: the hypervisor to ask for memory stats 4666 @raise errors.OpPrereqError: if the node doesn't have enough memory, or 4667 we cannot check the node 4668 4669 """ 4670 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name) 4671 nodeinfo[node].Raise("Can't get data from node %s" % node, 4672 prereq=True, ecode=errors.ECODE_ENVIRON) 4673 free_mem = nodeinfo[node].payload.get('memory_free', None) 4674 if not isinstance(free_mem, int): 4675 raise errors.OpPrereqError("Can't compute free memory on node %s, result" 4676 " was '%s'" % (node, free_mem), 4677 errors.ECODE_ENVIRON) 4678 if requested > free_mem: 4679 raise errors.OpPrereqError("Not enough memory on node %s for %s:" 4680 " needed %s MiB, available %s MiB" % 4681 (node, reason, requested, free_mem), 4682 errors.ECODE_NORES)
4683
4684 4685 -def _CheckNodesFreeDisk(lu, nodenames, requested):
4686 """Checks if nodes have enough free disk space in the default VG. 4687 4688 This function check if all given nodes have the needed amount of 4689 free disk. In case any node has less disk or we cannot get the 4690 information from the node, this function raise an OpPrereqError 4691 exception. 4692 4693 @type lu: C{LogicalUnit} 4694 @param lu: a logical unit from which we get configuration data 4695 @type nodenames: C{list} 4696 @param nodenames: the list of node names to check 4697 @type requested: C{int} 4698 @param requested: the amount of disk in MiB to check for 4699 @raise errors.OpPrereqError: if the node doesn't have enough disk, or 4700 we cannot check the node 4701 4702 """ 4703 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(), 4704 lu.cfg.GetHypervisorType()) 4705 for node in nodenames: 4706 info = nodeinfo[node] 4707 info.Raise("Cannot get current information from node %s" % node, 4708 prereq=True, ecode=errors.ECODE_ENVIRON) 4709 vg_free = info.payload.get("vg_free", None) 4710 if not isinstance(vg_free, int): 4711 raise errors.OpPrereqError("Can't compute free disk space on node %s," 4712 " result was '%s'" % (node, vg_free), 4713 errors.ECODE_ENVIRON) 4714 if requested > vg_free: 4715 raise errors.OpPrereqError("Not enough disk space on target node %s:" 4716 " required %d MiB, available %d MiB" % 4717 (node, requested, vg_free), 4718 errors.ECODE_NORES)
4719
4720 4721 -class LUStartupInstance(LogicalUnit):
4722 """Starts an instance. 4723 4724 """ 4725 HPATH = "instance-start" 4726 HTYPE = constants.HTYPE_INSTANCE 4727 _OP_PARAMS = [ 4728 _PInstanceName, 4729 _PForce, 4730 _PIgnoreOfflineNodes, 4731 ("hvparams", ht.EmptyDict, ht.TDict), 4732 ("beparams", ht.EmptyDict, ht.TDict), 4733 ] 4734 REQ_BGL = False 4735
4736 - def CheckArguments(self):
4737 # extra beparams 4738 if self.op.beparams: 4739 # fill the beparams dict 4740 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4741
4742 - def ExpandNames(self):
4744
4745 - def BuildHooksEnv(self):
4746 """Build hooks env. 4747 4748 This runs on master, primary and secondary nodes of the instance. 4749 4750 """ 4751 env = { 4752 "FORCE": self.op.force, 4753 } 4754 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 4755 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 4756 return env, nl, nl
4757
4758 - def CheckPrereq(self):
4759 """Check prerequisites. 4760 4761 This checks that the instance is in the cluster. 4762 4763 """ 4764 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4765 assert self.instance is not None, \ 4766 "Cannot retrieve locked instance %s" % self.op.instance_name 4767 4768 # extra hvparams 4769 if self.op.hvparams: 4770 # check hypervisor parameter syntax (locally) 4771 cluster = self.cfg.GetClusterInfo() 4772 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) 4773 filled_hvp = cluster.FillHV(instance) 4774 filled_hvp.update(self.op.hvparams) 4775 hv_type = hypervisor.GetHypervisor(instance.hypervisor) 4776 hv_type.CheckParameterSyntax(filled_hvp) 4777 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp) 4778 4779 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline 4780 4781 if self.primary_offline and self.op.ignore_offline_nodes: 4782 self.proc.LogWarning("Ignoring offline primary node") 4783 4784 if self.op.hvparams or self.op.beparams: 4785 self.proc.LogWarning("Overridden parameters are ignored") 4786 else: 4787 _CheckNodeOnline(self, instance.primary_node) 4788 4789 bep = self.cfg.GetClusterInfo().FillBE(instance) 4790 4791 # check bridges existence 4792 _CheckInstanceBridgesExist(self, instance) 4793 4794 remote_info = self.rpc.call_instance_info(instance.primary_node, 4795 instance.name, 4796 instance.hypervisor) 4797 remote_info.Raise("Error checking node %s" % instance.primary_node, 4798 prereq=True, ecode=errors.ECODE_ENVIRON) 4799 if not remote_info.payload: # not running already 4800 _CheckNodeFreeMemory(self, instance.primary_node, 4801 "starting instance %s" % instance.name, 4802 bep[constants.BE_MEMORY], instance.hypervisor)
4803
4804 - def Exec(self, feedback_fn):
4805 """Start the instance. 4806 4807 """ 4808 instance = self.instance 4809 force = self.op.force 4810 4811 self.cfg.MarkInstanceUp(instance.name) 4812 4813 if self.primary_offline: 4814 assert self.op.ignore_offline_nodes 4815 self.proc.LogInfo("Primary node offline, marked instance as started") 4816 else: 4817 node_current = instance.primary_node 4818 4819 _StartInstanceDisks(self, instance, force) 4820 4821 result = self.rpc.call_instance_start(node_current, instance, 4822 self.op.hvparams, self.op.beparams) 4823 msg = result.fail_msg 4824 if msg: 4825 _ShutdownInstanceDisks(self, instance) 4826 raise errors.OpExecError("Could not start instance: %s" % msg)
4827
4828 4829 -class LURebootInstance(LogicalUnit):
4830 """Reboot an instance. 4831 4832 """ 4833 HPATH = "instance-reboot" 4834 HTYPE = constants.HTYPE_INSTANCE 4835 _OP_PARAMS = [ 4836 _PInstanceName, 4837 ("ignore_secondaries", False, ht.TBool), 4838 ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)), 4839 _PShutdownTimeout, 4840 ] 4841 REQ_BGL = False 4842
4843 - def ExpandNames(self):
4845
4846 - def BuildHooksEnv(self):
4847 """Build hooks env. 4848 4849 This runs on master, primary and secondary nodes of the instance. 4850 4851 """ 4852 env = { 4853 "IGNORE_SECONDARIES": self.op.ignore_secondaries, 4854 "REBOOT_TYPE": self.op.reboot_type, 4855 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 4856 } 4857 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 4858 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 4859 return env, nl, nl
4860
4861 - def CheckPrereq(self):
4862 """Check prerequisites. 4863 4864 This checks that the instance is in the cluster. 4865 4866 """ 4867 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4868 assert self.instance is not None, \ 4869 "Cannot retrieve locked instance %s" % self.op.instance_name 4870 4871 _CheckNodeOnline(self, instance.primary_node) 4872 4873 # check bridges existence 4874 _CheckInstanceBridgesExist(self, instance)
4875
4876 - def Exec(self, feedback_fn):
4877 """Reboot the instance. 4878 4879 """ 4880 instance = self.instance 4881 ignore_secondaries = self.op.ignore_secondaries 4882 reboot_type = self.op.reboot_type 4883 4884 node_current = instance.primary_node 4885 4886 if reboot_type in [constants.INSTANCE_REBOOT_SOFT, 4887 constants.INSTANCE_REBOOT_HARD]: 4888 for disk in instance.disks: 4889 self.cfg.SetDiskID(disk, node_current) 4890 result = self.rpc.call_instance_reboot(node_current, instance, 4891 reboot_type, 4892 self.op.shutdown_timeout) 4893 result.Raise("Could not reboot instance") 4894 else: 4895 result = self.rpc.call_instance_shutdown(node_current, instance, 4896 self.op.shutdown_timeout) 4897 result.Raise("Could not shutdown instance for full reboot") 4898 _ShutdownInstanceDisks(self, instance) 4899 _StartInstanceDisks(self, instance, ignore_secondaries) 4900 result = self.rpc.call_instance_start(node_current, instance, None, None) 4901 msg = result.fail_msg 4902 if msg: 4903 _ShutdownInstanceDisks(self, instance) 4904 raise errors.OpExecError("Could not start instance for" 4905 " full reboot: %s" % msg) 4906 4907 self.cfg.MarkInstanceUp(instance.name)
4908
4909 4910 -class LUShutdownInstance(LogicalUnit):
4911 """Shutdown an instance. 4912 4913 """ 4914 HPATH = "instance-stop" 4915 HTYPE = constants.HTYPE_INSTANCE 4916 _OP_PARAMS = [ 4917 _PInstanceName, 4918 _PIgnoreOfflineNodes, 4919 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt), 4920 ] 4921 REQ_BGL = False 4922
4923 - def ExpandNames(self):
4925
4926 - def BuildHooksEnv(self):
4927 """Build hooks env. 4928 4929 This runs on master, primary and secondary nodes of the instance. 4930 4931 """ 4932 env = _BuildInstanceHookEnvByObject(self, self.instance) 4933 env["TIMEOUT"] = self.op.timeout 4934 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 4935 return env, nl, nl
4936
4937 - def CheckPrereq(self):
4938 """Check prerequisites. 4939 4940 This checks that the instance is in the cluster. 4941 4942 """ 4943 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4944 assert self.instance is not None, \ 4945 "Cannot retrieve locked instance %s" % self.op.instance_name 4946 4947 self.primary_offline = \ 4948 self.cfg.GetNodeInfo(self.instance.primary_node).offline 4949 4950 if self.primary_offline and self.op.ignore_offline_nodes: 4951 self.proc.LogWarning("Ignoring offline primary node") 4952 else: 4953 _CheckNodeOnline(self, self.instance.primary_node)
4954
4955 - def Exec(self, feedback_fn):
4956 """Shutdown the instance. 4957 4958 """ 4959 instance = self.instance 4960 node_current = instance.primary_node 4961 timeout = self.op.timeout 4962 4963 self.cfg.MarkInstanceDown(instance.name) 4964 4965 if self.primary_offline: 4966 assert self.op.ignore_offline_nodes 4967 self.proc.LogInfo("Primary node offline, marked instance as stopped") 4968 else: 4969 result = self.rpc.call_instance_shutdown(node_current, instance, timeout) 4970 msg = result.fail_msg 4971 if msg: 4972 self.proc.LogWarning("Could not shutdown instance: %s" % msg) 4973 4974 _ShutdownInstanceDisks(self, instance)
4975
4976 4977 -class LUReinstallInstance(LogicalUnit):
4978 """Reinstall an instance. 4979 4980 """ 4981 HPATH = "instance-reinstall" 4982 HTYPE = constants.HTYPE_INSTANCE 4983 _OP_PARAMS = [ 4984 _PInstanceName, 4985 ("os_type", None, ht.TMaybeString), 4986 ("force_variant", False, ht.TBool), 4987 ("osparams", None, ht.TOr(ht.TDict, ht.TNone)), 4988 ] 4989 REQ_BGL = False 4990
4991 - def ExpandNames(self):
4993
4994 - def BuildHooksEnv(self):
4995 """Build hooks env. 4996 4997 This runs on master, primary and secondary nodes of the instance. 4998 4999 """ 5000 env = _BuildInstanceHookEnvByObject(self, self.instance) 5001 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 5002 return env, nl, nl
5003
5004 - def CheckPrereq(self):
5005 """Check prerequisites. 5006 5007 This checks that the instance is in the cluster and is not running. 5008 5009 """ 5010 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5011 assert instance is not None, \ 5012 "Cannot retrieve locked instance %s" % self.op.instance_name 5013 _CheckNodeOnline(self, instance.primary_node, "Instance primary node" 5014 " offline, cannot reinstall") 5015 for node in instance.secondary_nodes: 5016 _CheckNodeOnline(self, node, "Instance secondary node offline," 5017 " cannot reinstall") 5018 5019 if instance.disk_template == constants.DT_DISKLESS: 5020 raise errors.OpPrereqError("Instance '%s' has no disks" % 5021 self.op.instance_name, 5022 errors.ECODE_INVAL) 5023 _CheckInstanceDown(self, instance, "cannot reinstall") 5024 5025 if self.op.os_type is not None: 5026 # OS verification 5027 pnode = _ExpandNodeName(self.cfg, instance.primary_node) 5028 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant) 5029 instance_os = self.op.os_type 5030 else: 5031 instance_os = instance.os 5032 5033 nodelist = list(instance.all_nodes) 5034 5035 if self.op.osparams: 5036 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams) 5037 _CheckOSParams(self, True, nodelist, instance_os, i_osdict) 5038 self.os_inst = i_osdict # the new dict (without defaults) 5039 else: 5040 self.os_inst = None 5041 5042 self.instance = instance
5043
5044 - def Exec(self, feedback_fn):
5045 """Reinstall the instance. 5046 5047 """ 5048 inst = self.instance 5049 5050 if self.op.os_type is not None: 5051 feedback_fn("Changing OS to '%s'..." % self.op.os_type) 5052 inst.os = self.op.os_type 5053 # Write to configuration 5054 self.cfg.Update(inst, feedback_fn) 5055 5056 _StartInstanceDisks(self, inst, None) 5057 try: 5058 feedback_fn("Running the instance OS create scripts...") 5059 # FIXME: pass debug option from opcode to backend 5060 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True, 5061 self.op.debug_level, 5062 osparams=self.os_inst) 5063 result.Raise("Could not install OS for instance %s on node %s" % 5064 (inst.name, inst.primary_node)) 5065 finally: 5066 _ShutdownInstanceDisks(self, inst)
5067
5068 5069 -class LURecreateInstanceDisks(LogicalUnit):
5070 """Recreate an instance's missing disks. 5071 5072 """ 5073 HPATH = "instance-recreate-disks" 5074 HTYPE = constants.HTYPE_INSTANCE 5075 _OP_PARAMS = [ 5076 _PInstanceName, 5077 ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)), 5078 ] 5079 REQ_BGL = False 5080
5081 - def ExpandNames(self):
5083
5084 - def BuildHooksEnv(self):
5085 """Build hooks env. 5086 5087 This runs on master, primary and secondary nodes of the instance. 5088 5089 """ 5090 env = _BuildInstanceHookEnvByObject(self, self.instance) 5091 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 5092 return env, nl, nl
5093
5094 - def CheckPrereq(self):
5095 """Check prerequisites. 5096 5097 This checks that the instance is in the cluster and is not running. 5098 5099 """ 5100 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5101 assert instance is not None, \ 5102 "Cannot retrieve locked instance %s" % self.op.instance_name 5103 _CheckNodeOnline(self, instance.primary_node) 5104 5105 if instance.disk_template == constants.DT_DISKLESS: 5106 raise errors.OpPrereqError("Instance '%s' has no disks" % 5107 self.op.instance_name, errors.ECODE_INVAL) 5108 _CheckInstanceDown(self, instance, "cannot recreate disks") 5109 5110 if not self.op.disks: 5111 self.op.disks = range(len(instance.disks)) 5112 else: 5113 for idx in self.op.disks: 5114 if idx >= len(instance.disks): 5115 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx, 5116 errors.ECODE_INVAL) 5117 5118 self.instance = instance
5119
5120 - def Exec(self, feedback_fn):
5121 """Recreate the disks. 5122 5123 """ 5124 to_skip = [] 5125 for idx, _ in enumerate(self.instance.disks): 5126 if idx not in self.op.disks: # disk idx has not been passed in 5127 to_skip.append(idx) 5128 continue 5129 5130 _CreateDisks(self, self.instance, to_skip=to_skip)
5131
5132 5133 -class LURenameInstance(LogicalUnit):
5134 """Rename an instance. 5135 5136 """ 5137 HPATH = "instance-rename" 5138 HTYPE = constants.HTYPE_INSTANCE 5139 _OP_PARAMS = [ 5140 _PInstanceName, 5141 ("new_name", ht.NoDefault, ht.TNonEmptyString), 5142 ("ip_check", False, ht.TBool), 5143 ("name_check", True, ht.TBool), 5144 ] 5145
5146 - def CheckArguments(self):
5147 """Check arguments. 5148 5149 """ 5150 if self.op.ip_check and not self.op.name_check: 5151 # TODO: make the ip check more flexible and not depend on the name check 5152 raise errors.OpPrereqError("Cannot do ip check without a name check", 5153 errors.ECODE_INVAL)
5154
5155 - def BuildHooksEnv(self):
5156 """Build hooks env. 5157 5158 This runs on master, primary and secondary nodes of the instance. 5159 5160 """ 5161 env = _BuildInstanceHookEnvByObject(self, self.instance) 5162 env["INSTANCE_NEW_NAME"] = self.op.new_name 5163 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 5164 return env, nl, nl
5165
5166 - def CheckPrereq(self):
5167 """Check prerequisites. 5168 5169 This checks that the instance is in the cluster and is not running. 5170 5171 """ 5172 self.op.instance_name = _ExpandInstanceName(self.cfg, 5173 self.op.instance_name) 5174 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5175 assert instance is not None 5176 _CheckNodeOnline(self, instance.primary_node) 5177 _CheckInstanceDown(self, instance, "cannot rename") 5178 self.instance = instance 5179 5180 new_name = self.op.new_name 5181 if self.op.name_check: 5182 hostname = netutils.GetHostname(name=new_name) 5183 new_name = self.op.new_name = hostname.name 5184 if (self.op.ip_check and 5185 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)): 5186 raise errors.OpPrereqError("IP %s of instance %s already in use" % 5187 (hostname.ip, new_name), 5188 errors.ECODE_NOTUNIQUE) 5189 5190 instance_list = self.cfg.GetInstanceList() 5191 if new_name in instance_list: 5192 raise errors.OpPrereqError("Instance '%s' is already in the cluster" % 5193 new_name, errors.ECODE_EXISTS)
5194
5195 - def Exec(self, feedback_fn):
5196 """Reinstall the instance. 5197 5198 """ 5199 inst = self.instance 5200 old_name = inst.name 5201 5202 if inst.disk_template == constants.DT_FILE: 5203 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1]) 5204 5205 self.cfg.RenameInstance(inst.name, self.op.new_name) 5206 # Change the instance lock. This is definitely safe while we hold the BGL 5207 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name) 5208 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name) 5209 5210 # re-read the instance from the configuration after rename 5211 inst = self.cfg.GetInstanceInfo(self.op.new_name) 5212 5213 if inst.disk_template == constants.DT_FILE: 5214 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1]) 5215 result = self.rpc.call_file_storage_dir_rename(inst.primary_node, 5216 old_file_storage_dir, 5217 new_file_storage_dir) 5218 result.Raise("Could not rename on node %s directory '%s' to '%s'" 5219 " (but the instance has been renamed in Ganeti)" % 5220 (inst.primary_node, old_file_storage_dir, 5221 new_file_storage_dir)) 5222 5223 _StartInstanceDisks(self, inst, None) 5224 try: 5225 result = self.rpc.call_instance_run_rename(inst.primary_node, inst, 5226 old_name, self.op.debug_level) 5227 msg = result.fail_msg 5228 if msg: 5229 msg = ("Could not run OS rename script for instance %s on node %s" 5230 " (but the instance has been renamed in Ganeti): %s" % 5231 (inst.name, inst.primary_node, msg)) 5232 self.proc.LogWarning(msg) 5233 finally: 5234 _ShutdownInstanceDisks(self, inst) 5235 5236 return inst.name
5237
5238 5239 -class LURemoveInstance(LogicalUnit):
5240 """Remove an instance. 5241 5242 """ 5243 HPATH = "instance-remove" 5244 HTYPE = constants.HTYPE_INSTANCE 5245 _OP_PARAMS = [ 5246 _PInstanceName, 5247 ("ignore_failures", False, ht.TBool), 5248 _PShutdownTimeout, 5249 ] 5250 REQ_BGL = False 5251
5252 - def ExpandNames(self):
5253 self._ExpandAndLockInstance() 5254 self.needed_locks[locking.LEVEL_NODE] = [] 5255 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5256
5257 - def DeclareLocks(self, level):
5258 if level == locking.LEVEL_NODE: 5259 self._LockInstancesNodes()
5260
5261 - def BuildHooksEnv(self):
5262 """Build hooks env. 5263 5264 This runs on master, primary and secondary nodes of the instance. 5265 5266 """ 5267 env = _BuildInstanceHookEnvByObject(self, self.instance) 5268 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout 5269 nl = [self.cfg.GetMasterNode()] 5270 nl_post = list(self.instance.all_nodes) + nl 5271 return env, nl, nl_post
5272
5273 - def CheckPrereq(self):
5274 """Check prerequisites. 5275 5276 This checks that the instance is in the cluster. 5277 5278 """ 5279 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5280 assert self.instance is not None, \ 5281 "Cannot retrieve locked instance %s" % self.op.instance_name
5282
5283 - def Exec(self, feedback_fn):
5284 """Remove the instance. 5285 5286 """ 5287 instance = self.instance 5288 logging.info("Shutting down instance %s on node %s", 5289 instance.name, instance.primary_node) 5290 5291 result = self.rpc.call_instance_shutdown(instance.primary_node, instance, 5292 self.op.shutdown_timeout) 5293 msg = result.fail_msg 5294 if msg: 5295 if self.op.ignore_failures: 5296 feedback_fn("Warning: can't shutdown instance: %s" % msg) 5297 else: 5298 raise errors.OpExecError("Could not shutdown instance %s on" 5299 " node %s: %s" % 5300 (instance.name, instance.primary_node, msg)) 5301 5302 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5303
5304 5305 -def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5306 """Utility function to remove an instance. 5307 5308 """ 5309 logging.info("Removing block devices for instance %s", instance.name) 5310 5311 if not _RemoveDisks(lu, instance): 5312 if not ignore_failures: 5313 raise errors.OpExecError("Can't remove instance's disks") 5314 feedback_fn("Warning: can't remove instance's disks") 5315 5316 logging.info("Removing instance %s out of cluster config", instance.name) 5317 5318 lu.cfg.RemoveInstance(instance.name) 5319 5320 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \ 5321 "Instance lock removal conflict" 5322 5323 # Remove lock for the instance 5324 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5325
5326 5327 -class LUQueryInstances(NoHooksLU):
5328 """Logical unit for querying instances. 5329 5330 """ 5331 # pylint: disable-msg=W0142 5332 _OP_PARAMS = [ 5333 ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)), 5334 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), 5335 ("use_locking", False, ht.TBool), 5336 ] 5337 REQ_BGL = False 5338 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor", 5339 "serial_no", "ctime", "mtime", "uuid"] 5340 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes", 5341 "admin_state", 5342 "disk_template", "ip", "mac", "bridge", 5343 "nic_mode", "nic_link", 5344 "sda_size", "sdb_size", "vcpus", "tags", 5345 "network_port", "beparams", 5346 r"(disk)\.(size)/([0-9]+)", 5347 r"(disk)\.(sizes)", "disk_usage", 5348 r"(nic)\.(mac|ip|mode|link)/([0-9]+)", 5349 r"(nic)\.(bridge)/([0-9]+)", 5350 r"(nic)\.(macs|ips|modes|links|bridges)", 5351 r"(disk|nic)\.(count)", 5352 "hvparams", "custom_hvparams", 5353 "custom_beparams", "custom_nicparams", 5354 ] + _SIMPLE_FIELDS + 5355 ["hv/%s" % name 5356 for name in constants.HVS_PARAMETERS 5357 if name not in constants.HVC_GLOBALS] + 5358 ["be/%s" % name 5359 for name in constants.BES_PARAMETERS]) 5360 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", 5361 "oper_ram", 5362 "oper_vcpus", 5363 "status") 5364 5365
5366 - def CheckArguments(self):
5367 _CheckOutputFields(static=self._FIELDS_STATIC, 5368 dynamic=self._FIELDS_DYNAMIC, 5369 selected=self.op.output_fields)
5370
5371 - def ExpandNames(self):
5372 self.needed_locks = {} 5373 self.share_locks[locking.LEVEL_INSTANCE] = 1 5374 self.share_locks[locking.LEVEL_NODE] = 1 5375 5376 if self.op.names: 5377 self.wanted = _GetWantedInstances(self, self.op.names) 5378 else: 5379 self.wanted = locking.ALL_SET 5380 5381 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields) 5382 self.do_locking = self.do_node_query and self.op.use_locking 5383 if self.do_locking: 5384 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted 5385 self.needed_locks[locking.LEVEL_NODE] = [] 5386 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5387
5388 - def DeclareLocks(self, level):
5389 if level == locking.LEVEL_NODE and self.do_locking: 5390 self._LockInstancesNodes()
5391
5392 - def Exec(self, feedback_fn):
5393 """Computes the list of nodes and their attributes. 5394 5395 """ 5396 # pylint: disable-msg=R0912 5397 # way too many branches here 5398 all_info = self.cfg.GetAllInstancesInfo() 5399 if self.wanted == locking.ALL_SET: 5400 # caller didn't specify instance names, so ordering is not important 5401 if self.do_locking: 5402 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE] 5403 else: 5404 instance_names = all_info.keys() 5405 instance_names = utils.NiceSort(instance_names) 5406 else: 5407 # caller did specify names, so we must keep the ordering 5408 if self.do_locking: 5409 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE] 5410 else: 5411 tgt_set = all_info.keys() 5412 missing = set(self.wanted).difference(tgt_set) 5413 if missing: 5414 raise errors.OpExecError("Some instances were removed before" 5415 " retrieving their data: %s" % missing) 5416 instance_names = self.wanted 5417 5418 instance_list = [all_info[iname] for iname in instance_names] 5419 5420 # begin data gathering 5421 5422 nodes = frozenset([inst.primary_node for inst in instance_list]) 5423 hv_list = list(set([inst.hypervisor for inst in instance_list])) 5424 5425 bad_nodes = [] 5426 off_nodes = [] 5427 if self.do_node_query: 5428 live_data = {} 5429 node_data = self.rpc.call_all_instances_info(nodes, hv_list) 5430 for name in nodes: 5431 result = node_data[name] 5432 if result.offline: 5433 # offline nodes will be in both lists 5434 off_nodes.append(name) 5435 if result.fail_msg: 5436 bad_nodes.append(name) 5437 else: 5438 if result.payload: 5439 live_data.update(result.payload) 5440 # else no instance is alive 5441 else: 5442 live_data = dict([(name, {}) for name in instance_names]) 5443 5444 # end data gathering 5445 5446 HVPREFIX = "hv/" 5447 BEPREFIX = "be/" 5448 output = [] 5449 cluster = self.cfg.GetClusterInfo() 5450 for instance in instance_list: 5451 iout = [] 5452 i_hv = cluster.FillHV(instance, skip_globals=True) 5453 i_be = cluster.FillBE(instance) 5454 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics] 5455 for field in self.op.output_fields: 5456 st_match = self._FIELDS_STATIC.Matches(field) 5457 if field in self._SIMPLE_FIELDS: 5458 val = getattr(instance, field) 5459 elif field == "pnode": 5460 val = instance.primary_node 5461 elif field == "snodes": 5462 val = list(instance.secondary_nodes) 5463 elif field == "admin_state": 5464 val = instance.admin_up 5465 elif field == "oper_state": 5466 if instance.primary_node in bad_nodes: 5467 val = None 5468 else: 5469 val = bool(live_data.get(instance.name)) 5470 elif field == "status": 5471 if instance.primary_node in off_nodes: 5472 val = "ERROR_nodeoffline" 5473 elif instance.primary_node in bad_nodes: 5474 val = "ERROR_nodedown" 5475 else: 5476 running = bool(live_data.get(instance.name)) 5477 if running: 5478 if instance.admin_up: 5479 val = "running" 5480 else: 5481 val = "ERROR_up" 5482 else: 5483 if instance.admin_up: 5484 val = "ERROR_down" 5485 else: 5486 val = "ADMIN_down" 5487 elif field == "oper_ram": 5488 if instance.primary_node in bad_nodes: 5489 val = None 5490 elif instance.name in live_data: 5491 val = live_data[instance.name].get("memory", "?") 5492 else: 5493 val = "-" 5494 elif field == "oper_vcpus": 5495 if instance.primary_node in bad_nodes: 5496 val = None 5497 elif instance.name in live_data: 5498 val = live_data[instance.name].get("vcpus", "?") 5499 else: 5500 val = "-" 5501 elif field == "vcpus": 5502 val = i_be[constants.BE_VCPUS] 5503 elif field == "disk_template": 5504 val = instance.disk_template 5505 elif field == "ip": 5506 if instance.nics: 5507 val = instance.nics[0].ip 5508 else: 5509 val = None 5510 elif field == "nic_mode": 5511 if instance.nics: 5512 val = i_nicp[0][constants.NIC_MODE] 5513 else: 5514 val = None 5515 elif field == "nic_link": 5516 if instance.nics: 5517 val = i_nicp[0][constants.NIC_LINK] 5518 else: 5519 val = None 5520 elif field == "bridge": 5521 if (instance.nics and 5522 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED): 5523 val = i_nicp[0][constants.NIC_LINK] 5524 else: 5525 val = None 5526 elif field == "mac": 5527 if instance.nics: 5528 val = instance.nics[0].mac 5529 else: 5530 val = None 5531 elif field == "custom_nicparams": 5532 val = [nic.nicparams for nic in instance.nics] 5533 elif field == "sda_size" or field == "sdb_size": 5534 idx = ord(field[2]) - ord('a') 5535 try: 5536 val = instance.FindDisk(idx).size 5537 except errors.OpPrereqError: 5538 val = None 5539 elif field == "disk_usage": # total disk usage per node 5540 disk_sizes = [{'size': disk.size} for disk in instance.disks] 5541 val = _ComputeDiskSize(instance.disk_template, disk_sizes) 5542 elif field == "tags": 5543 val = list(instance.GetTags()) 5544 elif field == "custom_hvparams": 5545 val = instance.hvparams # not filled! 5546 elif field == "hvparams": 5547 val = i_hv 5548 elif (field.startswith(HVPREFIX) and 5549 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and 5550 field[len(HVPREFIX):] not in constants.HVC_GLOBALS): 5551 val = i_hv.get(field[len(HVPREFIX):], None) 5552 elif field == "custom_beparams": 5553 val = instance.beparams 5554 elif field == "beparams": 5555 val = i_be 5556 elif (field.startswith(BEPREFIX) and 5557 field[len(BEPREFIX):] in constants.BES_PARAMETERS): 5558 val = i_be.get(field[len(BEPREFIX):], None) 5559 elif st_match and st_match.groups(): 5560 # matches a variable list 5561 st_groups = st_match.groups() 5562 if st_groups and st_groups[0] == "disk": 5563 if st_groups[1] == "count": 5564 val = len(instance.disks) 5565 elif st_groups[1] == "sizes": 5566 val = [disk.size for disk in instance.disks] 5567 elif st_groups[1] == "size": 5568 try: 5569 val = instance.FindDisk(st_groups[2]).size 5570 except errors.OpPrereqError: 5571 val = None 5572 else: 5573 assert False, "Unhandled disk parameter" 5574 elif st_groups[0] == "nic": 5575 if st_groups[1] == "count": 5576 val = len(instance.nics) 5577 elif st_groups[1] == "macs": 5578 val = [nic.mac for nic in instance.nics] 5579 elif st_groups[1] == "ips": 5580 val = [nic.ip for nic in instance.nics] 5581 elif st_groups[1] == "modes": 5582 val = [nicp[constants.NIC_MODE] for nicp in i_nicp] 5583 elif st_groups[1] == "links": 5584 val = [nicp[constants.NIC_LINK] for nicp in i_nicp] 5585 elif st_groups[1] == "bridges": 5586 val = [] 5587 for nicp in i_nicp: 5588 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 5589 val.append(nicp[constants.NIC_LINK]) 5590 else: 5591 val.append(None) 5592 else: 5593 # index-based item 5594 nic_idx = int(st_groups[2]) 5595 if nic_idx >= len(instance.nics): 5596 val = None 5597 else: 5598 if st_groups[1] == "mac": 5599 val = instance.nics[nic_idx].mac 5600 elif st_groups[1] == "ip": 5601 val = instance.nics[nic_idx].ip 5602 elif st_groups[1] == "mode": 5603 val = i_nicp[nic_idx][constants.NIC_MODE] 5604 elif st_groups[1] == "link": 5605 val = i_nicp[nic_idx][constants.NIC_LINK] 5606 elif st_groups[1] == "bridge": 5607 nic_mode = i_nicp[nic_idx][constants.NIC_MODE] 5608 if nic_mode == constants.NIC_MODE_BRIDGED: 5609 val = i_nicp[nic_idx][constants.NIC_LINK] 5610 else: 5611 val = None 5612 else: 5613 assert False, "Unhandled NIC parameter" 5614 else: 5615 assert False, ("Declared but unhandled variable parameter '%s'" % 5616 field) 5617 else: 5618 assert False, "Declared but unhandled parameter '%s'" % field 5619 iout.append(val) 5620 output.append(iout) 5621 5622 return output
5623
5624 5625 -class LUFailoverInstance(LogicalUnit):
5626 """Failover an instance. 5627 5628 """ 5629 HPATH = "instance-failover" 5630 HTYPE = constants.HTYPE_INSTANCE 5631 _OP_PARAMS = [ 5632 _PInstanceName, 5633 ("ignore_consistency", False, ht.TBool), 5634 _PShutdownTimeout, 5635 ] 5636 REQ_BGL = False 5637
5638 - def ExpandNames(self):
5639 self._ExpandAndLockInstance() 5640 self.needed_locks[locking.LEVEL_NODE] = [] 5641 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5642
5643 - def DeclareLocks(self, level):
5644 if level == locking.LEVEL_NODE: 5645 self._LockInstancesNodes()
5646
5647 - def BuildHooksEnv(self):
5648 """Build hooks env. 5649 5650 This runs on master, primary and secondary nodes of the instance. 5651 5652 """ 5653 instance = self.instance 5654 source_node = instance.primary_node 5655 target_node = instance.secondary_nodes[0] 5656 env = { 5657 "IGNORE_CONSISTENCY": self.op.ignore_consistency, 5658 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 5659 "OLD_PRIMARY": source_node, 5660 "OLD_SECONDARY": target_node, 5661 "NEW_PRIMARY": target_node, 5662 "NEW_SECONDARY": source_node, 5663 } 5664 env.update(_BuildInstanceHookEnvByObject(self, instance)) 5665 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes) 5666 nl_post = list(nl) 5667 nl_post.append(source_node) 5668 return env, nl, nl_post
5669
5670 - def CheckPrereq(self):
5671 """Check prerequisites. 5672 5673 This checks that the instance is in the cluster. 5674 5675 """ 5676 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5677 assert self.instance is not None, \ 5678 "Cannot retrieve locked instance %s" % self.op.instance_name 5679 5680 bep = self.cfg.GetClusterInfo().FillBE(instance) 5681 if instance.disk_template not in constants.DTS_NET_MIRROR: 5682 raise errors.OpPrereqError("Instance's disk layout is not" 5683 " network mirrored, cannot failover.", 5684 errors.ECODE_STATE) 5685 5686 secondary_nodes = instance.secondary_nodes 5687 if not secondary_nodes: 5688 raise errors.ProgrammerError("no secondary node but using " 5689 "a mirrored disk template") 5690 5691 target_node = secondary_nodes[0] 5692 _CheckNodeOnline(self, target_node) 5693 _CheckNodeNotDrained(self, target_node) 5694 if instance.admin_up: 5695 # check memory requirements on the secondary node 5696 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" % 5697 instance.name, bep[constants.BE_MEMORY], 5698 instance.hypervisor) 5699 else: 5700 self.LogInfo("Not checking memory on the secondary node as" 5701 " instance will not be started") 5702 5703 # check bridge existance 5704 _CheckInstanceBridgesExist(self, instance, node=target_node)
5705
5706 - def Exec(self, feedback_fn):
5707 """Failover an instance. 5708 5709 The failover is done by shutting it down on its present node and 5710 starting it on the secondary. 5711 5712 """ 5713 instance = self.instance 5714 primary_node = self.cfg.GetNodeInfo(instance.primary_node) 5715 5716 source_node = instance.primary_node 5717 target_node = instance.secondary_nodes[0] 5718 5719 if instance.admin_up: 5720 feedback_fn("* checking disk consistency between source and target") 5721 for dev in instance.disks: 5722 # for drbd, these are drbd over lvm 5723 if not _CheckDiskConsistency(self, dev, target_node, False): 5724 if not self.op.ignore_consistency: 5725 raise errors.OpExecError("Disk %s is degraded on target node," 5726 " aborting failover." % dev.iv_name) 5727 else: 5728 feedback_fn("* not checking disk consistency as instance is not running") 5729 5730 feedback_fn("* shutting down instance on source node") 5731 logging.info("Shutting down instance %s on node %s", 5732 instance.name, source_node) 5733 5734 result = self.rpc.call_instance_shutdown(source_node, instance, 5735 self.op.shutdown_timeout) 5736 msg = result.fail_msg 5737 if msg: 5738 if self.op.ignore_consistency or primary_node.offline: 5739 self.proc.LogWarning("Could not shutdown instance %s on node %s." 5740 " Proceeding anyway. Please make sure node" 5741 " %s is down. Error details: %s", 5742 instance.name, source_node, source_node, msg) 5743 else: 5744 raise errors.OpExecError("Could not shutdown instance %s on" 5745 " node %s: %s" % 5746 (instance.name, source_node, msg)) 5747 5748 feedback_fn("* deactivating the instance's disks on source node") 5749 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True): 5750 raise errors.OpExecError("Can't shut down the instance's disks.") 5751 5752 instance.primary_node = target_node 5753 # distribute new instance config to the other nodes 5754 self.cfg.Update(instance, feedback_fn) 5755 5756 # Only start the instance if it's marked as up 5757 if instance.admin_up: 5758 feedback_fn("* activating the instance's disks on target node") 5759 logging.info("Starting instance %s on node %s", 5760 instance.name, target_node) 5761 5762 disks_ok, _ = _AssembleInstanceDisks(self, instance, 5763 ignore_secondaries=True) 5764 if not disks_ok: 5765 _ShutdownInstanceDisks(self, instance) 5766 raise errors.OpExecError("Can't activate the instance's disks") 5767 5768 feedback_fn("* starting the instance on the target node") 5769 result = self.rpc.call_instance_start(target_node, instance, None, None) 5770 msg = result.fail_msg 5771 if msg: 5772 _ShutdownInstanceDisks(self, instance) 5773 raise errors.OpExecError("Could not start instance %s on node %s: %s" % 5774 (instance.name, target_node, msg))
5775
5776 5777 -class LUMigrateInstance(LogicalUnit):
5778 """Migrate an instance. 5779 5780 This is migration without shutting down, compared to the failover, 5781 which is done with shutdown. 5782 5783 """ 5784 HPATH = "instance-migrate" 5785 HTYPE = constants.HTYPE_INSTANCE 5786 _OP_PARAMS = [ 5787 _PInstanceName, 5788 _PMigrationMode, 5789 _PMigrationLive, 5790 ("cleanup", False, ht.TBool), 5791 ] 5792 5793 REQ_BGL = False 5794
5795 - def ExpandNames(self):
5796 self._ExpandAndLockInstance() 5797 5798 self.needed_locks[locking.LEVEL_NODE] = [] 5799 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 5800 5801 self._migrater = TLMigrateInstance(self, self.op.instance_name, 5802 self.op.cleanup) 5803 self.tasklets = [self._migrater]
5804
5805 - def DeclareLocks(self, level):
5806 if level == locking.LEVEL_NODE: 5807 self._LockInstancesNodes()
5808
5809 - def BuildHooksEnv(self):
5810 """Build hooks env. 5811 5812 This runs on master, primary and secondary nodes of the instance. 5813 5814 """ 5815 instance = self._migrater.instance 5816 source_node = instance.primary_node 5817 target_node = instance.secondary_nodes[0] 5818 env = _BuildInstanceHookEnvByObject(self, instance) 5819 env["MIGRATE_LIVE"] = self._migrater.live 5820 env["MIGRATE_CLEANUP"] = self.op.cleanup 5821 env.update({ 5822 "OLD_PRIMARY": source_node, 5823 "OLD_SECONDARY": target_node, 5824 "NEW_PRIMARY": target_node, 5825 "NEW_SECONDARY": source_node, 5826 }) 5827 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes) 5828 nl_post = list(nl) 5829 nl_post.append(source_node) 5830 return env, nl, nl_post
5831
5832 5833 -class LUMoveInstance(LogicalUnit):
5834 """Move an instance by data-copying. 5835 5836 """ 5837 HPATH = "instance-move" 5838 HTYPE = constants.HTYPE_INSTANCE 5839 _OP_PARAMS = [ 5840 _PInstanceName, 5841 ("target_node", ht.NoDefault, ht.TNonEmptyString), 5842 _PShutdownTimeout, 5843 ] 5844 REQ_BGL = False 5845
5846 - def ExpandNames(self):
5847 self._ExpandAndLockInstance() 5848 target_node = _ExpandNodeName(self.cfg, self.op.target_node) 5849 self.op.target_node = target_node 5850 self.needed_locks[locking.LEVEL_NODE] = [target_node] 5851 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5852
5853 - def DeclareLocks(self, level):
5854 if level == locking.LEVEL_NODE: 5855 self._LockInstancesNodes(primary_only=True)
5856
5857 - def BuildHooksEnv(self):
5858 """Build hooks env. 5859 5860 This runs on master, primary and secondary nodes of the instance. 5861 5862 """ 5863 env = { 5864 "TARGET_NODE": self.op.target_node, 5865 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 5866 } 5867 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 5868 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node, 5869 self.op.target_node] 5870 return env, nl, nl
5871
5872 - def CheckPrereq(self):
5873 """Check prerequisites. 5874 5875 This checks that the instance is in the cluster. 5876 5877 """ 5878 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5879 assert self.instance is not None, \ 5880 "Cannot retrieve locked instance %s" % self.op.instance_name 5881 5882 node = self.cfg.GetNodeInfo(self.op.target_node) 5883 assert node is not None, \ 5884 "Cannot retrieve locked node %s" % self.op.target_node 5885 5886 self.target_node = target_node = node.name 5887 5888 if target_node == instance.primary_node: 5889 raise errors.OpPrereqError("Instance %s is already on the node %s" % 5890 (instance.name, target_node), 5891 errors.ECODE_STATE) 5892 5893 bep = self.cfg.GetClusterInfo().FillBE(instance) 5894 5895 for idx, dsk in enumerate(instance.disks): 5896 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE): 5897 raise errors.OpPrereqError("Instance disk %d has a complex layout," 5898 " cannot copy" % idx, errors.ECODE_STATE) 5899 5900 _CheckNodeOnline(self, target_node) 5901 _CheckNodeNotDrained(self, target_node) 5902 _CheckNodeVmCapable(self, target_node) 5903 5904 if instance.admin_up: 5905 # check memory requirements on the secondary node 5906 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" % 5907 instance.name, bep[constants.BE_MEMORY], 5908 instance.hypervisor) 5909 else: 5910 self.LogInfo("Not checking memory on the secondary node as" 5911 " instance will not be started") 5912 5913 # check bridge existance 5914 _CheckInstanceBridgesExist(self, instance, node=target_node)
5915
5916 - def Exec(self, feedback_fn):
5917 """Move an instance. 5918 5919 The move is done by shutting it down on its present node, copying 5920 the data over (slow) and starting it on the new node. 5921 5922 """ 5923 instance = self.instance 5924 5925 source_node = instance.primary_node 5926 target_node = self.target_node 5927 5928 self.LogInfo("Shutting down instance %s on source node %s", 5929 instance.name, source_node) 5930 5931 result = self.rpc.call_instance_shutdown(source_node, instance, 5932 self.op.shutdown_timeout) 5933 msg = result.fail_msg 5934 if msg: 5935 if self.op.ignore_consistency: 5936 self.proc.LogWarning("Could not shutdown instance %s on node %s." 5937 " Proceeding anyway. Please make sure node" 5938 " %s is down. Error details: %s", 5939 instance.name, source_node, source_node, msg) 5940 else: 5941 raise errors.OpExecError("Could not shutdown instance %s on" 5942 " node %s: %s" % 5943 (instance.name, source_node, msg)) 5944 5945 # create the target disks 5946 try: 5947 _CreateDisks(self, instance, target_node=target_node) 5948 except errors.OpExecError: 5949 self.LogWarning("Device creation failed, reverting...") 5950 try: 5951 _RemoveDisks(self, instance, target_node=target_node) 5952 finally: 5953 self.cfg.ReleaseDRBDMinors(instance.name) 5954 raise 5955 5956 cluster_name = self.cfg.GetClusterInfo().cluster_name 5957 5958 errs = [] 5959 # activate, get path, copy the data over 5960 for idx, disk in enumerate(instance.disks): 5961 self.LogInfo("Copying data for disk %d", idx) 5962 result = self.rpc.call_blockdev_assemble(target_node, disk, 5963 instance.name, True) 5964 if result.fail_msg: 5965 self.LogWarning("Can't assemble newly created disk %d: %s", 5966 idx, result.fail_msg) 5967 errs.append(result.fail_msg) 5968 break 5969 dev_path = result.payload 5970 result = self.rpc.call_blockdev_export(source_node, disk, 5971 target_node, dev_path, 5972 cluster_name) 5973 if result.fail_msg: 5974 self.LogWarning("Can't copy data over for disk %d: %s", 5975 idx, result.fail_msg) 5976 errs.append(result.fail_msg) 5977 break 5978 5979 if errs: 5980 self.LogWarning("Some disks failed to copy, aborting") 5981 try: 5982 _RemoveDisks(self, instance, target_node=target_node) 5983 finally: 5984 self.cfg.ReleaseDRBDMinors(instance.name) 5985 raise errors.OpExecError("Errors during disk copy: %s" % 5986 (",".join(errs),)) 5987 5988 instance.primary_node = target_node 5989 self.cfg.Update(instance, feedback_fn) 5990 5991 self.LogInfo("Removing the disks on the original node") 5992 _RemoveDisks(self, instance, target_node=source_node) 5993 5994 # Only start the instance if it's marked as up 5995 if instance.admin_up: 5996 self.LogInfo("Starting instance %s on node %s", 5997 instance.name, target_node) 5998 5999 disks_ok, _ = _AssembleInstanceDisks(self, instance, 6000 ignore_secondaries=True) 6001 if not disks_ok: 6002 _ShutdownInstanceDisks(self, instance) 6003 raise errors.OpExecError("Can't activate the instance's disks") 6004 6005 result = self.rpc.call_instance_start(target_node, instance, None, None) 6006 msg = result.fail_msg 6007 if msg: 6008 _ShutdownInstanceDisks(self, instance) 6009 raise errors.OpExecError("Could not start instance %s on node %s: %s" % 6010 (instance.name, target_node, msg))
6011
6012 6013 -class LUMigrateNode(LogicalUnit):
6014 """Migrate all instances from a node. 6015 6016 """ 6017 HPATH = "node-migrate" 6018 HTYPE = constants.HTYPE_NODE 6019 _OP_PARAMS = [ 6020 _PNodeName, 6021 _PMigrationMode, 6022 _PMigrationLive, 6023 ] 6024 REQ_BGL = False 6025
6026 - def ExpandNames(self):
6027 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 6028 6029 self.needed_locks = { 6030 locking.LEVEL_NODE: [self.op.node_name], 6031 } 6032 6033 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 6034 6035 # Create tasklets for migrating instances for all instances on this node 6036 names = [] 6037 tasklets = [] 6038 6039 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name): 6040 logging.debug("Migrating instance %s", inst.name) 6041 names.append(inst.name) 6042 6043 tasklets.append(TLMigrateInstance(self, inst.name, False)) 6044 6045 self.tasklets = tasklets 6046 6047 # Declare instance locks 6048 self.needed_locks[locking.LEVEL_INSTANCE] = names
6049
6050 - def DeclareLocks(self, level):
6051 if level == locking.LEVEL_NODE: 6052 self._LockInstancesNodes()
6053
6054 - def BuildHooksEnv(self):
6055 """Build hooks env. 6056 6057 This runs on the master, the primary and all the secondaries. 6058 6059 """ 6060 env = { 6061 "NODE_NAME": self.op.node_name, 6062 } 6063 6064 nl = [self.cfg.GetMasterNode()] 6065 6066 return (env, nl, nl)
6067
6068 6069 -class TLMigrateInstance(Tasklet):
6070 """Tasklet class for instance migration. 6071 6072 @type live: boolean 6073 @ivar live: whether the migration will be done live or non-live; 6074 this variable is initalized only after CheckPrereq has run 6075 6076 """
6077 - def __init__(self, lu, instance_name, cleanup):
6078 """Initializes this class. 6079 6080 """ 6081 Tasklet.__init__(self, lu) 6082 6083 # Parameters 6084 self.instance_name = instance_name 6085 self.cleanup = cleanup 6086 self.live = False # will be overridden later
6087
6088 - def CheckPrereq(self):
6089 """Check prerequisites. 6090 6091 This checks that the instance is in the cluster. 6092 6093 """ 6094 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name) 6095 instance = self.cfg.GetInstanceInfo(instance_name) 6096 assert instance is not None 6097 6098 if instance.disk_template != constants.DT_DRBD8: 6099 raise errors.OpPrereqError("Instance's disk layout is not" 6100 " drbd8, cannot migrate.", errors.ECODE_STATE) 6101 6102 secondary_nodes = instance.secondary_nodes 6103 if not secondary_nodes: 6104 raise errors.ConfigurationError("No secondary node but using" 6105 " drbd8 disk template") 6106 6107 i_be = self.cfg.GetClusterInfo().FillBE(instance) 6108 6109 target_node = secondary_nodes[0] 6110 # check memory requirements on the secondary node 6111 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" % 6112 instance.name, i_be[constants.BE_MEMORY], 6113 instance.hypervisor) 6114 6115 # check bridge existance 6116 _CheckInstanceBridgesExist(self.lu, instance, node=target_node) 6117 6118 if not self.cleanup: 6119 _CheckNodeNotDrained(self.lu, target_node) 6120 result = self.rpc.call_instance_migratable(instance.primary_node, 6121 instance) 6122 result.Raise("Can't migrate, please use failover", 6123 prereq=True, ecode=errors.ECODE_STATE) 6124 6125 self.instance = instance 6126 6127 if self.lu.op.live is not None and self.lu.op.mode is not None: 6128 raise errors.OpPrereqError("Only one of the 'live' and 'mode'" 6129 " parameters are accepted", 6130 errors.ECODE_INVAL) 6131 if self.lu.op.live is not None: 6132 if self.lu.op.live: 6133 self.lu.op.mode = constants.HT_MIGRATION_LIVE 6134 else: 6135 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE 6136 # reset the 'live' parameter to None so that repeated 6137 # invocations of CheckPrereq do not raise an exception 6138 self.lu.op.live = None 6139 elif self.lu.op.mode is None: 6140 # read the default value from the hypervisor 6141 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False) 6142 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE] 6143 6144 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6145
6146 - def _WaitUntilSync(self):
6147 """Poll with custom rpc for disk sync. 6148 6149 This uses our own step-based rpc call. 6150 6151 """ 6152 self.feedback_fn("* wait until resync is done") 6153 all_done = False 6154 while not all_done: 6155 all_done = True 6156 result = self.rpc.call_drbd_wait_sync(self.all_nodes, 6157 self.nodes_ip, 6158 self.instance.disks) 6159 min_percent = 100 6160 for node, nres in result.items(): 6161 nres.Raise("Cannot resync disks on node %s" % node) 6162 node_done, node_percent = nres.payload 6163 all_done = all_done and node_done 6164 if node_percent is not None: 6165 min_percent = min(min_percent, node_percent) 6166 if not all_done: 6167 if min_percent < 100: 6168 self.feedback_fn(" - progress: %.1f%%" % min_percent) 6169 time.sleep(2)
6170
6171 - def _EnsureSecondary(self, node):
6172 """Demote a node to secondary. 6173 6174 """ 6175 self.feedback_fn("* switching node %s to secondary mode" % node) 6176 6177 for dev in self.instance.disks: 6178 self.cfg.SetDiskID(dev, node) 6179 6180 result = self.rpc.call_blockdev_close(node, self.instance.name, 6181 self.instance.disks) 6182 result.Raise("Cannot change disk to secondary on node %s" % node)
6183
6184 - def _GoStandalone(self):
6185 """Disconnect from the network. 6186 6187 """ 6188 self.feedback_fn("* changing into standalone mode") 6189 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip, 6190 self.instance.disks) 6191 for node, nres in result.items(): 6192 nres.Raise("Cannot disconnect disks node %s" % node)
6193
6194 - def _GoReconnect(self, multimaster):
6195 """Reconnect to the network. 6196 6197 """ 6198 if multimaster: 6199 msg = "dual-master" 6200 else: 6201 msg = "single-master" 6202 self.feedback_fn("* changing disks into %s mode" % msg) 6203 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip, 6204 self.instance.disks, 6205 self.instance.name, multimaster) 6206 for node, nres in result.items(): 6207 nres.Raise("Cannot change disks config on node %s" % node)
6208
6209 - def _ExecCleanup(self):
6210 """Try to cleanup after a failed migration. 6211 6212 The cleanup is done by: 6213 - check that the instance is running only on one node 6214 (and update the config if needed) 6215 - change disks on its secondary node to secondary 6216 - wait until disks are fully synchronized 6217 - disconnect from the network 6218 - change disks into single-master mode 6219 - wait again until disks are fully synchronized 6220 6221 """ 6222 instance = self.instance 6223 target_node = self.target_node 6224 source_node = self.source_node 6225 6226 # check running on only one node 6227 self.feedback_fn("* checking where the instance actually runs" 6228 " (if this hangs, the hypervisor might be in" 6229 " a bad state)") 6230 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor]) 6231 for node, result in ins_l.items(): 6232 result.Raise("Can't contact node %s" % node) 6233 6234 runningon_source = instance.name in ins_l[source_node].payload 6235 runningon_target = instance.name in ins_l[target_node].payload 6236 6237 if runningon_source and runningon_target: 6238 raise errors.OpExecError("Instance seems to be running on two nodes," 6239 " or the hypervisor is confused. You will have" 6240 " to ensure manually that it runs only on one" 6241 " and restart this operation.") 6242 6243 if not (runningon_source or runningon_target): 6244 raise errors.OpExecError("Instance does not seem to be running at all." 6245 " In this case, it's safer to repair by" 6246 " running 'gnt-instance stop' to ensure disk" 6247 " shutdown, and then restarting it.") 6248 6249 if runningon_target: 6250 # the migration has actually succeeded, we need to update the config 6251 self.feedback_fn("* instance running on secondary node (%s)," 6252 " updating config" % target_node) 6253 instance.primary_node = target_node 6254 self.cfg.Update(instance, self.feedback_fn) 6255 demoted_node = source_node 6256 else: 6257 self.feedback_fn("* instance confirmed to be running on its" 6258 " primary node (%s)" % source_node) 6259 demoted_node = target_node 6260 6261 self._EnsureSecondary(demoted_node) 6262 try: 6263 self._WaitUntilSync() 6264 except errors.OpExecError: 6265 # we ignore here errors, since if the device is standalone, it 6266 # won't be able to sync 6267 pass 6268 self._GoStandalone() 6269 self._GoReconnect(False) 6270 self._WaitUntilSync() 6271 6272 self.feedback_fn("* done")
6273
6274 - def _RevertDiskStatus(self):
6275 """Try to revert the disk status after a failed migration. 6276 6277 """ 6278 target_node = self.target_node 6279 try: 6280 self._EnsureSecondary(target_node) 6281 self._GoStandalone() 6282 self._GoReconnect(False) 6283 self._WaitUntilSync() 6284 except errors.OpExecError, err: 6285 self.lu.LogWarning("Migration failed and I can't reconnect the" 6286 " drives: error '%s'\n" 6287 "Please look and recover the instance status" % 6288 str(err))
6289
6290 - def _AbortMigration(self):
6291 """Call the hypervisor code to abort a started migration. 6292 6293 """ 6294 instance = self.instance 6295 target_node = self.target_node 6296 migration_info = self.migration_info 6297 6298 abort_result = self.rpc.call_finalize_migration(target_node, 6299 instance, 6300 migration_info, 6301 False) 6302 abort_msg = abort_result.fail_msg 6303 if abort_msg: 6304 logging.error("Aborting migration failed on target node %s: %s", 6305 target_node, abort_msg)
6306 # Don't raise an exception here, as we stil have to try to revert the 6307 # disk status, even if this step failed. 6308
6309 - def _ExecMigration(self):
6310 """Migrate an instance. 6311 6312 The migrate is done by: 6313 - change the disks into dual-master mode 6314 - wait until disks are fully synchronized again 6315 - migrate the instance 6316 - change disks on the new secondary node (the old primary) to secondary 6317 - wait until disks are fully synchronized 6318 - change disks into single-master mode 6319 6320 """ 6321 instance = self.instance 6322 target_node = self.target_node 6323 source_node = self.source_node 6324 6325 self.feedback_fn("* checking disk consistency between source and target") 6326 for dev in instance.disks: 6327 if not _CheckDiskConsistency(self.lu, dev, target_node, False): 6328 raise errors.OpExecError("Disk %s is degraded or not fully" 6329 " synchronized on target node," 6330 " aborting migrate." % dev.iv_name) 6331 6332 # First get the migration information from the remote node 6333 result = self.rpc.call_migration_info(source_node, instance) 6334 msg = result.fail_msg 6335 if msg: 6336 log_err = ("Failed fetching source migration information from %s: %s" % 6337 (source_node, msg)) 6338 logging.error(log_err) 6339 raise errors.OpExecError(log_err) 6340 6341 self.migration_info = migration_info = result.payload 6342 6343 # Then switch the disks to master/master mode 6344 self._EnsureSecondary(target_node) 6345 self._GoStandalone() 6346 self._GoReconnect(True) 6347 self._WaitUntilSync() 6348 6349 self.feedback_fn("* preparing %s to accept the instance" % target_node) 6350 result = self.rpc.call_accept_instance(target_node, 6351 instance, 6352 migration_info, 6353 self.nodes_ip[target_node]) 6354 6355 msg = result.fail_msg 6356 if msg: 6357 logging.error("Instance pre-migration failed, trying to revert" 6358 " disk status: %s", msg) 6359 self.feedback_fn("Pre-migration failed, aborting") 6360 self._AbortMigration() 6361 self._RevertDiskStatus() 6362 raise errors.OpExecError("Could not pre-migrate instance %s: %s" % 6363 (instance.name, msg)) 6364 6365 self.feedback_fn("* migrating instance to %s" % target_node) 6366 time.sleep(10) 6367 result = self.rpc.call_instance_migrate(source_node, instance, 6368 self.nodes_ip[target_node], 6369 self.live) 6370 msg = result.fail_msg 6371 if msg: 6372 logging.error("Instance migration failed, trying to revert" 6373 " disk status: %s", msg) 6374 self.feedback_fn("Migration failed, aborting") 6375 self._AbortMigration() 6376 self._RevertDiskStatus() 6377 raise errors.OpExecError("Could not migrate instance %s: %s" % 6378 (instance.name, msg)) 6379 time.sleep(10) 6380 6381 instance.primary_node = target_node 6382 # distribute new instance config to the other nodes 6383 self.cfg.Update(instance, self.feedback_fn) 6384 6385 result = self.rpc.call_finalize_migration(target_node, 6386 instance, 6387 migration_info, 6388 True) 6389 msg = result.fail_msg 6390 if msg: 6391 logging.error("Instance migration succeeded, but finalization failed:" 6392 " %s", msg) 6393 raise errors.OpExecError("Could not finalize instance migration: %s" % 6394 msg) 6395 6396 self._EnsureSecondary(source_node) 6397 self._WaitUntilSync() 6398 self._GoStandalone() 6399 self._GoReconnect(False) 6400 self._WaitUntilSync() 6401 6402 self.feedback_fn("* done")
6403
6404 - def Exec(self, feedback_fn):
6405 """Perform the migration. 6406 6407 """ 6408 feedback_fn("Migrating instance %s" % self.instance.name) 6409 6410 self.feedback_fn = feedback_fn 6411 6412 self.source_node = self.instance.primary_node 6413 self.target_node = self.instance.secondary_nodes[0] 6414 self.all_nodes = [self.source_node, self.target_node] 6415 self.nodes_ip = { 6416 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip, 6417 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip, 6418 } 6419 6420 if self.cleanup: 6421 return self._ExecCleanup() 6422 else: 6423 return self._ExecMigration()
6424
6425 6426 -def _CreateBlockDev(lu, node, instance, device, force_create, 6427 info, force_open):
6428 """Create a tree of block devices on a given node. 6429 6430 If this device type has to be created on secondaries, create it and 6431 all its children. 6432 6433 If not, just recurse to children keeping the same 'force' value. 6434 6435 @param lu: the lu on whose behalf we execute 6436 @param node: the node on which to create the device 6437 @type instance: L{objects.Instance} 6438 @param instance: the instance which owns the device 6439 @type device: L{objects.Disk} 6440 @param device: the device to create 6441 @type force_create: boolean 6442 @param force_create: whether to force creation of this device; this 6443 will be change to True whenever we find a device which has 6444 CreateOnSecondary() attribute 6445 @param info: the extra 'metadata' we should attach to the device 6446 (this will be represented as a LVM tag) 6447 @type force_open: boolean 6448 @param force_open: this parameter will be passes to the 6449 L{backend.BlockdevCreate} function where it specifies 6450 whether we run on primary or not, and it affects both 6451 the child assembly and the device own Open() execution 6452 6453 """ 6454 if device.CreateOnSecondary(): 6455 force_create = True 6456 6457 if device.children: 6458 for child in device.children: 6459 _CreateBlockDev(lu, node, instance, child, force_create, 6460 info, force_open) 6461 6462 if not force_create: 6463 return 6464 6465 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6466
6467 6468 -def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6469 """Create a single block device on a given node. 6470 6471 This will not recurse over children of the device, so they must be 6472 created in advance. 6473 6474 @param lu: the lu on whose behalf we execute 6475 @param node: the node on which to create the device 6476 @type instance: L{objects.Instance} 6477 @param instance: the instance which owns the device 6478 @type device: L{objects.Disk} 6479 @param device: the device to create 6480 @param info: the extra 'metadata' we should attach to the device 6481 (this will be represented as a LVM tag) 6482 @type force_open: boolean 6483 @param force_open: this parameter will be passes to the 6484 L{backend.BlockdevCreate} function where it specifies 6485 whether we run on primary or not, and it affects both 6486 the child assembly and the device own Open() execution 6487 6488 """ 6489 lu.cfg.SetDiskID(device, node) 6490 result = lu.rpc.call_blockdev_create(node, device, device.size, 6491 instance.name, force_open, info) 6492 result.Raise("Can't create block device %s on" 6493 " node %s for instance %s" % (device, node, instance.name)) 6494 if device.physical_id is None: 6495 device.physical_id = result.payload
6496
6497 6498 -def _GenerateUniqueNames(lu, exts):
6499 """Generate a suitable LV name. 6500 6501 This will generate a logical volume name for the given instance. 6502 6503 """ 6504 results = [] 6505 for val in exts: 6506 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId()) 6507 results.append("%s%s" % (new_id, val)) 6508 return results
6509
6510 6511 -def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name, 6512 p_minor, s_minor):
6513 """Generate a drbd8 device complete with its children. 6514 6515 """ 6516 port = lu.cfg.AllocatePort() 6517 vgname = lu.cfg.GetVGName() 6518 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId()) 6519 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size, 6520 logical_id=(vgname, names[0])) 6521 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128, 6522 logical_id=(vgname, names[1])) 6523 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size, 6524 logical_id=(primary, secondary, port, 6525 p_minor, s_minor, 6526 shared_secret), 6527 children=[dev_data, dev_meta], 6528 iv_name=iv_name) 6529 return drbd_dev
6530
6531 6532 -def _GenerateDiskTemplate(lu, template_name, 6533 instance_name, primary_node, 6534 secondary_nodes, disk_info, 6535 file_storage_dir, file_driver, 6536 base_index):
6537 """Generate the entire disk layout for a given template type. 6538 6539 """ 6540 #TODO: compute space requirements 6541 6542 vgname = lu.cfg.GetVGName() 6543 disk_count = len(disk_info) 6544 disks = [] 6545 if template_name == constants.DT_DISKLESS: 6546 pass 6547 elif template_name == constants.DT_PLAIN: 6548 if len(secondary_nodes) != 0: 6549 raise errors.ProgrammerError("Wrong template configuration") 6550 6551 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i) 6552 for i in range(disk_count)]) 6553 for idx, disk in enumerate(disk_info): 6554 disk_index = idx + base_index 6555 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"], 6556 logical_id=(vgname, names[idx]), 6557 iv_name="disk/%d" % disk_index, 6558 mode=disk["mode"]) 6559 disks.append(disk_dev) 6560 elif template_name == constants.DT_DRBD8: 6561 if len(secondary_nodes) != 1: 6562 raise errors.ProgrammerError("Wrong template configuration") 6563 remote_node = secondary_nodes[0] 6564 minors = lu.cfg.AllocateDRBDMinor( 6565 [primary_node, remote_node] * len(disk_info), instance_name) 6566 6567 names = [] 6568 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i) 6569 for i in range(disk_count)]): 6570 names.append(lv_prefix + "_data") 6571 names.append(lv_prefix + "_meta") 6572 for idx, disk in enumerate(disk_info): 6573 disk_index = idx + base_index 6574 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node, 6575 disk["size"], names[idx*2:idx*2+2], 6576 "disk/%d" % disk_index, 6577 minors[idx*2], minors[idx*2+1]) 6578 disk_dev.mode = disk["mode"] 6579 disks.append(disk_dev) 6580 elif template_name == constants.DT_FILE: 6581 if len(secondary_nodes) != 0: 6582 raise errors.ProgrammerError("Wrong template configuration") 6583 6584 _RequireFileStorage() 6585 6586 for idx, disk in enumerate(disk_info): 6587 disk_index = idx + base_index 6588 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"], 6589 iv_name="disk/%d" % disk_index, 6590 logical_id=(file_driver, 6591 "%s/disk%d" % (file_storage_dir, 6592 disk_index)), 6593 mode=disk["mode"]) 6594 disks.append(disk_dev) 6595 else: 6596 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name) 6597 return disks
6598
6599 6600 -def _GetInstanceInfoText(instance):
6601 """Compute that text that should be added to the disk's metadata. 6602 6603 """ 6604 return "originstname+%s" % instance.name
6605
6606 6607 -def _CalcEta(time_taken, written, total_size):
6608 """Calculates the ETA based on size written and total size. 6609 6610 @param time_taken: The time taken so far 6611 @param written: amount written so far 6612 @param total_size: The total size of data to be written 6613 @return: The remaining time in seconds 6614 6615 """ 6616 avg_time = time_taken / float(written) 6617 return (total_size - written) * avg_time
6618
6619 6620 -def _WipeDisks(lu, instance):
6621 """Wipes instance disks. 6622 6623 @type lu: L{LogicalUnit} 6624 @param lu: the logical unit on whose behalf we execute 6625 @type instance: L{objects.Instance} 6626 @param instance: the instance whose disks we should create 6627 @return: the success of the wipe 6628 6629 """ 6630 node = instance.primary_node 6631 for idx, device in enumerate(instance.disks): 6632 lu.LogInfo("* Wiping disk %d", idx) 6633 logging.info("Wiping disk %d for instance %s", idx, instance.name) 6634 6635 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but 6636 # MAX_WIPE_CHUNK at max 6637 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 * 6638 constants.MIN_WIPE_CHUNK_PERCENT) 6639 6640 offset = 0 6641 size = device.size 6642 last_output = 0 6643 start_time = time.time() 6644 6645 while offset < size: 6646 wipe_size = min(wipe_chunk_size, size - offset) 6647 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size) 6648 result.Raise("Could not wipe disk %d at offset %d for size %d" % 6649 (idx, offset, wipe_size)) 6650 now = time.time() 6651 offset += wipe_size 6652 if now - last_output >= 60: 6653 eta = _CalcEta(now - start_time, offset, size) 6654 lu.LogInfo(" - done: %.1f%% ETA: %s" % 6655 (offset / float(size) * 100, utils.FormatSeconds(eta))) 6656 last_output = now
6657
6658 6659 -def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6660 """Create all disks for an instance. 6661 6662 This abstracts away some work from AddInstance. 6663 6664 @type lu: L{LogicalUnit} 6665 @param lu: the logical unit on whose behalf we execute 6666 @type instance: L{objects.Instance} 6667 @param instance: the instance whose disks we should create 6668 @type to_skip: list 6669 @param to_skip: list of indices to skip 6670 @type target_node: string 6671 @param target_node: if passed, overrides the target node for creation 6672 @rtype: boolean 6673 @return: the success of the creation 6674 6675 """ 6676 info = _GetInstanceInfoText(instance) 6677 if target_node is None: 6678 pnode = instance.primary_node 6679 all_nodes = instance.all_nodes 6680 else: 6681 pnode = target_node 6682 all_nodes = [pnode] 6683 6684 if instance.disk_template == constants.DT_FILE: 6685 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) 6686 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir) 6687 6688 result.Raise("Failed to create directory '%s' on" 6689 " node %s" % (file_storage_dir, pnode)) 6690 6691 # Note: this needs to be kept in sync with adding of disks in 6692 # LUSetInstanceParams 6693 for idx, device in enumerate(instance.disks): 6694 if to_skip and idx in to_skip: 6695 continue 6696 logging.info("Creating volume %s for instance %s", 6697 device.iv_name, instance.name) 6698 #HARDCODE 6699 for node in all_nodes: 6700 f_create = node == pnode 6701 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6702
6703 6704 -def _RemoveDisks(lu, instance, target_node=None):
6705 """Remove all disks for an instance. 6706 6707 This abstracts away some work from `AddInstance()` and 6708 `RemoveInstance()`. Note that in case some of the devices couldn't 6709 be removed, the removal will continue with the other ones (compare 6710 with `_CreateDisks()`). 6711 6712 @type lu: L{LogicalUnit} 6713 @param lu: the logical unit on whose behalf we execute 6714 @type instance: L{objects.Instance} 6715 @param instance: the instance whose disks we should remove 6716 @type target_node: string 6717 @param target_node: used to override the node on which to remove the disks 6718 @rtype: boolean 6719 @return: the success of the removal 6720 6721 """ 6722 logging.info("Removing block devices for instance %s", instance.name) 6723 6724 all_result = True 6725 for device in instance.disks: 6726 if target_node: 6727 edata = [(target_node, device)] 6728 else: 6729 edata = device.ComputeNodeTree(instance.primary_node) 6730 for node, disk in edata: 6731 lu.cfg.SetDiskID(disk, node) 6732 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg 6733 if msg: 6734 lu.LogWarning("Could not remove block device %s on node %s," 6735 " continuing anyway: %s", device.iv_name, node, msg) 6736 all_result = False 6737 6738 if instance.disk_template == constants.DT_FILE: 6739 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) 6740 if target_node: 6741 tgt = target_node 6742 else: 6743 tgt = instance.primary_node 6744 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir) 6745 if result.fail_msg: 6746 lu.LogWarning("Could not remove directory '%s' on node %s: %s", 6747 file_storage_dir, instance.primary_node, result.fail_msg) 6748 all_result = False 6749 6750 return all_result
6751
6752 6753 -def _ComputeDiskSize(disk_template, disks):
6754 """Compute disk size requirements in the volume group 6755 6756 """ 6757 # Required free disk space as a function of disk and swap space 6758 req_size_dict = { 6759 constants.DT_DISKLESS: None, 6760 constants.DT_PLAIN: sum(d["size"] for d in disks), 6761 # 128 MB are added for drbd metadata for each disk 6762 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks), 6763 constants.DT_FILE: None, 6764 } 6765 6766 if disk_template not in req_size_dict: 6767 raise errors.ProgrammerError("Disk template '%s' size requirement" 6768 " is unknown" % disk_template) 6769 6770 return req_size_dict[disk_template]
6771
6772 6773 -def _CheckHVParams(lu, nodenames, hvname, hvparams):
6774 """Hypervisor parameter validation. 6775 6776 This function abstract the hypervisor parameter validation to be 6777 used in both instance create and instance modify. 6778 6779 @type lu: L{LogicalUnit} 6780 @param lu: the logical unit for which we check 6781 @type nodenames: list 6782 @param nodenames: the list of nodes on which we should check 6783 @type hvname: string 6784 @param hvname: the name of the hypervisor we should use 6785 @type hvparams: dict 6786 @param hvparams: the parameters which we need to check 6787 @raise errors.OpPrereqError: if the parameters are not valid 6788 6789 """ 6790 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, 6791 hvname, 6792 hvparams) 6793 for node in nodenames: 6794 info = hvinfo[node] 6795 if info.offline: 6796 continue 6797 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6798
6799 6800 -def _CheckOSParams(lu, required, nodenames, osname, osparams):
6801 """OS parameters validation. 6802 6803 @type lu: L{LogicalUnit} 6804 @param lu: the logical unit for which we check 6805 @type required: boolean 6806 @param required: whether the validation should fail if the OS is not 6807 found 6808 @type nodenames: list 6809 @param nodenames: the list of nodes on which we should check 6810 @type osname: string 6811 @param osname: the name of the hypervisor we should use 6812 @type osparams: dict 6813 @param osparams: the parameters which we need to check 6814 @raise errors.OpPrereqError: if the parameters are not valid 6815 6816 """ 6817 result = lu.rpc.call_os_validate(required, nodenames, osname, 6818 [constants.OS_VALIDATE_PARAMETERS], 6819 osparams) 6820 for node, nres in result.items(): 6821 # we don't check for offline cases since this should be run only 6822 # against the master node and/or an instance's nodes 6823 nres.Raise("OS Parameters validation failed on node %s" % node) 6824 if not nres.payload: 6825 lu.LogInfo("OS %s not found on node %s, validation skipped", 6826 osname, node)
6827
6828 6829 -class LUCreateInstance(LogicalUnit):
6830 """Create an instance. 6831 6832 """ 6833 HPATH = "instance-add" 6834 HTYPE = constants.HTYPE_INSTANCE 6835 _OP_PARAMS = [ 6836 _PInstanceName, 6837 ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)), 6838 ("start", True, ht.TBool), 6839 ("wait_for_sync", True, ht.TBool), 6840 ("ip_check", True, ht.TBool), 6841 ("name_check", True, ht.TBool), 6842 ("disks", ht.NoDefault, ht.TListOf(ht.TDict)), 6843 ("nics", ht.NoDefault, ht.TListOf(ht.TDict)), 6844 ("hvparams", ht.EmptyDict, ht.TDict), 6845 ("beparams", ht.EmptyDict, ht.TDict), 6846 ("osparams", ht.EmptyDict, ht.TDict), 6847 ("no_install", None, ht.TMaybeBool), 6848 ("os_type", None, ht.TMaybeString), 6849 ("force_variant", False, ht.TBool), 6850 ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)), 6851 ("source_x509_ca", None, ht.TMaybeString), 6852 ("source_instance_name", None, ht.TMaybeString), 6853 ("src_node", None, ht.TMaybeString), 6854 ("src_path", None, ht.TMaybeString), 6855 ("pnode", None, ht.TMaybeString), 6856 ("snode", None, ht.TMaybeString), 6857 ("iallocator", None, ht.TMaybeString), 6858 ("hypervisor", None, ht.TMaybeString), 6859 ("disk_template", ht.NoDefault, _CheckDiskTemplate), 6860 ("identify_defaults", False, ht.TBool), 6861 ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))), 6862 ("file_storage_dir", None, ht.TMaybeString), 6863 ] 6864 REQ_BGL = False 6865
6866 - def CheckArguments(self):
6867 """Check arguments. 6868 6869 """ 6870 # do not require name_check to ease forward/backward compatibility 6871 # for tools 6872 if self.op.no_install and self.op.start: 6873 self.LogInfo("No-installation mode selected, disabling startup") 6874 self.op.start = False 6875 # validate/normalize the instance name 6876 self.op.instance_name = \ 6877 netutils.Hostname.GetNormalizedName(self.op.instance_name) 6878 6879 if self.op.ip_check and not self.op.name_check: 6880 # TODO: make the ip check more flexible and not depend on the name check 6881 raise errors.OpPrereqError("Cannot do ip check without a name check", 6882 errors.ECODE_INVAL) 6883 6884 # check nics' parameter names 6885 for nic in self.op.nics: 6886 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES) 6887 6888 # check disks. parameter names and consistent adopt/no-adopt strategy 6889 has_adopt = has_no_adopt = False 6890 for disk in self.op.disks: 6891 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES) 6892 if "adopt" in disk: 6893 has_adopt = True 6894 else: 6895 has_no_adopt = True 6896 if has_adopt and has_no_adopt: 6897 raise errors.OpPrereqError("Either all disks are adopted or none is", 6898 errors.ECODE_INVAL) 6899 if has_adopt: 6900 if self.op.disk_template not in constants.DTS_MAY_ADOPT: 6901 raise errors.OpPrereqError("Disk adoption is not supported for the" 6902 " '%s' disk template" % 6903 self.op.disk_template, 6904 errors.ECODE_INVAL) 6905 if self.op.iallocator is not None: 6906 raise errors.OpPrereqError("Disk adoption not allowed with an" 6907 " iallocator script", errors.ECODE_INVAL) 6908 if self.op.mode == constants.INSTANCE_IMPORT: 6909 raise errors.OpPrereqError("Disk adoption not allowed for" 6910 " instance import", errors.ECODE_INVAL) 6911 6912 self.adopt_disks = has_adopt 6913 6914 # instance name verification 6915 if self.op.name_check: 6916 self.hostname1 = netutils.GetHostname(name=self.op.instance_name) 6917 self.op.instance_name = self.hostname1.name 6918 # used in CheckPrereq for ip ping check 6919 self.check_ip = self.hostname1.ip 6920 else: 6921 self.check_ip = None 6922 6923 # file storage checks 6924 if (self.op.file_driver and 6925 not self.op.file_driver in constants.FILE_DRIVER): 6926 raise errors.OpPrereqError("Invalid file driver name '%s'" % 6927 self.op.file_driver, errors.ECODE_INVAL) 6928 6929 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir): 6930 raise errors.OpPrereqError("File storage directory path not absolute", 6931 errors.ECODE_INVAL) 6932 6933 ### Node/iallocator related checks 6934 _CheckIAllocatorOrNode(self, "iallocator", "pnode") 6935 6936 if self.op.pnode is not None: 6937 if self.op.disk_template in constants.DTS_NET_MIRROR: 6938 if self.op.snode is None: 6939 raise errors.OpPrereqError("The networked disk templates need" 6940 " a mirror node", errors.ECODE_INVAL) 6941 elif self.op.snode: 6942 self.LogWarning("Secondary node will be ignored on non-mirrored disk" 6943 " template") 6944 self.op.snode = None 6945 6946 self._cds = _GetClusterDomainSecret() 6947 6948 if self.op.mode == constants.INSTANCE_IMPORT: 6949 # On import force_variant must be True, because if we forced it at 6950 # initial install, our only chance when importing it back is that it 6951 # works again! 6952 self.op.force_variant = True 6953 6954 if self.op.no_install: 6955 self.LogInfo("No-installation mode has no effect during import") 6956 6957 elif self.op.mode == constants.INSTANCE_CREATE: 6958 if self.op.os_type is None: 6959 raise errors.OpPrereqError("No guest OS specified", 6960 errors.ECODE_INVAL) 6961 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os: 6962 raise errors.OpPrereqError("Guest OS '%s' is not allowed for" 6963 " installation" % self.op.os_type, 6964 errors.ECODE_STATE) 6965 if self.op.disk_template is None: 6966 raise errors.OpPrereqError("No disk template specified", 6967 errors.ECODE_INVAL) 6968 6969 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: 6970 # Check handshake to ensure both clusters have the same domain secret 6971 src_handshake = self.op.source_handshake 6972 if not src_handshake: 6973 raise errors.OpPrereqError("Missing source handshake", 6974 errors.ECODE_INVAL) 6975 6976 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds, 6977 src_handshake) 6978 if errmsg: 6979 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg, 6980 errors.ECODE_INVAL) 6981 6982 # Load and check source CA 6983 self.source_x509_ca_pem = self.op.source_x509_ca 6984 if not self.source_x509_ca_pem: 6985 raise errors.OpPrereqError("Missing source X509 CA", 6986 errors.ECODE_INVAL) 6987 6988 try: 6989 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem, 6990 self._cds) 6991 except OpenSSL.crypto.Error, err: 6992 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" % 6993 (err, ), errors.ECODE_INVAL) 6994 6995 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) 6996 if errcode is not None: 6997 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ), 6998 errors.ECODE_INVAL) 6999 7000 self.source_x509_ca = cert 7001 7002 src_instance_name = self.op.source_instance_name 7003 if not src_instance_name: 7004 raise errors.OpPrereqError("Missing source instance name", 7005 errors.ECODE_INVAL) 7006 7007 self.source_instance_name = \ 7008 netutils.GetHostname(name=src_instance_name).name 7009 7010 else: 7011 raise errors.OpPrereqError("Invalid instance creation mode %r" % 7012 self.op.mode, errors.ECODE_INVAL)
7013
7014 - def ExpandNames(self):
7015 """ExpandNames for CreateInstance. 7016 7017 Figure out the right locks for instance creation. 7018 7019 """ 7020 self.needed_locks = {} 7021 7022 instance_name = self.op.instance_name 7023 # this is just a preventive check, but someone might still add this 7024 # instance in the meantime, and creation will fail at lock-add time 7025 if instance_name in self.cfg.GetInstanceList(): 7026 raise errors.OpPrereqError("Instance '%s' is already in the cluster" % 7027 instance_name, errors.ECODE_EXISTS) 7028 7029 self.add_locks[locking.LEVEL_INSTANCE] = instance_name 7030 7031 if self.op.iallocator: 7032 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 7033 else: 7034 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode) 7035 nodelist = [self.op.pnode] 7036 if self.op.snode is not None: 7037 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode) 7038 nodelist.append(self.op.snode) 7039 self.needed_locks[locking.LEVEL_NODE] = nodelist 7040 7041 # in case of import lock the source node too 7042 if self.op.mode == constants.INSTANCE_IMPORT: 7043 src_node = self.op.src_node 7044 src_path = self.op.src_path 7045 7046 if src_path is None: 7047 self.op.src_path = src_path = self.op.instance_name 7048 7049 if src_node is None: 7050 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 7051 self.op.src_node = None 7052 if os.path.isabs(src_path): 7053 raise errors.OpPrereqError("Importing an instance from an absolute" 7054 " path requires a source node option.", 7055 errors.ECODE_INVAL) 7056 else: 7057 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node) 7058 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET: 7059 self.needed_locks[locking.LEVEL_NODE].append(src_node) 7060 if not os.path.isabs(src_path): 7061 self.op.src_path = src_path = \ 7062 utils.PathJoin(constants.EXPORT_DIR, src_path)
7063
7064 - def _RunAllocator(self):
7065 """Run the allocator based on input opcode. 7066 7067 """ 7068 nics = [n.ToDict() for n in self.nics] 7069 ial = IAllocator(self.cfg, self.rpc, 7070 mode=constants.IALLOCATOR_MODE_ALLOC, 7071 name=self.op.instance_name, 7072 disk_template=self.op.disk_template, 7073 tags=[], 7074 os=self.op.os_type, 7075 vcpus=self.be_full[constants.BE_VCPUS], 7076 mem_size=self.be_full[constants.BE_MEMORY], 7077 disks=self.disks, 7078 nics=nics, 7079 hypervisor=self.op.hypervisor, 7080 ) 7081 7082 ial.Run(self.op.iallocator) 7083 7084 if not ial.success: 7085 raise errors.OpPrereqError("Can't compute nodes using" 7086 " iallocator '%s': %s" % 7087 (self.op.iallocator, ial.info), 7088 errors.ECODE_NORES) 7089 if len(ial.result) != ial.required_nodes: 7090 raise errors.OpPrereqError("iallocator '%s' returned invalid number" 7091 " of nodes (%s), required %s" % 7092 (self.op.iallocator, len(ial.result), 7093 ial.required_nodes), errors.ECODE_FAULT) 7094 self.op.pnode = ial.result[0] 7095 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s", 7096 self.op.instance_name, self.op.iallocator, 7097 utils.CommaJoin(ial.result)) 7098 if ial.required_nodes == 2: 7099 self.op.snode = ial.result[1]
7100
7101 - def BuildHooksEnv(self):
7102 """Build hooks env. 7103 7104 This runs on master, primary and secondary nodes of the instance. 7105 7106 """ 7107 env = { 7108 "ADD_MODE": self.op.mode, 7109 } 7110 if self.op.mode == constants.INSTANCE_IMPORT: 7111 env["SRC_NODE"] = self.op.src_node 7112 env["SRC_PATH"] = self.op.src_path 7113 env["SRC_IMAGES"] = self.src_images 7114 7115 env.update(_BuildInstanceHookEnv( 7116 name=self.op.instance_name, 7117 primary_node=self.op.pnode, 7118 secondary_nodes=self.secondaries, 7119 status=self.op.start, 7120 os_type=self.op.os_type, 7121 memory=self.be_full[constants.BE_MEMORY], 7122 vcpus=self.be_full[constants.BE_VCPUS], 7123 nics=_NICListToTuple(self, self.nics), 7124 disk_template=self.op.disk_template, 7125 disks=[(d["size"], d["mode"]) for d in self.disks], 7126 bep=self.be_full, 7127 hvp=self.hv_full, 7128 hypervisor_name=self.op.hypervisor, 7129 )) 7130 7131 nl = ([self.cfg.GetMasterNode(), self.op.pnode] + 7132 self.secondaries) 7133 return env, nl, nl
7134
7135 - def _ReadExportInfo(self):
7136 """Reads the export information from disk. 7137 7138 It will override the opcode source node and path with the actual 7139 information, if these two were not specified before. 7140 7141 @return: the export information 7142 7143 """ 7144 assert self.op.mode == constants.INSTANCE_IMPORT 7145 7146 src_node = self.op.src_node 7147 src_path = self.op.src_path 7148 7149 if src_node is None: 7150 locked_nodes = self.acquired_locks[locking.LEVEL_NODE] 7151 exp_list = self.rpc.call_export_list(locked_nodes) 7152 found = False 7153 for node in exp_list: 7154 if exp_list[node].fail_msg: 7155 continue 7156 if src_path in exp_list[node].payload: 7157 found = True 7158 self.op.src_node = src_node = node 7159 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR, 7160 src_path) 7161 break 7162 if not found: 7163 raise errors.OpPrereqError("No export found for relative path %s" % 7164 src_path, errors.ECODE_INVAL) 7165 7166 _CheckNodeOnline(self, src_node) 7167 result = self.rpc.call_export_info(src_node, src_path) 7168 result.Raise("No export or invalid export found in dir %s" % src_path) 7169 7170 export_info = objects.SerializableConfigParser.Loads(str(result.payload)) 7171 if not export_info.has_section(constants.INISECT_EXP): 7172 raise errors.ProgrammerError("Corrupted export config", 7173 errors.ECODE_ENVIRON) 7174 7175 ei_version = export_info.get(constants.INISECT_EXP, "version") 7176 if (int(ei_version) != constants.EXPORT_VERSION): 7177 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" % 7178 (ei_version, constants.EXPORT_VERSION), 7179 errors.ECODE_ENVIRON) 7180 return export_info
7181
7182 - def _ReadExportParams(self, einfo):
7183 """Use export parameters as defaults. 7184 7185 In case the opcode doesn't specify (as in override) some instance 7186 parameters, then try to use them from the export information, if 7187 that declares them. 7188 7189 """ 7190 self.op.os_type = einfo.get(constants.INISECT_EXP, "os") 7191 7192 if self.op.disk_template is None: 7193 if einfo.has_option(constants.INISECT_INS, "disk_template"): 7194 self.op.disk_template = einfo.get(constants.INISECT_INS, 7195 "disk_template") 7196 else: 7197 raise errors.OpPrereqError("No disk template specified and the export" 7198 " is missing the disk_template information", 7199 errors.ECODE_INVAL) 7200 7201 if not self.op.disks: 7202 if einfo.has_option(constants.INISECT_INS, "disk_count"): 7203 disks = [] 7204 # TODO: import the disk iv_name too 7205 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")): 7206 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx) 7207 disks.append({"size": disk_sz}) 7208 self.op.disks = disks 7209 else: 7210 raise errors.OpPrereqError("No disk info specified and the export" 7211 " is missing the disk information", 7212 errors.ECODE_INVAL) 7213 7214 if (not self.op.nics and 7215 einfo.has_option(constants.INISECT_INS, "nic_count")): 7216 nics = [] 7217 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")): 7218 ndict = {} 7219 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]: 7220 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name)) 7221 ndict[name] = v 7222 nics.append(ndict) 7223 self.op.nics = nics 7224 7225 if (self.op.hypervisor is None and 7226 einfo.has_option(constants.INISECT_INS, "hypervisor")): 7227 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor") 7228 if einfo.has_section(constants.INISECT_HYP): 7229 # use the export parameters but do not override the ones 7230 # specified by the user 7231 for name, value in einfo.items(constants.INISECT_HYP): 7232 if name not in self.op.hvparams: 7233 self.op.hvparams[name] = value 7234 7235 if einfo.has_section(constants.INISECT_BEP): 7236 # use the parameters, without overriding 7237 for name, value in einfo.items(constants.INISECT_BEP): 7238 if name not in self.op.beparams: 7239 self.op.beparams[name] = value 7240 else: 7241 # try to read the parameters old style, from the main section 7242 for name in constants.BES_PARAMETERS: 7243 if (name not in self.op.beparams and 7244 einfo.has_option(constants.INISECT_INS, name)): 7245 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name) 7246 7247 if einfo.has_section(constants.INISECT_OSP): 7248 # use the parameters, without overriding 7249 for name, value in einfo.items(constants.INISECT_OSP): 7250 if name not in self.op.osparams: 7251 self.op.osparams[name] = value
7252
7253 - def _RevertToDefaults(self, cluster):
7254 """Revert the instance parameters to the default values. 7255 7256 """ 7257 # hvparams 7258 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {}) 7259 for name in self.op.hvparams.keys(): 7260 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]: 7261 del self.op.hvparams[name] 7262 # beparams 7263 be_defs = cluster.SimpleFillBE({}) 7264 for name in self.op.beparams.keys(): 7265 if name in be_defs and be_defs[name] == self.op.beparams[name]: 7266 del self.op.beparams[name] 7267 # nic params 7268 nic_defs = cluster.SimpleFillNIC({}) 7269 for nic in self.op.nics: 7270 for name in constants.NICS_PARAMETERS: 7271 if name in nic and name in nic_defs and nic[name] == nic_defs[name]: 7272 del nic[name] 7273 # osparams 7274 os_defs = cluster.SimpleFillOS(self.op.os_type, {}) 7275 for name in self.op.osparams.keys(): 7276 if name in os_defs and os_defs[name] == self.op.osparams[name]: 7277 del self.op.osparams[name]
7278
7279 - def CheckPrereq(self):
7280 """Check prerequisites. 7281 7282 """ 7283 if self.op.mode == constants.INSTANCE_IMPORT: 7284 export_info = self._ReadExportInfo() 7285 self._ReadExportParams(export_info) 7286 7287 _CheckDiskTemplate(self.op.disk_template) 7288 7289 if (not self.cfg.GetVGName() and 7290 self.op.disk_template not in constants.DTS_NOT_LVM): 7291 raise errors.OpPrereqError("Cluster does not support lvm-based" 7292 " instances", errors.ECODE_STATE) 7293 7294 if self.op.hypervisor is None: 7295 self.op.hypervisor = self.cfg.GetHypervisorType() 7296 7297 cluster = self.cfg.GetClusterInfo() 7298 enabled_hvs = cluster.enabled_hypervisors 7299 if self.op.hypervisor not in enabled_hvs: 7300 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the" 7301 " cluster (%s)" % (self.op.hypervisor, 7302 ",".join(enabled_hvs)), 7303 errors.ECODE_STATE) 7304 7305 # check hypervisor parameter syntax (locally) 7306 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) 7307 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, 7308 self.op.hvparams) 7309 hv_type = hypervisor.GetHypervisor(self.op.hypervisor) 7310 hv_type.CheckParameterSyntax(filled_hvp) 7311 self.hv_full = filled_hvp 7312 # check that we don't specify global parameters on an instance 7313 _CheckGlobalHvParams(self.op.hvparams) 7314 7315 # fill and remember the beparams dict 7316 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 7317 self.be_full = cluster.SimpleFillBE(self.op.beparams) 7318 7319 # build os parameters 7320 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams) 7321 7322 # now that hvp/bep are in final format, let's reset to defaults, 7323 # if told to do so 7324 if self.op.identify_defaults: 7325 self._RevertToDefaults(cluster) 7326 7327 # NIC buildup 7328 self.nics = [] 7329 for idx, nic in enumerate(self.op.nics): 7330 nic_mode_req = nic.get("mode", None) 7331 nic_mode = nic_mode_req 7332 if nic_mode is None: 7333 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE] 7334 7335 # in routed mode, for the first nic, the default ip is 'auto' 7336 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0: 7337 default_ip_mode = constants.VALUE_AUTO 7338 else: 7339 default_ip_mode = constants.VALUE_NONE 7340 7341 # ip validity checks 7342 ip = nic.get("ip", default_ip_mode) 7343 if ip is None or ip.lower() == constants.VALUE_NONE: 7344 nic_ip = None 7345 elif ip.lower() == constants.VALUE_AUTO: 7346 if not self.op.name_check: 7347 raise errors.OpPrereqError("IP address set to auto but name checks" 7348 " have been skipped", 7349 errors.ECODE_INVAL) 7350 nic_ip = self.hostname1.ip 7351 else: 7352 if not netutils.IPAddress.IsValid(ip): 7353 raise errors.OpPrereqError("Invalid IP address '%s'" % ip, 7354 errors.ECODE_INVAL) 7355 nic_ip = ip 7356 7357 # TODO: check the ip address for uniqueness 7358 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip: 7359 raise errors.OpPrereqError("Routed nic mode requires an ip address", 7360 errors.ECODE_INVAL) 7361 7362 # MAC address verification 7363 mac = nic.get("mac", constants.VALUE_AUTO) 7364 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 7365 mac = utils.NormalizeAndValidateMac(mac) 7366 7367 try: 7368 self.cfg.ReserveMAC(mac, self.proc.GetECId()) 7369 except errors.ReservationError: 7370 raise errors.OpPrereqError("MAC address %s already in use" 7371 " in cluster" % mac, 7372 errors.ECODE_NOTUNIQUE) 7373 7374 # bridge verification 7375 bridge = nic.get("bridge", None) 7376 link = nic.get("link", None) 7377 if bridge and link: 7378 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'" 7379 " at the same time", errors.ECODE_INVAL) 7380 elif bridge and nic_mode == constants.NIC_MODE_ROUTED: 7381 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic", 7382 errors.ECODE_INVAL) 7383 elif bridge: 7384 link = bridge 7385 7386 nicparams = {} 7387 if nic_mode_req: 7388 nicparams[constants.NIC_MODE] = nic_mode_req 7389 if link: 7390 nicparams[constants.NIC_LINK] = link 7391 7392 check_params = cluster.SimpleFillNIC(nicparams) 7393 objects.NIC.CheckParameterSyntax(check_params) 7394 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams)) 7395 7396 # disk checks/pre-build 7397 self.disks = [] 7398 for disk in self.op.disks: 7399 mode = disk.get("mode", constants.DISK_RDWR) 7400 if mode not in constants.DISK_ACCESS_SET: 7401 raise errors.OpPrereqError("Invalid disk access mode '%s'" % 7402 mode, errors.ECODE_INVAL) 7403 size = disk.get("size", None) 7404 if size is None: 7405 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL) 7406 try: 7407 size = int(size) 7408 except (TypeError, ValueError): 7409 raise errors.OpPrereqError("Invalid disk size '%s'" % size, 7410 errors.ECODE_INVAL) 7411 new_disk = {"size": size, "mode": mode} 7412 if "adopt" in disk: 7413 new_disk["adopt"] = disk["adopt"] 7414 self.disks.append(new_disk) 7415 7416 if self.op.mode == constants.INSTANCE_IMPORT: 7417 7418 # Check that the new instance doesn't have less disks than the export 7419 instance_disks = len(self.disks) 7420 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count') 7421 if instance_disks < export_disks: 7422 raise errors.OpPrereqError("Not enough disks to import." 7423 " (instance: %d, export: %d)" % 7424 (instance_disks, export_disks), 7425 errors.ECODE_INVAL) 7426 7427 disk_images = [] 7428 for idx in range(export_disks): 7429 option = 'disk%d_dump' % idx 7430 if export_info.has_option(constants.INISECT_INS, option): 7431 # FIXME: are the old os-es, disk sizes, etc. useful? 7432 export_name = export_info.get(constants.INISECT_INS, option) 7433 image = utils.PathJoin(self.op.src_path, export_name) 7434 disk_images.append(image) 7435 else: 7436 disk_images.append(False) 7437 7438 self.src_images = disk_images 7439 7440 old_name = export_info.get(constants.INISECT_INS, 'name') 7441 try: 7442 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count') 7443 except (TypeError, ValueError), err: 7444 raise errors.OpPrereqError("Invalid export file, nic_count is not" 7445 " an integer: %s" % str(err), 7446 errors.ECODE_STATE) 7447 if self.op.instance_name == old_name: 7448 for idx, nic in enumerate(self.nics): 7449 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx: 7450 nic_mac_ini = 'nic%d_mac' % idx 7451 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini) 7452 7453 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT 7454 7455 # ip ping checks (we use the same ip that was resolved in ExpandNames) 7456 if self.op.ip_check: 7457 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT): 7458 raise errors.OpPrereqError("IP %s of instance %s already in use" % 7459 (self.check_ip, self.op.instance_name), 7460 errors.ECODE_NOTUNIQUE) 7461 7462 #### mac address generation 7463 # By generating here the mac address both the allocator and the hooks get 7464 # the real final mac address rather than the 'auto' or 'generate' value. 7465 # There is a race condition between the generation and the instance object 7466 # creation, which means that we know the mac is valid now, but we're not 7467 # sure it will be when we actually add the instance. If things go bad 7468 # adding the instance will abort because of a duplicate mac, and the 7469 # creation job will fail. 7470 for nic in self.nics: 7471 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 7472 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId()) 7473 7474 #### allocator run 7475 7476 if self.op.iallocator is not None: 7477 self._RunAllocator() 7478 7479 #### node related checks 7480 7481 # check primary node 7482 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode) 7483 assert self.pnode is not None, \ 7484 "Cannot retrieve locked node %s" % self.op.pnode 7485 if pnode.offline: 7486 raise errors.OpPrereqError("Cannot use offline primary node '%s'" % 7487 pnode.name, errors.ECODE_STATE) 7488 if pnode.drained: 7489 raise errors.OpPrereqError("Cannot use drained primary node '%s'" % 7490 pnode.name, errors.ECODE_STATE) 7491 if not pnode.vm_capable: 7492 raise errors.OpPrereqError("Cannot use non-vm_capable primary node" 7493 " '%s'" % pnode.name, errors.ECODE_STATE) 7494 7495 self.secondaries = [] 7496 7497 # mirror node verification 7498 if self.op.disk_template in constants.DTS_NET_MIRROR: 7499 if self.op.snode == pnode.name: 7500 raise errors.OpPrereqError("The secondary node cannot be the" 7501 " primary node.", errors.ECODE_INVAL) 7502 _CheckNodeOnline(self, self.op.snode) 7503 _CheckNodeNotDrained(self, self.op.snode) 7504 _CheckNodeVmCapable(self, self.op.snode) 7505 self.secondaries.append(self.op.snode) 7506 7507 nodenames = [pnode.name] + self.secondaries 7508 7509 req_size = _ComputeDiskSize(self.op.disk_template, 7510 self.disks) 7511 7512 # Check lv size requirements, if not adopting 7513 if req_size is not None and not self.adopt_disks: 7514 _CheckNodesFreeDisk(self, nodenames, req_size) 7515 7516 if self.adopt_disks: # instead, we must check the adoption data 7517 all_lvs = set([i["adopt"] for i in self.disks]) 7518 if len(all_lvs) != len(self.disks): 7519 raise errors.OpPrereqError("Duplicate volume names given for adoption", 7520 errors.ECODE_INVAL) 7521 for lv_name in all_lvs: 7522 try: 7523 self.cfg.ReserveLV(lv_name, self.proc.GetECId()) 7524 except errors.ReservationError: 7525 raise errors.OpPrereqError("LV named %s used by another instance" % 7526 lv_name, errors.ECODE_NOTUNIQUE) 7527 7528 node_lvs = self.rpc.call_lv_list([pnode.name], 7529 self.cfg.GetVGName())[pnode.name] 7530 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name) 7531 node_lvs = node_lvs.payload 7532 delta = all_lvs.difference(node_lvs.keys()) 7533 if delta: 7534 raise errors.OpPrereqError("Missing logical volume(s): %s" % 7535 utils.CommaJoin(delta), 7536 errors.ECODE_INVAL) 7537 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]] 7538 if online_lvs: 7539 raise errors.OpPrereqError("Online logical volumes found, cannot" 7540 " adopt: %s" % utils.CommaJoin(online_lvs), 7541 errors.ECODE_STATE) 7542 # update the size of disk based on what is found 7543 for dsk in self.disks: 7544 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0])) 7545 7546 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams) 7547 7548 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant) 7549 # check OS parameters (remotely) 7550 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full) 7551 7552 _CheckNicsBridgesExist(self, self.nics, self.pnode.name) 7553 7554 # memory check on primary node 7555 if self.op.start: 7556 _CheckNodeFreeMemory(self, self.pnode.name, 7557 "creating instance %s" % self.op.instance_name, 7558 self.be_full[constants.BE_MEMORY], 7559 self.op.hypervisor) 7560 7561 self.dry_run_result = list(nodenames)
7562
7563 - def Exec(self, feedback_fn):
7564 """Create and add the instance to the cluster. 7565 7566 """ 7567 instance = self.op.instance_name 7568 pnode_name = self.pnode.name 7569 7570 ht_kind = self.op.hypervisor 7571 if ht_kind in constants.HTS_REQ_PORT: 7572 network_port = self.cfg.AllocatePort() 7573 else: 7574 network_port = None 7575 7576 if constants.ENABLE_FILE_STORAGE: 7577 # this is needed because os.path.join does not accept None arguments 7578 if self.op.file_storage_dir is None: 7579 string_file_storage_dir = "" 7580 else: 7581 string_file_storage_dir = self.op.file_storage_dir 7582 7583 # build the full file storage dir path 7584 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(), 7585 string_file_storage_dir, instance) 7586 else: 7587 file_storage_dir = "" 7588 7589 disks = _GenerateDiskTemplate(self, 7590 self.op.disk_template, 7591 instance, pnode_name, 7592 self.secondaries, 7593 self.disks, 7594 file_storage_dir, 7595 self.op.file_driver, 7596 0) 7597 7598 iobj = objects.Instance(name=instance, os=self.op.os_type, 7599 primary_node=pnode_name, 7600 nics=self.nics, disks=disks, 7601 disk_template=self.op.disk_template, 7602 admin_up=False, 7603 network_port=network_port, 7604 beparams=self.op.beparams, 7605 hvparams=self.op.hvparams, 7606 hypervisor=self.op.hypervisor, 7607 osparams=self.op.osparams, 7608 ) 7609 7610 if self.adopt_disks: 7611 # rename LVs to the newly-generated names; we need to construct 7612 # 'fake' LV disks with the old data, plus the new unique_id 7613 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks] 7614 rename_to = [] 7615 for t_dsk, a_dsk in zip (tmp_disks, self.disks): 7616 rename_to.append(t_dsk.logical_id) 7617 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"]) 7618 self.cfg.SetDiskID(t_dsk, pnode_name) 7619 result = self.rpc.call_blockdev_rename(pnode_name, 7620 zip(tmp_disks, rename_to)) 7621 result.Raise("Failed to rename adoped LVs") 7622 else: 7623 feedback_fn("* creating instance disks...") 7624 try: 7625 _CreateDisks(self, iobj) 7626 except errors.OpExecError: 7627 self.LogWarning("Device creation failed, reverting...") 7628 try: 7629 _RemoveDisks(self, iobj) 7630 finally: 7631 self.cfg.ReleaseDRBDMinors(instance) 7632 raise 7633 7634 if self.cfg.GetClusterInfo().prealloc_wipe_disks: 7635 feedback_fn("* wiping instance disks...") 7636 try: 7637 _WipeDisks(self, iobj) 7638 except errors.OpExecError: 7639 self.LogWarning("Device wiping failed, reverting...") 7640 try: 7641 _RemoveDisks(self, iobj) 7642 finally: 7643 self.cfg.ReleaseDRBDMinors(instance) 7644 raise 7645 7646 feedback_fn("adding instance %s to cluster config" % instance) 7647 7648 self.cfg.AddInstance(iobj, self.proc.GetECId()) 7649 7650 # Declare that we don't want to remove the instance lock anymore, as we've 7651 # added the instance to the config 7652 del self.remove_locks[locking.LEVEL_INSTANCE] 7653 # Unlock all the nodes 7654 if self.op.mode == constants.INSTANCE_IMPORT: 7655 nodes_keep = [self.op.src_node] 7656 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE] 7657 if node != self.op.src_node] 7658 self.context.glm.release(locking.LEVEL_NODE, nodes_release) 7659 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep 7660 else: 7661 self.context.glm.release(locking.LEVEL_NODE) 7662 del self.acquired_locks[locking.LEVEL_NODE] 7663 7664 if self.op.wait_for_sync: 7665 disk_abort = not _WaitForSync(self, iobj) 7666 elif iobj.disk_template in constants.DTS_NET_MIRROR: 7667 # make sure the disks are not degraded (still sync-ing is ok) 7668 time.sleep(15) 7669 feedback_fn("* checking mirrors status") 7670 disk_abort = not _WaitForSync(self, iobj, oneshot=True) 7671 else: 7672 disk_abort = False 7673 7674 if disk_abort: 7675 _RemoveDisks(self, iobj) 7676 self.cfg.RemoveInstance(iobj.name) 7677 # Make sure the instance lock gets removed 7678 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name 7679 raise errors.OpExecError("There are some degraded disks for" 7680 " this instance") 7681 7682 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks: 7683 if self.op.mode == constants.INSTANCE_CREATE: 7684 if not self.op.no_install: 7685 feedback_fn("* running the instance OS create scripts...") 7686 # FIXME: pass debug option from opcode to backend 7687 result = self.rpc.call_instance_os_add(pnode_name, iobj, False, 7688 self.op.debug_level) 7689 result.Raise("Could not add os for instance %s" 7690 " on node %s" % (instance, pnode_name)) 7691 7692 elif self.op.mode == constants.INSTANCE_IMPORT: 7693 feedback_fn("* running the instance OS import scripts...") 7694 7695 transfers = [] 7696 7697 for idx, image in enumerate(self.src_images): 7698 if not image: 7699 continue 7700 7701 # FIXME: pass debug option from opcode to backend 7702 dt = masterd.instance.DiskTransfer("disk/%s" % idx, 7703 constants.IEIO_FILE, (image, ), 7704 constants.IEIO_SCRIPT, 7705 (iobj.disks[idx], idx), 7706 None) 7707 transfers.append(dt) 7708 7709 import_result = \ 7710 masterd.instance.TransferInstanceData(self, feedback_fn, 7711 self.op.src_node, pnode_name, 7712 self.pnode.secondary_ip, 7713 iobj, transfers) 7714 if not compat.all(import_result): 7715 self.LogWarning("Some disks for instance %s on node %s were not" 7716 " imported successfully" % (instance, pnode_name)) 7717 7718 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: 7719 feedback_fn("* preparing remote import...") 7720 connect_timeout = constants.RIE_CONNECT_TIMEOUT 7721 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) 7722 7723 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj, 7724 self.source_x509_ca, 7725 self._cds, timeouts) 7726 if not compat.all(disk_results): 7727 # TODO: Should the instance still be started, even if some disks 7728 # failed to import (valid for local imports, too)? 7729 self.LogWarning("Some disks for instance %s on node %s were not" 7730 " imported successfully" % (instance, pnode_name)) 7731 7732 # Run rename script on newly imported instance 7733 assert iobj.name == instance 7734 feedback_fn("Running rename script for %s" % instance) 7735 result = self.rpc.call_instance_run_rename(pnode_name, iobj, 7736 self.source_instance_name, 7737 self.op.debug_level) 7738 if result.fail_msg: 7739 self.LogWarning("Failed to run rename script for %s on node" 7740 " %s: %s" % (instance, pnode_name, result.fail_msg)) 7741 7742 else: 7743 # also checked in the prereq part 7744 raise errors.ProgrammerError("Unknown OS initialization mode '%s'" 7745 % self.op.mode) 7746 7747 if self.op.start: 7748 iobj.admin_up = True 7749 self.cfg.Update(iobj, feedback_fn) 7750 logging.info("Starting instance %s on node %s", instance, pnode_name) 7751 feedback_fn("* starting instance...") 7752 result = self.rpc.call_instance_start(pnode_name, iobj, None, None) 7753 result.Raise("Could not start instance") 7754 7755 return list(iobj.all_nodes)
7756
7757 7758 -class LUConnectConsole(NoHooksLU):
7759 """Connect to an instance's console. 7760 7761 This is somewhat special in that it returns the command line that 7762 you need to run on the master node in order to connect to the 7763 console. 7764 7765 """ 7766 _OP_PARAMS = [ 7767 _PInstanceName 7768 ] 7769 REQ_BGL = False 7770
7771 - def ExpandNames(self):
7773
7774 - def CheckPrereq(self):
7775 """Check prerequisites. 7776 7777 This checks that the instance is in the cluster. 7778 7779 """ 7780 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7781 assert self.instance is not None, \ 7782 "Cannot retrieve locked instance %s" % self.op.instance_name 7783 _CheckNodeOnline(self, self.instance.primary_node)
7784
7785 - def Exec(self, feedback_fn):
7786 """Connect to the console of an instance 7787 7788 """ 7789 instance = self.instance 7790 node = instance.primary_node 7791 7792 node_insts = self.rpc.call_instance_list([node], 7793 [instance.hypervisor])[node] 7794 node_insts.Raise("Can't get node information from %s" % node) 7795 7796 if instance.name not in node_insts.payload: 7797 if instance.admin_up: 7798 state = "ERROR_down" 7799 else: 7800 state = "ADMIN_down" 7801 raise errors.OpExecError("Instance %s is not running (state %s)" % 7802 (instance.name, state)) 7803 7804 logging.debug("Connecting to console of %s on %s", instance.name, node) 7805 7806 hyper = hypervisor.GetHypervisor(instance.hypervisor) 7807 cluster = self.cfg.GetClusterInfo() 7808 # beparams and hvparams are passed separately, to avoid editing the 7809 # instance and then saving the defaults in the instance itself. 7810 hvparams = cluster.FillHV(instance) 7811 beparams = cluster.FillBE(instance) 7812 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams) 7813 7814 # build ssh cmdline 7815 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7816
7817 7818 -class LUReplaceDisks(LogicalUnit):
7819 """Replace the disks of an instance. 7820 7821 """ 7822 HPATH = "mirrors-replace" 7823 HTYPE = constants.HTYPE_INSTANCE 7824 _OP_PARAMS = [ 7825 _PInstanceName, 7826 ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)), 7827 ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)), 7828 ("remote_node", None, ht.TMaybeString), 7829 ("iallocator", None, ht.TMaybeString), 7830 ("early_release", False, ht.TBool), 7831 ] 7832 REQ_BGL = False 7833
7834 - def CheckArguments(self):
7835 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node, 7836 self.op.iallocator)
7837
7838 - def ExpandNames(self):
7839 self._ExpandAndLockInstance() 7840 7841 if self.op.iallocator is not None: 7842 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 7843 7844 elif self.op.remote_node is not None: 7845 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 7846 self.op.remote_node = remote_node 7847 7848 # Warning: do not remove the locking of the new secondary here 7849 # unless DRBD8.AddChildren is changed to work in parallel; 7850 # currently it doesn't since parallel invocations of 7851 # FindUnusedMinor will conflict 7852 self.needed_locks[locking.LEVEL_NODE] = [remote_node] 7853 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 7854 7855 else: 7856 self.needed_locks[locking.LEVEL_NODE] = [] 7857 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 7858 7859 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode, 7860 self.op.iallocator, self.op.remote_node, 7861 self.op.disks, False, self.op.early_release) 7862 7863 self.tasklets = [self.replacer]
7864
7865 - def DeclareLocks(self, level):
7866 # If we're not already locking all nodes in the set we have to declare the 7867 # instance's primary/secondary nodes. 7868 if (level == locking.LEVEL_NODE and 7869 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET): 7870 self._LockInstancesNodes()
7871
7872 - def BuildHooksEnv(self):
7873 """Build hooks env. 7874 7875 This runs on the master, the primary and all the secondaries. 7876 7877 """ 7878 instance = self.replacer.instance 7879 env = { 7880 "MODE": self.op.mode, 7881 "NEW_SECONDARY": self.op.remote_node, 7882 "OLD_SECONDARY": instance.secondary_nodes[0], 7883 } 7884 env.update(_BuildInstanceHookEnvByObject(self, instance)) 7885 nl = [ 7886 self.cfg.GetMasterNode(), 7887 instance.primary_node, 7888 ] 7889 if self.op.remote_node is not None: 7890 nl.append(self.op.remote_node) 7891 return env, nl, nl
7892
7893 7894 -class TLReplaceDisks(Tasklet):
7895 """Replaces disks for an instance. 7896 7897 Note: Locking is not within the scope of this class. 7898 7899 """
7900 - def __init__(self, lu, instance_name, mode, iallocator_name, remote_node, 7901 disks, delay_iallocator, early_release):
7902 """Initializes this class. 7903 7904 """ 7905 Tasklet.__init__(self, lu) 7906 7907 # Parameters 7908 self.instance_name = instance_name 7909 self.mode = mode 7910 self.iallocator_name = iallocator_name 7911 self.remote_node = remote_node 7912 self.disks = disks 7913 self.delay_iallocator = delay_iallocator 7914 self.early_release = early_release 7915 7916 # Runtime data 7917 self.instance = None 7918 self.new_node = None 7919 self.target_node = None 7920 self.other_node = None 7921 self.remote_node_info = None 7922 self.node_secondary_ip = None
7923 7924 @staticmethod
7925 - def CheckArguments(mode, remote_node, iallocator):
7926 """Helper function for users of this class. 7927 7928 """ 7929 # check for valid parameter combination 7930 if mode == constants.REPLACE_DISK_CHG: 7931 if remote_node is None and iallocator is None: 7932 raise errors.OpPrereqError("When changing the secondary either an" 7933 " iallocator script must be used or the" 7934 " new node given", errors.ECODE_INVAL) 7935 7936 if remote_node is not None and iallocator is not None: 7937 raise errors.OpPrereqError("Give either the iallocator or the new" 7938 " secondary, not both", errors.ECODE_INVAL) 7939 7940 elif remote_node is not None or iallocator is not None: 7941 # Not replacing the secondary 7942 raise errors.OpPrereqError("The iallocator and new node options can" 7943 " only be used when changing the" 7944 " secondary node", errors.ECODE_INVAL)
7945 7946 @staticmethod
7947 - def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7948 """Compute a new secondary node using an IAllocator. 7949 7950 """ 7951 ial = IAllocator(lu.cfg, lu.rpc, 7952 mode=constants.IALLOCATOR_MODE_RELOC, 7953 name=instance_name, 7954 relocate_from=relocate_from) 7955 7956 ial.Run(iallocator_name) 7957 7958 if not ial.success: 7959 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':" 7960 " %s" % (iallocator_name, ial.info), 7961 errors.ECODE_NORES) 7962 7963 if len(ial.result) != ial.required_nodes: 7964 raise errors.OpPrereqError("iallocator '%s' returned invalid number" 7965 " of nodes (%s), required %s" % 7966 (iallocator_name, 7967 len(ial.result), ial.required_nodes), 7968 errors.ECODE_FAULT) 7969 7970 remote_node_name = ial.result[0] 7971 7972 lu.LogInfo("Selected new secondary for instance '%s': %s", 7973 instance_name, remote_node_name) 7974 7975 return remote_node_name
7976
7977 - def _FindFaultyDisks(self, node_name):
7978 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance, 7979 node_name, True)
7980
7981 - def CheckPrereq(self):
7982 """Check prerequisites. 7983 7984 This checks that the instance is in the cluster. 7985 7986 """ 7987 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name) 7988 assert instance is not None, \ 7989 "Cannot retrieve locked instance %s" % self.instance_name 7990 7991 if instance.disk_template != constants.DT_DRBD8: 7992 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based" 7993 " instances", errors.ECODE_INVAL) 7994 7995 if len(instance.secondary_nodes) != 1: 7996 raise errors.OpPrereqError("The instance has a strange layout," 7997 " expected one secondary but found %d" % 7998 len(instance.secondary_nodes), 7999 errors.ECODE_FAULT) 8000 8001 if not self.delay_iallocator: 8002 self._CheckPrereq2()
8003
8004 - def _CheckPrereq2(self):
8005 """Check prerequisites, second part. 8006 8007 This function should always be part of CheckPrereq. It was separated and is 8008 now called from Exec because during node evacuation iallocator was only 8009 called with an unmodified cluster model, not taking planned changes into 8010 account. 8011 8012 """ 8013 instance = self.instance 8014 secondary_node = instance.secondary_nodes[0] 8015 8016 if self.iallocator_name is None: 8017 remote_node = self.remote_node 8018 else: 8019 remote_node = self._RunAllocator(self.lu, self.iallocator_name, 8020 instance.name, instance.secondary_nodes) 8021 8022 if remote_node is not None: 8023 self.remote_node_info = self.cfg.GetNodeInfo(remote_node) 8024 assert self.remote_node_info is not None, \ 8025 "Cannot retrieve locked node %s" % remote_node 8026 else: 8027 self.remote_node_info = None 8028 8029 if remote_node == self.instance.primary_node: 8030 raise errors.OpPrereqError("The specified node is the primary node of" 8031 " the instance.", errors.ECODE_INVAL) 8032 8033 if remote_node == secondary_node: 8034 raise errors.OpPrereqError("The specified node is already the" 8035 " secondary node of the instance.", 8036 errors.ECODE_INVAL) 8037 8038 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO, 8039 constants.REPLACE_DISK_CHG): 8040 raise errors.OpPrereqError("Cannot specify disks to be replaced", 8041 errors.ECODE_INVAL) 8042 8043 if self.mode == constants.REPLACE_DISK_AUTO: 8044 faulty_primary = self._FindFaultyDisks(instance.primary_node) 8045 faulty_secondary = self._FindFaultyDisks(secondary_node) 8046 8047 if faulty_primary and faulty_secondary: 8048 raise errors.OpPrereqError("Instance %s has faulty disks on more than" 8049 " one node and can not be repaired" 8050 " automatically" % self.instance_name, 8051 errors.ECODE_STATE) 8052 8053 if faulty_primary: 8054 self.disks = faulty_primary 8055 self.target_node = instance.primary_node 8056 self.other_node = secondary_node 8057 check_nodes = [self.target_node, self.other_node] 8058 elif faulty_secondary: 8059 self.disks = faulty_secondary 8060 self.target_node = secondary_node 8061 self.other_node = instance.primary_node 8062 check_nodes = [self.target_node, self.other_node] 8063 else: 8064 self.disks = [] 8065 check_nodes = [] 8066 8067 else: 8068 # Non-automatic modes 8069 if self.mode == constants.REPLACE_DISK_PRI: 8070 self.target_node = instance.primary_node 8071 self.other_node = secondary_node 8072 check_nodes = [self.target_node, self.other_node] 8073 8074 elif self.mode == constants.REPLACE_DISK_SEC: 8075 self.target_node = secondary_node 8076 self.other_node = instance.primary_node 8077 check_nodes = [self.target_node, self.other_node] 8078 8079 elif self.mode == constants.REPLACE_DISK_CHG: 8080 self.new_node = remote_node 8081 self.other_node = instance.primary_node 8082 self.target_node = secondary_node 8083 check_nodes = [self.new_node, self.other_node] 8084 8085 _CheckNodeNotDrained(self.lu, remote_node) 8086 _CheckNodeVmCapable(self.lu, remote_node) 8087 8088 old_node_info = self.cfg.GetNodeInfo(secondary_node) 8089 assert old_node_info is not None 8090 if old_node_info.offline and not self.early_release: 8091 # doesn't make sense to delay the release 8092 self.early_release = True 8093 self.lu.LogInfo("Old secondary %s is offline, automatically enabling" 8094 " early-release mode", secondary_node) 8095 8096 else: 8097 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" % 8098 self.mode) 8099 8100 # If not specified all disks should be replaced 8101 if not self.disks: 8102 self.disks = range(len(self.instance.disks)) 8103 8104 for node in check_nodes: 8105 _CheckNodeOnline(self.lu, node) 8106 8107 # Check whether disks are valid 8108 for disk_idx in self.disks: 8109 instance.FindDisk(disk_idx) 8110 8111 # Get secondary node IP addresses 8112 node_2nd_ip = {} 8113 8114 for node_name in [self.target_node, self.other_node, self.new_node]: 8115 if node_name is not None: 8116 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip 8117 8118 self.node_secondary_ip = node_2nd_ip
8119
8120 - def Exec(self, feedback_fn):
8121 """Execute disk replacement. 8122 8123 This dispatches the disk replacement to the appropriate handler. 8124 8125 """ 8126 if self.delay_iallocator: 8127 self._CheckPrereq2() 8128 8129 if not self.disks: 8130 feedback_fn("No disks need replacement") 8131 return 8132 8133 feedback_fn("Replacing disk(s) %s for %s" % 8134 (utils.CommaJoin(self.disks), self.instance.name)) 8135 8136 activate_disks = (not self.instance.admin_up) 8137 8138 # Activate the instance disks if we're replacing them on a down instance 8139 if activate_disks: 8140 _StartInstanceDisks(self.lu, self.instance, True) 8141 8142 try: 8143 # Should we replace the secondary node? 8144 if self.new_node is not None: 8145 fn = self._ExecDrbd8Secondary 8146 else: 8147 fn = self._ExecDrbd8DiskOnly 8148 8149 return fn(feedback_fn) 8150 8151 finally: 8152 # Deactivate the instance disks if we're replacing them on a 8153 # down instance 8154 if activate_disks: 8155 _SafeShutdownInstanceDisks(self.lu, self.instance)
8156
8157 - def _CheckVolumeGroup(self, nodes):
8158 self.lu.LogInfo("Checking volume groups") 8159 8160 vgname = self.cfg.GetVGName() 8161 8162 # Make sure volume group exists on all involved nodes 8163 results = self.rpc.call_vg_list(nodes) 8164 if not results: 8165 raise errors.OpExecError("Can't list volume groups on the nodes") 8166 8167 for node in nodes: 8168 res = results[node] 8169 res.Raise("Error checking node %s" % node) 8170 if vgname not in res.payload: 8171 raise errors.OpExecError("Volume group '%s' not found on node %s" % 8172 (vgname, node))
8173
8174 - def _CheckDisksExistence(self, nodes):
8175 # Check disk existence 8176 for idx, dev in enumerate(self.instance.disks): 8177 if idx not in self.disks: 8178 continue 8179 8180 for node in nodes: 8181 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node)) 8182 self.cfg.SetDiskID(dev, node) 8183 8184 result = self.rpc.call_blockdev_find(node, dev) 8185 8186 msg = result.fail_msg 8187 if msg or not result.payload: 8188 if not msg: 8189 msg = "disk not found" 8190 raise errors.OpExecError("Can't find disk/%d on node %s: %s" % 8191 (idx, node, msg))
8192
8193 - def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8194 for idx, dev in enumerate(self.instance.disks): 8195 if idx not in self.disks: 8196 continue 8197 8198 self.lu.LogInfo("Checking disk/%d consistency on node %s" % 8199 (idx, node_name)) 8200 8201 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary, 8202 ldisk=ldisk): 8203 raise errors.OpExecError("Node %s has degraded storage, unsafe to" 8204 " replace disks for instance %s" % 8205 (node_name, self.instance.name))
8206
8207 - def _CreateNewStorage(self, node_name):
8208 vgname = self.cfg.GetVGName() 8209 iv_names = {} 8210 8211 for idx, dev in enumerate(self.instance.disks): 8212 if idx not in self.disks: 8213 continue 8214 8215 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx)) 8216 8217 self.cfg.SetDiskID(dev, node_name) 8218 8219 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]] 8220 names = _GenerateUniqueNames(self.lu, lv_names) 8221 8222 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size, 8223 logical_id=(vgname, names[0])) 8224 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128, 8225 logical_id=(vgname, names[1])) 8226 8227 new_lvs = [lv_data, lv_meta] 8228 old_lvs = dev.children 8229 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs) 8230 8231 # we pass force_create=True to force the LVM creation 8232 for new_lv in new_lvs: 8233 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True, 8234 _GetInstanceInfoText(self.instance), False) 8235 8236 return iv_names
8237
8238 - def _CheckDevices(self, node_name, iv_names):
8239 for name, (dev, _, _) in iv_names.iteritems(): 8240 self.cfg.SetDiskID(dev, node_name) 8241 8242 result = self.rpc.call_blockdev_find(node_name, dev) 8243 8244 msg = result.fail_msg 8245 if msg or not result.payload: 8246 if not msg: 8247 msg = "disk not found" 8248 raise errors.OpExecError("Can't find DRBD device %s: %s" % 8249 (name, msg)) 8250 8251 if result.payload.is_degraded: 8252 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8253
8254 - def _RemoveOldStorage(self, node_name, iv_names):
8255 for name, (_, old_lvs, _) in iv_names.iteritems(): 8256 self.lu.LogInfo("Remove logical volumes for %s" % name) 8257 8258 for lv in old_lvs: 8259 self.cfg.SetDiskID(lv, node_name) 8260 8261 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg 8262 if msg: 8263 self.lu.LogWarning("Can't remove old LV: %s" % msg, 8264 hint="remove unused LVs manually")
8265
8266 - def _ReleaseNodeLock(self, node_name):
8267 """Releases the lock for a given node.""" 8268 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8269
8270 - def _ExecDrbd8DiskOnly(self, feedback_fn):
8271 """Replace a disk on the primary or secondary for DRBD 8. 8272 8273 The algorithm for replace is quite complicated: 8274 8275 1. for each disk to be replaced: 8276 8277 1. create new LVs on the target node with unique names 8278 1. detach old LVs from the drbd device 8279 1. rename old LVs to name_replaced.<time_t> 8280 1. rename new LVs to old LVs 8281 1. attach the new LVs (with the old names now) to the drbd device 8282 8283 1. wait for sync across all devices 8284 8285 1. for each modified disk: 8286 8287 1. remove old LVs (which have the name name_replaces.<time_t>) 8288 8289 Failures are not very well handled. 8290 8291 """ 8292 steps_total = 6 8293 8294 # Step: check device activation 8295 self.lu.LogStep(1, steps_total, "Check device existence") 8296 self._CheckDisksExistence([self.other_node, self.target_node]) 8297 self._CheckVolumeGroup([self.target_node, self.other_node]) 8298 8299 # Step: check other node consistency 8300 self.lu.LogStep(2, steps_total, "Check peer consistency") 8301 self._CheckDisksConsistency(self.other_node, 8302 self.other_node == self.instance.primary_node, 8303 False) 8304 8305 # Step: create new storage 8306 self.lu.LogStep(3, steps_total, "Allocate new storage") 8307 iv_names = self._CreateNewStorage(self.target_node) 8308 8309 # Step: for each lv, detach+rename*2+attach 8310 self.lu.LogStep(4, steps_total, "Changing drbd configuration") 8311 for dev, old_lvs, new_lvs in iv_names.itervalues(): 8312 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name) 8313 8314 result = self.rpc.call_blockdev_removechildren(self.target_node, dev, 8315 old_lvs) 8316 result.Raise("Can't detach drbd from local storage on node" 8317 " %s for device %s" % (self.target_node, dev.iv_name)) 8318 #dev.children = [] 8319 #cfg.Update(instance) 8320 8321 # ok, we created the new LVs, so now we know we have the needed 8322 # storage; as such, we proceed on the target node to rename 8323 # old_lv to _old, and new_lv to old_lv; note that we rename LVs 8324 # using the assumption that logical_id == physical_id (which in 8325 # turn is the unique_id on that node) 8326 8327 # FIXME(iustin): use a better name for the replaced LVs 8328 temp_suffix = int(time.time()) 8329 ren_fn = lambda d, suff: (d.physical_id[0], 8330 d.physical_id[1] + "_replaced-%s" % suff) 8331 8332 # Build the rename list based on what LVs exist on the node 8333 rename_old_to_new = [] 8334 for to_ren in old_lvs: 8335 result = self.rpc.call_blockdev_find(self.target_node, to_ren) 8336 if not result.fail_msg and result.payload: 8337 # device exists 8338 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix))) 8339 8340 self.lu.LogInfo("Renaming the old LVs on the target node") 8341 result = self.rpc.call_blockdev_rename(self.target_node, 8342 rename_old_to_new) 8343 result.Raise("Can't rename old LVs on node %s" % self.target_node) 8344 8345 # Now we rename the new LVs to the old LVs 8346 self.lu.LogInfo("Renaming the new LVs on the target node") 8347 rename_new_to_old = [(new, old.physical_id) 8348 for old, new in zip(old_lvs, new_lvs)] 8349 result = self.rpc.call_blockdev_rename(self.target_node, 8350 rename_new_to_old) 8351 result.Raise("Can't rename new LVs on node %s" % self.target_node) 8352 8353 for old, new in zip(old_lvs, new_lvs): 8354 new.logical_id = old.logical_id 8355 self.cfg.SetDiskID(new, self.target_node) 8356 8357 for disk in old_lvs: 8358 disk.logical_id = ren_fn(disk, temp_suffix) 8359 self.cfg.SetDiskID(disk, self.target_node) 8360 8361 # Now that the new lvs have the old name, we can add them to the device 8362 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node) 8363 result = self.rpc.call_blockdev_addchildren(self.target_node, dev, 8364 new_lvs) 8365 msg = result.fail_msg 8366 if msg: 8367 for new_lv in new_lvs: 8368 msg2 = self.rpc.call_blockdev_remove(self.target_node, 8369 new_lv).fail_msg 8370 if msg2: 8371 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2, 8372 hint=("cleanup manually the unused logical" 8373 "volumes")) 8374 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg) 8375 8376 dev.children = new_lvs 8377 8378 self.cfg.Update(self.instance, feedback_fn) 8379 8380 cstep = 5 8381 if self.early_release: 8382 self.lu.LogStep(cstep, steps_total, "Removing old storage") 8383 cstep += 1 8384 self._RemoveOldStorage(self.target_node, iv_names) 8385 # WARNING: we release both node locks here, do not do other RPCs 8386 # than WaitForSync to the primary node 8387 self._ReleaseNodeLock([self.target_node, self.other_node]) 8388 8389 # Wait for sync 8390 # This can fail as the old devices are degraded and _WaitForSync 8391 # does a combined result over all disks, so we don't check its return value 8392 self.lu.LogStep(cstep, steps_total, "Sync devices") 8393 cstep += 1 8394 _WaitForSync(self.lu, self.instance) 8395 8396 # Check all devices manually 8397 self._CheckDevices(self.instance.primary_node, iv_names) 8398 8399 # Step: remove old storage 8400 if not self.early_release: 8401 self.lu.LogStep(cstep, steps_total, "Removing old storage") 8402 cstep += 1 8403 self._RemoveOldStorage(self.target_node, iv_names)
8404
8405 - def _ExecDrbd8Secondary(self, feedback_fn):
8406 """Replace the secondary node for DRBD 8. 8407 8408 The algorithm for replace is quite complicated: 8409 - for all disks of the instance: 8410 - create new LVs on the new node with same names 8411 - shutdown the drbd device on the old secondary 8412 - disconnect the drbd network on the primary 8413 - create the drbd device on the new secondary 8414 - network attach the drbd on the primary, using an artifice: 8415 the drbd code for Attach() will connect to the network if it 8416 finds a device which is connected to the good local disks but 8417 not network enabled 8418 - wait for sync across all devices 8419 - remove all disks from the old secondary 8420 8421 Failures are not very well handled. 8422 8423 """ 8424 steps_total = 6 8425 8426 # Step: check device activation 8427 self.lu.LogStep(1, steps_total, "Check device existence") 8428 self._CheckDisksExistence([self.instance.primary_node]) 8429 self._CheckVolumeGroup([self.instance.primary_node]) 8430 8431 # Step: check other node consistency 8432 self.lu.LogStep(2, steps_total, "Check peer consistency") 8433 self._CheckDisksConsistency(self.instance.primary_node, True, True) 8434 8435 # Step: create new storage 8436 self.lu.LogStep(3, steps_total, "Allocate new storage") 8437 for idx, dev in enumerate(self.instance.disks): 8438 self.lu.LogInfo("Adding new local storage on %s for disk/%d" % 8439 (self.new_node, idx)) 8440 # we pass force_create=True to force LVM creation 8441 for new_lv in dev.children: 8442 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True, 8443 _GetInstanceInfoText(self.instance), False) 8444 8445 # Step 4: dbrd minors and drbd setups changes 8446 # after this, we must manually remove the drbd minors on both the 8447 # error and the success paths 8448 self.lu.LogStep(4, steps_total, "Changing drbd configuration") 8449 minors = self.cfg.AllocateDRBDMinor([self.new_node 8450 for dev in self.instance.disks], 8451 self.instance.name) 8452 logging.debug("Allocated minors %r", minors) 8453 8454 iv_names = {} 8455 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)): 8456 self.lu.LogInfo("activating a new drbd on %s for disk/%d" % 8457 (self.new_node, idx)) 8458 # create new devices on new_node; note that we create two IDs: 8459 # one without port, so the drbd will be activated without 8460 # networking information on the new node at this stage, and one 8461 # with network, for the latter activation in step 4 8462 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id 8463 if self.instance.primary_node == o_node1: 8464 p_minor = o_minor1 8465 else: 8466 assert self.instance.primary_node == o_node2, "Three-node instance?" 8467 p_minor = o_minor2 8468 8469 new_alone_id = (self.instance.primary_node, self.new_node, None, 8470 p_minor, new_minor, o_secret) 8471 new_net_id = (self.instance.primary_node, self.new_node, o_port, 8472 p_minor, new_minor, o_secret) 8473 8474 iv_names[idx] = (dev, dev.children, new_net_id) 8475 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor, 8476 new_net_id) 8477 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8, 8478 logical_id=new_alone_id, 8479 children=dev.children, 8480 size=dev.size) 8481 try: 8482 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd, 8483 _GetInstanceInfoText(self.instance), False) 8484 except errors.GenericError: 8485 self.cfg.ReleaseDRBDMinors(self.instance.name) 8486 raise 8487 8488 # We have new devices, shutdown the drbd on the old secondary 8489 for idx, dev in enumerate(self.instance.disks): 8490 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx) 8491 self.cfg.SetDiskID(dev, self.target_node) 8492 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg 8493 if msg: 8494 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old" 8495 "node: %s" % (idx, msg), 8496 hint=("Please cleanup this device manually as" 8497 " soon as possible")) 8498 8499 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)") 8500 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], 8501 self.node_secondary_ip, 8502 self.instance.disks)\ 8503 [self.instance.primary_node] 8504 8505 msg = result.fail_msg 8506 if msg: 8507 # detaches didn't succeed (unlikely) 8508 self.cfg.ReleaseDRBDMinors(self.instance.name) 8509 raise errors.OpExecError("Can't detach the disks from the network on" 8510 " old node: %s" % (msg,)) 8511 8512 # if we managed to detach at least one, we update all the disks of 8513 # the instance to point to the new secondary 8514 self.lu.LogInfo("Updating instance configuration") 8515 for dev, _, new_logical_id in iv_names.itervalues(): 8516 dev.logical_id = new_logical_id 8517 self.cfg.SetDiskID(dev, self.instance.primary_node) 8518 8519 self.cfg.Update(self.instance, feedback_fn) 8520 8521 # and now perform the drbd attach 8522 self.lu.LogInfo("Attaching primary drbds to new secondary" 8523 " (standalone => connected)") 8524 result = self.rpc.call_drbd_attach_net([self.instance.primary_node, 8525 self.new_node], 8526 self.node_secondary_ip, 8527 self.instance.disks, 8528 self.instance.name, 8529 False) 8530 for to_node, to_result in result.items(): 8531 msg = to_result.fail_msg 8532 if msg: 8533 self.lu.LogWarning("Can't attach drbd disks on node %s: %s", 8534 to_node, msg, 8535 hint=("please do a gnt-instance info to see the" 8536 " status of disks")) 8537 cstep = 5 8538 if self.early_release: 8539 self.lu.LogStep(cstep, steps_total, "Removing old storage") 8540 cstep += 1 8541 self._RemoveOldStorage(self.target_node, iv_names) 8542 # WARNING: we release all node locks here, do not do other RPCs 8543 # than WaitForSync to the primary node 8544 self._ReleaseNodeLock([self.instance.primary_node, 8545 self.target_node, 8546 self.new_node]) 8547 8548 # Wait for sync 8549 # This can fail as the old devices are degraded and _WaitForSync 8550 # does a combined result over all disks, so we don't check its return value 8551 self.lu.LogStep(cstep, steps_total, "Sync devices") 8552 cstep += 1 8553 _WaitForSync(self.lu, self.instance) 8554 8555 # Check all devices manually 8556 self._CheckDevices(self.instance.primary_node, iv_names) 8557 8558 # Step: remove old storage 8559 if not self.early_release: 8560 self.lu.LogStep(cstep, steps_total, "Removing old storage") 8561 self._RemoveOldStorage(self.target_node, iv_names)
8562
8563 8564 -class LURepairNodeStorage(NoHooksLU):
8565 """Repairs the volume group on a node. 8566 8567 """ 8568 _OP_PARAMS = [ 8569 _PNodeName, 8570 ("storage_type", ht.NoDefault, _CheckStorageType), 8571 ("name", ht.NoDefault, ht.TNonEmptyString), 8572 ("ignore_consistency", False, ht.TBool), 8573 ] 8574 REQ_BGL = False 8575
8576 - def CheckArguments(self):
8577 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 8578 8579 storage_type = self.op.storage_type 8580 8581 if (constants.SO_FIX_CONSISTENCY not in 8582 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])): 8583 raise errors.OpPrereqError("Storage units of type '%s' can not be" 8584 " repaired" % storage_type, 8585 errors.ECODE_INVAL)
8586
8587 - def ExpandNames(self):
8588 self.needed_locks = { 8589 locking.LEVEL_NODE: [self.op.node_name], 8590 }
8591
8592 - def _CheckFaultyDisks(self, instance, node_name):
8593 """Ensure faulty disks abort the opcode or at least warn.""" 8594 try: 8595 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance, 8596 node_name, True): 8597 raise errors.OpPrereqError("Instance '%s' has faulty disks on" 8598 " node '%s'" % (instance.name, node_name), 8599 errors.ECODE_STATE) 8600 except errors.OpPrereqError, err: 8601 if self.op.ignore_consistency: 8602 self.proc.LogWarning(str(err.args[0])) 8603 else: 8604 raise
8605
8606 - def CheckPrereq(self):
8607 """Check prerequisites. 8608 8609 """ 8610 # Check whether any instance on this node has faulty disks 8611 for inst in _GetNodeInstances(self.cfg, self.op.node_name): 8612 if not inst.admin_up: 8613 continue 8614 check_nodes = set(inst.all_nodes) 8615 check_nodes.discard(self.op.node_name) 8616 for inst_node_name in check_nodes: 8617 self._CheckFaultyDisks(inst, inst_node_name)
8618
8619 - def Exec(self, feedback_fn):
8620 feedback_fn("Repairing storage unit '%s' on %s ..." % 8621 (self.op.name, self.op.node_name)) 8622 8623 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 8624 result = self.rpc.call_storage_execute(self.op.node_name, 8625 self.op.storage_type, st_args, 8626 self.op.name, 8627 constants.SO_FIX_CONSISTENCY) 8628 result.Raise("Failed to repair storage unit '%s' on %s" % 8629 (self.op.name, self.op.node_name))
8630
8631 8632 -class LUNodeEvacuationStrategy(NoHooksLU):
8633 """Computes the node evacuation strategy. 8634 8635 """ 8636 _OP_PARAMS = [ 8637 ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)), 8638 ("remote_node", None, ht.TMaybeString), 8639 ("iallocator", None, ht.TMaybeString), 8640 ] 8641 REQ_BGL = False 8642
8643 - def CheckArguments(self):
8644 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8645
8646 - def ExpandNames(self):
8647 self.op.nodes = _GetWantedNodes(self, self.op.nodes) 8648 self.needed_locks = locks = {} 8649 if self.op.remote_node is None: 8650 locks[locking.LEVEL_NODE] = locking.ALL_SET 8651 else: 8652 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 8653 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8654
8655 - def Exec(self, feedback_fn):
8656 if self.op.remote_node is not None: 8657 instances = [] 8658 for node in self.op.nodes: 8659 instances.extend(_GetNodeSecondaryInstances(self.cfg, node)) 8660 result = [] 8661 for i in instances: 8662 if i.primary_node == self.op.remote_node: 8663 raise errors.OpPrereqError("Node %s is the primary node of" 8664 " instance %s, cannot use it as" 8665 " secondary" % 8666 (self.op.remote_node, i.name), 8667 errors.ECODE_INVAL) 8668 result.append([i.name, self.op.remote_node]) 8669 else: 8670 ial = IAllocator(self.cfg, self.rpc, 8671 mode=constants.IALLOCATOR_MODE_MEVAC, 8672 evac_nodes=self.op.nodes) 8673 ial.Run(self.op.iallocator, validate=True) 8674 if not ial.success: 8675 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info, 8676 errors.ECODE_NORES) 8677 result = ial.result 8678 return result
8679
8680 8681 -class LUGrowDisk(LogicalUnit):
8682 """Grow a disk of an instance. 8683 8684 """ 8685 HPATH = "disk-grow" 8686 HTYPE = constants.HTYPE_INSTANCE 8687 _OP_PARAMS = [ 8688 _PInstanceName, 8689 ("disk", ht.NoDefault, ht.TInt), 8690 ("amount", ht.NoDefault, ht.TInt), 8691 ("wait_for_sync", True, ht.TBool), 8692 ] 8693 REQ_BGL = False 8694
8695 - def ExpandNames(self):
8696 self._ExpandAndLockInstance() 8697 self.needed_locks[locking.LEVEL_NODE] = [] 8698 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8699
8700 - def DeclareLocks(self, level):
8701 if level == locking.LEVEL_NODE: 8702 self._LockInstancesNodes()
8703
8704 - def BuildHooksEnv(self):
8705 """Build hooks env. 8706 8707 This runs on the master, the primary and all the secondaries. 8708 8709 """ 8710 env = { 8711 "DISK": self.op.disk, 8712 "AMOUNT": self.op.amount, 8713 } 8714 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 8715 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 8716 return env, nl, nl
8717
8718 - def CheckPrereq(self):
8719 """Check prerequisites. 8720 8721 This checks that the instance is in the cluster. 8722 8723 """ 8724 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 8725 assert instance is not None, \ 8726 "Cannot retrieve locked instance %s" % self.op.instance_name 8727 nodenames = list(instance.all_nodes) 8728 for node in nodenames: 8729 _CheckNodeOnline(self, node) 8730 8731 self.instance = instance 8732 8733 if instance.disk_template not in constants.DTS_GROWABLE: 8734 raise errors.OpPrereqError("Instance's disk layout does not support" 8735 " growing.", errors.ECODE_INVAL) 8736 8737 self.disk = instance.FindDisk(self.op.disk) 8738 8739 if instance.disk_template != constants.DT_FILE: 8740 # TODO: check the free disk space for file, when that feature will be 8741 # supported 8742 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8743
8744 - def Exec(self, feedback_fn):
8745 """Execute disk grow. 8746 8747 """ 8748 instance = self.instance 8749 disk = self.disk 8750 8751 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk]) 8752 if not disks_ok: 8753 raise errors.OpExecError("Cannot activate block device to grow") 8754 8755 for node in instance.all_nodes: 8756 self.cfg.SetDiskID(disk, node) 8757 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount) 8758 result.Raise("Grow request failed to node %s" % node) 8759 8760 # TODO: Rewrite code to work properly 8761 # DRBD goes into sync mode for a short amount of time after executing the 8762 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby 8763 # calling "resize" in sync mode fails. Sleeping for a short amount of 8764 # time is a work-around. 8765 time.sleep(5) 8766 8767 disk.RecordGrow(self.op.amount) 8768 self.cfg.Update(instance, feedback_fn) 8769 if self.op.wait_for_sync: 8770 disk_abort = not _WaitForSync(self, instance, disks=[disk]) 8771 if disk_abort: 8772 self.proc.LogWarning("Warning: disk sync-ing has not returned a good" 8773 " status.\nPlease check the instance.") 8774 if not instance.admin_up: 8775 _SafeShutdownInstanceDisks(self, instance, disks=[disk]) 8776 elif not instance.admin_up: 8777 self.proc.LogWarning("Not shutting down the disk even if the instance is" 8778 " not supposed to be running because no wait for" 8779 " sync mode was requested.")
8780
8781 8782 -class LUQueryInstanceData(NoHooksLU):
8783 """Query runtime instance data. 8784 8785 """ 8786 _OP_PARAMS = [ 8787 ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), 8788 ("static", False, ht.TBool), 8789 ] 8790 REQ_BGL = False 8791
8792 - def ExpandNames(self):
8793 self.needed_locks = {} 8794 self.share_locks = dict.fromkeys(locking.LEVELS, 1) 8795 8796 if self.op.instances: 8797 self.wanted_names = [] 8798 for name in self.op.instances: 8799 full_name = _ExpandInstanceName(self.cfg, name) 8800 self.wanted_names.append(full_name) 8801 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names 8802 else: 8803 self.wanted_names = None 8804 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET 8805 8806 self.needed_locks[locking.LEVEL_NODE] = [] 8807 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8808
8809 - def DeclareLocks(self, level):
8810 if level == locking.LEVEL_NODE: 8811 self._LockInstancesNodes()
8812
8813 - def CheckPrereq(self):
8814 """Check prerequisites. 8815 8816 This only checks the optional instance list against the existing names. 8817 8818 """ 8819 if self.wanted_names is None: 8820 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE] 8821 8822 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name 8823 in self.wanted_names]
8824
8825 - def _ComputeBlockdevStatus(self, node, instance_name, dev):
8826 """Returns the status of a block device 8827 8828 """ 8829 if self.op.static or not node: 8830 return None 8831 8832 self.cfg.SetDiskID(dev, node) 8833 8834 result = self.rpc.call_blockdev_find(node, dev) 8835 if result.offline: 8836 return None 8837 8838 result.Raise("Can't compute disk status for %s" % instance_name) 8839 8840 status = result.payload 8841 if status is None: 8842 return None 8843 8844 return (status.dev_path, status.major, status.minor, 8845 status.sync_percent, status.estimated_time, 8846 status.is_degraded, status.ldisk_status)
8847
8848 - def _ComputeDiskStatus(self, instance, snode, dev):
8849 """Compute block device status. 8850 8851 """ 8852 if dev.dev_type in constants.LDS_DRBD: 8853 # we change the snode then (otherwise we use the one passed in) 8854 if dev.logical_id[0] == instance.primary_node: 8855 snode = dev.logical_id[1] 8856 else: 8857 snode = dev.logical_id[0] 8858 8859 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node, 8860 instance.name, dev) 8861 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev) 8862 8863 if dev.children: 8864 dev_children = [self._ComputeDiskStatus(instance, snode, child) 8865 for child in dev.children] 8866 else: 8867 dev_children = [] 8868 8869 data = { 8870 "iv_name": dev.iv_name, 8871 "dev_type": dev.dev_type, 8872 "logical_id": dev.logical_id, 8873 "physical_id": dev.physical_id, 8874 "pstatus": dev_pstatus, 8875 "sstatus": dev_sstatus, 8876 "children": dev_children, 8877 "mode": dev.mode, 8878 "size": dev.size, 8879 } 8880 8881 return data
8882
8883 - def Exec(self, feedback_fn):
8884 """Gather and return data""" 8885 result = {} 8886 8887 cluster = self.cfg.GetClusterInfo() 8888 8889 for instance in self.wanted_instances: 8890 if not self.op.static: 8891 remote_info = self.rpc.call_instance_info(instance.primary_node, 8892 instance.name, 8893 instance.hypervisor) 8894 remote_info.Raise("Error checking node %s" % instance.primary_node) 8895 remote_info = remote_info.payload 8896 if remote_info and "state" in remote_info: 8897 remote_state = "up" 8898 else: 8899 remote_state = "down" 8900 else: 8901 remote_state = None 8902 if instance.admin_up: 8903 config_state = "up" 8904 else: 8905 config_state = "down" 8906 8907 disks = [self._ComputeDiskStatus(instance, None, device) 8908 for device in instance.disks] 8909 8910 idict = { 8911 "name": instance.name, 8912 "config_state": config_state, 8913 "run_state": remote_state, 8914 "pnode": instance.primary_node, 8915 "snodes": instance.secondary_nodes, 8916 "os": instance.os, 8917 # this happens to be the same format used for hooks 8918 "nics": _NICListToTuple(self, instance.nics), 8919 "disk_template": instance.disk_template, 8920 "disks": disks, 8921 "hypervisor": instance.hypervisor, 8922 "network_port": instance.network_port, 8923 "hv_instance": instance.hvparams, 8924 "hv_actual": cluster.FillHV(instance, skip_globals=True), 8925 "be_instance": instance.beparams, 8926 "be_actual": cluster.FillBE(instance), 8927 "os_instance": instance.osparams, 8928 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams), 8929 "serial_no": instance.serial_no, 8930 "mtime": instance.mtime, 8931 "ctime": instance.ctime, 8932 "uuid": instance.uuid, 8933 } 8934 8935 result[instance.name] = idict 8936 8937 return result
8938
8939 8940 -class LUSetInstanceParams(LogicalUnit):
8941 """Modifies an instances's parameters. 8942 8943 """ 8944 HPATH = "instance-modify" 8945 HTYPE = constants.HTYPE_INSTANCE 8946 _OP_PARAMS = [ 8947 _PInstanceName, 8948 ("nics", ht.EmptyList, ht.TList), 8949 ("disks", ht.EmptyList, ht.TList), 8950 ("beparams", ht.EmptyDict, ht.TDict), 8951 ("hvparams", ht.EmptyDict, ht.TDict), 8952 ("disk_template", None, ht.TMaybeString), 8953 ("remote_node", None, ht.TMaybeString), 8954 ("os_name", None, ht.TMaybeString), 8955 ("force_variant", False, ht.TBool), 8956 ("osparams", None, ht.TOr(ht.TDict, ht.TNone)), 8957 _PForce, 8958 ] 8959 REQ_BGL = False 8960
8961 - def CheckArguments(self):
8962 if not (self.op.nics or self.op.disks or self.op.disk_template or 8963 self.op.hvparams or self.op.beparams or self.op.os_name): 8964 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL) 8965 8966 if self.op.hvparams: 8967 _CheckGlobalHvParams(self.op.hvparams) 8968 8969 # Disk validation 8970 disk_addremove = 0 8971 for disk_op, disk_dict in self.op.disks: 8972 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES) 8973 if disk_op == constants.DDM_REMOVE: 8974 disk_addremove += 1 8975 continue 8976 elif disk_op == constants.DDM_ADD: 8977 disk_addremove += 1 8978 else: 8979 if not isinstance(disk_op, int): 8980 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL) 8981 if not isinstance(disk_dict, dict): 8982 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict 8983 raise errors.OpPrereqError(msg, errors.ECODE_INVAL) 8984 8985 if disk_op == constants.DDM_ADD: 8986 mode = disk_dict.setdefault('mode', constants.DISK_RDWR) 8987 if mode not in constants.DISK_ACCESS_SET: 8988 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode, 8989 errors.ECODE_INVAL) 8990 size = disk_dict.get('size', None) 8991 if size is None: 8992 raise errors.OpPrereqError("Required disk parameter size missing", 8993 errors.ECODE_INVAL) 8994 try: 8995 size = int(size) 8996 except (TypeError, ValueError), err: 8997 raise errors.OpPrereqError("Invalid disk size parameter: %s" % 8998 str(err), errors.ECODE_INVAL) 8999 disk_dict['size'] = size 9000 else: 9001 # modification of disk 9002 if 'size' in disk_dict: 9003 raise errors.OpPrereqError("Disk size change not possible, use" 9004 " grow-disk", errors.ECODE_INVAL) 9005 9006 if disk_addremove > 1: 9007 raise errors.OpPrereqError("Only one disk add or remove operation" 9008 " supported at a time", errors.ECODE_INVAL) 9009 9010 if self.op.disks and self.op.disk_template is not None: 9011 raise errors.OpPrereqError("Disk template conversion and other disk" 9012 " changes not supported at the same time", 9013 errors.ECODE_INVAL) 9014 9015 if self.op.disk_template: 9016 _CheckDiskTemplate(self.op.disk_template) 9017 if (self.op.disk_template in constants.DTS_NET_MIRROR and 9018 self.op.remote_node is None): 9019 raise errors.OpPrereqError("Changing the disk template to a mirrored" 9020 " one requires specifying a secondary node", 9021 errors.ECODE_INVAL) 9022 9023 # NIC validation 9024 nic_addremove = 0 9025 for nic_op, nic_dict in self.op.nics: 9026 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES) 9027 if nic_op == constants.DDM_REMOVE: 9028 nic_addremove += 1 9029 continue 9030 elif nic_op == constants.DDM_ADD: 9031 nic_addremove += 1 9032 else: 9033 if not isinstance(nic_op, int): 9034 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL) 9035 if not isinstance(nic_dict, dict): 9036 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict 9037 raise errors.OpPrereqError(msg, errors.ECODE_INVAL) 9038 9039 # nic_dict should be a dict 9040 nic_ip = nic_dict.get('ip', None) 9041 if nic_ip is not None: 9042 if nic_ip.lower() == constants.VALUE_NONE: 9043 nic_dict['ip'] = None 9044 else: 9045 if not netutils.IPAddress.IsValid(nic_ip): 9046 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip, 9047 errors.ECODE_INVAL) 9048 9049 nic_bridge = nic_dict.get('bridge', None) 9050 nic_link = nic_dict.get('link', None) 9051 if nic_bridge and nic_link: 9052 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'" 9053 " at the same time", errors.ECODE_INVAL) 9054 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE: 9055 nic_dict['bridge'] = None 9056 elif nic_link and nic_link.lower() == constants.VALUE_NONE: 9057 nic_dict['link'] = None 9058 9059 if nic_op == constants.DDM_ADD: 9060 nic_mac = nic_dict.get('mac', None) 9061 if nic_mac is None: 9062 nic_dict['mac'] = constants.VALUE_AUTO 9063 9064 if 'mac' in nic_dict: 9065 nic_mac = nic_dict['mac'] 9066 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 9067 nic_mac = utils.NormalizeAndValidateMac(nic_mac) 9068 9069 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO: 9070 raise errors.OpPrereqError("'auto' is not a valid MAC address when" 9071 " modifying an existing nic", 9072 errors.ECODE_INVAL) 9073 9074 if nic_addremove > 1: 9075 raise errors.OpPrereqError("Only one NIC add or remove operation" 9076 " supported at a time", errors.ECODE_INVAL)
9077
9078 - def ExpandNames(self):
9079 self._ExpandAndLockInstance() 9080 self.needed_locks[locking.LEVEL_NODE] = [] 9081 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9082
9083 - def DeclareLocks(self, level):
9084 if level == locking.LEVEL_NODE: 9085 self._LockInstancesNodes() 9086 if self.op.disk_template and self.op.remote_node: 9087 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 9088 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9089
9090 - def BuildHooksEnv(self):
9091 """Build hooks env. 9092 9093 This runs on the master, primary and secondaries. 9094 9095 """ 9096 args = dict() 9097 if constants.BE_MEMORY in self.be_new: 9098 args['memory'] = self.be_new[constants.BE_MEMORY] 9099 if constants.BE_VCPUS in self.be_new: 9100 args['vcpus'] = self.be_new[constants.BE_VCPUS] 9101 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk 9102 # information at all. 9103 if self.op.nics: 9104 args['nics'] = [] 9105 nic_override = dict(self.op.nics) 9106 for idx, nic in enumerate(self.instance.nics): 9107 if idx in nic_override: 9108 this_nic_override = nic_override[idx] 9109 else: 9110 this_nic_override = {} 9111 if 'ip' in this_nic_override: 9112 ip = this_nic_override['ip'] 9113 else: 9114 ip = nic.ip 9115 if 'mac' in this_nic_override: 9116 mac = this_nic_override['mac'] 9117 else: 9118 mac = nic.mac 9119 if idx in self.nic_pnew: 9120 nicparams = self.nic_pnew[idx] 9121 else: 9122 nicparams = self.cluster.SimpleFillNIC(nic.nicparams) 9123 mode = nicparams[constants.NIC_MODE] 9124 link = nicparams[constants.NIC_LINK] 9125 args['nics'].append((ip, mac, mode, link)) 9126 if constants.DDM_ADD in nic_override: 9127 ip = nic_override[constants.DDM_ADD].get('ip', None) 9128 mac = nic_override[constants.DDM_ADD]['mac'] 9129 nicparams = self.nic_pnew[constants.DDM_ADD] 9130 mode = nicparams[constants.NIC_MODE] 9131 link = nicparams[constants.NIC_LINK] 9132 args['nics'].append((ip, mac, mode, link)) 9133 elif constants.DDM_REMOVE in nic_override: 9134 del args['nics'][-1] 9135 9136 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args) 9137 if self.op.disk_template: 9138 env["NEW_DISK_TEMPLATE"] = self.op.disk_template 9139 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 9140 return env, nl, nl
9141
9142 - def CheckPrereq(self):
9143 """Check prerequisites. 9144 9145 This only checks the instance list against the existing names. 9146 9147 """ 9148 # checking the new params on the primary/secondary nodes 9149 9150 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 9151 cluster = self.cluster = self.cfg.GetClusterInfo() 9152 assert self.instance is not None, \ 9153 "Cannot retrieve locked instance %s" % self.op.instance_name 9154 pnode = instance.primary_node 9155 nodelist = list(instance.all_nodes) 9156 9157 # OS change 9158 if self.op.os_name and not self.op.force: 9159 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name, 9160 self.op.force_variant) 9161 instance_os = self.op.os_name 9162 else: 9163 instance_os = instance.os 9164 9165 if self.op.disk_template: 9166 if instance.disk_template == self.op.disk_template: 9167 raise errors.OpPrereqError("Instance already has disk template %s" % 9168 instance.disk_template, errors.ECODE_INVAL) 9169 9170 if (instance.disk_template, 9171 self.op.disk_template) not in self._DISK_CONVERSIONS: 9172 raise errors.OpPrereqError("Unsupported disk template conversion from" 9173 " %s to %s" % (instance.disk_template, 9174 self.op.disk_template), 9175 errors.ECODE_INVAL) 9176 _CheckInstanceDown(self, instance, "cannot change disk template") 9177 if self.op.disk_template in constants.DTS_NET_MIRROR: 9178 if self.op.remote_node == pnode: 9179 raise errors.OpPrereqError("Given new secondary node %s is the same" 9180 " as the primary node of the instance" % 9181 self.op.remote_node, errors.ECODE_STATE) 9182 _CheckNodeOnline(self, self.op.remote_node) 9183 _CheckNodeNotDrained(self, self.op.remote_node) 9184 disks = [{"size": d.size} for d in instance.disks] 9185 required = _ComputeDiskSize(self.op.disk_template, disks) 9186 _CheckNodesFreeDisk(self, [self.op.remote_node], required) 9187 9188 # hvparams processing 9189 if self.op.hvparams: 9190 hv_type = instance.hypervisor 9191 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams) 9192 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES) 9193 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict) 9194 9195 # local check 9196 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new) 9197 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new) 9198 self.hv_new = hv_new # the new actual values 9199 self.hv_inst = i_hvdict # the new dict (without defaults) 9200 else: 9201 self.hv_new = self.hv_inst = {} 9202 9203 # beparams processing 9204 if self.op.beparams: 9205 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams, 9206 use_none=True) 9207 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES) 9208 be_new = cluster.SimpleFillBE(i_bedict) 9209 self.be_new = be_new # the new actual values 9210 self.be_inst = i_bedict # the new dict (without defaults) 9211 else: 9212 self.be_new = self.be_inst = {} 9213 9214 # osparams processing 9215 if self.op.osparams: 9216 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams) 9217 _CheckOSParams(self, True, nodelist, instance_os, i_osdict) 9218 self.os_inst = i_osdict # the new dict (without defaults) 9219 else: 9220 self.os_inst = {} 9221 9222 self.warn = [] 9223 9224 if constants.BE_MEMORY in self.op.beparams and not self.op.force: 9225 mem_check_list = [pnode] 9226 if be_new[constants.BE_AUTO_BALANCE]: 9227 # either we changed auto_balance to yes or it was from before 9228 mem_check_list.extend(instance.secondary_nodes) 9229 instance_info = self.rpc.call_instance_info(pnode, instance.name, 9230 instance.hypervisor) 9231 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(), 9232 instance.hypervisor) 9233 pninfo = nodeinfo[pnode] 9234 msg = pninfo.fail_msg 9235 if msg: 9236 # Assume the primary node is unreachable and go ahead 9237 self.warn.append("Can't get info from primary node %s: %s" % 9238 (pnode, msg)) 9239 elif not isinstance(pninfo.payload.get('memory_free', None), int): 9240 self.warn.append("Node data from primary node %s doesn't contain" 9241 " free memory information" % pnode) 9242 elif instance_info.fail_msg: 9243 self.warn.append("Can't get instance runtime information: %s" % 9244 instance_info.fail_msg) 9245 else: 9246 if instance_info.payload: 9247 current_mem = int(instance_info.payload['memory']) 9248 else: 9249 # Assume instance not running 9250 # (there is a slight race condition here, but it's not very probable, 9251 # and we have no other way to check) 9252 current_mem = 0 9253 miss_mem = (be_new[constants.BE_MEMORY] - current_mem - 9254 pninfo.payload['memory_free']) 9255 if miss_mem > 0: 9256 raise errors.OpPrereqError("This change will prevent the instance" 9257 " from starting, due to %d MB of memory" 9258 " missing on its primary node" % miss_mem, 9259 errors.ECODE_NORES) 9260 9261 if be_new[constants.BE_AUTO_BALANCE]: 9262 for node, nres in nodeinfo.items(): 9263 if node not in instance.secondary_nodes: 9264 continue 9265 msg = nres.fail_msg 9266 if msg: 9267 self.warn.append("Can't get info from secondary node %s: %s" % 9268 (node, msg)) 9269 elif not isinstance(nres.payload.get('memory_free', None), int): 9270 self.warn.append("Secondary node %s didn't return free" 9271 " memory information" % node) 9272 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']: 9273 self.warn.append("Not enough memory to failover instance to" 9274 " secondary node %s" % node) 9275 9276 # NIC processing 9277 self.nic_pnew = {} 9278 self.nic_pinst = {} 9279 for nic_op, nic_dict in self.op.nics: 9280 if nic_op == constants.DDM_REMOVE: 9281 if not instance.nics: 9282 raise errors.OpPrereqError("Instance has no NICs, cannot remove", 9283 errors.ECODE_INVAL) 9284 continue 9285 if nic_op != constants.DDM_ADD: 9286 # an existing nic 9287 if not instance.nics: 9288 raise errors.OpPrereqError("Invalid NIC index %s, instance has" 9289 " no NICs" % nic_op, 9290 errors.ECODE_INVAL) 9291 if nic_op < 0 or nic_op >= len(instance.nics): 9292 raise errors.OpPrereqError("Invalid NIC index %s, valid values" 9293 " are 0 to %d" % 9294 (nic_op, len(instance.nics) - 1), 9295 errors.ECODE_INVAL) 9296 old_nic_params = instance.nics[nic_op].nicparams 9297 old_nic_ip = instance.nics[nic_op].ip 9298 else: 9299 old_nic_params = {} 9300 old_nic_ip = None 9301 9302 update_params_dict = dict([(key, nic_dict[key]) 9303 for key in constants.NICS_PARAMETERS 9304 if key in nic_dict]) 9305 9306 if 'bridge' in nic_dict: 9307 update_params_dict[constants.NIC_LINK] = nic_dict['bridge'] 9308 9309 new_nic_params = _GetUpdatedParams(old_nic_params, 9310 update_params_dict) 9311 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES) 9312 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params) 9313 objects.NIC.CheckParameterSyntax(new_filled_nic_params) 9314 self.nic_pinst[nic_op] = new_nic_params 9315 self.nic_pnew[nic_op] = new_filled_nic_params 9316 new_nic_mode = new_filled_nic_params[constants.NIC_MODE] 9317 9318 if new_nic_mode == constants.NIC_MODE_BRIDGED: 9319 nic_bridge = new_filled_nic_params[constants.NIC_LINK] 9320 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg 9321 if msg: 9322 msg = "Error checking bridges on node %s: %s" % (pnode, msg) 9323 if self.op.force: 9324 self.warn.append(msg) 9325 else: 9326 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) 9327 if new_nic_mode == constants.NIC_MODE_ROUTED: 9328 if 'ip' in nic_dict: 9329 nic_ip = nic_dict['ip'] 9330 else: 9331 nic_ip = old_nic_ip 9332 if nic_ip is None: 9333 raise errors.OpPrereqError('Cannot set the nic ip to None' 9334 ' on a routed nic', errors.ECODE_INVAL) 9335 if 'mac' in nic_dict: 9336 nic_mac = nic_dict['mac'] 9337 if nic_mac is None: 9338 raise errors.OpPrereqError('Cannot set the nic mac to None', 9339 errors.ECODE_INVAL) 9340 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 9341 # otherwise generate the mac 9342 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId()) 9343 else: 9344 # or validate/reserve the current one 9345 try: 9346 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId()) 9347 except errors.ReservationError: 9348 raise errors.OpPrereqError("MAC address %s already in use" 9349 " in cluster" % nic_mac, 9350 errors.ECODE_NOTUNIQUE) 9351 9352 # DISK processing 9353 if self.op.disks and instance.disk_template == constants.DT_DISKLESS: 9354 raise errors.OpPrereqError("Disk operations not supported for" 9355 " diskless instances", 9356 errors.ECODE_INVAL) 9357 for disk_op, _ in self.op.disks: 9358 if disk_op == constants.DDM_REMOVE: 9359 if len(instance.disks) == 1: 9360 raise errors.OpPrereqError("Cannot remove the last disk of" 9361 " an instance", errors.ECODE_INVAL) 9362 _CheckInstanceDown(self, instance, "cannot remove disks") 9363 9364 if (disk_op == constants.DDM_ADD and 9365 len(instance.nics) >= constants.MAX_DISKS): 9366 raise errors.OpPrereqError("Instance has too many disks (%d), cannot" 9367 " add more" % constants.MAX_DISKS, 9368 errors.ECODE_STATE) 9369 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE): 9370 # an existing disk 9371 if disk_op < 0 or disk_op >= len(instance.disks): 9372 raise errors.OpPrereqError("Invalid disk index %s, valid values" 9373 " are 0 to %d" % 9374 (disk_op, len(instance.disks)), 9375 errors.ECODE_INVAL) 9376 9377 return
9378
9379 - def _ConvertPlainToDrbd(self, feedback_fn):
9380 """Converts an instance from plain to drbd. 9381 9382 """ 9383 feedback_fn("Converting template to drbd") 9384 instance = self.instance 9385 pnode = instance.primary_node 9386 snode = self.op.remote_node 9387 9388 # create a fake disk info for _GenerateDiskTemplate 9389 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks] 9390 new_disks = _GenerateDiskTemplate(self, self.op.disk_template, 9391 instance.name, pnode, [snode], 9392 disk_info, None, None, 0) 9393 info = _GetInstanceInfoText(instance) 9394 feedback_fn("Creating aditional volumes...") 9395 # first, create the missing data and meta devices 9396 for disk in new_disks: 9397 # unfortunately this is... not too nice 9398 _CreateSingleBlockDev(self, pnode, instance, disk.children[1], 9399 info, True) 9400 for child in disk.children: 9401 _CreateSingleBlockDev(self, snode, instance, child, info, True) 9402 # at this stage, all new LVs have been created, we can rename the 9403 # old ones 9404 feedback_fn("Renaming original volumes...") 9405 rename_list = [(o, n.children[0].logical_id) 9406 for (o, n) in zip(instance.disks, new_disks)] 9407 result = self.rpc.call_blockdev_rename(pnode, rename_list) 9408 result.Raise("Failed to rename original LVs") 9409 9410 feedback_fn("Initializing DRBD devices...") 9411 # all child devices are in place, we can now create the DRBD devices 9412 for disk in new_disks: 9413 for node in [pnode, snode]: 9414 f_create = node == pnode 9415 _CreateSingleBlockDev(self, node, instance, disk, info, f_create) 9416 9417 # at this point, the instance has been modified 9418 instance.disk_template = constants.DT_DRBD8 9419 instance.disks = new_disks 9420 self.cfg.Update(instance, feedback_fn) 9421 9422 # disks are created, waiting for sync 9423 disk_abort = not _WaitForSync(self, instance) 9424 if disk_abort: 9425 raise errors.OpExecError("There are some degraded disks for" 9426 " this instance, please cleanup manually")
9427
9428 - def _ConvertDrbdToPlain(self, feedback_fn):
9429 """Converts an instance from drbd to plain. 9430 9431 """ 9432 instance = self.instance 9433 assert len(instance.secondary_nodes) == 1 9434 pnode = instance.primary_node 9435 snode = instance.secondary_nodes[0] 9436 feedback_fn("Converting template to plain") 9437 9438 old_disks = instance.disks 9439 new_disks = [d.children[0] for d in old_disks] 9440 9441 # copy over size and mode 9442 for parent, child in zip(old_disks, new_disks): 9443 child.size = parent.size 9444 child.mode = parent.mode 9445 9446 # update instance structure 9447 instance.disks = new_disks 9448 instance.disk_template = constants.DT_PLAIN 9449 self.cfg.Update(instance, feedback_fn) 9450 9451 feedback_fn("Removing volumes on the secondary node...") 9452 for disk in old_disks: 9453 self.cfg.SetDiskID(disk, snode) 9454 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg 9455 if msg: 9456 self.LogWarning("Could not remove block device %s on node %s," 9457 " continuing anyway: %s", disk.iv_name, snode, msg) 9458 9459 feedback_fn("Removing unneeded volumes on the primary node...") 9460 for idx, disk in enumerate(old_disks): 9461 meta = disk.children[1] 9462 self.cfg.SetDiskID(meta, pnode) 9463 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg 9464 if msg: 9465 self.LogWarning("Could not remove metadata for disk %d on node %s," 9466 " continuing anyway: %s", idx, pnode, msg)
9467 9468
9469 - def Exec(self, feedback_fn):
9470 """Modifies an instance. 9471 9472 All parameters take effect only at the next restart of the instance. 9473 9474 """ 9475 # Process here the warnings from CheckPrereq, as we don't have a 9476 # feedback_fn there. 9477 for warn in self.warn: 9478 feedback_fn("WARNING: %s" % warn) 9479 9480 result = [] 9481 instance = self.instance 9482 # disk changes 9483 for disk_op, disk_dict in self.op.disks: 9484 if disk_op == constants.DDM_REMOVE: 9485 # remove the last disk 9486 device = instance.disks.pop() 9487 device_idx = len(instance.disks) 9488 for node, disk in device.ComputeNodeTree(instance.primary_node): 9489 self.cfg.SetDiskID(disk, node) 9490 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg 9491 if msg: 9492 self.LogWarning("Could not remove disk/%d on node %s: %s," 9493 " continuing anyway", device_idx, node, msg) 9494 result.append(("disk/%d" % device_idx, "remove")) 9495 elif disk_op == constants.DDM_ADD: 9496 # add a new disk 9497 if instance.disk_template == constants.DT_FILE: 9498 file_driver, file_path = instance.disks[0].logical_id 9499 file_path = os.path.dirname(file_path) 9500 else: 9501 file_driver = file_path = None 9502 disk_idx_base = len(instance.disks) 9503 new_disk = _GenerateDiskTemplate(self, 9504 instance.disk_template, 9505 instance.name, instance.primary_node, 9506 instance.secondary_nodes, 9507 [disk_dict], 9508 file_path, 9509 file_driver, 9510 disk_idx_base)[0] 9511 instance.disks.append(new_disk) 9512 info = _GetInstanceInfoText(instance) 9513 9514 logging.info("Creating volume %s for instance %s", 9515 new_disk.iv_name, instance.name) 9516 # Note: this needs to be kept in sync with _CreateDisks 9517 #HARDCODE 9518 for node in instance.all_nodes: 9519 f_create = node == instance.primary_node 9520 try: 9521 _CreateBlockDev(self, node, instance, new_disk, 9522 f_create, info, f_create) 9523 except errors.OpExecError, err: 9524 self.LogWarning("Failed to create volume %s (%s) on" 9525 " node %s: %s", 9526 new_disk.iv_name, new_disk, node, err) 9527 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" % 9528 (new_disk.size, new_disk.mode))) 9529 else: 9530 # change a given disk 9531 instance.disks[disk_op].mode = disk_dict['mode'] 9532 result.append(("disk.mode/%d" % disk_op, disk_dict['mode'])) 9533 9534 if self.op.disk_template: 9535 r_shut = _ShutdownInstanceDisks(self, instance) 9536 if not r_shut: 9537 raise errors.OpExecError("Cannot shutdow instance disks, unable to" 9538 " proceed with disk template conversion") 9539 mode = (instance.disk_template, self.op.disk_template) 9540 try: 9541 self._DISK_CONVERSIONS[mode](self, feedback_fn) 9542 except: 9543 self.cfg.ReleaseDRBDMinors(instance.name) 9544 raise 9545 result.append(("disk_template", self.op.disk_template)) 9546 9547 # NIC changes 9548 for nic_op, nic_dict in self.op.nics: 9549 if nic_op == constants.DDM_REMOVE: 9550 # remove the last nic 9551 del instance.nics[-1] 9552 result.append(("nic.%d" % len(instance.nics), "remove")) 9553 elif nic_op == constants.DDM_ADD: 9554 # mac and bridge should be set, by now 9555 mac = nic_dict['mac'] 9556 ip = nic_dict.get('ip', None) 9557 nicparams = self.nic_pinst[constants.DDM_ADD] 9558 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams) 9559 instance.nics.append(new_nic) 9560 result.append(("nic.%d" % (len(instance.nics) - 1), 9561 "add:mac=%s,ip=%s,mode=%s,link=%s" % 9562 (new_nic.mac, new_nic.ip, 9563 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE], 9564 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK] 9565 ))) 9566 else: 9567 for key in 'mac', 'ip': 9568 if key in nic_dict: 9569 setattr(instance.nics[nic_op], key, nic_dict[key]) 9570 if nic_op in self.nic_pinst: 9571 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op] 9572 for key, val in nic_dict.iteritems(): 9573 result.append(("nic.%s/%d" % (key, nic_op), val)) 9574 9575 # hvparams changes 9576 if self.op.hvparams: 9577 instance.hvparams = self.hv_inst 9578 for key, val in self.op.hvparams.iteritems(): 9579 result.append(("hv/%s" % key, val)) 9580 9581 # beparams changes 9582 if self.op.beparams: 9583 instance.beparams = self.be_inst 9584 for key, val in self.op.beparams.iteritems(): 9585 result.append(("be/%s" % key, val)) 9586 9587 # OS change 9588 if self.op.os_name: 9589 instance.os = self.op.os_name 9590 9591 # osparams changes 9592 if self.op.osparams: 9593 instance.osparams = self.os_inst 9594 for key, val in self.op.osparams.iteritems(): 9595 result.append(("os/%s" % key, val)) 9596 9597 self.cfg.Update(instance, feedback_fn) 9598 9599 return result
9600 9601 _DISK_CONVERSIONS = { 9602 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd, 9603 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain, 9604 }
9605
9606 9607 -class LUQueryExports(NoHooksLU):
9608 """Query the exports list 9609 9610 """ 9611 _OP_PARAMS = [ 9612 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), 9613 ("use_locking", False, ht.TBool), 9614 ] 9615 REQ_BGL = False 9616
9617 - def ExpandNames(self):
9618 self.needed_locks = {} 9619 self.share_locks[locking.LEVEL_NODE] = 1 9620 if not self.op.nodes: 9621 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 9622 else: 9623 self.needed_locks[locking.LEVEL_NODE] = \ 9624 _GetWantedNodes(self, self.op.nodes)
9625
9626 - def Exec(self, feedback_fn):
9627 """Compute the list of all the exported system images. 9628 9629 @rtype: dict 9630 @return: a dictionary with the structure node->(export-list) 9631 where export-list is a list of the instances exported on 9632 that node. 9633 9634 """ 9635 self.nodes = self.acquired_locks[locking.LEVEL_NODE] 9636 rpcresult = self.rpc.call_export_list(self.nodes) 9637 result = {} 9638 for node in rpcresult: 9639 if rpcresult[node].fail_msg: 9640 result[node] = False 9641 else: 9642 result[node] = rpcresult[node].payload 9643 9644 return result
9645
9646 9647 -class LUPrepareExport(NoHooksLU):
9648 """Prepares an instance for an export and returns useful information. 9649 9650 """ 9651 _OP_PARAMS = [ 9652 _PInstanceName, 9653 ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)), 9654 ] 9655 REQ_BGL = False 9656
9657 - def ExpandNames(self):
9659
9660 - def CheckPrereq(self):
9661 """Check prerequisites. 9662 9663 """ 9664 instance_name = self.op.instance_name 9665 9666 self.instance = self.cfg.GetInstanceInfo(instance_name) 9667 assert self.instance is not None, \ 9668 "Cannot retrieve locked instance %s" % self.op.instance_name 9669 _CheckNodeOnline(self, self.instance.primary_node) 9670 9671 self._cds = _GetClusterDomainSecret()
9672
9673 - def Exec(self, feedback_fn):
9674 """Prepares an instance for an export. 9675 9676 """ 9677 instance = self.instance 9678 9679 if self.op.mode == constants.EXPORT_MODE_REMOTE: 9680 salt = utils.GenerateSecret(8) 9681 9682 feedback_fn("Generating X509 certificate on %s" % instance.primary_node) 9683 result = self.rpc.call_x509_cert_create(instance.primary_node, 9684 constants.RIE_CERT_VALIDITY) 9685 result.Raise("Can't create X509 key and certificate on %s" % result.node) 9686 9687 (name, cert_pem) = result.payload 9688 9689 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 9690 cert_pem) 9691 9692 return { 9693 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds), 9694 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt), 9695 salt), 9696 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt), 9697 } 9698 9699 return None
9700
9701 9702 -class LUExportInstance(LogicalUnit):
9703 """Export an instance to an image in the cluster. 9704 9705 """ 9706 HPATH = "instance-export" 9707 HTYPE = constants.HTYPE_INSTANCE 9708 _OP_PARAMS = [ 9709 _PInstanceName, 9710 ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)), 9711 ("shutdown", True, ht.TBool), 9712 _PShutdownTimeout, 9713 ("remove_instance", False, ht.TBool), 9714 ("ignore_remove_failures", False, ht.TBool), 9715 ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)), 9716 ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)), 9717 ("destination_x509_ca", None, ht.TMaybeString), 9718 ] 9719 REQ_BGL = False 9720
9721 - def CheckArguments(self):
9722 """Check the arguments. 9723 9724 """ 9725 self.x509_key_name = self.op.x509_key_name 9726 self.dest_x509_ca_pem = self.op.destination_x509_ca 9727 9728 if self.op.mode == constants.EXPORT_MODE_REMOTE: 9729 if not self.x509_key_name: 9730 raise errors.OpPrereqError("Missing X509 key name for encryption", 9731 errors.ECODE_INVAL) 9732 9733 if not self.dest_x509_ca_pem: 9734 raise errors.OpPrereqError("Missing destination X509 CA", 9735 errors.ECODE_INVAL)
9736
9737 - def ExpandNames(self):
9738 self._ExpandAndLockInstance() 9739 9740 # Lock all nodes for local exports 9741 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9742 # FIXME: lock only instance primary and destination node 9743 # 9744 # Sad but true, for now we have do lock all nodes, as we don't know where 9745 # the previous export might be, and in this LU we search for it and 9746 # remove it from its current node. In the future we could fix this by: 9747 # - making a tasklet to search (share-lock all), then create the 9748 # new one, then one to remove, after 9749 # - removing the removal operation altogether 9750 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9751
9752 - def DeclareLocks(self, level):
9753 """Last minute lock declaration."""
9754 # All nodes are locked anyway, so nothing to do here. 9755
9756 - def BuildHooksEnv(self):
9757 """Build hooks env. 9758 9759 This will run on the master, primary node and target node. 9760 9761 """ 9762 env = { 9763 "EXPORT_MODE": self.op.mode, 9764 "EXPORT_NODE": self.op.target_node, 9765 "EXPORT_DO_SHUTDOWN": self.op.shutdown, 9766 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 9767 # TODO: Generic function for boolean env variables 9768 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)), 9769 } 9770 9771 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 9772 9773 nl = [self.cfg.GetMasterNode(), self.instance.primary_node] 9774 9775 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9776 nl.append(self.op.target_node) 9777 9778 return env, nl, nl
9779
9780 - def CheckPrereq(self):
9781 """Check prerequisites. 9782 9783 This checks that the instance and node names are valid. 9784 9785 """ 9786 instance_name = self.op.instance_name 9787 9788 self.instance = self.cfg.GetInstanceInfo(instance_name) 9789 assert self.instance is not None, \ 9790 "Cannot retrieve locked instance %s" % self.op.instance_name 9791 _CheckNodeOnline(self, self.instance.primary_node) 9792 9793 if (self.op.remove_instance and self.instance.admin_up and 9794 not self.op.shutdown): 9795 raise errors.OpPrereqError("Can not remove instance without shutting it" 9796 " down before") 9797 9798 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9799 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node) 9800 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node) 9801 assert self.dst_node is not None 9802 9803 _CheckNodeOnline(self, self.dst_node.name) 9804 _CheckNodeNotDrained(self, self.dst_node.name) 9805 9806 self._cds = None 9807 self.dest_disk_info = None 9808 self.dest_x509_ca = None 9809 9810 elif self.op.mode == constants.EXPORT_MODE_REMOTE: 9811 self.dst_node = None 9812 9813 if len(self.op.target_node) != len(self.instance.disks): 9814 raise errors.OpPrereqError(("Received destination information for %s" 9815 " disks, but instance %s has %s disks") % 9816 (len(self.op.target_node), instance_name, 9817 len(self.instance.disks)), 9818 errors.ECODE_INVAL) 9819 9820 cds = _GetClusterDomainSecret() 9821 9822 # Check X509 key name 9823 try: 9824 (key_name, hmac_digest, hmac_salt) = self.x509_key_name 9825 except (TypeError, ValueError), err: 9826 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err) 9827 9828 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt): 9829 raise errors.OpPrereqError("HMAC for X509 key name is wrong", 9830 errors.ECODE_INVAL) 9831 9832 # Load and verify CA 9833 try: 9834 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds) 9835 except OpenSSL.crypto.Error, err: 9836 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" % 9837 (err, ), errors.ECODE_INVAL) 9838 9839 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) 9840 if errcode is not None: 9841 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % 9842 (msg, ), errors.ECODE_INVAL) 9843 9844 self.dest_x509_ca = cert 9845 9846 # Verify target information 9847 disk_info = [] 9848 for idx, disk_data in enumerate(self.op.target_node): 9849 try: 9850 (host, port, magic) = \ 9851 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data) 9852 except errors.GenericError, err: 9853 raise errors.OpPrereqError("Target info for disk %s: %s" % 9854 (idx, err), errors.ECODE_INVAL) 9855 9856 disk_info.append((host, port, magic)) 9857 9858 assert len(disk_info) == len(self.op.target_node) 9859 self.dest_disk_info = disk_info 9860 9861 else: 9862 raise errors.ProgrammerError("Unhandled export mode %r" % 9863 self.op.mode) 9864 9865 # instance disk type verification 9866 # TODO: Implement export support for file-based disks 9867 for disk in self.instance.disks: 9868 if disk.dev_type == constants.LD_FILE: 9869 raise errors.OpPrereqError("Export not supported for instances with" 9870 " file-based disks", errors.ECODE_INVAL)
9871
9872 - def _CleanupExports(self, feedback_fn):
9873 """Removes exports of current instance from all other nodes. 9874 9875 If an instance in a cluster with nodes A..D was exported to node C, its 9876 exports will be removed from the nodes A, B and D. 9877 9878 """ 9879 assert self.op.mode != constants.EXPORT_MODE_REMOTE 9880 9881 nodelist = self.cfg.GetNodeList() 9882 nodelist.remove(self.dst_node.name) 9883 9884 # on one-node clusters nodelist will be empty after the removal 9885 # if we proceed the backup would be removed because OpQueryExports 9886 # substitutes an empty list with the full cluster node list. 9887 iname = self.instance.name 9888 if nodelist: 9889 feedback_fn("Removing old exports for instance %s" % iname) 9890 exportlist = self.rpc.call_export_list(nodelist) 9891 for node in exportlist: 9892 if exportlist[node].fail_msg: 9893 continue 9894 if iname in exportlist[node].payload: 9895 msg = self.rpc.call_export_remove(node, iname).fail_msg 9896 if msg: 9897 self.LogWarning("Could not remove older export for instance %s" 9898 " on node %s: %s", iname, node, msg)
9899
9900 - def Exec(self, feedback_fn):
9901 """Export an instance to an image in the cluster. 9902 9903 """ 9904 assert self.op.mode in constants.EXPORT_MODES 9905 9906 instance = self.instance 9907 src_node = instance.primary_node 9908 9909 if self.op.shutdown: 9910 # shutdown the instance, but not the disks 9911 feedback_fn("Shutting down instance %s" % instance.name) 9912 result = self.rpc.call_instance_shutdown(src_node, instance, 9913 self.op.shutdown_timeout) 9914 # TODO: Maybe ignore failures if ignore_remove_failures is set 9915 result.Raise("Could not shutdown instance %s on" 9916 " node %s" % (instance.name, src_node)) 9917 9918 # set the disks ID correctly since call_instance_start needs the 9919 # correct drbd minor to create the symlinks 9920 for disk in instance.disks: 9921 self.cfg.SetDiskID(disk, src_node) 9922 9923 activate_disks = (not instance.admin_up) 9924 9925 if activate_disks: 9926 # Activate the instance disks if we'exporting a stopped instance 9927 feedback_fn("Activating disks for %s" % instance.name) 9928 _StartInstanceDisks(self, instance, None) 9929 9930 try: 9931 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn, 9932 instance) 9933 9934 helper.CreateSnapshots() 9935 try: 9936 if (self.op.shutdown and instance.admin_up and 9937 not self.op.remove_instance): 9938 assert not activate_disks 9939 feedback_fn("Starting instance %s" % instance.name) 9940 result = self.rpc.call_instance_start(src_node, instance, None, None) 9941 msg = result.fail_msg 9942 if msg: 9943 feedback_fn("Failed to start instance: %s" % msg) 9944 _ShutdownInstanceDisks(self, instance) 9945 raise errors.OpExecError("Could not start instance: %s" % msg) 9946 9947 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9948 (fin_resu, dresults) = helper.LocalExport(self.dst_node) 9949 elif self.op.mode == constants.EXPORT_MODE_REMOTE: 9950 connect_timeout = constants.RIE_CONNECT_TIMEOUT 9951 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) 9952 9953 (key_name, _, _) = self.x509_key_name 9954 9955 dest_ca_pem = \ 9956 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, 9957 self.dest_x509_ca) 9958 9959 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info, 9960 key_name, dest_ca_pem, 9961 timeouts) 9962 finally: 9963 helper.Cleanup() 9964 9965 # Check for backwards compatibility 9966 assert len(dresults) == len(instance.disks) 9967 assert compat.all(isinstance(i, bool) for i in dresults), \ 9968 "Not all results are boolean: %r" % dresults 9969 9970 finally: 9971 if activate_disks: 9972 feedback_fn("Deactivating disks for %s" % instance.name) 9973 _ShutdownInstanceDisks(self, instance) 9974 9975 if not (compat.all(dresults) and fin_resu): 9976 failures = [] 9977 if not fin_resu: 9978 failures.append("export finalization") 9979 if not compat.all(dresults): 9980 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults) 9981 if not dsk) 9982 failures.append("disk export: disk(s) %s" % fdsk) 9983 9984 raise errors.OpExecError("Export failed, errors in %s" % 9985 utils.CommaJoin(failures)) 9986 9987 # At this point, the export was successful, we can cleanup/finish 9988 9989 # Remove instance if requested 9990 if self.op.remove_instance: 9991 feedback_fn("Removing instance %s" % instance.name) 9992 _RemoveInstance(self, feedback_fn, instance, 9993 self.op.ignore_remove_failures) 9994 9995 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9996 self._CleanupExports(feedback_fn) 9997 9998 return fin_resu, dresults
9999
10000 10001 -class LURemoveExport(NoHooksLU):
10002 """Remove exports related to the named instance. 10003 10004 """ 10005 _OP_PARAMS = [ 10006 _PInstanceName, 10007 ] 10008 REQ_BGL = False 10009
10010 - def ExpandNames(self):
10011 self.needed_locks = {} 10012 # We need all nodes to be locked in order for RemoveExport to work, but we 10013 # don't need to lock the instance itself, as nothing will happen to it (and 10014 # we can remove exports also for a removed instance) 10015 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10016
10017 - def Exec(self, feedback_fn):
10018 """Remove any export. 10019 10020 """ 10021 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name) 10022 # If the instance was not found we'll try with the name that was passed in. 10023 # This will only work if it was an FQDN, though. 10024 fqdn_warn = False 10025 if not instance_name: 10026 fqdn_warn = True 10027 instance_name = self.op.instance_name 10028 10029 locked_nodes = self.acquired_locks[locking.LEVEL_NODE] 10030 exportlist = self.rpc.call_export_list(locked_nodes) 10031 found = False 10032 for node in exportlist: 10033 msg = exportlist[node].fail_msg 10034 if msg: 10035 self.LogWarning("Failed to query node %s (continuing): %s", node, msg) 10036 continue 10037 if instance_name in exportlist[node].payload: 10038 found = True 10039 result = self.rpc.call_export_remove(node, instance_name) 10040 msg = result.fail_msg 10041 if msg: 10042 logging.error("Could not remove export for instance %s" 10043 " on node %s: %s", instance_name, node, msg) 10044 10045 if fqdn_warn and not found: 10046 feedback_fn("Export not found. If trying to remove an export belonging" 10047 " to a deleted instance please use its Fully Qualified" 10048 " Domain Name.")
10049
10050 10051 -class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10052 """Generic tags LU. 10053 10054 This is an abstract class which is the parent of all the other tags LUs. 10055 10056 """ 10057
10058 - def ExpandNames(self):
10059 self.needed_locks = {} 10060 if self.op.kind == constants.TAG_NODE: 10061 self.op.name = _ExpandNodeName(self.cfg, self.op.name) 10062 self.needed_locks[locking.LEVEL_NODE] = self.op.name 10063 elif self.op.kind == constants.TAG_INSTANCE: 10064 self.op.name = _ExpandInstanceName(self.cfg, self.op.name) 10065 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10066 10067 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's 10068 # not possible to acquire the BGL based on opcode parameters) 10069
10070 - def CheckPrereq(self):
10071 """Check prerequisites. 10072 10073 """ 10074 if self.op.kind == constants.TAG_CLUSTER: 10075 self.target = self.cfg.GetClusterInfo() 10076 elif self.op.kind == constants.TAG_NODE: 10077 self.target = self.cfg.GetNodeInfo(self.op.name) 10078 elif self.op.kind == constants.TAG_INSTANCE: 10079 self.target = self.cfg.GetInstanceInfo(self.op.name) 10080 else: 10081 raise errors.OpPrereqError("Wrong tag type requested (%s)" % 10082 str(self.op.kind), errors.ECODE_INVAL)
10083
10084 10085 -class LUGetTags(TagsLU):
10086 """Returns the tags of a given object. 10087 10088 """ 10089 _OP_PARAMS = [ 10090 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)), 10091 # Name is only meaningful for nodes and instances 10092 ("name", ht.NoDefault, ht.TMaybeString), 10093 ] 10094 REQ_BGL = False 10095
10096 - def ExpandNames(self):
10097 TagsLU.ExpandNames(self) 10098 10099 # Share locks as this is only a read operation 10100 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10101
10102 - def Exec(self, feedback_fn):
10103 """Returns the tag list. 10104 10105 """ 10106 return list(self.target.GetTags())
10107
10108 10109 -class LUSearchTags(NoHooksLU):
10110 """Searches the tags for a given pattern. 10111 10112 """ 10113 _OP_PARAMS = [ 10114 ("pattern", ht.NoDefault, ht.TNonEmptyString), 10115 ] 10116 REQ_BGL = False 10117
10118 - def ExpandNames(self):
10119 self.needed_locks = {}
10120
10121 - def CheckPrereq(self):
10122 """Check prerequisites. 10123 10124 This checks the pattern passed for validity by compiling it. 10125 10126 """ 10127 try: 10128 self.re = re.compile(self.op.pattern) 10129 except re.error, err: 10130 raise errors.OpPrereqError("Invalid search pattern '%s': %s" % 10131 (self.op.pattern, err), errors.ECODE_INVAL)
10132
10133 - def Exec(self, feedback_fn):
10134 """Returns the tag list. 10135 10136 """ 10137 cfg = self.cfg 10138 tgts = [("/cluster", cfg.GetClusterInfo())] 10139 ilist = cfg.GetAllInstancesInfo().values() 10140 tgts.extend([("/instances/%s" % i.name, i) for i in ilist]) 10141 nlist = cfg.GetAllNodesInfo().values() 10142 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist]) 10143 results = [] 10144 for path, target in tgts: 10145 for tag in target.GetTags(): 10146 if self.re.search(tag): 10147 results.append((path, tag)) 10148 return results
10149
10150 10151 -class LUAddTags(TagsLU):
10152 """Sets a tag on a given object. 10153 10154 """ 10155 _OP_PARAMS = [ 10156 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)), 10157 # Name is only meaningful for nodes and instances 10158 ("name", ht.NoDefault, ht.TMaybeString), 10159 ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)), 10160 ] 10161 REQ_BGL = False 10162
10163 - def CheckPrereq(self):
10164 """Check prerequisites. 10165 10166 This checks the type and length of the tag name and value. 10167 10168 """ 10169 TagsLU.CheckPrereq(self) 10170 for tag in self.op.tags: 10171 objects.TaggableObject.ValidateTag(tag)
10172
10173 - def Exec(self, feedback_fn):
10174 """Sets the tag. 10175 10176 """ 10177 try: 10178 for tag in self.op.tags: 10179 self.target.AddTag(tag) 10180 except errors.TagError, err: 10181 raise errors.OpExecError("Error while setting tag: %s" % str(err)) 10182 self.cfg.Update(self.target, feedback_fn)
10183
10184 10185 -class LUDelTags(TagsLU):
10186 """Delete a list of tags from a given object. 10187 10188 """ 10189 _OP_PARAMS = [ 10190 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)), 10191 # Name is only meaningful for nodes and instances 10192 ("name", ht.NoDefault, ht.TMaybeString), 10193 ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)), 10194 ] 10195 REQ_BGL = False 10196
10197 - def CheckPrereq(self):
10198 """Check prerequisites. 10199 10200 This checks that we have the given tag. 10201 10202 """ 10203 TagsLU.CheckPrereq(self) 10204 for tag in self.op.tags: 10205 objects.TaggableObject.ValidateTag(tag) 10206 del_tags = frozenset(self.op.tags) 10207 cur_tags = self.target.GetTags() 10208 10209 diff_tags = del_tags - cur_tags 10210 if diff_tags: 10211 diff_names = ("'%s'" % i for i in sorted(diff_tags)) 10212 raise errors.OpPrereqError("Tag(s) %s not found" % 10213 (utils.CommaJoin(diff_names), ), 10214 errors.ECODE_NOENT)
10215
10216 - def Exec(self, feedback_fn):
10217 """Remove the tag from the object. 10218 10219 """ 10220 for tag in self.op.tags: 10221 self.target.RemoveTag(tag) 10222 self.cfg.Update(self.target, feedback_fn)
10223
10224 10225 -class LUTestDelay(NoHooksLU):
10226 """Sleep for a specified amount of time. 10227 10228 This LU sleeps on the master and/or nodes for a specified amount of 10229 time. 10230 10231 """ 10232 _OP_PARAMS = [ 10233 ("duration", ht.NoDefault, ht.TFloat), 10234 ("on_master", True, ht.TBool), 10235 ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), 10236 ("repeat", 0, ht.TPositiveInt) 10237 ] 10238 REQ_BGL = False 10239
10240 - def ExpandNames(self):
10241 """Expand names and set required locks. 10242 10243 This expands the node list, if any. 10244 10245 """ 10246 self.needed_locks = {} 10247 if self.op.on_nodes: 10248 # _GetWantedNodes can be used here, but is not always appropriate to use 10249 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for 10250 # more information. 10251 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes) 10252 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10253
10254 - def _TestDelay(self):
10255 """Do the actual sleep. 10256 10257 """ 10258 if self.op.on_master: 10259 if not utils.TestDelay(self.op.duration): 10260 raise errors.OpExecError("Error during master delay test") 10261 if self.op.on_nodes: 10262 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration) 10263 for node, node_result in result.items(): 10264 node_result.Raise("Failure during rpc call to node %s" % node)
10265
10266 - def Exec(self, feedback_fn):
10267 """Execute the test delay opcode, with the wanted repetitions. 10268 10269 """ 10270 if self.op.repeat == 0: 10271 self._TestDelay() 10272 else: 10273 top_value = self.op.repeat - 1 10274 for i in range(self.op.repeat): 10275 self.LogInfo("Test delay iteration %d/%d" % (i, top_value)) 10276 self._TestDelay()
10277
10278 10279 -class LUTestJobqueue(NoHooksLU):
10280 """Utility LU to test some aspects of the job queue. 10281 10282 """ 10283 _OP_PARAMS = [ 10284 ("notify_waitlock", False, ht.TBool), 10285 ("notify_exec", False, ht.TBool), 10286 ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)), 10287 ("fail", False, ht.TBool), 10288 ] 10289 REQ_BGL = False 10290 10291 # Must be lower than default timeout for WaitForJobChange to see whether it 10292 # notices changed jobs 10293 _CLIENT_CONNECT_TIMEOUT = 20.0 10294 _CLIENT_CONFIRM_TIMEOUT = 60.0 10295 10296 @classmethod
10297 - def _NotifyUsingSocket(cls, cb, errcls):
10298 """Opens a Unix socket and waits for another program to connect. 10299 10300 @type cb: callable 10301 @param cb: Callback to send socket name to client 10302 @type errcls: class 10303 @param errcls: Exception class to use for errors 10304 10305 """ 10306 # Using a temporary directory as there's no easy way to create temporary 10307 # sockets without writing a custom loop around tempfile.mktemp and 10308 # socket.bind 10309 tmpdir = tempfile.mkdtemp() 10310 try: 10311 tmpsock = utils.PathJoin(tmpdir, "sock") 10312 10313 logging.debug("Creating temporary socket at %s", tmpsock) 10314 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) 10315 try: 10316 sock.bind(tmpsock) 10317 sock.listen(1) 10318 10319 # Send details to client 10320 cb(tmpsock) 10321 10322 # Wait for client to connect before continuing 10323 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT) 10324 try: 10325 (conn, _) = sock.accept() 10326 except socket.error, err: 10327 raise errcls("Client didn't connect in time (%s)" % err) 10328 finally: 10329 sock.close() 10330 finally: 10331 # Remove as soon as client is connected 10332 shutil.rmtree(tmpdir) 10333 10334 # Wait for client to close 10335 try: 10336 try: 10337 # pylint: disable-msg=E1101 10338 # Instance of '_socketobject' has no ... member 10339 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT) 10340 conn.recv(1) 10341 except socket.error, err: 10342 raise errcls("Client failed to confirm notification (%s)" % err) 10343 finally: 10344 conn.close()
10345
10346 - def _SendNotification(self, test, arg, sockname):
10347 """Sends a notification to the client. 10348 10349 @type test: string 10350 @param test: Test name 10351 @param arg: Test argument (depends on test) 10352 @type sockname: string 10353 @param sockname: Socket path 10354 10355 """ 10356 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10357
10358 - def _Notify(self, prereq, test, arg):
10359 """Notifies the client of a test. 10360 10361 @type prereq: bool 10362 @param prereq: Whether this is a prereq-phase test 10363 @type test: string 10364 @param test: Test name 10365 @param arg: Test argument (depends on test) 10366 10367 """ 10368 if prereq: 10369 errcls = errors.OpPrereqError 10370 else: 10371 errcls = errors.OpExecError 10372 10373 return self._NotifyUsingSocket(compat.partial(self._SendNotification, 10374 test, arg), 10375 errcls)
10376
10377 - def CheckArguments(self):
10378 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1 10379 self.expandnames_calls = 0
10380
10381 - def ExpandNames(self):
10382 checkargs_calls = getattr(self, "checkargs_calls", 0) 10383 if checkargs_calls < 1: 10384 raise errors.ProgrammerError("CheckArguments was not called") 10385 10386 self.expandnames_calls += 1 10387 10388 if self.op.notify_waitlock: 10389 self._Notify(True, constants.JQT_EXPANDNAMES, None) 10390 10391 self.LogInfo("Expanding names") 10392 10393 # Get lock on master node (just to get a lock, not for a particular reason) 10394 self.needed_locks = { 10395 locking.LEVEL_NODE: self.cfg.GetMasterNode(), 10396 }
10397
10398 - def Exec(self, feedback_fn):
10399 if self.expandnames_calls < 1: 10400 raise errors.ProgrammerError("ExpandNames was not called") 10401 10402 if self.op.notify_exec: 10403 self._Notify(False, constants.JQT_EXEC, None) 10404 10405 self.LogInfo("Executing") 10406 10407 if self.op.log_messages: 10408 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages)) 10409 for idx, msg in enumerate(self.op.log_messages): 10410 self.LogInfo("Sending log message %s", idx + 1) 10411 feedback_fn(constants.JQT_MSGPREFIX + msg) 10412 # Report how many test messages have been sent 10413 self._Notify(False, constants.JQT_LOGMSG, idx + 1) 10414 10415 if self.op.fail: 10416 raise errors.OpExecError("Opcode failure was requested") 10417 10418 return True
10419
10420 10421 -class IAllocator(object):
10422 """IAllocator framework. 10423 10424 An IAllocator instance has three sets of attributes: 10425 - cfg that is needed to query the cluster 10426 - input data (all members of the _KEYS class attribute are required) 10427 - four buffer attributes (in|out_data|text), that represent the 10428 input (to the external script) in text and data structure format, 10429 and the output from it, again in two formats 10430 - the result variables from the script (success, info, nodes) for 10431 easy usage 10432 10433 """ 10434 # pylint: disable-msg=R0902 10435 # lots of instance attributes 10436 _ALLO_KEYS = [ 10437 "name", "mem_size", "disks", "disk_template", 10438 "os", "tags", "nics", "vcpus", "hypervisor", 10439 ] 10440 _RELO_KEYS = [ 10441 "name", "relocate_from", 10442 ] 10443 _EVAC_KEYS = [ 10444 "evac_nodes", 10445 ] 10446
10447 - def __init__(self, cfg, rpc, mode, **kwargs):
10448 self.cfg = cfg 10449 self.rpc = rpc 10450 # init buffer variables 10451 self.in_text = self.out_text = self.in_data = self.out_data = None 10452 # init all input fields so that pylint is happy 10453 self.mode = mode 10454 self.mem_size = self.disks = self.disk_template = None 10455 self.os = self.tags = self.nics = self.vcpus = None 10456 self.hypervisor = None 10457 self.relocate_from = None 10458 self.name = None 10459 self.evac_nodes = None 10460 # computed fields 10461 self.required_nodes = None 10462 # init result fields 10463 self.success = self.info = self.result = None 10464 if self.mode == constants.IALLOCATOR_MODE_ALLOC: 10465 keyset = self._ALLO_KEYS 10466 fn = self._AddNewInstance 10467 elif self.mode == constants.IALLOCATOR_MODE_RELOC: 10468 keyset = self._RELO_KEYS 10469 fn = self._AddRelocateInstance 10470 elif self.mode == constants.IALLOCATOR_MODE_MEVAC: 10471 keyset = self._EVAC_KEYS 10472 fn = self._AddEvacuateNodes 10473 else: 10474 raise errors.ProgrammerError("Unknown mode '%s' passed to the" 10475 " IAllocator" % self.mode) 10476 for key in kwargs: 10477 if key not in keyset: 10478 raise errors.ProgrammerError("Invalid input parameter '%s' to" 10479 " IAllocator" % key) 10480 setattr(self, key, kwargs[key]) 10481 10482 for key in keyset: 10483 if key not in kwargs: 10484 raise errors.ProgrammerError("Missing input parameter '%s' to" 10485 " IAllocator" % key) 10486 self._BuildInputData(fn)
10487
10488 - def _ComputeClusterData(self):
10489 """Compute the generic allocator input data. 10490 10491 This is the data that is independent of the actual operation. 10492 10493 """ 10494 cfg = self.cfg 10495 cluster_info = cfg.GetClusterInfo() 10496 # cluster data 10497 data = { 10498 "version": constants.IALLOCATOR_VERSION, 10499 "cluster_name": cfg.GetClusterName(), 10500 "cluster_tags": list(cluster_info.GetTags()), 10501 "enabled_hypervisors": list(cluster_info.enabled_hypervisors), 10502 # we don't have job IDs 10503 } 10504 iinfo = cfg.GetAllInstancesInfo().values() 10505 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo] 10506 10507 # node data 10508 node_list = cfg.GetNodeList() 10509 10510 if self.mode == constants.IALLOCATOR_MODE_ALLOC: 10511 hypervisor_name = self.hypervisor 10512 elif self.mode == constants.IALLOCATOR_MODE_RELOC: 10513 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor 10514 elif self.mode == constants.IALLOCATOR_MODE_MEVAC: 10515 hypervisor_name = cluster_info.enabled_hypervisors[0] 10516 10517 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(), 10518 hypervisor_name) 10519 node_iinfo = \ 10520 self.rpc.call_all_instances_info(node_list, 10521 cluster_info.enabled_hypervisors) 10522 10523 data["nodegroups"] = self._ComputeNodeGroupData(cfg) 10524 10525 data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list) 10526 10527 data["instances"] = self._ComputeInstanceData(cluster_info, i_list) 10528 10529 self.in_data = data
10530 10531 @staticmethod
10532 - def _ComputeNodeGroupData(cfg):
10533 """Compute node groups data. 10534 10535 """ 10536 ng = {} 10537 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items(): 10538 ng[guuid] = { "name": gdata.name } 10539 return ng
10540 10541 @staticmethod
10542 - def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10543 """Compute global node data. 10544 10545 """ 10546 node_results = {} 10547 for nname, nresult in node_data.items(): 10548 # first fill in static (config-based) values 10549 ninfo = cfg.GetNodeInfo(nname) 10550 pnr = { 10551 "tags": list(ninfo.GetTags()), 10552 "primary_ip": ninfo.primary_ip, 10553 "secondary_ip": ninfo.secondary_ip, 10554 "offline": ninfo.offline, 10555 "drained": ninfo.drained, 10556 "master_candidate": ninfo.master_candidate, 10557 "group": ninfo.group, 10558 "master_capable": ninfo.master_capable, 10559 "vm_capable": ninfo.vm_capable, 10560 } 10561 10562 if not (ninfo.offline or ninfo.drained): 10563 nresult.Raise("Can't get data for node %s" % nname) 10564 node_iinfo[nname].Raise("Can't get node instance info from node %s" % 10565 nname) 10566 remote_info = nresult.payload 10567 10568 for attr in ['memory_total', 'memory_free', 'memory_dom0', 10569 'vg_size', 'vg_free', 'cpu_total']: 10570 if attr not in remote_info: 10571 raise errors.OpExecError("Node '%s' didn't return attribute" 10572 " '%s'" % (nname, attr)) 10573 if not isinstance(remote_info[attr], int): 10574 raise errors.OpExecError("Node '%s' returned invalid value" 10575 " for '%s': %s" % 10576 (nname, attr, remote_info[attr])) 10577 # compute memory used by primary instances 10578 i_p_mem = i_p_up_mem = 0 10579 for iinfo, beinfo in i_list: 10580 if iinfo.primary_node == nname: 10581 i_p_mem += beinfo[constants.BE_MEMORY] 10582 if iinfo.name not in node_iinfo[nname].payload: 10583 i_used_mem = 0 10584 else: 10585 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory']) 10586 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem 10587 remote_info['memory_free'] -= max(0, i_mem_diff) 10588 10589 if iinfo.admin_up: 10590 i_p_up_mem += beinfo[constants.BE_MEMORY] 10591 10592 # compute memory used by instances 10593 pnr_dyn = { 10594 "total_memory": remote_info['memory_total'], 10595 "reserved_memory": remote_info['memory_dom0'], 10596 "free_memory": remote_info['memory_free'], 10597 "total_disk": remote_info['vg_size'], 10598 "free_disk": remote_info['vg_free'], 10599 "total_cpus": remote_info['cpu_total'], 10600 "i_pri_memory": i_p_mem, 10601 "i_pri_up_memory": i_p_up_mem, 10602 } 10603 pnr.update(pnr_dyn) 10604 10605 node_results[nname] = pnr 10606 10607 return node_results
10608 10609 @staticmethod
10610 - def _ComputeInstanceData(cluster_info, i_list):
10611 """Compute global instance data. 10612 10613 """ 10614 instance_data = {} 10615 for iinfo, beinfo in i_list: 10616 nic_data = [] 10617 for nic in iinfo.nics: 10618 filled_params = cluster_info.SimpleFillNIC(nic.nicparams) 10619 nic_dict = {"mac": nic.mac, 10620 "ip": nic.ip, 10621 "mode": filled_params[constants.NIC_MODE], 10622 "link": filled_params[constants.NIC_LINK], 10623 } 10624 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 10625 nic_dict["bridge"] = filled_params[constants.NIC_LINK] 10626 nic_data.append(nic_dict) 10627 pir = { 10628 "tags": list(iinfo.GetTags()), 10629 "admin_up": iinfo.admin_up, 10630 "vcpus": beinfo[constants.BE_VCPUS], 10631 "memory": beinfo[constants.BE_MEMORY], 10632 "os": iinfo.os, 10633 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes), 10634 "nics": nic_data, 10635 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks], 10636 "disk_template": iinfo.disk_template, 10637 "hypervisor": iinfo.hypervisor, 10638 } 10639 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template, 10640 pir["disks"]) 10641 instance_data[iinfo.name] = pir 10642 10643 return instance_data
10644
10645 - def _AddNewInstance(self):
10646 """Add new instance data to allocator structure. 10647 10648 This in combination with _AllocatorGetClusterData will create the 10649 correct structure needed as input for the allocator. 10650 10651 The checks for the completeness of the opcode must have already been 10652 done. 10653 10654 """ 10655 disk_space = _ComputeDiskSize(self.disk_template, self.disks) 10656 10657 if self.disk_template in constants.DTS_NET_MIRROR: 10658 self.required_nodes = 2 10659 else: 10660 self.required_nodes = 1 10661 request = { 10662 "name": self.name, 10663 "disk_template": self.disk_template, 10664 "tags": self.tags, 10665 "os": self.os, 10666 "vcpus": self.vcpus, 10667 "memory": self.mem_size, 10668 "disks": self.disks, 10669 "disk_space_total": disk_space, 10670 "nics": self.nics, 10671 "required_nodes": self.required_nodes, 10672 } 10673 return request
10674
10675 - def _AddRelocateInstance(self):
10676 """Add relocate instance data to allocator structure. 10677 10678 This in combination with _IAllocatorGetClusterData will create the 10679 correct structure needed as input for the allocator. 10680 10681 The checks for the completeness of the opcode must have already been 10682 done. 10683 10684 """ 10685 instance = self.cfg.GetInstanceInfo(self.name) 10686 if instance is None: 10687 raise errors.ProgrammerError("Unknown instance '%s' passed to" 10688 " IAllocator" % self.name) 10689 10690 if instance.disk_template not in constants.DTS_NET_MIRROR: 10691 raise errors.OpPrereqError("Can't relocate non-mirrored instances", 10692 errors.ECODE_INVAL) 10693 10694 if len(instance.secondary_nodes) != 1: 10695 raise errors.OpPrereqError("Instance has not exactly one secondary node", 10696 errors.ECODE_STATE) 10697 10698 self.required_nodes = 1 10699 disk_sizes = [{'size': disk.size} for disk in instance.disks] 10700 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes) 10701 10702 request = { 10703 "name": self.name, 10704 "disk_space_total": disk_space, 10705 "required_nodes": self.required_nodes, 10706 "relocate_from": self.relocate_from, 10707 } 10708 return request
10709
10710 - def _AddEvacuateNodes(self):
10711 """Add evacuate nodes data to allocator structure. 10712 10713 """ 10714 request = { 10715 "evac_nodes": self.evac_nodes 10716 } 10717 return request
10718
10719 - def _BuildInputData(self, fn):
10720 """Build input data structures. 10721 10722 """ 10723 self._ComputeClusterData() 10724 10725 request = fn() 10726 request["type"] = self.mode 10727 self.in_data["request"] = request 10728 10729 self.in_text = serializer.Dump(self.in_data)
10730
10731 - def Run(self, name, validate=True, call_fn=None):
10732 """Run an instance allocator and return the results. 10733 10734 """ 10735 if call_fn is None: 10736 call_fn = self.rpc.call_iallocator_runner 10737 10738 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text) 10739 result.Raise("Failure while running the iallocator script") 10740 10741 self.out_text = result.payload 10742 if validate: 10743 self._ValidateResult()
10744
10745 - def _ValidateResult(self):
10746 """Process the allocator results. 10747 10748 This will process and if successful save the result in 10749 self.out_data and the other parameters. 10750 10751 """ 10752 try: 10753 rdict = serializer.Load(self.out_text) 10754 except Exception, err: 10755 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err)) 10756 10757 if not isinstance(rdict, dict): 10758 raise errors.OpExecError("Can't parse iallocator results: not a dict") 10759 10760 # TODO: remove backwards compatiblity in later versions 10761 if "nodes" in rdict and "result" not in rdict: 10762 rdict["result"] = rdict["nodes"] 10763 del rdict["nodes"] 10764 10765 for key in "success", "info", "result": 10766 if key not in rdict: 10767 raise errors.OpExecError("Can't parse iallocator results:" 10768 " missing key '%s'" % key) 10769 setattr(self, key, rdict[key]) 10770 10771 if not isinstance(rdict["result"], list): 10772 raise errors.OpExecError("Can't parse iallocator results: 'result' key" 10773 " is not a list") 10774 self.out_data = rdict
10775
10776 10777 -class LUTestAllocator(NoHooksLU):
10778 """Run allocator tests. 10779 10780 This LU runs the allocator tests 10781 10782 """ 10783 _OP_PARAMS = [ 10784 ("direction", ht.NoDefault, 10785 ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)), 10786 ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)), 10787 ("name", ht.NoDefault, ht.TNonEmptyString), 10788 ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf( 10789 ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]), 10790 ht.TOr(ht.TNone, ht.TNonEmptyString))))), 10791 ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)), 10792 ("hypervisor", None, ht.TMaybeString), 10793 ("allocator", None, ht.TMaybeString), 10794 ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), 10795 ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)), 10796 ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)), 10797 ("os", None, ht.TMaybeString), 10798 ("disk_template", None, ht.TMaybeString), 10799 ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))), 10800 ] 10801
10802 - def CheckPrereq(self):
10803 """Check prerequisites. 10804 10805 This checks the opcode parameters depending on the director and mode test. 10806 10807 """ 10808 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC: 10809 for attr in ["mem_size", "disks", "disk_template", 10810 "os", "tags", "nics", "vcpus"]: 10811 if not hasattr(self.op, attr): 10812 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" % 10813 attr, errors.ECODE_INVAL) 10814 iname = self.cfg.ExpandInstanceName(self.op.name) 10815 if iname is not None: 10816 raise errors.OpPrereqError("Instance '%s' already in the cluster" % 10817 iname, errors.ECODE_EXISTS) 10818 if not isinstance(self.op.nics, list): 10819 raise errors.OpPrereqError("Invalid parameter 'nics'", 10820 errors.ECODE_INVAL) 10821 if not isinstance(self.op.disks, list): 10822 raise errors.OpPrereqError("Invalid parameter 'disks'", 10823 errors.ECODE_INVAL) 10824 for row in self.op.disks: 10825 if (not isinstance(row, dict) or 10826 "size" not in row or 10827 not isinstance(row["size"], int) or 10828 "mode" not in row or 10829 row["mode"] not in ['r', 'w']): 10830 raise errors.OpPrereqError("Invalid contents of the 'disks'" 10831 " parameter", errors.ECODE_INVAL) 10832 if self.op.hypervisor is None: 10833 self.op.hypervisor = self.cfg.GetHypervisorType() 10834 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: 10835 fname = _ExpandInstanceName(self.cfg, self.op.name) 10836 self.op.name = fname 10837 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes 10838 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC: 10839 if not hasattr(self.op, "evac_nodes"): 10840 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on" 10841 " opcode input", errors.ECODE_INVAL) 10842 else: 10843 raise errors.OpPrereqError("Invalid test allocator mode '%s'" % 10844 self.op.mode, errors.ECODE_INVAL) 10845 10846 if self.op.direction == constants.IALLOCATOR_DIR_OUT: 10847 if self.op.allocator is None: 10848 raise errors.OpPrereqError("Missing allocator name", 10849 errors.ECODE_INVAL) 10850 elif self.op.direction != constants.IALLOCATOR_DIR_IN: 10851 raise errors.OpPrereqError("Wrong allocator test '%s'" % 10852 self.op.direction, errors.ECODE_INVAL)
10853
10854 - def Exec(self, feedback_fn):
10855 """Run the allocator test. 10856 10857 """ 10858 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC: 10859 ial = IAllocator(self.cfg, self.rpc, 10860 mode=self.op.mode, 10861 name=self.op.name, 10862 mem_size=self.op.mem_size, 10863 disks=self.op.disks, 10864 disk_template=self.op.disk_template, 10865 os=self.op.os, 10866 tags=self.op.tags, 10867 nics=self.op.nics, 10868 vcpus=self.op.vcpus, 10869 hypervisor=self.op.hypervisor, 10870 ) 10871 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: 10872 ial = IAllocator(self.cfg, self.rpc, 10873 mode=self.op.mode, 10874 name=self.op.name, 10875 relocate_from=list(self.relocate_from), 10876 ) 10877 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC: 10878 ial = IAllocator(self.cfg, self.rpc, 10879 mode=self.op.mode, 10880 evac_nodes=self.op.evac_nodes) 10881 else: 10882 raise errors.ProgrammerError("Uncatched mode %s in" 10883 " LUTestAllocator.Exec", self.op.mode) 10884 10885 if self.op.direction == constants.IALLOCATOR_DIR_IN: 10886 result = ial.in_text 10887 else: 10888 ial.Run(self.op.allocator, validate=False) 10889 result = ial.out_text 10890 return result
10891