Package ganeti :: Module cmdlib
[hide private]
[frames] | no frames]

Source Code for Module ganeti.cmdlib

    1  # 
    2  # 
    3   
    4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc. 
    5  # 
    6  # This program is free software; you can redistribute it and/or modify 
    7  # it under the terms of the GNU General Public License as published by 
    8  # the Free Software Foundation; either version 2 of the License, or 
    9  # (at your option) any later version. 
   10  # 
   11  # This program is distributed in the hope that it will be useful, but 
   12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
   13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
   14  # General Public License for more details. 
   15  # 
   16  # You should have received a copy of the GNU General Public License 
   17  # along with this program; if not, write to the Free Software 
   18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
   19  # 02110-1301, USA. 
   20   
   21   
   22  """Module implementing the master-side code.""" 
   23   
   24  # pylint: disable-msg=W0201,C0302 
   25   
   26  # W0201 since most LU attributes are defined in CheckPrereq or similar 
   27  # functions 
   28   
   29  # C0302: since we have waaaay to many lines in this module 
   30   
   31  import os 
   32  import os.path 
   33  import time 
   34  import re 
   35  import platform 
   36  import logging 
   37  import copy 
   38  import OpenSSL 
   39  import socket 
   40  import tempfile 
   41  import shutil 
   42  import itertools 
   43  import operator 
   44   
   45  from ganeti import ssh 
   46  from ganeti import utils 
   47  from ganeti import errors 
   48  from ganeti import hypervisor 
   49  from ganeti import locking 
   50  from ganeti import constants 
   51  from ganeti import objects 
   52  from ganeti import serializer 
   53  from ganeti import ssconf 
   54  from ganeti import uidpool 
   55  from ganeti import compat 
   56  from ganeti import masterd 
   57  from ganeti import netutils 
   58  from ganeti import query 
   59  from ganeti import qlang 
   60  from ganeti import opcodes 
   61   
   62  import ganeti.masterd.instance # pylint: disable-msg=W0611 
63 64 65 -def _SupportsOob(cfg, node):
66 """Tells if node supports OOB. 67 68 @type cfg: L{config.ConfigWriter} 69 @param cfg: The cluster configuration 70 @type node: L{objects.Node} 71 @param node: The node 72 @return: The OOB script if supported or an empty string otherwise 73 74 """ 75 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
76
77 78 # End types 79 -class LogicalUnit(object):
80 """Logical Unit base class. 81 82 Subclasses must follow these rules: 83 - implement ExpandNames 84 - implement CheckPrereq (except when tasklets are used) 85 - implement Exec (except when tasklets are used) 86 - implement BuildHooksEnv 87 - redefine HPATH and HTYPE 88 - optionally redefine their run requirements: 89 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively 90 91 Note that all commands require root permissions. 92 93 @ivar dry_run_result: the value (if any) that will be returned to the caller 94 in dry-run mode (signalled by opcode dry_run parameter) 95 96 """ 97 HPATH = None 98 HTYPE = None 99 REQ_BGL = True 100
101 - def __init__(self, processor, op, context, rpc):
102 """Constructor for LogicalUnit. 103 104 This needs to be overridden in derived classes in order to check op 105 validity. 106 107 """ 108 self.proc = processor 109 self.op = op 110 self.cfg = context.cfg 111 self.context = context 112 self.rpc = rpc 113 # Dicts used to declare locking needs to mcpu 114 self.needed_locks = None 115 self.acquired_locks = {} 116 self.share_locks = dict.fromkeys(locking.LEVELS, 0) 117 self.add_locks = {} 118 self.remove_locks = {} 119 # Used to force good behavior when calling helper functions 120 self.recalculate_locks = {} 121 self.__ssh = None 122 # logging 123 self.Log = processor.Log # pylint: disable-msg=C0103 124 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103 125 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103 126 self.LogStep = processor.LogStep # pylint: disable-msg=C0103 127 # support for dry-run 128 self.dry_run_result = None 129 # support for generic debug attribute 130 if (not hasattr(self.op, "debug_level") or 131 not isinstance(self.op.debug_level, int)): 132 self.op.debug_level = 0 133 134 # Tasklets 135 self.tasklets = None 136 137 # Validate opcode parameters and set defaults 138 self.op.Validate(True) 139 140 self.CheckArguments()
141
142 - def __GetSSH(self):
143 """Returns the SshRunner object 144 145 """ 146 if not self.__ssh: 147 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName()) 148 return self.__ssh
149 150 ssh = property(fget=__GetSSH) 151
152 - def CheckArguments(self):
153 """Check syntactic validity for the opcode arguments. 154 155 This method is for doing a simple syntactic check and ensure 156 validity of opcode parameters, without any cluster-related 157 checks. While the same can be accomplished in ExpandNames and/or 158 CheckPrereq, doing these separate is better because: 159 160 - ExpandNames is left as as purely a lock-related function 161 - CheckPrereq is run after we have acquired locks (and possible 162 waited for them) 163 164 The function is allowed to change the self.op attribute so that 165 later methods can no longer worry about missing parameters. 166 167 """ 168 pass
169
170 - def ExpandNames(self):
171 """Expand names for this LU. 172 173 This method is called before starting to execute the opcode, and it should 174 update all the parameters of the opcode to their canonical form (e.g. a 175 short node name must be fully expanded after this method has successfully 176 completed). This way locking, hooks, logging, etc. can work correctly. 177 178 LUs which implement this method must also populate the self.needed_locks 179 member, as a dict with lock levels as keys, and a list of needed lock names 180 as values. Rules: 181 182 - use an empty dict if you don't need any lock 183 - if you don't need any lock at a particular level omit that level 184 - don't put anything for the BGL level 185 - if you want all locks at a level use locking.ALL_SET as a value 186 187 If you need to share locks (rather than acquire them exclusively) at one 188 level you can modify self.share_locks, setting a true value (usually 1) for 189 that level. By default locks are not shared. 190 191 This function can also define a list of tasklets, which then will be 192 executed in order instead of the usual LU-level CheckPrereq and Exec 193 functions, if those are not defined by the LU. 194 195 Examples:: 196 197 # Acquire all nodes and one instance 198 self.needed_locks = { 199 locking.LEVEL_NODE: locking.ALL_SET, 200 locking.LEVEL_INSTANCE: ['instance1.example.com'], 201 } 202 # Acquire just two nodes 203 self.needed_locks = { 204 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'], 205 } 206 # Acquire no locks 207 self.needed_locks = {} # No, you can't leave it to the default value None 208 209 """ 210 # The implementation of this method is mandatory only if the new LU is 211 # concurrent, so that old LUs don't need to be changed all at the same 212 # time. 213 if self.REQ_BGL: 214 self.needed_locks = {} # Exclusive LUs don't need locks. 215 else: 216 raise NotImplementedError
217
218 - def DeclareLocks(self, level):
219 """Declare LU locking needs for a level 220 221 While most LUs can just declare their locking needs at ExpandNames time, 222 sometimes there's the need to calculate some locks after having acquired 223 the ones before. This function is called just before acquiring locks at a 224 particular level, but after acquiring the ones at lower levels, and permits 225 such calculations. It can be used to modify self.needed_locks, and by 226 default it does nothing. 227 228 This function is only called if you have something already set in 229 self.needed_locks for the level. 230 231 @param level: Locking level which is going to be locked 232 @type level: member of ganeti.locking.LEVELS 233 234 """
235
236 - def CheckPrereq(self):
237 """Check prerequisites for this LU. 238 239 This method should check that the prerequisites for the execution 240 of this LU are fulfilled. It can do internode communication, but 241 it should be idempotent - no cluster or system changes are 242 allowed. 243 244 The method should raise errors.OpPrereqError in case something is 245 not fulfilled. Its return value is ignored. 246 247 This method should also update all the parameters of the opcode to 248 their canonical form if it hasn't been done by ExpandNames before. 249 250 """ 251 if self.tasklets is not None: 252 for (idx, tl) in enumerate(self.tasklets): 253 logging.debug("Checking prerequisites for tasklet %s/%s", 254 idx + 1, len(self.tasklets)) 255 tl.CheckPrereq() 256 else: 257 pass
258
259 - def Exec(self, feedback_fn):
260 """Execute the LU. 261 262 This method should implement the actual work. It should raise 263 errors.OpExecError for failures that are somewhat dealt with in 264 code, or expected. 265 266 """ 267 if self.tasklets is not None: 268 for (idx, tl) in enumerate(self.tasklets): 269 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets)) 270 tl.Exec(feedback_fn) 271 else: 272 raise NotImplementedError
273
274 - def BuildHooksEnv(self):
275 """Build hooks environment for this LU. 276 277 This method should return a three-node tuple consisting of: a dict 278 containing the environment that will be used for running the 279 specific hook for this LU, a list of node names on which the hook 280 should run before the execution, and a list of node names on which 281 the hook should run after the execution. 282 283 The keys of the dict must not have 'GANETI_' prefixed as this will 284 be handled in the hooks runner. Also note additional keys will be 285 added by the hooks runner. If the LU doesn't define any 286 environment, an empty dict (and not None) should be returned. 287 288 No nodes should be returned as an empty list (and not None). 289 290 Note that if the HPATH for a LU class is None, this function will 291 not be called. 292 293 """ 294 raise NotImplementedError
295
296 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
297 """Notify the LU about the results of its hooks. 298 299 This method is called every time a hooks phase is executed, and notifies 300 the Logical Unit about the hooks' result. The LU can then use it to alter 301 its result based on the hooks. By default the method does nothing and the 302 previous result is passed back unchanged but any LU can define it if it 303 wants to use the local cluster hook-scripts somehow. 304 305 @param phase: one of L{constants.HOOKS_PHASE_POST} or 306 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 307 @param hook_results: the results of the multi-node hooks rpc call 308 @param feedback_fn: function used send feedback back to the caller 309 @param lu_result: the previous Exec result this LU had, or None 310 in the PRE phase 311 @return: the new Exec result, based on the previous result 312 and hook results 313 314 """ 315 # API must be kept, thus we ignore the unused argument and could 316 # be a function warnings 317 # pylint: disable-msg=W0613,R0201 318 return lu_result
319
320 - def _ExpandAndLockInstance(self):
321 """Helper function to expand and lock an instance. 322 323 Many LUs that work on an instance take its name in self.op.instance_name 324 and need to expand it and then declare the expanded name for locking. This 325 function does it, and then updates self.op.instance_name to the expanded 326 name. It also initializes needed_locks as a dict, if this hasn't been done 327 before. 328 329 """ 330 if self.needed_locks is None: 331 self.needed_locks = {} 332 else: 333 assert locking.LEVEL_INSTANCE not in self.needed_locks, \ 334 "_ExpandAndLockInstance called with instance-level locks set" 335 self.op.instance_name = _ExpandInstanceName(self.cfg, 336 self.op.instance_name) 337 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
338
339 - def _LockInstancesNodes(self, primary_only=False):
340 """Helper function to declare instances' nodes for locking. 341 342 This function should be called after locking one or more instances to lock 343 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE] 344 with all primary or secondary nodes for instances already locked and 345 present in self.needed_locks[locking.LEVEL_INSTANCE]. 346 347 It should be called from DeclareLocks, and for safety only works if 348 self.recalculate_locks[locking.LEVEL_NODE] is set. 349 350 In the future it may grow parameters to just lock some instance's nodes, or 351 to just lock primaries or secondary nodes, if needed. 352 353 If should be called in DeclareLocks in a way similar to:: 354 355 if level == locking.LEVEL_NODE: 356 self._LockInstancesNodes() 357 358 @type primary_only: boolean 359 @param primary_only: only lock primary nodes of locked instances 360 361 """ 362 assert locking.LEVEL_NODE in self.recalculate_locks, \ 363 "_LockInstancesNodes helper function called with no nodes to recalculate" 364 365 # TODO: check if we're really been called with the instance locks held 366 367 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the 368 # future we might want to have different behaviors depending on the value 369 # of self.recalculate_locks[locking.LEVEL_NODE] 370 wanted_nodes = [] 371 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]: 372 instance = self.context.cfg.GetInstanceInfo(instance_name) 373 wanted_nodes.append(instance.primary_node) 374 if not primary_only: 375 wanted_nodes.extend(instance.secondary_nodes) 376 377 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE: 378 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes 379 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND: 380 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes) 381 382 del self.recalculate_locks[locking.LEVEL_NODE]
383
384 385 -class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
386 """Simple LU which runs no hooks. 387 388 This LU is intended as a parent for other LogicalUnits which will 389 run no hooks, in order to reduce duplicate code. 390 391 """ 392 HPATH = None 393 HTYPE = None 394
395 - def BuildHooksEnv(self):
396 """Empty BuildHooksEnv for NoHooksLu. 397 398 This just raises an error. 399 400 """ 401 assert False, "BuildHooksEnv called for NoHooksLUs"
402
403 404 -class Tasklet:
405 """Tasklet base class. 406 407 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or 408 they can mix legacy code with tasklets. Locking needs to be done in the LU, 409 tasklets know nothing about locks. 410 411 Subclasses must follow these rules: 412 - Implement CheckPrereq 413 - Implement Exec 414 415 """
416 - def __init__(self, lu):
417 self.lu = lu 418 419 # Shortcuts 420 self.cfg = lu.cfg 421 self.rpc = lu.rpc
422
423 - def CheckPrereq(self):
424 """Check prerequisites for this tasklets. 425 426 This method should check whether the prerequisites for the execution of 427 this tasklet are fulfilled. It can do internode communication, but it 428 should be idempotent - no cluster or system changes are allowed. 429 430 The method should raise errors.OpPrereqError in case something is not 431 fulfilled. Its return value is ignored. 432 433 This method should also update all parameters to their canonical form if it 434 hasn't been done before. 435 436 """ 437 pass
438
439 - def Exec(self, feedback_fn):
440 """Execute the tasklet. 441 442 This method should implement the actual work. It should raise 443 errors.OpExecError for failures that are somewhat dealt with in code, or 444 expected. 445 446 """ 447 raise NotImplementedError
448
449 450 -class _QueryBase:
451 """Base for query utility classes. 452 453 """ 454 #: Attribute holding field definitions 455 FIELDS = None 456
457 - def __init__(self, names, fields, use_locking):
458 """Initializes this class. 459 460 """ 461 self.names = names 462 self.use_locking = use_locking 463 464 self.query = query.Query(self.FIELDS, fields) 465 self.requested_data = self.query.RequestedData() 466 467 self.do_locking = None 468 self.wanted = None
469
470 - def _GetNames(self, lu, all_names, lock_level):
471 """Helper function to determine names asked for in the query. 472 473 """ 474 if self.do_locking: 475 names = lu.acquired_locks[lock_level] 476 else: 477 names = all_names 478 479 if self.wanted == locking.ALL_SET: 480 assert not self.names 481 # caller didn't specify names, so ordering is not important 482 return utils.NiceSort(names) 483 484 # caller specified names and we must keep the same order 485 assert self.names 486 assert not self.do_locking or lu.acquired_locks[lock_level] 487 488 missing = set(self.wanted).difference(names) 489 if missing: 490 raise errors.OpExecError("Some items were removed before retrieving" 491 " their data: %s" % missing) 492 493 # Return expanded names 494 return self.wanted
495 496 @classmethod
497 - def FieldsQuery(cls, fields):
498 """Returns list of available fields. 499 500 @return: List of L{objects.QueryFieldDefinition} 501 502 """ 503 return query.QueryFields(cls.FIELDS, fields)
504
505 - def ExpandNames(self, lu):
506 """Expand names for this query. 507 508 See L{LogicalUnit.ExpandNames}. 509 510 """ 511 raise NotImplementedError()
512
513 - def DeclareLocks(self, lu, level):
514 """Declare locks for this query. 515 516 See L{LogicalUnit.DeclareLocks}. 517 518 """ 519 raise NotImplementedError()
520
521 - def _GetQueryData(self, lu):
522 """Collects all data for this query. 523 524 @return: Query data object 525 526 """ 527 raise NotImplementedError()
528
529 - def NewStyleQuery(self, lu):
530 """Collect data and execute query. 531 532 """ 533 return query.GetQueryResponse(self.query, self._GetQueryData(lu))
534
535 - def OldStyleQuery(self, lu):
536 """Collect data and execute query. 537 538 """ 539 return self.query.OldStyleQuery(self._GetQueryData(lu))
540
541 542 -def _GetWantedNodes(lu, nodes):
543 """Returns list of checked and expanded node names. 544 545 @type lu: L{LogicalUnit} 546 @param lu: the logical unit on whose behalf we execute 547 @type nodes: list 548 @param nodes: list of node names or None for all nodes 549 @rtype: list 550 @return: the list of nodes, sorted 551 @raise errors.ProgrammerError: if the nodes parameter is wrong type 552 553 """ 554 if nodes: 555 return [_ExpandNodeName(lu.cfg, name) for name in nodes] 556 557 return utils.NiceSort(lu.cfg.GetNodeList())
558
559 560 -def _GetWantedInstances(lu, instances):
561 """Returns list of checked and expanded instance names. 562 563 @type lu: L{LogicalUnit} 564 @param lu: the logical unit on whose behalf we execute 565 @type instances: list 566 @param instances: list of instance names or None for all instances 567 @rtype: list 568 @return: the list of instances, sorted 569 @raise errors.OpPrereqError: if the instances parameter is wrong type 570 @raise errors.OpPrereqError: if any of the passed instances is not found 571 572 """ 573 if instances: 574 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances] 575 else: 576 wanted = utils.NiceSort(lu.cfg.GetInstanceList()) 577 return wanted
578
579 580 -def _GetUpdatedParams(old_params, update_dict, 581 use_default=True, use_none=False):
582 """Return the new version of a parameter dictionary. 583 584 @type old_params: dict 585 @param old_params: old parameters 586 @type update_dict: dict 587 @param update_dict: dict containing new parameter values, or 588 constants.VALUE_DEFAULT to reset the parameter to its default 589 value 590 @param use_default: boolean 591 @type use_default: whether to recognise L{constants.VALUE_DEFAULT} 592 values as 'to be deleted' values 593 @param use_none: boolean 594 @type use_none: whether to recognise C{None} values as 'to be 595 deleted' values 596 @rtype: dict 597 @return: the new parameter dictionary 598 599 """ 600 params_copy = copy.deepcopy(old_params) 601 for key, val in update_dict.iteritems(): 602 if ((use_default and val == constants.VALUE_DEFAULT) or 603 (use_none and val is None)): 604 try: 605 del params_copy[key] 606 except KeyError: 607 pass 608 else: 609 params_copy[key] = val 610 return params_copy
611
612 613 -def _CheckOutputFields(static, dynamic, selected):
614 """Checks whether all selected fields are valid. 615 616 @type static: L{utils.FieldSet} 617 @param static: static fields set 618 @type dynamic: L{utils.FieldSet} 619 @param dynamic: dynamic fields set 620 621 """ 622 f = utils.FieldSet() 623 f.Extend(static) 624 f.Extend(dynamic) 625 626 delta = f.NonMatching(selected) 627 if delta: 628 raise errors.OpPrereqError("Unknown output fields selected: %s" 629 % ",".join(delta), errors.ECODE_INVAL)
630
631 632 -def _CheckGlobalHvParams(params):
633 """Validates that given hypervisor params are not global ones. 634 635 This will ensure that instances don't get customised versions of 636 global params. 637 638 """ 639 used_globals = constants.HVC_GLOBALS.intersection(params) 640 if used_globals: 641 msg = ("The following hypervisor parameters are global and cannot" 642 " be customized at instance level, please modify them at" 643 " cluster level: %s" % utils.CommaJoin(used_globals)) 644 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
645
646 647 -def _CheckNodeOnline(lu, node, msg=None):
648 """Ensure that a given node is online. 649 650 @param lu: the LU on behalf of which we make the check 651 @param node: the node to check 652 @param msg: if passed, should be a message to replace the default one 653 @raise errors.OpPrereqError: if the node is offline 654 655 """ 656 if msg is None: 657 msg = "Can't use offline node" 658 if lu.cfg.GetNodeInfo(node).offline: 659 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
660
661 662 -def _CheckNodeNotDrained(lu, node):
663 """Ensure that a given node is not drained. 664 665 @param lu: the LU on behalf of which we make the check 666 @param node: the node to check 667 @raise errors.OpPrereqError: if the node is drained 668 669 """ 670 if lu.cfg.GetNodeInfo(node).drained: 671 raise errors.OpPrereqError("Can't use drained node %s" % node, 672 errors.ECODE_STATE)
673
674 675 -def _CheckNodeVmCapable(lu, node):
676 """Ensure that a given node is vm capable. 677 678 @param lu: the LU on behalf of which we make the check 679 @param node: the node to check 680 @raise errors.OpPrereqError: if the node is not vm capable 681 682 """ 683 if not lu.cfg.GetNodeInfo(node).vm_capable: 684 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node, 685 errors.ECODE_STATE)
686
687 688 -def _CheckNodeHasOS(lu, node, os_name, force_variant):
689 """Ensure that a node supports a given OS. 690 691 @param lu: the LU on behalf of which we make the check 692 @param node: the node to check 693 @param os_name: the OS to query about 694 @param force_variant: whether to ignore variant errors 695 @raise errors.OpPrereqError: if the node is not supporting the OS 696 697 """ 698 result = lu.rpc.call_os_get(node, os_name) 699 result.Raise("OS '%s' not in supported OS list for node %s" % 700 (os_name, node), 701 prereq=True, ecode=errors.ECODE_INVAL) 702 if not force_variant: 703 _CheckOSVariant(result.payload, os_name)
704
705 706 -def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
707 """Ensure that a node has the given secondary ip. 708 709 @type lu: L{LogicalUnit} 710 @param lu: the LU on behalf of which we make the check 711 @type node: string 712 @param node: the node to check 713 @type secondary_ip: string 714 @param secondary_ip: the ip to check 715 @type prereq: boolean 716 @param prereq: whether to throw a prerequisite or an execute error 717 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True 718 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False 719 720 """ 721 result = lu.rpc.call_node_has_ip_address(node, secondary_ip) 722 result.Raise("Failure checking secondary ip on node %s" % node, 723 prereq=prereq, ecode=errors.ECODE_ENVIRON) 724 if not result.payload: 725 msg = ("Node claims it doesn't have the secondary ip you gave (%s)," 726 " please fix and re-run this command" % secondary_ip) 727 if prereq: 728 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) 729 else: 730 raise errors.OpExecError(msg)
731
732 733 -def _GetClusterDomainSecret():
734 """Reads the cluster domain secret. 735 736 """ 737 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE, 738 strict=True)
739
740 741 -def _CheckInstanceDown(lu, instance, reason):
742 """Ensure that an instance is not running.""" 743 if instance.admin_up: 744 raise errors.OpPrereqError("Instance %s is marked to be up, %s" % 745 (instance.name, reason), errors.ECODE_STATE) 746 747 pnode = instance.primary_node 748 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode] 749 ins_l.Raise("Can't contact node %s for instance information" % pnode, 750 prereq=True, ecode=errors.ECODE_ENVIRON) 751 752 if instance.name in ins_l.payload: 753 raise errors.OpPrereqError("Instance %s is running, %s" % 754 (instance.name, reason), errors.ECODE_STATE)
755
756 757 -def _ExpandItemName(fn, name, kind):
758 """Expand an item name. 759 760 @param fn: the function to use for expansion 761 @param name: requested item name 762 @param kind: text description ('Node' or 'Instance') 763 @return: the resolved (full) name 764 @raise errors.OpPrereqError: if the item is not found 765 766 """ 767 full_name = fn(name) 768 if full_name is None: 769 raise errors.OpPrereqError("%s '%s' not known" % (kind, name), 770 errors.ECODE_NOENT) 771 return full_name
772
773 774 -def _ExpandNodeName(cfg, name):
775 """Wrapper over L{_ExpandItemName} for nodes.""" 776 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
777
778 779 -def _ExpandInstanceName(cfg, name):
780 """Wrapper over L{_ExpandItemName} for instance.""" 781 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
782
783 784 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, 785 memory, vcpus, nics, disk_template, disks, 786 bep, hvp, hypervisor_name):
787 """Builds instance related env variables for hooks 788 789 This builds the hook environment from individual variables. 790 791 @type name: string 792 @param name: the name of the instance 793 @type primary_node: string 794 @param primary_node: the name of the instance's primary node 795 @type secondary_nodes: list 796 @param secondary_nodes: list of secondary nodes as strings 797 @type os_type: string 798 @param os_type: the name of the instance's OS 799 @type status: boolean 800 @param status: the should_run status of the instance 801 @type memory: string 802 @param memory: the memory size of the instance 803 @type vcpus: string 804 @param vcpus: the count of VCPUs the instance has 805 @type nics: list 806 @param nics: list of tuples (ip, mac, mode, link) representing 807 the NICs the instance has 808 @type disk_template: string 809 @param disk_template: the disk template of the instance 810 @type disks: list 811 @param disks: the list of (size, mode) pairs 812 @type bep: dict 813 @param bep: the backend parameters for the instance 814 @type hvp: dict 815 @param hvp: the hypervisor parameters for the instance 816 @type hypervisor_name: string 817 @param hypervisor_name: the hypervisor for the instance 818 @rtype: dict 819 @return: the hook environment for this instance 820 821 """ 822 if status: 823 str_status = "up" 824 else: 825 str_status = "down" 826 env = { 827 "OP_TARGET": name, 828 "INSTANCE_NAME": name, 829 "INSTANCE_PRIMARY": primary_node, 830 "INSTANCE_SECONDARIES": " ".join(secondary_nodes), 831 "INSTANCE_OS_TYPE": os_type, 832 "INSTANCE_STATUS": str_status, 833 "INSTANCE_MEMORY": memory, 834 "INSTANCE_VCPUS": vcpus, 835 "INSTANCE_DISK_TEMPLATE": disk_template, 836 "INSTANCE_HYPERVISOR": hypervisor_name, 837 } 838 839 if nics: 840 nic_count = len(nics) 841 for idx, (ip, mac, mode, link) in enumerate(nics): 842 if ip is None: 843 ip = "" 844 env["INSTANCE_NIC%d_IP" % idx] = ip 845 env["INSTANCE_NIC%d_MAC" % idx] = mac 846 env["INSTANCE_NIC%d_MODE" % idx] = mode 847 env["INSTANCE_NIC%d_LINK" % idx] = link 848 if mode == constants.NIC_MODE_BRIDGED: 849 env["INSTANCE_NIC%d_BRIDGE" % idx] = link 850 else: 851 nic_count = 0 852 853 env["INSTANCE_NIC_COUNT"] = nic_count 854 855 if disks: 856 disk_count = len(disks) 857 for idx, (size, mode) in enumerate(disks): 858 env["INSTANCE_DISK%d_SIZE" % idx] = size 859 env["INSTANCE_DISK%d_MODE" % idx] = mode 860 else: 861 disk_count = 0 862 863 env["INSTANCE_DISK_COUNT"] = disk_count 864 865 for source, kind in [(bep, "BE"), (hvp, "HV")]: 866 for key, value in source.items(): 867 env["INSTANCE_%s_%s" % (kind, key)] = value 868 869 return env
870
871 872 -def _NICListToTuple(lu, nics):
873 """Build a list of nic information tuples. 874 875 This list is suitable to be passed to _BuildInstanceHookEnv or as a return 876 value in LUInstanceQueryData. 877 878 @type lu: L{LogicalUnit} 879 @param lu: the logical unit on whose behalf we execute 880 @type nics: list of L{objects.NIC} 881 @param nics: list of nics to convert to hooks tuples 882 883 """ 884 hooks_nics = [] 885 cluster = lu.cfg.GetClusterInfo() 886 for nic in nics: 887 ip = nic.ip 888 mac = nic.mac 889 filled_params = cluster.SimpleFillNIC(nic.nicparams) 890 mode = filled_params[constants.NIC_MODE] 891 link = filled_params[constants.NIC_LINK] 892 hooks_nics.append((ip, mac, mode, link)) 893 return hooks_nics
894
895 896 -def _BuildInstanceHookEnvByObject(lu, instance, override=None):
897 """Builds instance related env variables for hooks from an object. 898 899 @type lu: L{LogicalUnit} 900 @param lu: the logical unit on whose behalf we execute 901 @type instance: L{objects.Instance} 902 @param instance: the instance for which we should build the 903 environment 904 @type override: dict 905 @param override: dictionary with key/values that will override 906 our values 907 @rtype: dict 908 @return: the hook environment dictionary 909 910 """ 911 cluster = lu.cfg.GetClusterInfo() 912 bep = cluster.FillBE(instance) 913 hvp = cluster.FillHV(instance) 914 args = { 915 'name': instance.name, 916 'primary_node': instance.primary_node, 917 'secondary_nodes': instance.secondary_nodes, 918 'os_type': instance.os, 919 'status': instance.admin_up, 920 'memory': bep[constants.BE_MEMORY], 921 'vcpus': bep[constants.BE_VCPUS], 922 'nics': _NICListToTuple(lu, instance.nics), 923 'disk_template': instance.disk_template, 924 'disks': [(disk.size, disk.mode) for disk in instance.disks], 925 'bep': bep, 926 'hvp': hvp, 927 'hypervisor_name': instance.hypervisor, 928 } 929 if override: 930 args.update(override) 931 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
932
933 934 -def _AdjustCandidatePool(lu, exceptions):
935 """Adjust the candidate pool after node operations. 936 937 """ 938 mod_list = lu.cfg.MaintainCandidatePool(exceptions) 939 if mod_list: 940 lu.LogInfo("Promoted nodes to master candidate role: %s", 941 utils.CommaJoin(node.name for node in mod_list)) 942 for name in mod_list: 943 lu.context.ReaddNode(name) 944 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions) 945 if mc_now > mc_max: 946 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % 947 (mc_now, mc_max))
948
949 950 -def _DecideSelfPromotion(lu, exceptions=None):
951 """Decide whether I should promote myself as a master candidate. 952 953 """ 954 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size 955 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions) 956 # the new node will increase mc_max with one, so: 957 mc_should = min(mc_should + 1, cp_size) 958 return mc_now < mc_should
959
960 961 -def _CheckNicsBridgesExist(lu, target_nics, target_node):
962 """Check that the brigdes needed by a list of nics exist. 963 964 """ 965 cluster = lu.cfg.GetClusterInfo() 966 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics] 967 brlist = [params[constants.NIC_LINK] for params in paramslist 968 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED] 969 if brlist: 970 result = lu.rpc.call_bridges_exist(target_node, brlist) 971 result.Raise("Error checking bridges on destination node '%s'" % 972 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
973
974 975 -def _CheckInstanceBridgesExist(lu, instance, node=None):
976 """Check that the brigdes needed by an instance exist. 977 978 """ 979 if node is None: 980 node = instance.primary_node 981 _CheckNicsBridgesExist(lu, instance.nics, node)
982
983 984 -def _CheckOSVariant(os_obj, name):
985 """Check whether an OS name conforms to the os variants specification. 986 987 @type os_obj: L{objects.OS} 988 @param os_obj: OS object to check 989 @type name: string 990 @param name: OS name passed by the user, to check for validity 991 992 """ 993 if not os_obj.supported_variants: 994 return 995 variant = objects.OS.GetVariant(name) 996 if not variant: 997 raise errors.OpPrereqError("OS name must include a variant", 998 errors.ECODE_INVAL) 999 1000 if variant not in os_obj.supported_variants: 1001 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1002
1003 1004 -def _GetNodeInstancesInner(cfg, fn):
1005 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1006
1007 1008 -def _GetNodeInstances(cfg, node_name):
1009 """Returns a list of all primary and secondary instances on a node. 1010 1011 """ 1012 1013 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1014
1015 1016 -def _GetNodePrimaryInstances(cfg, node_name):
1017 """Returns primary instances on a node. 1018 1019 """ 1020 return _GetNodeInstancesInner(cfg, 1021 lambda inst: node_name == inst.primary_node)
1022
1023 1024 -def _GetNodeSecondaryInstances(cfg, node_name):
1025 """Returns secondary instances on a node. 1026 1027 """ 1028 return _GetNodeInstancesInner(cfg, 1029 lambda inst: node_name in inst.secondary_nodes)
1030
1031 1032 -def _GetStorageTypeArgs(cfg, storage_type):
1033 """Returns the arguments for a storage type. 1034 1035 """ 1036 # Special case for file storage 1037 if storage_type == constants.ST_FILE: 1038 # storage.FileStorage wants a list of storage directories 1039 return [[cfg.GetFileStorageDir()]] 1040 1041 return []
1042
1043 1044 -def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1045 faulty = [] 1046 1047 for dev in instance.disks: 1048 cfg.SetDiskID(dev, node_name) 1049 1050 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks) 1051 result.Raise("Failed to get disk status from node %s" % node_name, 1052 prereq=prereq, ecode=errors.ECODE_ENVIRON) 1053 1054 for idx, bdev_status in enumerate(result.payload): 1055 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY: 1056 faulty.append(idx) 1057 1058 return faulty
1059
1060 1061 -def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1062 """Check the sanity of iallocator and node arguments and use the 1063 cluster-wide iallocator if appropriate. 1064 1065 Check that at most one of (iallocator, node) is specified. If none is 1066 specified, then the LU's opcode's iallocator slot is filled with the 1067 cluster-wide default iallocator. 1068 1069 @type iallocator_slot: string 1070 @param iallocator_slot: the name of the opcode iallocator slot 1071 @type node_slot: string 1072 @param node_slot: the name of the opcode target node slot 1073 1074 """ 1075 node = getattr(lu.op, node_slot, None) 1076 iallocator = getattr(lu.op, iallocator_slot, None) 1077 1078 if node is not None and iallocator is not None: 1079 raise errors.OpPrereqError("Do not specify both, iallocator and node.", 1080 errors.ECODE_INVAL) 1081 elif node is None and iallocator is None: 1082 default_iallocator = lu.cfg.GetDefaultIAllocator() 1083 if default_iallocator: 1084 setattr(lu.op, iallocator_slot, default_iallocator) 1085 else: 1086 raise errors.OpPrereqError("No iallocator or node given and no" 1087 " cluster-wide default iallocator found." 1088 " Please specify either an iallocator or a" 1089 " node, or set a cluster-wide default" 1090 " iallocator.")
1091
1092 1093 -class LUClusterPostInit(LogicalUnit):
1094 """Logical unit for running hooks after cluster initialization. 1095 1096 """ 1097 HPATH = "cluster-init" 1098 HTYPE = constants.HTYPE_CLUSTER 1099
1100 - def BuildHooksEnv(self):
1101 """Build hooks env. 1102 1103 """ 1104 env = {"OP_TARGET": self.cfg.GetClusterName()} 1105 mn = self.cfg.GetMasterNode() 1106 return env, [], [mn]
1107
1108 - def Exec(self, feedback_fn):
1109 """Nothing to do. 1110 1111 """ 1112 return True
1113
1114 1115 -class LUClusterDestroy(LogicalUnit):
1116 """Logical unit for destroying the cluster. 1117 1118 """ 1119 HPATH = "cluster-destroy" 1120 HTYPE = constants.HTYPE_CLUSTER 1121
1122 - def BuildHooksEnv(self):
1123 """Build hooks env. 1124 1125 """ 1126 env = {"OP_TARGET": self.cfg.GetClusterName()} 1127 return env, [], []
1128
1129 - def CheckPrereq(self):
1130 """Check prerequisites. 1131 1132 This checks whether the cluster is empty. 1133 1134 Any errors are signaled by raising errors.OpPrereqError. 1135 1136 """ 1137 master = self.cfg.GetMasterNode() 1138 1139 nodelist = self.cfg.GetNodeList() 1140 if len(nodelist) != 1 or nodelist[0] != master: 1141 raise errors.OpPrereqError("There are still %d node(s) in" 1142 " this cluster." % (len(nodelist) - 1), 1143 errors.ECODE_INVAL) 1144 instancelist = self.cfg.GetInstanceList() 1145 if instancelist: 1146 raise errors.OpPrereqError("There are still %d instance(s) in" 1147 " this cluster." % len(instancelist), 1148 errors.ECODE_INVAL)
1149
1150 - def Exec(self, feedback_fn):
1151 """Destroys the cluster. 1152 1153 """ 1154 master = self.cfg.GetMasterNode() 1155 1156 # Run post hooks on master node before it's removed 1157 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self) 1158 try: 1159 hm.RunPhase(constants.HOOKS_PHASE_POST, [master]) 1160 except: 1161 # pylint: disable-msg=W0702 1162 self.LogWarning("Errors occurred running hooks on %s" % master) 1163 1164 result = self.rpc.call_node_stop_master(master, False) 1165 result.Raise("Could not disable the master role") 1166 1167 return master
1168
1169 1170 -def _VerifyCertificate(filename):
1171 """Verifies a certificate for LUClusterVerify. 1172 1173 @type filename: string 1174 @param filename: Path to PEM file 1175 1176 """ 1177 try: 1178 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 1179 utils.ReadFile(filename)) 1180 except Exception, err: # pylint: disable-msg=W0703 1181 return (LUClusterVerify.ETYPE_ERROR, 1182 "Failed to load X509 certificate %s: %s" % (filename, err)) 1183 1184 (errcode, msg) = \ 1185 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN, 1186 constants.SSL_CERT_EXPIRATION_ERROR) 1187 1188 if msg: 1189 fnamemsg = "While verifying %s: %s" % (filename, msg) 1190 else: 1191 fnamemsg = None 1192 1193 if errcode is None: 1194 return (None, fnamemsg) 1195 elif errcode == utils.CERT_WARNING: 1196 return (LUClusterVerify.ETYPE_WARNING, fnamemsg) 1197 elif errcode == utils.CERT_ERROR: 1198 return (LUClusterVerify.ETYPE_ERROR, fnamemsg) 1199 1200 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1201
1202 1203 -class LUClusterVerify(LogicalUnit):
1204 """Verifies the cluster status. 1205 1206 """ 1207 HPATH = "cluster-verify" 1208 HTYPE = constants.HTYPE_CLUSTER 1209 REQ_BGL = False 1210 1211 TCLUSTER = "cluster" 1212 TNODE = "node" 1213 TINSTANCE = "instance" 1214 1215 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG") 1216 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT") 1217 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE") 1218 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN") 1219 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT") 1220 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK") 1221 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK") 1222 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE") 1223 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS") 1224 ENODEDRBD = (TNODE, "ENODEDRBD") 1225 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER") 1226 ENODEFILECHECK = (TNODE, "ENODEFILECHECK") 1227 ENODEHOOKS = (TNODE, "ENODEHOOKS") 1228 ENODEHV = (TNODE, "ENODEHV") 1229 ENODELVM = (TNODE, "ENODELVM") 1230 ENODEN1 = (TNODE, "ENODEN1") 1231 ENODENET = (TNODE, "ENODENET") 1232 ENODEOS = (TNODE, "ENODEOS") 1233 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE") 1234 ENODEORPHANLV = (TNODE, "ENODEORPHANLV") 1235 ENODERPC = (TNODE, "ENODERPC") 1236 ENODESSH = (TNODE, "ENODESSH") 1237 ENODEVERSION = (TNODE, "ENODEVERSION") 1238 ENODESETUP = (TNODE, "ENODESETUP") 1239 ENODETIME = (TNODE, "ENODETIME") 1240 ENODEOOBPATH = (TNODE, "ENODEOOBPATH") 1241 1242 ETYPE_FIELD = "code" 1243 ETYPE_ERROR = "ERROR" 1244 ETYPE_WARNING = "WARNING" 1245 1246 _HOOKS_INDENT_RE = re.compile("^", re.M) 1247
1248 - class NodeImage(object):
1249 """A class representing the logical and physical status of a node. 1250 1251 @type name: string 1252 @ivar name: the node name to which this object refers 1253 @ivar volumes: a structure as returned from 1254 L{ganeti.backend.GetVolumeList} (runtime) 1255 @ivar instances: a list of running instances (runtime) 1256 @ivar pinst: list of configured primary instances (config) 1257 @ivar sinst: list of configured secondary instances (config) 1258 @ivar sbp: diction of {secondary-node: list of instances} of all peers 1259 of this node (config) 1260 @ivar mfree: free memory, as reported by hypervisor (runtime) 1261 @ivar dfree: free disk, as reported by the node (runtime) 1262 @ivar offline: the offline status (config) 1263 @type rpc_fail: boolean 1264 @ivar rpc_fail: whether the RPC verify call was successfull (overall, 1265 not whether the individual keys were correct) (runtime) 1266 @type lvm_fail: boolean 1267 @ivar lvm_fail: whether the RPC call didn't return valid LVM data 1268 @type hyp_fail: boolean 1269 @ivar hyp_fail: whether the RPC call didn't return the instance list 1270 @type ghost: boolean 1271 @ivar ghost: whether this is a known node or not (config) 1272 @type os_fail: boolean 1273 @ivar os_fail: whether the RPC call didn't return valid OS data 1274 @type oslist: list 1275 @ivar oslist: list of OSes as diagnosed by DiagnoseOS 1276 @type vm_capable: boolean 1277 @ivar vm_capable: whether the node can host instances 1278 1279 """
1280 - def __init__(self, offline=False, name=None, vm_capable=True):
1281 self.name = name 1282 self.volumes = {} 1283 self.instances = [] 1284 self.pinst = [] 1285 self.sinst = [] 1286 self.sbp = {} 1287 self.mfree = 0 1288 self.dfree = 0 1289 self.offline = offline 1290 self.vm_capable = vm_capable 1291 self.rpc_fail = False 1292 self.lvm_fail = False 1293 self.hyp_fail = False 1294 self.ghost = False 1295 self.os_fail = False 1296 self.oslist = {}
1297
1298 - def ExpandNames(self):
1299 self.needed_locks = { 1300 locking.LEVEL_NODE: locking.ALL_SET, 1301 locking.LEVEL_INSTANCE: locking.ALL_SET, 1302 } 1303 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1304
1305 - def _Error(self, ecode, item, msg, *args, **kwargs):
1306 """Format an error message. 1307 1308 Based on the opcode's error_codes parameter, either format a 1309 parseable error code, or a simpler error string. 1310 1311 This must be called only from Exec and functions called from Exec. 1312 1313 """ 1314 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) 1315 itype, etxt = ecode 1316 # first complete the msg 1317 if args: 1318 msg = msg % args 1319 # then format the whole message 1320 if self.op.error_codes: 1321 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) 1322 else: 1323 if item: 1324 item = " " + item 1325 else: 1326 item = "" 1327 msg = "%s: %s%s: %s" % (ltype, itype, item, msg) 1328 # and finally report it via the feedback_fn 1329 self._feedback_fn(" - %s" % msg)
1330
1331 - def _ErrorIf(self, cond, *args, **kwargs):
1332 """Log an error message if the passed condition is True. 1333 1334 """ 1335 cond = bool(cond) or self.op.debug_simulate_errors 1336 if cond: 1337 self._Error(*args, **kwargs) 1338 # do not mark the operation as failed for WARN cases only 1339 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR: 1340 self.bad = self.bad or cond
1341
1342 - def _VerifyNode(self, ninfo, nresult):
1343 """Perform some basic validation on data returned from a node. 1344 1345 - check the result data structure is well formed and has all the 1346 mandatory fields 1347 - check ganeti version 1348 1349 @type ninfo: L{objects.Node} 1350 @param ninfo: the node to check 1351 @param nresult: the results from the node 1352 @rtype: boolean 1353 @return: whether overall this call was successful (and we can expect 1354 reasonable values in the respose) 1355 1356 """ 1357 node = ninfo.name 1358 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1359 1360 # main result, nresult should be a non-empty dict 1361 test = not nresult or not isinstance(nresult, dict) 1362 _ErrorIf(test, self.ENODERPC, node, 1363 "unable to verify node: no data returned") 1364 if test: 1365 return False 1366 1367 # compares ganeti version 1368 local_version = constants.PROTOCOL_VERSION 1369 remote_version = nresult.get("version", None) 1370 test = not (remote_version and 1371 isinstance(remote_version, (list, tuple)) and 1372 len(remote_version) == 2) 1373 _ErrorIf(test, self.ENODERPC, node, 1374 "connection to node returned invalid data") 1375 if test: 1376 return False 1377 1378 test = local_version != remote_version[0] 1379 _ErrorIf(test, self.ENODEVERSION, node, 1380 "incompatible protocol versions: master %s," 1381 " node %s", local_version, remote_version[0]) 1382 if test: 1383 return False 1384 1385 # node seems compatible, we can actually try to look into its results 1386 1387 # full package version 1388 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], 1389 self.ENODEVERSION, node, 1390 "software version mismatch: master %s, node %s", 1391 constants.RELEASE_VERSION, remote_version[1], 1392 code=self.ETYPE_WARNING) 1393 1394 hyp_result = nresult.get(constants.NV_HYPERVISOR, None) 1395 if ninfo.vm_capable and isinstance(hyp_result, dict): 1396 for hv_name, hv_result in hyp_result.iteritems(): 1397 test = hv_result is not None 1398 _ErrorIf(test, self.ENODEHV, node, 1399 "hypervisor %s verify failure: '%s'", hv_name, hv_result) 1400 1401 hvp_result = nresult.get(constants.NV_HVPARAMS, None) 1402 if ninfo.vm_capable and isinstance(hvp_result, list): 1403 for item, hv_name, hv_result in hvp_result: 1404 _ErrorIf(True, self.ENODEHV, node, 1405 "hypervisor %s parameter verify failure (source %s): %s", 1406 hv_name, item, hv_result) 1407 1408 test = nresult.get(constants.NV_NODESETUP, 1409 ["Missing NODESETUP results"]) 1410 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s", 1411 "; ".join(test)) 1412 1413 return True
1414
1415 - def _VerifyNodeTime(self, ninfo, nresult, 1416 nvinfo_starttime, nvinfo_endtime):
1417 """Check the node time. 1418 1419 @type ninfo: L{objects.Node} 1420 @param ninfo: the node to check 1421 @param nresult: the remote results for the node 1422 @param nvinfo_starttime: the start time of the RPC call 1423 @param nvinfo_endtime: the end time of the RPC call 1424 1425 """ 1426 node = ninfo.name 1427 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1428 1429 ntime = nresult.get(constants.NV_TIME, None) 1430 try: 1431 ntime_merged = utils.MergeTime(ntime) 1432 except (ValueError, TypeError): 1433 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time") 1434 return 1435 1436 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): 1437 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) 1438 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): 1439 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) 1440 else: 1441 ntime_diff = None 1442 1443 _ErrorIf(ntime_diff is not None, self.ENODETIME, node, 1444 "Node time diverges by at least %s from master node time", 1445 ntime_diff)
1446
1447 - def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1448 """Check the node LVM results. 1449 1450 @type ninfo: L{objects.Node} 1451 @param ninfo: the node to check 1452 @param nresult: the remote results for the node 1453 @param vg_name: the configured VG name 1454 1455 """ 1456 if vg_name is None: 1457 return 1458 1459 node = ninfo.name 1460 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1461 1462 # checks vg existence and size > 20G 1463 vglist = nresult.get(constants.NV_VGLIST, None) 1464 test = not vglist 1465 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups") 1466 if not test: 1467 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, 1468 constants.MIN_VG_SIZE) 1469 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus) 1470 1471 # check pv names 1472 pvlist = nresult.get(constants.NV_PVLIST, None) 1473 test = pvlist is None 1474 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node") 1475 if not test: 1476 # check that ':' is not present in PV names, since it's a 1477 # special character for lvcreate (denotes the range of PEs to 1478 # use on the PV) 1479 for _, pvname, owner_vg in pvlist: 1480 test = ":" in pvname 1481 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV" 1482 " '%s' of VG '%s'", pvname, owner_vg)
1483
1484 - def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1485 """Check the node bridges. 1486 1487 @type ninfo: L{objects.Node} 1488 @param ninfo: the node to check 1489 @param nresult: the remote results for the node 1490 @param bridges: the expected list of bridges 1491 1492 """ 1493 if not bridges: 1494 return 1495 1496 node = ninfo.name 1497 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1498 1499 missing = nresult.get(constants.NV_BRIDGES, None) 1500 test = not isinstance(missing, list) 1501 _ErrorIf(test, self.ENODENET, node, 1502 "did not return valid bridge information") 1503 if not test: 1504 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" % 1505 utils.CommaJoin(sorted(missing)))
1506
1507 - def _VerifyNodeNetwork(self, ninfo, nresult):
1508 """Check the node network connectivity results. 1509 1510 @type ninfo: L{objects.Node} 1511 @param ninfo: the node to check 1512 @param nresult: the remote results for the node 1513 1514 """ 1515 node = ninfo.name 1516 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1517 1518 test = constants.NV_NODELIST not in nresult 1519 _ErrorIf(test, self.ENODESSH, node, 1520 "node hasn't returned node ssh connectivity data") 1521 if not test: 1522 if nresult[constants.NV_NODELIST]: 1523 for a_node, a_msg in nresult[constants.NV_NODELIST].items(): 1524 _ErrorIf(True, self.ENODESSH, node, 1525 "ssh communication with node '%s': %s", a_node, a_msg) 1526 1527 test = constants.NV_NODENETTEST not in nresult 1528 _ErrorIf(test, self.ENODENET, node, 1529 "node hasn't returned node tcp connectivity data") 1530 if not test: 1531 if nresult[constants.NV_NODENETTEST]: 1532 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) 1533 for anode in nlist: 1534 _ErrorIf(True, self.ENODENET, node, 1535 "tcp communication with node '%s': %s", 1536 anode, nresult[constants.NV_NODENETTEST][anode]) 1537 1538 test = constants.NV_MASTERIP not in nresult 1539 _ErrorIf(test, self.ENODENET, node, 1540 "node hasn't returned node master IP reachability data") 1541 if not test: 1542 if not nresult[constants.NV_MASTERIP]: 1543 if node == self.master_node: 1544 msg = "the master node cannot reach the master IP (not configured?)" 1545 else: 1546 msg = "cannot reach the master IP" 1547 _ErrorIf(True, self.ENODENET, node, msg)
1548
1549 - def _VerifyInstance(self, instance, instanceconfig, node_image, 1550 diskstatus):
1551 """Verify an instance. 1552 1553 This function checks to see if the required block devices are 1554 available on the instance's node. 1555 1556 """ 1557 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1558 node_current = instanceconfig.primary_node 1559 1560 node_vol_should = {} 1561 instanceconfig.MapLVsByNode(node_vol_should) 1562 1563 for node in node_vol_should: 1564 n_img = node_image[node] 1565 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 1566 # ignore missing volumes on offline or broken nodes 1567 continue 1568 for volume in node_vol_should[node]: 1569 test = volume not in n_img.volumes 1570 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance, 1571 "volume %s missing on node %s", volume, node) 1572 1573 if instanceconfig.admin_up: 1574 pri_img = node_image[node_current] 1575 test = instance not in pri_img.instances and not pri_img.offline 1576 _ErrorIf(test, self.EINSTANCEDOWN, instance, 1577 "instance not running on its primary node %s", 1578 node_current) 1579 1580 for node, n_img in node_image.items(): 1581 if node != node_current: 1582 test = instance in n_img.instances 1583 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance, 1584 "instance should not run on node %s", node) 1585 1586 diskdata = [(nname, success, status, idx) 1587 for (nname, disks) in diskstatus.items() 1588 for idx, (success, status) in enumerate(disks)] 1589 1590 for nname, success, bdev_status, idx in diskdata: 1591 # the 'ghost node' construction in Exec() ensures that we have a 1592 # node here 1593 snode = node_image[nname] 1594 bad_snode = snode.ghost or snode.offline 1595 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode, 1596 self.EINSTANCEFAULTYDISK, instance, 1597 "couldn't retrieve status for disk/%s on %s: %s", 1598 idx, nname, bdev_status) 1599 _ErrorIf((instanceconfig.admin_up and success and 1600 bdev_status.ldisk_status == constants.LDS_FAULTY), 1601 self.EINSTANCEFAULTYDISK, instance, 1602 "disk/%s on %s is faulty", idx, nname)
1603
1604 - def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1605 """Verify if there are any unknown volumes in the cluster. 1606 1607 The .os, .swap and backup volumes are ignored. All other volumes are 1608 reported as unknown. 1609 1610 @type reserved: L{ganeti.utils.FieldSet} 1611 @param reserved: a FieldSet of reserved volume names 1612 1613 """ 1614 for node, n_img in node_image.items(): 1615 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 1616 # skip non-healthy nodes 1617 continue 1618 for volume in n_img.volumes: 1619 test = ((node not in node_vol_should or 1620 volume not in node_vol_should[node]) and 1621 not reserved.Matches(volume)) 1622 self._ErrorIf(test, self.ENODEORPHANLV, node, 1623 "volume %s is unknown", volume)
1624
1625 - def _VerifyOrphanInstances(self, instancelist, node_image):
1626 """Verify the list of running instances. 1627 1628 This checks what instances are running but unknown to the cluster. 1629 1630 """ 1631 for node, n_img in node_image.items(): 1632 for o_inst in n_img.instances: 1633 test = o_inst not in instancelist 1634 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node, 1635 "instance %s on node %s should not exist", o_inst, node)
1636
1637 - def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1638 """Verify N+1 Memory Resilience. 1639 1640 Check that if one single node dies we can still start all the 1641 instances it was primary for. 1642 1643 """ 1644 for node, n_img in node_image.items(): 1645 # This code checks that every node which is now listed as 1646 # secondary has enough memory to host all instances it is 1647 # supposed to should a single other node in the cluster fail. 1648 # FIXME: not ready for failover to an arbitrary node 1649 # FIXME: does not support file-backed instances 1650 # WARNING: we currently take into account down instances as well 1651 # as up ones, considering that even if they're down someone 1652 # might want to start them even in the event of a node failure. 1653 if n_img.offline: 1654 # we're skipping offline nodes from the N+1 warning, since 1655 # most likely we don't have good memory infromation from them; 1656 # we already list instances living on such nodes, and that's 1657 # enough warning 1658 continue 1659 for prinode, instances in n_img.sbp.items(): 1660 needed_mem = 0 1661 for instance in instances: 1662 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance]) 1663 if bep[constants.BE_AUTO_BALANCE]: 1664 needed_mem += bep[constants.BE_MEMORY] 1665 test = n_img.mfree < needed_mem 1666 self._ErrorIf(test, self.ENODEN1, node, 1667 "not enough memory to accomodate instance failovers" 1668 " should node %s fail (%dMiB needed, %dMiB available)", 1669 prinode, needed_mem, n_img.mfree)
1670
1671 - def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum, 1672 master_files):
1673 """Verifies and computes the node required file checksums. 1674 1675 @type ninfo: L{objects.Node} 1676 @param ninfo: the node to check 1677 @param nresult: the remote results for the node 1678 @param file_list: required list of files 1679 @param local_cksum: dictionary of local files and their checksums 1680 @param master_files: list of files that only masters should have 1681 1682 """ 1683 node = ninfo.name 1684 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1685 1686 remote_cksum = nresult.get(constants.NV_FILELIST, None) 1687 test = not isinstance(remote_cksum, dict) 1688 _ErrorIf(test, self.ENODEFILECHECK, node, 1689 "node hasn't returned file checksum data") 1690 if test: 1691 return 1692 1693 for file_name in file_list: 1694 node_is_mc = ninfo.master_candidate 1695 must_have = (file_name not in master_files) or node_is_mc 1696 # missing 1697 test1 = file_name not in remote_cksum 1698 # invalid checksum 1699 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name] 1700 # existing and good 1701 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name] 1702 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node, 1703 "file '%s' missing", file_name) 1704 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node, 1705 "file '%s' has wrong checksum", file_name) 1706 # not candidate and this is not a must-have file 1707 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node, 1708 "file '%s' should not exist on non master" 1709 " candidates (and the file is outdated)", file_name) 1710 # all good, except non-master/non-must have combination 1711 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node, 1712 "file '%s' should not exist" 1713 " on non master candidates", file_name)
1714
1715 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper, 1716 drbd_map):
1717 """Verifies and the node DRBD status. 1718 1719 @type ninfo: L{objects.Node} 1720 @param ninfo: the node to check 1721 @param nresult: the remote results for the node 1722 @param instanceinfo: the dict of instances 1723 @param drbd_helper: the configured DRBD usermode helper 1724 @param drbd_map: the DRBD map as returned by 1725 L{ganeti.config.ConfigWriter.ComputeDRBDMap} 1726 1727 """ 1728 node = ninfo.name 1729 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1730 1731 if drbd_helper: 1732 helper_result = nresult.get(constants.NV_DRBDHELPER, None) 1733 test = (helper_result == None) 1734 _ErrorIf(test, self.ENODEDRBDHELPER, node, 1735 "no drbd usermode helper returned") 1736 if helper_result: 1737 status, payload = helper_result 1738 test = not status 1739 _ErrorIf(test, self.ENODEDRBDHELPER, node, 1740 "drbd usermode helper check unsuccessful: %s", payload) 1741 test = status and (payload != drbd_helper) 1742 _ErrorIf(test, self.ENODEDRBDHELPER, node, 1743 "wrong drbd usermode helper: %s", payload) 1744 1745 # compute the DRBD minors 1746 node_drbd = {} 1747 for minor, instance in drbd_map[node].items(): 1748 test = instance not in instanceinfo 1749 _ErrorIf(test, self.ECLUSTERCFG, None, 1750 "ghost instance '%s' in temporary DRBD map", instance) 1751 # ghost instance should not be running, but otherwise we 1752 # don't give double warnings (both ghost instance and 1753 # unallocated minor in use) 1754 if test: 1755 node_drbd[minor] = (instance, False) 1756 else: 1757 instance = instanceinfo[instance] 1758 node_drbd[minor] = (instance.name, instance.admin_up) 1759 1760 # and now check them 1761 used_minors = nresult.get(constants.NV_DRBDLIST, []) 1762 test = not isinstance(used_minors, (tuple, list)) 1763 _ErrorIf(test, self.ENODEDRBD, node, 1764 "cannot parse drbd status file: %s", str(used_minors)) 1765 if test: 1766 # we cannot check drbd status 1767 return 1768 1769 for minor, (iname, must_exist) in node_drbd.items(): 1770 test = minor not in used_minors and must_exist 1771 _ErrorIf(test, self.ENODEDRBD, node, 1772 "drbd minor %d of instance %s is not active", minor, iname) 1773 for minor in used_minors: 1774 test = minor not in node_drbd 1775 _ErrorIf(test, self.ENODEDRBD, node, 1776 "unallocated drbd minor %d is in use", minor)
1777
1778 - def _UpdateNodeOS(self, ninfo, nresult, nimg):
1779 """Builds the node OS structures. 1780 1781 @type ninfo: L{objects.Node} 1782 @param ninfo: the node to check 1783 @param nresult: the remote results for the node 1784 @param nimg: the node image object 1785 1786 """ 1787 node = ninfo.name 1788 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1789 1790 remote_os = nresult.get(constants.NV_OSLIST, None) 1791 test = (not isinstance(remote_os, list) or 1792 not compat.all(isinstance(v, list) and len(v) == 7 1793 for v in remote_os)) 1794 1795 _ErrorIf(test, self.ENODEOS, node, 1796 "node hasn't returned valid OS data") 1797 1798 nimg.os_fail = test 1799 1800 if test: 1801 return 1802 1803 os_dict = {} 1804 1805 for (name, os_path, status, diagnose, 1806 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]: 1807 1808 if name not in os_dict: 1809 os_dict[name] = [] 1810 1811 # parameters is a list of lists instead of list of tuples due to 1812 # JSON lacking a real tuple type, fix it: 1813 parameters = [tuple(v) for v in parameters] 1814 os_dict[name].append((os_path, status, diagnose, 1815 set(variants), set(parameters), set(api_ver))) 1816 1817 nimg.oslist = os_dict
1818
1819 - def _VerifyNodeOS(self, ninfo, nimg, base):
1820 """Verifies the node OS list. 1821 1822 @type ninfo: L{objects.Node} 1823 @param ninfo: the node to check 1824 @param nimg: the node image object 1825 @param base: the 'template' node we match against (e.g. from the master) 1826 1827 """ 1828 node = ninfo.name 1829 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1830 1831 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" 1832 1833 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l] 1834 for os_name, os_data in nimg.oslist.items(): 1835 assert os_data, "Empty OS status for OS %s?!" % os_name 1836 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] 1837 _ErrorIf(not f_status, self.ENODEOS, node, 1838 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag) 1839 _ErrorIf(len(os_data) > 1, self.ENODEOS, node, 1840 "OS '%s' has multiple entries (first one shadows the rest): %s", 1841 os_name, utils.CommaJoin([v[0] for v in os_data])) 1842 # this will catched in backend too 1843 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api) 1844 and not f_var, self.ENODEOS, node, 1845 "OS %s with API at least %d does not declare any variant", 1846 os_name, constants.OS_API_V15) 1847 # comparisons with the 'base' image 1848 test = os_name not in base.oslist 1849 _ErrorIf(test, self.ENODEOS, node, 1850 "Extra OS %s not present on reference node (%s)", 1851 os_name, base.name) 1852 if test: 1853 continue 1854 assert base.oslist[os_name], "Base node has empty OS status?" 1855 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0] 1856 if not b_status: 1857 # base OS is invalid, skipping 1858 continue 1859 for kind, a, b in [("API version", f_api, b_api), 1860 ("variants list", f_var, b_var), 1861 ("parameters", beautify_params(f_param), 1862 beautify_params(b_param))]: 1863 _ErrorIf(a != b, self.ENODEOS, node, 1864 "OS %s for %s differs from reference node %s: [%s] vs. [%s]", 1865 kind, os_name, base.name, 1866 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b))) 1867 1868 # check any missing OSes 1869 missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) 1870 _ErrorIf(missing, self.ENODEOS, node, 1871 "OSes present on reference node %s but missing on this node: %s", 1872 base.name, utils.CommaJoin(missing))
1873
1874 - def _VerifyOob(self, ninfo, nresult):
1875 """Verifies out of band functionality of a node. 1876 1877 @type ninfo: L{objects.Node} 1878 @param ninfo: the node to check 1879 @param nresult: the remote results for the node 1880 1881 """ 1882 node = ninfo.name 1883 # We just have to verify the paths on master and/or master candidates 1884 # as the oob helper is invoked on the master 1885 if ((ninfo.master_candidate or ninfo.master_capable) and 1886 constants.NV_OOB_PATHS in nresult): 1887 for path_result in nresult[constants.NV_OOB_PATHS]: 1888 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1889
1890 - def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1891 """Verifies and updates the node volume data. 1892 1893 This function will update a L{NodeImage}'s internal structures 1894 with data from the remote call. 1895 1896 @type ninfo: L{objects.Node} 1897 @param ninfo: the node to check 1898 @param nresult: the remote results for the node 1899 @param nimg: the node image object 1900 @param vg_name: the configured VG name 1901 1902 """ 1903 node = ninfo.name 1904 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1905 1906 nimg.lvm_fail = True 1907 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") 1908 if vg_name is None: 1909 pass 1910 elif isinstance(lvdata, basestring): 1911 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s", 1912 utils.SafeEncode(lvdata)) 1913 elif not isinstance(lvdata, dict): 1914 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)") 1915 else: 1916 nimg.volumes = lvdata 1917 nimg.lvm_fail = False
1918
1919 - def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1920 """Verifies and updates the node instance list. 1921 1922 If the listing was successful, then updates this node's instance 1923 list. Otherwise, it marks the RPC call as failed for the instance 1924 list key. 1925 1926 @type ninfo: L{objects.Node} 1927 @param ninfo: the node to check 1928 @param nresult: the remote results for the node 1929 @param nimg: the node image object 1930 1931 """ 1932 idata = nresult.get(constants.NV_INSTANCELIST, None) 1933 test = not isinstance(idata, list) 1934 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed" 1935 " (instancelist): %s", utils.SafeEncode(str(idata))) 1936 if test: 1937 nimg.hyp_fail = True 1938 else: 1939 nimg.instances = idata
1940
1941 - def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1942 """Verifies and computes a node information map 1943 1944 @type ninfo: L{objects.Node} 1945 @param ninfo: the node to check 1946 @param nresult: the remote results for the node 1947 @param nimg: the node image object 1948 @param vg_name: the configured VG name 1949 1950 """ 1951 node = ninfo.name 1952 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1953 1954 # try to read free memory (from the hypervisor) 1955 hv_info = nresult.get(constants.NV_HVINFO, None) 1956 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info 1957 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)") 1958 if not test: 1959 try: 1960 nimg.mfree = int(hv_info["memory_free"]) 1961 except (ValueError, TypeError): 1962 _ErrorIf(True, self.ENODERPC, node, 1963 "node returned invalid nodeinfo, check hypervisor") 1964 1965 # FIXME: devise a free space model for file based instances as well 1966 if vg_name is not None: 1967 test = (constants.NV_VGLIST not in nresult or 1968 vg_name not in nresult[constants.NV_VGLIST]) 1969 _ErrorIf(test, self.ENODELVM, node, 1970 "node didn't return data for the volume group '%s'" 1971 " - it is either missing or broken", vg_name) 1972 if not test: 1973 try: 1974 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) 1975 except (ValueError, TypeError): 1976 _ErrorIf(True, self.ENODERPC, node, 1977 "node returned invalid LVM info, check LVM status")
1978
1979 - def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1980 """Gets per-disk status information for all instances. 1981 1982 @type nodelist: list of strings 1983 @param nodelist: Node names 1984 @type node_image: dict of (name, L{objects.Node}) 1985 @param node_image: Node objects 1986 @type instanceinfo: dict of (name, L{objects.Instance}) 1987 @param instanceinfo: Instance objects 1988 @rtype: {instance: {node: [(succes, payload)]}} 1989 @return: a dictionary of per-instance dictionaries with nodes as 1990 keys and disk information as values; the disk information is a 1991 list of tuples (success, payload) 1992 1993 """ 1994 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1995 1996 node_disks = {} 1997 node_disks_devonly = {} 1998 diskless_instances = set() 1999 diskless = constants.DT_DISKLESS 2000 2001 for nname in nodelist: 2002 node_instances = list(itertools.chain(node_image[nname].pinst, 2003 node_image[nname].sinst)) 2004 diskless_instances.update(inst for inst in node_instances 2005 if instanceinfo[inst].disk_template == diskless) 2006 disks = [(inst, disk) 2007 for inst in node_instances 2008 for disk in instanceinfo[inst].disks] 2009 2010 if not disks: 2011 # No need to collect data 2012 continue 2013 2014 node_disks[nname] = disks 2015 2016 # Creating copies as SetDiskID below will modify the objects and that can 2017 # lead to incorrect data returned from nodes 2018 devonly = [dev.Copy() for (_, dev) in disks] 2019 2020 for dev in devonly: 2021 self.cfg.SetDiskID(dev, nname) 2022 2023 node_disks_devonly[nname] = devonly 2024 2025 assert len(node_disks) == len(node_disks_devonly) 2026 2027 # Collect data from all nodes with disks 2028 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(), 2029 node_disks_devonly) 2030 2031 assert len(result) == len(node_disks) 2032 2033 instdisk = {} 2034 2035 for (nname, nres) in result.items(): 2036 disks = node_disks[nname] 2037 2038 if nres.offline: 2039 # No data from this node 2040 data = len(disks) * [(False, "node offline")] 2041 else: 2042 msg = nres.fail_msg 2043 _ErrorIf(msg, self.ENODERPC, nname, 2044 "while getting disk information: %s", msg) 2045 if msg: 2046 # No data from this node 2047 data = len(disks) * [(False, msg)] 2048 else: 2049 data = [] 2050 for idx, i in enumerate(nres.payload): 2051 if isinstance(i, (tuple, list)) and len(i) == 2: 2052 data.append(i) 2053 else: 2054 logging.warning("Invalid result from node %s, entry %d: %s", 2055 nname, idx, i) 2056 data.append((False, "Invalid result from the remote node")) 2057 2058 for ((inst, _), status) in zip(disks, data): 2059 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status) 2060 2061 # Add empty entries for diskless instances. 2062 for inst in diskless_instances: 2063 assert inst not in instdisk 2064 instdisk[inst] = {} 2065 2066 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and 2067 len(nnames) <= len(instanceinfo[inst].all_nodes) and 2068 compat.all(isinstance(s, (tuple, list)) and 2069 len(s) == 2 for s in statuses) 2070 for inst, nnames in instdisk.items() 2071 for nname, statuses in nnames.items()) 2072 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure" 2073 2074 return instdisk
2075
2076 - def _VerifyHVP(self, hvp_data):
2077 """Verifies locally the syntax of the hypervisor parameters. 2078 2079 """ 2080 for item, hv_name, hv_params in hvp_data: 2081 msg = ("hypervisor %s parameters syntax check (source %s): %%s" % 2082 (item, hv_name)) 2083 try: 2084 hv_class = hypervisor.GetHypervisor(hv_name) 2085 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 2086 hv_class.CheckParameterSyntax(hv_params) 2087 except errors.GenericError, err: 2088 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2089 2090
2091 - def BuildHooksEnv(self):
2092 """Build hooks env. 2093 2094 Cluster-Verify hooks just ran in the post phase and their failure makes 2095 the output be logged in the verify output and the verification to fail. 2096 2097 """ 2098 all_nodes = self.cfg.GetNodeList() 2099 env = { 2100 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()) 2101 } 2102 for node in self.cfg.GetAllNodesInfo().values(): 2103 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags()) 2104 2105 return env, [], all_nodes
2106
2107 - def Exec(self, feedback_fn):
2108 """Verify integrity of cluster, performing various test on nodes. 2109 2110 """ 2111 # This method has too many local variables. pylint: disable-msg=R0914 2112 self.bad = False 2113 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 2114 verbose = self.op.verbose 2115 self._feedback_fn = feedback_fn 2116 feedback_fn("* Verifying global settings") 2117 for msg in self.cfg.VerifyConfig(): 2118 _ErrorIf(True, self.ECLUSTERCFG, None, msg) 2119 2120 # Check the cluster certificates 2121 for cert_filename in constants.ALL_CERT_FILES: 2122 (errcode, msg) = _VerifyCertificate(cert_filename) 2123 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode) 2124 2125 vg_name = self.cfg.GetVGName() 2126 drbd_helper = self.cfg.GetDRBDHelper() 2127 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors 2128 cluster = self.cfg.GetClusterInfo() 2129 nodeinfo_byname = self.cfg.GetAllNodesInfo() 2130 nodelist = utils.NiceSort(nodeinfo_byname.keys()) 2131 nodeinfo = [nodeinfo_byname[nname] for nname in nodelist] 2132 instanceinfo = self.cfg.GetAllInstancesInfo() 2133 instancelist = utils.NiceSort(instanceinfo.keys()) 2134 groupinfo = self.cfg.GetAllNodeGroupsInfo() 2135 i_non_redundant = [] # Non redundant instances 2136 i_non_a_balanced = [] # Non auto-balanced instances 2137 n_offline = 0 # Count of offline nodes 2138 n_drained = 0 # Count of nodes being drained 2139 node_vol_should = {} 2140 2141 # FIXME: verify OS list 2142 # do local checksums 2143 master_files = [constants.CLUSTER_CONF_FILE] 2144 master_node = self.master_node = self.cfg.GetMasterNode() 2145 master_ip = self.cfg.GetMasterIP() 2146 2147 file_names = ssconf.SimpleStore().GetFileList() 2148 file_names.extend(constants.ALL_CERT_FILES) 2149 file_names.extend(master_files) 2150 if cluster.modify_etc_hosts: 2151 file_names.append(constants.ETC_HOSTS) 2152 2153 local_checksums = utils.FingerprintFiles(file_names) 2154 2155 # Compute the set of hypervisor parameters 2156 hvp_data = [] 2157 for hv_name in hypervisors: 2158 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name))) 2159 for os_name, os_hvp in cluster.os_hvp.items(): 2160 for hv_name, hv_params in os_hvp.items(): 2161 if not hv_params: 2162 continue 2163 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name) 2164 hvp_data.append(("os %s" % os_name, hv_name, full_params)) 2165 # TODO: collapse identical parameter values in a single one 2166 for instance in instanceinfo.values(): 2167 if not instance.hvparams: 2168 continue 2169 hvp_data.append(("instance %s" % instance.name, instance.hypervisor, 2170 cluster.FillHV(instance))) 2171 # and verify them locally 2172 self._VerifyHVP(hvp_data) 2173 2174 feedback_fn("* Gathering data (%d nodes)" % len(nodelist)) 2175 node_verify_param = { 2176 constants.NV_FILELIST: file_names, 2177 constants.NV_NODELIST: [node.name for node in nodeinfo 2178 if not node.offline], 2179 constants.NV_HYPERVISOR: hypervisors, 2180 constants.NV_HVPARAMS: hvp_data, 2181 constants.NV_NODENETTEST: [(node.name, node.primary_ip, 2182 node.secondary_ip) for node in nodeinfo 2183 if not node.offline], 2184 constants.NV_INSTANCELIST: hypervisors, 2185 constants.NV_VERSION: None, 2186 constants.NV_HVINFO: self.cfg.GetHypervisorType(), 2187 constants.NV_NODESETUP: None, 2188 constants.NV_TIME: None, 2189 constants.NV_MASTERIP: (master_node, master_ip), 2190 constants.NV_OSLIST: None, 2191 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(), 2192 } 2193 2194 if vg_name is not None: 2195 node_verify_param[constants.NV_VGLIST] = None 2196 node_verify_param[constants.NV_LVLIST] = vg_name 2197 node_verify_param[constants.NV_PVLIST] = [vg_name] 2198 node_verify_param[constants.NV_DRBDLIST] = None 2199 2200 if drbd_helper: 2201 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper 2202 2203 # bridge checks 2204 # FIXME: this needs to be changed per node-group, not cluster-wide 2205 bridges = set() 2206 default_nicpp = cluster.nicparams[constants.PP_DEFAULT] 2207 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 2208 bridges.add(default_nicpp[constants.NIC_LINK]) 2209 for instance in instanceinfo.values(): 2210 for nic in instance.nics: 2211 full_nic = cluster.SimpleFillNIC(nic.nicparams) 2212 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 2213 bridges.add(full_nic[constants.NIC_LINK]) 2214 2215 if bridges: 2216 node_verify_param[constants.NV_BRIDGES] = list(bridges) 2217 2218 # Build our expected cluster state 2219 node_image = dict((node.name, self.NodeImage(offline=node.offline, 2220 name=node.name, 2221 vm_capable=node.vm_capable)) 2222 for node in nodeinfo) 2223 2224 # Gather OOB paths 2225 oob_paths = [] 2226 for node in nodeinfo: 2227 path = _SupportsOob(self.cfg, node) 2228 if path and path not in oob_paths: 2229 oob_paths.append(path) 2230 2231 if oob_paths: 2232 node_verify_param[constants.NV_OOB_PATHS] = oob_paths 2233 2234 for instance in instancelist: 2235 inst_config = instanceinfo[instance] 2236 2237 for nname in inst_config.all_nodes: 2238 if nname not in node_image: 2239 # ghost node 2240 gnode = self.NodeImage(name=nname) 2241 gnode.ghost = True 2242 node_image[nname] = gnode 2243 2244 inst_config.MapLVsByNode(node_vol_should) 2245 2246 pnode = inst_config.primary_node 2247 node_image[pnode].pinst.append(instance) 2248 2249 for snode in inst_config.secondary_nodes: 2250 nimg = node_image[snode] 2251 nimg.sinst.append(instance) 2252 if pnode not in nimg.sbp: 2253 nimg.sbp[pnode] = [] 2254 nimg.sbp[pnode].append(instance) 2255 2256 # At this point, we have the in-memory data structures complete, 2257 # except for the runtime information, which we'll gather next 2258 2259 # Due to the way our RPC system works, exact response times cannot be 2260 # guaranteed (e.g. a broken node could run into a timeout). By keeping the 2261 # time before and after executing the request, we can at least have a time 2262 # window. 2263 nvinfo_starttime = time.time() 2264 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param, 2265 self.cfg.GetClusterName()) 2266 nvinfo_endtime = time.time() 2267 2268 all_drbd_map = self.cfg.ComputeDRBDMap() 2269 2270 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist)) 2271 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo) 2272 2273 feedback_fn("* Verifying node status") 2274 2275 refos_img = None 2276 2277 for node_i in nodeinfo: 2278 node = node_i.name 2279 nimg = node_image[node] 2280 2281 if node_i.offline: 2282 if verbose: 2283 feedback_fn("* Skipping offline node %s" % (node,)) 2284 n_offline += 1 2285 continue 2286 2287 if node == master_node: 2288 ntype = "master" 2289 elif node_i.master_candidate: 2290 ntype = "master candidate" 2291 elif node_i.drained: 2292 ntype = "drained" 2293 n_drained += 1 2294 else: 2295 ntype = "regular" 2296 if verbose: 2297 feedback_fn("* Verifying node %s (%s)" % (node, ntype)) 2298 2299 msg = all_nvinfo[node].fail_msg 2300 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg) 2301 if msg: 2302 nimg.rpc_fail = True 2303 continue 2304 2305 nresult = all_nvinfo[node].payload 2306 2307 nimg.call_ok = self._VerifyNode(node_i, nresult) 2308 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime) 2309 self._VerifyNodeNetwork(node_i, nresult) 2310 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums, 2311 master_files) 2312 2313 self._VerifyOob(node_i, nresult) 2314 2315 if nimg.vm_capable: 2316 self._VerifyNodeLVM(node_i, nresult, vg_name) 2317 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper, 2318 all_drbd_map) 2319 2320 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name) 2321 self._UpdateNodeInstances(node_i, nresult, nimg) 2322 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name) 2323 self._UpdateNodeOS(node_i, nresult, nimg) 2324 if not nimg.os_fail: 2325 if refos_img is None: 2326 refos_img = nimg 2327 self._VerifyNodeOS(node_i, nimg, refos_img) 2328 self._VerifyNodeBridges(node_i, nresult, bridges) 2329 2330 feedback_fn("* Verifying instance status") 2331 for instance in instancelist: 2332 if verbose: 2333 feedback_fn("* Verifying instance %s" % instance) 2334 inst_config = instanceinfo[instance] 2335 self._VerifyInstance(instance, inst_config, node_image, 2336 instdisk[instance]) 2337 inst_nodes_offline = [] 2338 2339 pnode = inst_config.primary_node 2340 pnode_img = node_image[pnode] 2341 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, 2342 self.ENODERPC, pnode, "instance %s, connection to" 2343 " primary node failed", instance) 2344 2345 _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance, 2346 "instance lives on offline node %s", inst_config.primary_node) 2347 2348 # If the instance is non-redundant we cannot survive losing its primary 2349 # node, so we are not N+1 compliant. On the other hand we have no disk 2350 # templates with more than one secondary so that situation is not well 2351 # supported either. 2352 # FIXME: does not support file-backed instances 2353 if not inst_config.secondary_nodes: 2354 i_non_redundant.append(instance) 2355 2356 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT, 2357 instance, "instance has multiple secondary nodes: %s", 2358 utils.CommaJoin(inst_config.secondary_nodes), 2359 code=self.ETYPE_WARNING) 2360 2361 if inst_config.disk_template in constants.DTS_NET_MIRROR: 2362 pnode = inst_config.primary_node 2363 instance_nodes = utils.NiceSort(inst_config.all_nodes) 2364 instance_groups = {} 2365 2366 for node in instance_nodes: 2367 instance_groups.setdefault(nodeinfo_byname[node].group, 2368 []).append(node) 2369 2370 pretty_list = [ 2371 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name) 2372 # Sort so that we always list the primary node first. 2373 for group, nodes in sorted(instance_groups.items(), 2374 key=lambda (_, nodes): pnode in nodes, 2375 reverse=True)] 2376 2377 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS, 2378 instance, "instance has primary and secondary nodes in" 2379 " different groups: %s", utils.CommaJoin(pretty_list), 2380 code=self.ETYPE_WARNING) 2381 2382 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]: 2383 i_non_a_balanced.append(instance) 2384 2385 for snode in inst_config.secondary_nodes: 2386 s_img = node_image[snode] 2387 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode, 2388 "instance %s, connection to secondary node failed", instance) 2389 2390 if s_img.offline: 2391 inst_nodes_offline.append(snode) 2392 2393 # warn that the instance lives on offline nodes 2394 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance, 2395 "instance has offline secondary node(s) %s", 2396 utils.CommaJoin(inst_nodes_offline)) 2397 # ... or ghost/non-vm_capable nodes 2398 for node in inst_config.all_nodes: 2399 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance, 2400 "instance lives on ghost node %s", node) 2401 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE, 2402 instance, "instance lives on non-vm_capable node %s", node) 2403 2404 feedback_fn("* Verifying orphan volumes") 2405 reserved = utils.FieldSet(*cluster.reserved_lvs) 2406 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved) 2407 2408 feedback_fn("* Verifying orphan instances") 2409 self._VerifyOrphanInstances(instancelist, node_image) 2410 2411 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks: 2412 feedback_fn("* Verifying N+1 Memory redundancy") 2413 self._VerifyNPlusOneMemory(node_image, instanceinfo) 2414 2415 feedback_fn("* Other Notes") 2416 if i_non_redundant: 2417 feedback_fn(" - NOTICE: %d non-redundant instance(s) found." 2418 % len(i_non_redundant)) 2419 2420 if i_non_a_balanced: 2421 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found." 2422 % len(i_non_a_balanced)) 2423 2424 if n_offline: 2425 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline) 2426 2427 if n_drained: 2428 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained) 2429 2430 return not self.bad
2431
2432 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2433 """Analyze the post-hooks' result 2434 2435 This method analyses the hook result, handles it, and sends some 2436 nicely-formatted feedback back to the user. 2437 2438 @param phase: one of L{constants.HOOKS_PHASE_POST} or 2439 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 2440 @param hooks_results: the results of the multi-node hooks rpc call 2441 @param feedback_fn: function used send feedback back to the caller 2442 @param lu_result: previous Exec result 2443 @return: the new Exec result, based on the previous result 2444 and hook results 2445 2446 """ 2447 # We only really run POST phase hooks, and are only interested in 2448 # their results 2449 if phase == constants.HOOKS_PHASE_POST: 2450 # Used to change hooks' output to proper indentation 2451 feedback_fn("* Hooks Results") 2452 assert hooks_results, "invalid result from hooks" 2453 2454 for node_name in hooks_results: 2455 res = hooks_results[node_name] 2456 msg = res.fail_msg 2457 test = msg and not res.offline 2458 self._ErrorIf(test, self.ENODEHOOKS, node_name, 2459 "Communication failure in hooks execution: %s", msg) 2460 if res.offline or msg: 2461 # No need to investigate payload if node is offline or gave an error. 2462 # override manually lu_result here as _ErrorIf only 2463 # overrides self.bad 2464 lu_result = 1 2465 continue 2466 for script, hkr, output in res.payload: 2467 test = hkr == constants.HKR_FAIL 2468 self._ErrorIf(test, self.ENODEHOOKS, node_name, 2469 "Script %s failed, output:", script) 2470 if test: 2471 output = self._HOOKS_INDENT_RE.sub(' ', output) 2472 feedback_fn("%s" % output) 2473 lu_result = 0 2474 2475 return lu_result
2476
2477 2478 -class LUClusterVerifyDisks(NoHooksLU):
2479 """Verifies the cluster disks status. 2480 2481 """ 2482 REQ_BGL = False 2483
2484 - def ExpandNames(self):
2485 self.needed_locks = { 2486 locking.LEVEL_NODE: locking.ALL_SET, 2487 locking.LEVEL_INSTANCE: locking.ALL_SET, 2488 } 2489 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2490
2491 - def Exec(self, feedback_fn):
2492 """Verify integrity of cluster disks. 2493 2494 @rtype: tuple of three items 2495 @return: a tuple of (dict of node-to-node_error, list of instances 2496 which need activate-disks, dict of instance: (node, volume) for 2497 missing volumes 2498 2499 """ 2500 result = res_nodes, res_instances, res_missing = {}, [], {} 2501 2502 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList()) 2503 instances = self.cfg.GetAllInstancesInfo().values() 2504 2505 nv_dict = {} 2506 for inst in instances: 2507 inst_lvs = {} 2508 if not inst.admin_up: 2509 continue 2510 inst.MapLVsByNode(inst_lvs) 2511 # transform { iname: {node: [vol,],},} to {(node, vol): iname} 2512 for node, vol_list in inst_lvs.iteritems(): 2513 for vol in vol_list: 2514 nv_dict[(node, vol)] = inst 2515 2516 if not nv_dict: 2517 return result 2518 2519 node_lvs = self.rpc.call_lv_list(nodes, []) 2520 for node, node_res in node_lvs.items(): 2521 if node_res.offline: 2522 continue 2523 msg = node_res.fail_msg 2524 if msg: 2525 logging.warning("Error enumerating LVs on node %s: %s", node, msg) 2526 res_nodes[node] = msg 2527 continue 2528 2529 lvs = node_res.payload 2530 for lv_name, (_, _, lv_online) in lvs.items(): 2531 inst = nv_dict.pop((node, lv_name), None) 2532 if (not lv_online and inst is not None 2533 and inst.name not in res_instances): 2534 res_instances.append(inst.name) 2535 2536 # any leftover items in nv_dict are missing LVs, let's arrange the 2537 # data better 2538 for key, inst in nv_dict.iteritems(): 2539 if inst.name not in res_missing: 2540 res_missing[inst.name] = [] 2541 res_missing[inst.name].append(key) 2542 2543 return result
2544
2545 2546 -class LUClusterRepairDiskSizes(NoHooksLU):
2547 """Verifies the cluster disks sizes. 2548 2549 """ 2550 REQ_BGL = False 2551
2552 - def ExpandNames(self):
2553 if self.op.instances: 2554 self.wanted_names = [] 2555 for name in self.op.instances: 2556 full_name = _ExpandInstanceName(self.cfg, name) 2557 self.wanted_names.append(full_name) 2558 self.needed_locks = { 2559 locking.LEVEL_NODE: [], 2560 locking.LEVEL_INSTANCE: self.wanted_names, 2561 } 2562 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 2563 else: 2564 self.wanted_names = None 2565 self.needed_locks = { 2566 locking.LEVEL_NODE: locking.ALL_SET, 2567 locking.LEVEL_INSTANCE: locking.ALL_SET, 2568 } 2569 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2570
2571 - def DeclareLocks(self, level):
2572 if level == locking.LEVEL_NODE and self.wanted_names is not None: 2573 self._LockInstancesNodes(primary_only=True)
2574
2575 - def CheckPrereq(self):
2576 """Check prerequisites. 2577 2578 This only checks the optional instance list against the existing names. 2579 2580 """ 2581 if self.wanted_names is None: 2582 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE] 2583 2584 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name 2585 in self.wanted_names]
2586
2587 - def _EnsureChildSizes(self, disk):
2588 """Ensure children of the disk have the needed disk size. 2589 2590 This is valid mainly for DRBD8 and fixes an issue where the 2591 children have smaller disk size. 2592 2593 @param disk: an L{ganeti.objects.Disk} object 2594 2595 """ 2596 if disk.dev_type == constants.LD_DRBD8: 2597 assert disk.children, "Empty children for DRBD8?" 2598 fchild = disk.children[0] 2599 mismatch = fchild.size < disk.size 2600 if mismatch: 2601 self.LogInfo("Child disk has size %d, parent %d, fixing", 2602 fchild.size, disk.size) 2603 fchild.size = disk.size 2604 2605 # and we recurse on this child only, not on the metadev 2606 return self._EnsureChildSizes(fchild) or mismatch 2607 else: 2608 return False
2609
2610 - def Exec(self, feedback_fn):
2611 """Verify the size of cluster disks. 2612 2613 """ 2614 # TODO: check child disks too 2615 # TODO: check differences in size between primary/secondary nodes 2616 per_node_disks = {} 2617 for instance in self.wanted_instances: 2618 pnode = instance.primary_node 2619 if pnode not in per_node_disks: 2620 per_node_disks[pnode] = [] 2621 for idx, disk in enumerate(instance.disks): 2622 per_node_disks[pnode].append((instance, idx, disk)) 2623 2624 changed = [] 2625 for node, dskl in per_node_disks.items(): 2626 newl = [v[2].Copy() for v in dskl] 2627 for dsk in newl: 2628 self.cfg.SetDiskID(dsk, node) 2629 result = self.rpc.call_blockdev_getsize(node, newl) 2630 if result.fail_msg: 2631 self.LogWarning("Failure in blockdev_getsize call to node" 2632 " %s, ignoring", node) 2633 continue 2634 if len(result.payload) != len(dskl): 2635 logging.warning("Invalid result from node %s: len(dksl)=%d," 2636 " result.payload=%s", node, len(dskl), result.payload) 2637 self.LogWarning("Invalid result from node %s, ignoring node results", 2638 node) 2639 continue 2640 for ((instance, idx, disk), size) in zip(dskl, result.payload): 2641 if size is None: 2642 self.LogWarning("Disk %d of instance %s did not return size" 2643 " information, ignoring", idx, instance.name) 2644 continue 2645 if not isinstance(size, (int, long)): 2646 self.LogWarning("Disk %d of instance %s did not return valid" 2647 " size information, ignoring", idx, instance.name) 2648 continue 2649 size = size >> 20 2650 if size != disk.size: 2651 self.LogInfo("Disk %d of instance %s has mismatched size," 2652 " correcting: recorded %d, actual %d", idx, 2653 instance.name, disk.size, size) 2654 disk.size = size 2655 self.cfg.Update(instance, feedback_fn) 2656 changed.append((instance.name, idx, size)) 2657 if self._EnsureChildSizes(disk): 2658 self.cfg.Update(instance, feedback_fn) 2659 changed.append((instance.name, idx, disk.size)) 2660 return changed
2661
2662 2663 -class LUClusterRename(LogicalUnit):
2664 """Rename the cluster. 2665 2666 """ 2667 HPATH = "cluster-rename" 2668 HTYPE = constants.HTYPE_CLUSTER 2669
2670 - def BuildHooksEnv(self):
2671 """Build hooks env. 2672 2673 """ 2674 env = { 2675 "OP_TARGET": self.cfg.GetClusterName(), 2676 "NEW_NAME": self.op.name, 2677 } 2678 mn = self.cfg.GetMasterNode() 2679 all_nodes = self.cfg.GetNodeList() 2680 return env, [mn], all_nodes
2681
2682 - def CheckPrereq(self):
2683 """Verify that the passed name is a valid one. 2684 2685 """ 2686 hostname = netutils.GetHostname(name=self.op.name, 2687 family=self.cfg.GetPrimaryIPFamily()) 2688 2689 new_name = hostname.name 2690 self.ip = new_ip = hostname.ip 2691 old_name = self.cfg.GetClusterName() 2692 old_ip = self.cfg.GetMasterIP() 2693 if new_name == old_name and new_ip == old_ip: 2694 raise errors.OpPrereqError("Neither the name nor the IP address of the" 2695 " cluster has changed", 2696 errors.ECODE_INVAL) 2697 if new_ip != old_ip: 2698 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT): 2699 raise errors.OpPrereqError("The given cluster IP address (%s) is" 2700 " reachable on the network" % 2701 new_ip, errors.ECODE_NOTUNIQUE) 2702 2703 self.op.name = new_name
2704
2705 - def Exec(self, feedback_fn):
2706 """Rename the cluster. 2707 2708 """ 2709 clustername = self.op.name 2710 ip = self.ip 2711 2712 # shutdown the master IP 2713 master = self.cfg.GetMasterNode() 2714 result = self.rpc.call_node_stop_master(master, False) 2715 result.Raise("Could not disable the master role") 2716 2717 try: 2718 cluster = self.cfg.GetClusterInfo() 2719 cluster.cluster_name = clustername 2720 cluster.master_ip = ip 2721 self.cfg.Update(cluster, feedback_fn) 2722 2723 # update the known hosts file 2724 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE) 2725 node_list = self.cfg.GetOnlineNodeList() 2726 try: 2727 node_list.remove(master) 2728 except ValueError: 2729 pass 2730 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE) 2731 finally: 2732 result = self.rpc.call_node_start_master(master, False, False) 2733 msg = result.fail_msg 2734 if msg: 2735 self.LogWarning("Could not re-enable the master role on" 2736 " the master, please restart manually: %s", msg) 2737 2738 return clustername
2739
2740 2741 -class LUClusterSetParams(LogicalUnit):
2742 """Change the parameters of the cluster. 2743 2744 """ 2745 HPATH = "cluster-modify" 2746 HTYPE = constants.HTYPE_CLUSTER 2747 REQ_BGL = False 2748
2749 - def CheckArguments(self):
2750 """Check parameters 2751 2752 """ 2753 if self.op.uid_pool: 2754 uidpool.CheckUidPool(self.op.uid_pool) 2755 2756 if self.op.add_uids: 2757 uidpool.CheckUidPool(self.op.add_uids) 2758 2759 if self.op.remove_uids: 2760 uidpool.CheckUidPool(self.op.remove_uids)
2761
2762 - def ExpandNames(self):
2763 # FIXME: in the future maybe other cluster params won't require checking on 2764 # all nodes to be modified. 2765 self.needed_locks = { 2766 locking.LEVEL_NODE: locking.ALL_SET, 2767 } 2768 self.share_locks[locking.LEVEL_NODE] = 1
2769
2770 - def BuildHooksEnv(self):
2771 """Build hooks env. 2772 2773 """ 2774 env = { 2775 "OP_TARGET": self.cfg.GetClusterName(), 2776 "NEW_VG_NAME": self.op.vg_name, 2777 } 2778 mn = self.cfg.GetMasterNode() 2779 return env, [mn], [mn]
2780
2781 - def CheckPrereq(self):
2782 """Check prerequisites. 2783 2784 This checks whether the given params don't conflict and 2785 if the given volume group is valid. 2786 2787 """ 2788 if self.op.vg_name is not None and not self.op.vg_name: 2789 if self.cfg.HasAnyDiskOfType(constants.LD_LV): 2790 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based" 2791 " instances exist", errors.ECODE_INVAL) 2792 2793 if self.op.drbd_helper is not None and not self.op.drbd_helper: 2794 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8): 2795 raise errors.OpPrereqError("Cannot disable drbd helper while" 2796 " drbd-based instances exist", 2797 errors.ECODE_INVAL) 2798 2799 node_list = self.acquired_locks[locking.LEVEL_NODE] 2800 2801 # if vg_name not None, checks given volume group on all nodes 2802 if self.op.vg_name: 2803 vglist = self.rpc.call_vg_list(node_list) 2804 for node in node_list: 2805 msg = vglist[node].fail_msg 2806 if msg: 2807 # ignoring down node 2808 self.LogWarning("Error while gathering data on node %s" 2809 " (ignoring node): %s", node, msg) 2810 continue 2811 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload, 2812 self.op.vg_name, 2813 constants.MIN_VG_SIZE) 2814 if vgstatus: 2815 raise errors.OpPrereqError("Error on node '%s': %s" % 2816 (node, vgstatus), errors.ECODE_ENVIRON) 2817 2818 if self.op.drbd_helper: 2819 # checks given drbd helper on all nodes 2820 helpers = self.rpc.call_drbd_helper(node_list) 2821 for node in node_list: 2822 ninfo = self.cfg.GetNodeInfo(node) 2823 if ninfo.offline: 2824 self.LogInfo("Not checking drbd helper on offline node %s", node) 2825 continue 2826 msg = helpers[node].fail_msg 2827 if msg: 2828 raise errors.OpPrereqError("Error checking drbd helper on node" 2829 " '%s': %s" % (node, msg), 2830 errors.ECODE_ENVIRON) 2831 node_helper = helpers[node].payload 2832 if node_helper != self.op.drbd_helper: 2833 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" % 2834 (node, node_helper), errors.ECODE_ENVIRON) 2835 2836 self.cluster = cluster = self.cfg.GetClusterInfo() 2837 # validate params changes 2838 if self.op.beparams: 2839 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 2840 self.new_beparams = cluster.SimpleFillBE(self.op.beparams) 2841 2842 if self.op.ndparams: 2843 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) 2844 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams) 2845 2846 # TODO: we need a more general way to handle resetting 2847 # cluster-level parameters to default values 2848 if self.new_ndparams["oob_program"] == "": 2849 self.new_ndparams["oob_program"] = \ 2850 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM] 2851 2852 if self.op.nicparams: 2853 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES) 2854 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams) 2855 objects.NIC.CheckParameterSyntax(self.new_nicparams) 2856 nic_errors = [] 2857 2858 # check all instances for consistency 2859 for instance in self.cfg.GetAllInstancesInfo().values(): 2860 for nic_idx, nic in enumerate(instance.nics): 2861 params_copy = copy.deepcopy(nic.nicparams) 2862 params_filled = objects.FillDict(self.new_nicparams, params_copy) 2863 2864 # check parameter syntax 2865 try: 2866 objects.NIC.CheckParameterSyntax(params_filled) 2867 except errors.ConfigurationError, err: 2868 nic_errors.append("Instance %s, nic/%d: %s" % 2869 (instance.name, nic_idx, err)) 2870 2871 # if we're moving instances to routed, check that they have an ip 2872 target_mode = params_filled[constants.NIC_MODE] 2873 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip: 2874 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip" 2875 " address" % (instance.name, nic_idx)) 2876 if nic_errors: 2877 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" % 2878 "\n".join(nic_errors)) 2879 2880 # hypervisor list/parameters 2881 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {}) 2882 if self.op.hvparams: 2883 for hv_name, hv_dict in self.op.hvparams.items(): 2884 if hv_name not in self.new_hvparams: 2885 self.new_hvparams[hv_name] = hv_dict 2886 else: 2887 self.new_hvparams[hv_name].update(hv_dict) 2888 2889 # os hypervisor parameters 2890 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {}) 2891 if self.op.os_hvp: 2892 for os_name, hvs in self.op.os_hvp.items(): 2893 if os_name not in self.new_os_hvp: 2894 self.new_os_hvp[os_name] = hvs 2895 else: 2896 for hv_name, hv_dict in hvs.items(): 2897 if hv_name not in self.new_os_hvp[os_name]: 2898 self.new_os_hvp[os_name][hv_name] = hv_dict 2899 else: 2900 self.new_os_hvp[os_name][hv_name].update(hv_dict) 2901 2902 # os parameters 2903 self.new_osp = objects.FillDict(cluster.osparams, {}) 2904 if self.op.osparams: 2905 for os_name, osp in self.op.osparams.items(): 2906 if os_name not in self.new_osp: 2907 self.new_osp[os_name] = {} 2908 2909 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp, 2910 use_none=True) 2911 2912 if not self.new_osp[os_name]: 2913 # we removed all parameters 2914 del self.new_osp[os_name] 2915 else: 2916 # check the parameter validity (remote check) 2917 _CheckOSParams(self, False, [self.cfg.GetMasterNode()], 2918 os_name, self.new_osp[os_name]) 2919 2920 # changes to the hypervisor list 2921 if self.op.enabled_hypervisors is not None: 2922 self.hv_list = self.op.enabled_hypervisors 2923 for hv in self.hv_list: 2924 # if the hypervisor doesn't already exist in the cluster 2925 # hvparams, we initialize it to empty, and then (in both 2926 # cases) we make sure to fill the defaults, as we might not 2927 # have a complete defaults list if the hypervisor wasn't 2928 # enabled before 2929 if hv not in new_hvp: 2930 new_hvp[hv] = {} 2931 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv]) 2932 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES) 2933 else: 2934 self.hv_list = cluster.enabled_hypervisors 2935 2936 if self.op.hvparams or self.op.enabled_hypervisors is not None: 2937 # either the enabled list has changed, or the parameters have, validate 2938 for hv_name, hv_params in self.new_hvparams.items(): 2939 if ((self.op.hvparams and hv_name in self.op.hvparams) or 2940 (self.op.enabled_hypervisors and 2941 hv_name in self.op.enabled_hypervisors)): 2942 # either this is a new hypervisor, or its parameters have changed 2943 hv_class = hypervisor.GetHypervisor(hv_name) 2944 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 2945 hv_class.CheckParameterSyntax(hv_params) 2946 _CheckHVParams(self, node_list, hv_name, hv_params) 2947 2948 if self.op.os_hvp: 2949 # no need to check any newly-enabled hypervisors, since the 2950 # defaults have already been checked in the above code-block 2951 for os_name, os_hvp in self.new_os_hvp.items(): 2952 for hv_name, hv_params in os_hvp.items(): 2953 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 2954 # we need to fill in the new os_hvp on top of the actual hv_p 2955 cluster_defaults = self.new_hvparams.get(hv_name, {}) 2956 new_osp = objects.FillDict(cluster_defaults, hv_params) 2957 hv_class = hypervisor.GetHypervisor(hv_name) 2958 hv_class.CheckParameterSyntax(new_osp) 2959 _CheckHVParams(self, node_list, hv_name, new_osp) 2960 2961 if self.op.default_iallocator: 2962 alloc_script = utils.FindFile(self.op.default_iallocator, 2963 constants.IALLOCATOR_SEARCH_PATH, 2964 os.path.isfile) 2965 if alloc_script is None: 2966 raise errors.OpPrereqError("Invalid default iallocator script '%s'" 2967 " specified" % self.op.default_iallocator, 2968 errors.ECODE_INVAL)
2969
2970 - def Exec(self, feedback_fn):
2971 """Change the parameters of the cluster. 2972 2973 """ 2974 if self.op.vg_name is not None: 2975 new_volume = self.op.vg_name 2976 if not new_volume: 2977 new_volume = None 2978 if new_volume != self.cfg.GetVGName(): 2979 self.cfg.SetVGName(new_volume) 2980 else: 2981 feedback_fn("Cluster LVM configuration already in desired" 2982 " state, not changing") 2983 if self.op.drbd_helper is not None: 2984 new_helper = self.op.drbd_helper 2985 if not new_helper: 2986 new_helper = None 2987 if new_helper != self.cfg.GetDRBDHelper(): 2988 self.cfg.SetDRBDHelper(new_helper) 2989 else: 2990 feedback_fn("Cluster DRBD helper already in desired state," 2991 " not changing") 2992 if self.op.hvparams: 2993 self.cluster.hvparams = self.new_hvparams 2994 if self.op.os_hvp: 2995 self.cluster.os_hvp = self.new_os_hvp 2996 if self.op.enabled_hypervisors is not None: 2997 self.cluster.hvparams = self.new_hvparams 2998 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors 2999 if self.op.beparams: 3000 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams 3001 if self.op.nicparams: 3002 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams 3003 if self.op.osparams: 3004 self.cluster.osparams = self.new_osp 3005 if self.op.ndparams: 3006 self.cluster.ndparams = self.new_ndparams 3007 3008 if self.op.candidate_pool_size is not None: 3009 self.cluster.candidate_pool_size = self.op.candidate_pool_size 3010 # we need to update the pool size here, otherwise the save will fail 3011 _AdjustCandidatePool(self, []) 3012 3013 if self.op.maintain_node_health is not None: 3014 self.cluster.maintain_node_health = self.op.maintain_node_health 3015 3016 if self.op.prealloc_wipe_disks is not None: 3017 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks 3018 3019 if self.op.add_uids is not None: 3020 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids) 3021 3022 if self.op.remove_uids is not None: 3023 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids) 3024 3025 if self.op.uid_pool is not None: 3026 self.cluster.uid_pool = self.op.uid_pool 3027 3028 if self.op.default_iallocator is not None: 3029 self.cluster.default_iallocator = self.op.default_iallocator 3030 3031 if self.op.reserved_lvs is not None: 3032 self.cluster.reserved_lvs = self.op.reserved_lvs 3033 3034 def helper_os(aname, mods, desc): 3035 desc += " OS list" 3036 lst = getattr(self.cluster, aname) 3037 for key, val in mods: 3038 if key == constants.DDM_ADD: 3039 if val in lst: 3040 feedback_fn("OS %s already in %s, ignoring" % (val, desc)) 3041 else: 3042 lst.append(val) 3043 elif key == constants.DDM_REMOVE: 3044 if val in lst: 3045 lst.remove(val) 3046 else: 3047 feedback_fn("OS %s not found in %s, ignoring" % (val, desc)) 3048 else: 3049 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3050 3051 if self.op.hidden_os: 3052 helper_os("hidden_os", self.op.hidden_os, "hidden") 3053 3054 if self.op.blacklisted_os: 3055 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted") 3056 3057 if self.op.master_netdev: 3058 master = self.cfg.GetMasterNode() 3059 feedback_fn("Shutting down master ip on the current netdev (%s)" % 3060 self.cluster.master_netdev) 3061 result = self.rpc.call_node_stop_master(master, False) 3062 result.Raise("Could not disable the master ip") 3063 feedback_fn("Changing master_netdev from %s to %s" % 3064 (self.cluster.master_netdev, self.op.master_netdev)) 3065 self.cluster.master_netdev = self.op.master_netdev 3066 3067 self.cfg.Update(self.cluster, feedback_fn) 3068 3069 if self.op.master_netdev: 3070 feedback_fn("Starting the master ip on the new master netdev (%s)" % 3071 self.op.master_netdev) 3072 result = self.rpc.call_node_start_master(master, False, False) 3073 if result.fail_msg: 3074 self.LogWarning("Could not re-enable the master ip on" 3075 " the master, please restart manually: %s", 3076 result.fail_msg)
3077
3078 3079 -def _UploadHelper(lu, nodes, fname):
3080 """Helper for uploading a file and showing warnings. 3081 3082 """ 3083 if os.path.exists(fname): 3084 result = lu.rpc.call_upload_file(nodes, fname) 3085 for to_node, to_result in result.items(): 3086 msg = to_result.fail_msg 3087 if msg: 3088 msg = ("Copy of file %s to node %s failed: %s" % 3089 (fname, to_node, msg)) 3090 lu.proc.LogWarning(msg)
3091
3092 3093 -def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3094 """Distribute additional files which are part of the cluster configuration. 3095 3096 ConfigWriter takes care of distributing the config and ssconf files, but 3097 there are more files which should be distributed to all nodes. This function 3098 makes sure those are copied. 3099 3100 @param lu: calling logical unit 3101 @param additional_nodes: list of nodes not in the config to distribute to 3102 @type additional_vm: boolean 3103 @param additional_vm: whether the additional nodes are vm-capable or not 3104 3105 """ 3106 # 1. Gather target nodes 3107 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode()) 3108 dist_nodes = lu.cfg.GetOnlineNodeList() 3109 nvm_nodes = lu.cfg.GetNonVmCapableNodeList() 3110 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes] 3111 if additional_nodes is not None: 3112 dist_nodes.extend(additional_nodes) 3113 if additional_vm: 3114 vm_nodes.extend(additional_nodes) 3115 if myself.name in dist_nodes: 3116 dist_nodes.remove(myself.name) 3117 if myself.name in vm_nodes: 3118 vm_nodes.remove(myself.name) 3119 3120 # 2. Gather files to distribute 3121 dist_files = set([constants.ETC_HOSTS, 3122 constants.SSH_KNOWN_HOSTS_FILE, 3123 constants.RAPI_CERT_FILE, 3124 constants.RAPI_USERS_FILE, 3125 constants.CONFD_HMAC_KEY, 3126 constants.CLUSTER_DOMAIN_SECRET_FILE, 3127 ]) 3128 3129 vm_files = set() 3130 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors 3131 for hv_name in enabled_hypervisors: 3132 hv_class = hypervisor.GetHypervisor(hv_name) 3133 vm_files.update(hv_class.GetAncillaryFiles()) 3134 3135 # 3. Perform the files upload 3136 for fname in dist_files: 3137 _UploadHelper(lu, dist_nodes, fname) 3138 for fname in vm_files: 3139 _UploadHelper(lu, vm_nodes, fname)
3140
3141 3142 -class LUClusterRedistConf(NoHooksLU):
3143 """Force the redistribution of cluster configuration. 3144 3145 This is a very simple LU. 3146 3147 """ 3148 REQ_BGL = False 3149
3150 - def ExpandNames(self):
3151 self.needed_locks = { 3152 locking.LEVEL_NODE: locking.ALL_SET, 3153 } 3154 self.share_locks[locking.LEVEL_NODE] = 1
3155
3156 - def Exec(self, feedback_fn):
3157 """Redistribute the configuration. 3158 3159 """ 3160 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn) 3161 _RedistributeAncillaryFiles(self)
3162
3163 3164 -def _WaitForSync(lu, instance, disks=None, oneshot=False):
3165 """Sleep and poll for an instance's disk to sync. 3166 3167 """ 3168 if not instance.disks or disks is not None and not disks: 3169 return True 3170 3171 disks = _ExpandCheckDisks(instance, disks) 3172 3173 if not oneshot: 3174 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name) 3175 3176 node = instance.primary_node 3177 3178 for dev in disks: 3179 lu.cfg.SetDiskID(dev, node) 3180 3181 # TODO: Convert to utils.Retry 3182 3183 retries = 0 3184 degr_retries = 10 # in seconds, as we sleep 1 second each time 3185 while True: 3186 max_time = 0 3187 done = True 3188 cumul_degraded = False 3189 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks) 3190 msg = rstats.fail_msg 3191 if msg: 3192 lu.LogWarning("Can't get any data from node %s: %s", node, msg) 3193 retries += 1 3194 if retries >= 10: 3195 raise errors.RemoteError("Can't contact node %s for mirror data," 3196 " aborting." % node) 3197 time.sleep(6) 3198 continue 3199 rstats = rstats.payload 3200 retries = 0 3201 for i, mstat in enumerate(rstats): 3202 if mstat is None: 3203 lu.LogWarning("Can't compute data for node %s/%s", 3204 node, disks[i].iv_name) 3205 continue 3206 3207 cumul_degraded = (cumul_degraded or 3208 (mstat.is_degraded and mstat.sync_percent is None)) 3209 if mstat.sync_percent is not None: 3210 done = False 3211 if mstat.estimated_time is not None: 3212 rem_time = ("%s remaining (estimated)" % 3213 utils.FormatSeconds(mstat.estimated_time)) 3214 max_time = mstat.estimated_time 3215 else: 3216 rem_time = "no time estimate" 3217 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" % 3218 (disks[i].iv_name, mstat.sync_percent, rem_time)) 3219 3220 # if we're done but degraded, let's do a few small retries, to 3221 # make sure we see a stable and not transient situation; therefore 3222 # we force restart of the loop 3223 if (done or oneshot) and cumul_degraded and degr_retries > 0: 3224 logging.info("Degraded disks found, %d retries left", degr_retries) 3225 degr_retries -= 1 3226 time.sleep(1) 3227 continue 3228 3229 if done or oneshot: 3230 break 3231 3232 time.sleep(min(60, max_time)) 3233 3234 if done: 3235 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name) 3236 return not cumul_degraded
3237
3238 3239 -def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3240 """Check that mirrors are not degraded. 3241 3242 The ldisk parameter, if True, will change the test from the 3243 is_degraded attribute (which represents overall non-ok status for 3244 the device(s)) to the ldisk (representing the local storage status). 3245 3246 """ 3247 lu.cfg.SetDiskID(dev, node) 3248 3249 result = True 3250 3251 if on_primary or dev.AssembleOnSecondary(): 3252 rstats = lu.rpc.call_blockdev_find(node, dev) 3253 msg = rstats.fail_msg 3254 if msg: 3255 lu.LogWarning("Can't find disk on node %s: %s", node, msg) 3256 result = False 3257 elif not rstats.payload: 3258 lu.LogWarning("Can't find disk on node %s", node) 3259 result = False 3260 else: 3261 if ldisk: 3262 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY 3263 else: 3264 result = result and not rstats.payload.is_degraded 3265 3266 if dev.children: 3267 for child in dev.children: 3268 result = result and _CheckDiskConsistency(lu, child, node, on_primary) 3269 3270 return result
3271
3272 3273 -class LUOobCommand(NoHooksLU):
3274 """Logical unit for OOB handling. 3275 3276 """ 3277 REG_BGL = False 3278
3279 - def CheckPrereq(self):
3280 """Check prerequisites. 3281 3282 This checks: 3283 - the node exists in the configuration 3284 - OOB is supported 3285 3286 Any errors are signaled by raising errors.OpPrereqError. 3287 3288 """ 3289 self.nodes = [] 3290 for node_name in self.op.node_names: 3291 node = self.cfg.GetNodeInfo(node_name) 3292 3293 if node is None: 3294 raise errors.OpPrereqError("Node %s not found" % node_name, 3295 errors.ECODE_NOENT) 3296 else: 3297 self.nodes.append(node) 3298 3299 if (self.op.command == constants.OOB_POWER_OFF and not node.offline): 3300 raise errors.OpPrereqError(("Cannot power off node %s because it is" 3301 " not marked offline") % node_name, 3302 errors.ECODE_STATE)
3303
3304 - def ExpandNames(self):
3305 """Gather locks we need. 3306 3307 """ 3308 if self.op.node_names: 3309 self.op.node_names = [_ExpandNodeName(self.cfg, name) 3310 for name in self.op.node_names] 3311 else: 3312 self.op.node_names = self.cfg.GetNodeList() 3313 3314 self.needed_locks = { 3315 locking.LEVEL_NODE: self.op.node_names, 3316 }
3317
3318 - def Exec(self, feedback_fn):
3319 """Execute OOB and return result if we expect any. 3320 3321 """ 3322 master_node = self.cfg.GetMasterNode() 3323 ret = [] 3324 3325 for node in self.nodes: 3326 node_entry = [(constants.RS_NORMAL, node.name)] 3327 ret.append(node_entry) 3328 3329 oob_program = _SupportsOob(self.cfg, node) 3330 3331 if not oob_program: 3332 node_entry.append((constants.RS_UNAVAIL, None)) 3333 continue 3334 3335 logging.info("Executing out-of-band command '%s' using '%s' on %s", 3336 self.op.command, oob_program, node.name) 3337 result = self.rpc.call_run_oob(master_node, oob_program, 3338 self.op.command, node.name, 3339 self.op.timeout) 3340 3341 if result.fail_msg: 3342 self.LogWarning("On node '%s' out-of-band RPC failed with: %s", 3343 node.name, result.fail_msg) 3344 node_entry.append((constants.RS_NODATA, None)) 3345 else: 3346 try: 3347 self._CheckPayload(result) 3348 except errors.OpExecError, err: 3349 self.LogWarning("The payload returned by '%s' is not valid: %s", 3350 node.name, err) 3351 node_entry.append((constants.RS_NODATA, None)) 3352 else: 3353 if self.op.command == constants.OOB_HEALTH: 3354 # For health we should log important events 3355 for item, status in result.payload: 3356 if status in [constants.OOB_STATUS_WARNING, 3357 constants.OOB_STATUS_CRITICAL]: 3358 self.LogWarning("On node '%s' item '%s' has status '%s'", 3359 node.name, item, status) 3360 3361 if self.op.command == constants.OOB_POWER_ON: 3362 node.powered = True 3363 elif self.op.command == constants.OOB_POWER_OFF: 3364 node.powered = False 3365 elif self.op.command == constants.OOB_POWER_STATUS: 3366 powered = result.payload[constants.OOB_POWER_STATUS_POWERED] 3367 if powered != node.powered: 3368 logging.warning(("Recorded power state (%s) of node '%s' does not" 3369 " match actual power state (%s)"), node.powered, 3370 node.name, powered) 3371 3372 # For configuration changing commands we should update the node 3373 if self.op.command in (constants.OOB_POWER_ON, 3374 constants.OOB_POWER_OFF): 3375 self.cfg.Update(node, feedback_fn) 3376 3377 node_entry.append((constants.RS_NORMAL, result.payload)) 3378 3379 return ret
3380
3381 - def _CheckPayload(self, result):
3382 """Checks if the payload is valid. 3383 3384 @param result: RPC result 3385 @raises errors.OpExecError: If payload is not valid 3386 3387 """ 3388 errs = [] 3389 if self.op.command == constants.OOB_HEALTH: 3390 if not isinstance(result.payload, list): 3391 errs.append("command 'health' is expected to return a list but got %s" % 3392 type(result.payload)) 3393 else: 3394 for item, status in result.payload: 3395 if status not in constants.OOB_STATUSES: 3396 errs.append("health item '%s' has invalid status '%s'" % 3397 (item, status)) 3398 3399 if self.op.command == constants.OOB_POWER_STATUS: 3400 if not isinstance(result.payload, dict): 3401 errs.append("power-status is expected to return a dict but got %s" % 3402 type(result.payload)) 3403 3404 if self.op.command in [ 3405 constants.OOB_POWER_ON, 3406 constants.OOB_POWER_OFF, 3407 constants.OOB_POWER_CYCLE, 3408 ]: 3409 if result.payload is not None: 3410 errs.append("%s is expected to not return payload but got '%s'" % 3411 (self.op.command, result.payload)) 3412 3413 if errs: 3414 raise errors.OpExecError("Check of out-of-band payload failed due to %s" % 3415 utils.CommaJoin(errs))
3416
3417 3418 3419 -class LUOsDiagnose(NoHooksLU):
3420 """Logical unit for OS diagnose/query. 3421 3422 """ 3423 REQ_BGL = False 3424 _HID = "hidden" 3425 _BLK = "blacklisted" 3426 _VLD = "valid" 3427 _FIELDS_STATIC = utils.FieldSet() 3428 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants", 3429 "parameters", "api_versions", _HID, _BLK) 3430
3431 - def CheckArguments(self):
3432 if self.op.names: 3433 raise errors.OpPrereqError("Selective OS query not supported", 3434 errors.ECODE_INVAL) 3435 3436 _CheckOutputFields(static=self._FIELDS_STATIC, 3437 dynamic=self._FIELDS_DYNAMIC, 3438 selected=self.op.output_fields)
3439
3440 - def ExpandNames(self):
3441 # Lock all nodes, in shared mode 3442 # Temporary removal of locks, should be reverted later 3443 # TODO: reintroduce locks when they are lighter-weight 3444 self.needed_locks = {}
3445 #self.share_locks[locking.LEVEL_NODE] = 1 3446 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 3447 3448 @staticmethod
3449 - def _DiagnoseByOS(rlist):
3450 """Remaps a per-node return list into an a per-os per-node dictionary 3451 3452 @param rlist: a map with node names as keys and OS objects as values 3453 3454 @rtype: dict 3455 @return: a dictionary with osnames as keys and as value another 3456 map, with nodes as keys and tuples of (path, status, diagnose, 3457 variants, parameters, api_versions) as values, eg:: 3458 3459 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []), 3460 (/srv/..., False, "invalid api")], 3461 "node2": [(/srv/..., True, "", [], [])]} 3462 } 3463 3464 """ 3465 all_os = {} 3466 # we build here the list of nodes that didn't fail the RPC (at RPC 3467 # level), so that nodes with a non-responding node daemon don't 3468 # make all OSes invalid 3469 good_nodes = [node_name for node_name in rlist 3470 if not rlist[node_name].fail_msg] 3471 for node_name, nr in rlist.items(): 3472 if nr.fail_msg or not nr.payload: 3473 continue 3474 for (name, path, status, diagnose, variants, 3475 params, api_versions) in nr.payload: 3476 if name not in all_os: 3477 # build a list of nodes for this os containing empty lists 3478 # for each node in node_list 3479 all_os[name] = {} 3480 for nname in good_nodes: 3481 all_os[name][nname] = [] 3482 # convert params from [name, help] to (name, help) 3483 params = [tuple(v) for v in params] 3484 all_os[name][node_name].append((path, status, diagnose, 3485 variants, params, api_versions)) 3486 return all_os
3487
3488 - def Exec(self, feedback_fn):
3489 """Compute the list of OSes. 3490 3491 """ 3492 valid_nodes = [node.name 3493 for node in self.cfg.GetAllNodesInfo().values() 3494 if not node.offline and node.vm_capable] 3495 node_data = self.rpc.call_os_diagnose(valid_nodes) 3496 pol = self._DiagnoseByOS(node_data) 3497 output = [] 3498 cluster = self.cfg.GetClusterInfo() 3499 3500 for os_name in utils.NiceSort(pol.keys()): 3501 os_data = pol[os_name] 3502 row = [] 3503 valid = True 3504 (variants, params, api_versions) = null_state = (set(), set(), set()) 3505 for idx, osl in enumerate(os_data.values()): 3506 valid = bool(valid and osl and osl[0][1]) 3507 if not valid: 3508 (variants, params, api_versions) = null_state 3509 break 3510 node_variants, node_params, node_api = osl[0][3:6] 3511 if idx == 0: # first entry 3512 variants = set(node_variants) 3513 params = set(node_params) 3514 api_versions = set(node_api) 3515 else: # keep consistency 3516 variants.intersection_update(node_variants) 3517 params.intersection_update(node_params) 3518 api_versions.intersection_update(node_api) 3519 3520 is_hid = os_name in cluster.hidden_os 3521 is_blk = os_name in cluster.blacklisted_os 3522 if ((self._HID not in self.op.output_fields and is_hid) or 3523 (self._BLK not in self.op.output_fields and is_blk) or 3524 (self._VLD not in self.op.output_fields and not valid)): 3525 continue 3526 3527 for field in self.op.output_fields: 3528 if field == "name": 3529 val = os_name 3530 elif field == self._VLD: 3531 val = valid 3532 elif field == "node_status": 3533 # this is just a copy of the dict 3534 val = {} 3535 for node_name, nos_list in os_data.items(): 3536 val[node_name] = nos_list 3537 elif field == "variants": 3538 val = utils.NiceSort(list(variants)) 3539 elif field == "parameters": 3540 val = list(params) 3541 elif field == "api_versions": 3542 val = list(api_versions) 3543 elif field == self._HID: 3544 val = is_hid 3545 elif field == self._BLK: 3546 val = is_blk 3547 else: 3548 raise errors.ParameterError(field) 3549 row.append(val) 3550 output.append(row) 3551 3552 return output
3553
3554 3555 -class LUNodeRemove(LogicalUnit):
3556 """Logical unit for removing a node. 3557 3558 """ 3559 HPATH = "node-remove" 3560 HTYPE = constants.HTYPE_NODE 3561
3562 - def BuildHooksEnv(self):
3563 """Build hooks env. 3564 3565 This doesn't run on the target node in the pre phase as a failed 3566 node would then be impossible to remove. 3567 3568 """ 3569 env = { 3570 "OP_TARGET": self.op.node_name, 3571 "NODE_NAME": self.op.node_name, 3572 } 3573 all_nodes = self.cfg.GetNodeList() 3574 try: 3575 all_nodes.remove(self.op.node_name) 3576 except ValueError: 3577 logging.warning("Node %s which is about to be removed not found" 3578 " in the all nodes list", self.op.node_name) 3579 return env, all_nodes, all_nodes
3580
3581 - def CheckPrereq(self):
3582 """Check prerequisites. 3583 3584 This checks: 3585 - the node exists in the configuration 3586 - it does not have primary or secondary instances 3587 - it's not the master 3588 3589 Any errors are signaled by raising errors.OpPrereqError. 3590 3591 """ 3592 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 3593 node = self.cfg.GetNodeInfo(self.op.node_name) 3594 assert node is not None 3595 3596 instance_list = self.cfg.GetInstanceList() 3597 3598 masternode = self.cfg.GetMasterNode() 3599 if node.name == masternode: 3600 raise errors.OpPrereqError("Node is the master node," 3601 " you need to failover first.", 3602 errors.ECODE_INVAL) 3603 3604 for instance_name in instance_list: 3605 instance = self.cfg.GetInstanceInfo(instance_name) 3606 if node.name in instance.all_nodes: 3607 raise errors.OpPrereqError("Instance %s is still running on the node," 3608 " please remove first." % instance_name, 3609 errors.ECODE_INVAL) 3610 self.op.node_name = node.name 3611 self.node = node
3612
3613 - def Exec(self, feedback_fn):
3614 """Removes the node from the cluster. 3615 3616 """ 3617 node = self.node 3618 logging.info("Stopping the node daemon and removing configs from node %s", 3619 node.name) 3620 3621 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup 3622 3623 # Promote nodes to master candidate as needed 3624 _AdjustCandidatePool(self, exceptions=[node.name]) 3625 self.context.RemoveNode(node.name) 3626 3627 # Run post hooks on the node before it's removed 3628 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self) 3629 try: 3630 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name]) 3631 except: 3632 # pylint: disable-msg=W0702 3633 self.LogWarning("Errors occurred running hooks on %s" % node.name) 3634 3635 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup) 3636 msg = result.fail_msg 3637 if msg: 3638 self.LogWarning("Errors encountered on the remote node while leaving" 3639 " the cluster: %s", msg) 3640 3641 # Remove node from our /etc/hosts 3642 if self.cfg.GetClusterInfo().modify_etc_hosts: 3643 master_node = self.cfg.GetMasterNode() 3644 result = self.rpc.call_etc_hosts_modify(master_node, 3645 constants.ETC_HOSTS_REMOVE, 3646 node.name, None) 3647 result.Raise("Can't update hosts file with new host data") 3648 _RedistributeAncillaryFiles(self)
3649
3650 3651 -class _NodeQuery(_QueryBase):
3652 FIELDS = query.NODE_FIELDS 3653
3654 - def ExpandNames(self, lu):
3655 lu.needed_locks = {} 3656 lu.share_locks[locking.LEVEL_NODE] = 1 3657 3658 if self.names: 3659 self.wanted = _GetWantedNodes(lu, self.names) 3660 else: 3661 self.wanted = locking.ALL_SET 3662 3663 self.do_locking = (self.use_locking and 3664 query.NQ_LIVE in self.requested_data) 3665 3666 if self.do_locking: 3667 # if we don't request only static fields, we need to lock the nodes 3668 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3669
3670 - def DeclareLocks(self, lu, level):
3671 pass
3672
3673 - def _GetQueryData(self, lu):
3674 """Computes the list of nodes and their attributes. 3675 3676 """ 3677 all_info = lu.cfg.GetAllNodesInfo() 3678 3679 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE) 3680 3681 # Gather data as requested 3682 if query.NQ_LIVE in self.requested_data: 3683 # filter out non-vm_capable nodes 3684 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable] 3685 3686 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(), 3687 lu.cfg.GetHypervisorType()) 3688 live_data = dict((name, nresult.payload) 3689 for (name, nresult) in node_data.items() 3690 if not nresult.fail_msg and nresult.payload) 3691 else: 3692 live_data = None 3693 3694 if query.NQ_INST in self.requested_data: 3695 node_to_primary = dict([(name, set()) for name in nodenames]) 3696 node_to_secondary = dict([(name, set()) for name in nodenames]) 3697 3698 inst_data = lu.cfg.GetAllInstancesInfo() 3699 3700 for inst in inst_data.values(): 3701 if inst.primary_node in node_to_primary: 3702 node_to_primary[inst.primary_node].add(inst.name) 3703 for secnode in inst.secondary_nodes: 3704 if secnode in node_to_secondary: 3705 node_to_secondary[secnode].add(inst.name) 3706 else: 3707 node_to_primary = None 3708 node_to_secondary = None 3709 3710 if query.NQ_OOB in self.requested_data: 3711 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node))) 3712 for name, node in all_info.iteritems()) 3713 else: 3714 oob_support = None 3715 3716 if query.NQ_GROUP in self.requested_data: 3717 groups = lu.cfg.GetAllNodeGroupsInfo() 3718 else: 3719 groups = {} 3720 3721 return query.NodeQueryData([all_info[name] for name in nodenames], 3722 live_data, lu.cfg.GetMasterNode(), 3723 node_to_primary, node_to_secondary, groups, 3724 oob_support, lu.cfg.GetClusterInfo())
3725
3726 3727 -class LUNodeQuery(NoHooksLU):
3728 """Logical unit for querying nodes. 3729 3730 """ 3731 # pylint: disable-msg=W0142 3732 REQ_BGL = False 3733
3734 - def CheckArguments(self):
3735 self.nq = _NodeQuery(self.op.names, self.op.output_fields, 3736 self.op.use_locking)
3737
3738 - def ExpandNames(self):
3739 self.nq.ExpandNames(self)
3740
3741 - def Exec(self, feedback_fn):
3742 return self.nq.OldStyleQuery(self)
3743
3744 3745 -class LUNodeQueryvols(NoHooksLU):
3746 """Logical unit for getting volumes on node(s). 3747 3748 """ 3749 REQ_BGL = False 3750 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance") 3751 _FIELDS_STATIC = utils.FieldSet("node") 3752
3753 - def CheckArguments(self):
3754 _CheckOutputFields(static=self._FIELDS_STATIC, 3755 dynamic=self._FIELDS_DYNAMIC, 3756 selected=self.op.output_fields)
3757
3758 - def ExpandNames(self):
3759 self.needed_locks = {} 3760 self.share_locks[locking.LEVEL_NODE] = 1 3761 if not self.op.nodes: 3762 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 3763 else: 3764 self.needed_locks[locking.LEVEL_NODE] = \ 3765 _GetWantedNodes(self, self.op.nodes)
3766
3767 - def Exec(self, feedback_fn):
3768 """Computes the list of nodes and their attributes. 3769 3770 """ 3771 nodenames = self.acquired_locks[locking.LEVEL_NODE] 3772 volumes = self.rpc.call_node_volumes(nodenames) 3773 3774 ilist = self.cfg.GetAllInstancesInfo() 3775 3776 vol2inst = dict(((node, vol), inst.name) 3777 for inst in ilist.values() 3778 for (node, vols) in inst.MapLVsByNode().items() 3779 for vol in vols) 3780 3781 output = [] 3782 for node in nodenames: 3783 nresult = volumes[node] 3784 if nresult.offline: 3785 continue 3786 msg = nresult.fail_msg 3787 if msg: 3788 self.LogWarning("Can't compute volume data on node %s: %s", node, msg) 3789 continue 3790 3791 node_vols = sorted(nresult.payload, 3792 key=operator.itemgetter("dev")) 3793 3794 for vol in node_vols: 3795 node_output = [] 3796 for field in self.op.output_fields: 3797 if field == "node": 3798 val = node 3799 elif field == "phys": 3800 val = vol['dev'] 3801 elif field == "vg": 3802 val = vol['vg'] 3803 elif field == "name": 3804 val = vol['name'] 3805 elif field == "size": 3806 val = int(float(vol['size'])) 3807 elif field == "instance": 3808 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-") 3809 else: 3810 raise errors.ParameterError(field) 3811 node_output.append(str(val)) 3812 3813 output.append(node_output) 3814 3815 return output
3816
3817 3818 -class LUNodeQueryStorage(NoHooksLU):
3819 """Logical unit for getting information on storage units on node(s). 3820 3821 """ 3822 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE) 3823 REQ_BGL = False 3824
3825 - def CheckArguments(self):
3826 _CheckOutputFields(static=self._FIELDS_STATIC, 3827 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS), 3828 selected=self.op.output_fields)
3829
3830 - def ExpandNames(self):
3831 self.needed_locks = {} 3832 self.share_locks[locking.LEVEL_NODE] = 1 3833 3834 if self.op.nodes: 3835 self.needed_locks[locking.LEVEL_NODE] = \ 3836 _GetWantedNodes(self, self.op.nodes) 3837 else: 3838 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3839
3840 - def Exec(self, feedback_fn):
3841 """Computes the list of nodes and their attributes. 3842 3843 """ 3844 self.nodes = self.acquired_locks[locking.LEVEL_NODE] 3845 3846 # Always get name to sort by 3847 if constants.SF_NAME in self.op.output_fields: 3848 fields = self.op.output_fields[:] 3849 else: 3850 fields = [constants.SF_NAME] + self.op.output_fields 3851 3852 # Never ask for node or type as it's only known to the LU 3853 for extra in [constants.SF_NODE, constants.SF_TYPE]: 3854 while extra in fields: 3855 fields.remove(extra) 3856 3857 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)]) 3858 name_idx = field_idx[constants.SF_NAME] 3859 3860 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 3861 data = self.rpc.call_storage_list(self.nodes, 3862 self.op.storage_type, st_args, 3863 self.op.name, fields) 3864 3865 result = [] 3866 3867 for node in utils.NiceSort(self.nodes): 3868 nresult = data[node] 3869 if nresult.offline: 3870 continue 3871 3872 msg = nresult.fail_msg 3873 if msg: 3874 self.LogWarning("Can't get storage data from node %s: %s", node, msg) 3875 continue 3876 3877 rows = dict([(row[name_idx], row) for row in nresult.payload]) 3878 3879 for name in utils.NiceSort(rows.keys()): 3880 row = rows[name] 3881 3882 out = [] 3883 3884 for field in self.op.output_fields: 3885 if field == constants.SF_NODE: 3886 val = node 3887 elif field == constants.SF_TYPE: 3888 val = self.op.storage_type 3889 elif field in field_idx: 3890 val = row[field_idx[field]] 3891 else: 3892 raise errors.ParameterError(field) 3893 3894 out.append(val) 3895 3896 result.append(out) 3897 3898 return result
3899
3900 3901 -class _InstanceQuery(_QueryBase):
3902 FIELDS = query.INSTANCE_FIELDS 3903
3904 - def ExpandNames(self, lu):
3905 lu.needed_locks = {} 3906 lu.share_locks[locking.LEVEL_INSTANCE] = 1 3907 lu.share_locks[locking.LEVEL_NODE] = 1 3908 3909 if self.names: 3910 self.wanted = _GetWantedInstances(lu, self.names) 3911 else: 3912 self.wanted = locking.ALL_SET 3913 3914 self.do_locking = (self.use_locking and 3915 query.IQ_LIVE in self.requested_data) 3916 if self.do_locking: 3917 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted 3918 lu.needed_locks[locking.LEVEL_NODE] = [] 3919 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3920
3921 - def DeclareLocks(self, lu, level):
3922 if level == locking.LEVEL_NODE and self.do_locking: 3923 lu._LockInstancesNodes() # pylint: disable-msg=W0212
3924
3925 - def _GetQueryData(self, lu):
3926 """Computes the list of instances and their attributes. 3927 3928 """ 3929 cluster = lu.cfg.GetClusterInfo() 3930 all_info = lu.cfg.GetAllInstancesInfo() 3931 3932 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE) 3933 3934 instance_list = [all_info[name] for name in instance_names] 3935 nodes = frozenset(itertools.chain(*(inst.all_nodes 3936 for inst in instance_list))) 3937 hv_list = list(set([inst.hypervisor for inst in instance_list])) 3938 bad_nodes = [] 3939 offline_nodes = [] 3940 wrongnode_inst = set() 3941 3942 # Gather data as requested 3943 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]): 3944 live_data = {} 3945 node_data = lu.rpc.call_all_instances_info(nodes, hv_list) 3946 for name in nodes: 3947 result = node_data[name] 3948 if result.offline: 3949 # offline nodes will be in both lists 3950 assert result.fail_msg 3951 offline_nodes.append(name) 3952 if result.fail_msg: 3953 bad_nodes.append(name) 3954 elif result.payload: 3955 for inst in result.payload: 3956 if inst in all_info: 3957 if all_info[inst].primary_node == name: 3958 live_data.update(result.payload) 3959 else: 3960 wrongnode_inst.add(inst) 3961 else: 3962 # orphan instance; we don't list it here as we don't 3963 # handle this case yet in the output of instance listing 3964 logging.warning("Orphan instance '%s' found on node %s", 3965 inst, name) 3966 # else no instance is alive 3967 else: 3968 live_data = {} 3969 3970 if query.IQ_DISKUSAGE in self.requested_data: 3971 disk_usage = dict((inst.name, 3972 _ComputeDiskSize(inst.disk_template, 3973 [{"size": disk.size} 3974 for disk in inst.disks])) 3975 for inst in instance_list) 3976 else: 3977 disk_usage = None 3978 3979 if query.IQ_CONSOLE in self.requested_data: 3980 consinfo = {} 3981 for inst in instance_list: 3982 if inst.name in live_data: 3983 # Instance is running 3984 consinfo[inst.name] = _GetInstanceConsole(cluster, inst) 3985 else: 3986 consinfo[inst.name] = None 3987 assert set(consinfo.keys()) == set(instance_names) 3988 else: 3989 consinfo = None 3990 3991 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(), 3992 disk_usage, offline_nodes, bad_nodes, 3993 live_data, wrongnode_inst, consinfo)
3994
3995 3996 -class LUQuery(NoHooksLU):
3997 """Query for resources/items of a certain kind. 3998 3999 """ 4000 # pylint: disable-msg=W0142 4001 REQ_BGL = False 4002
4003 - def CheckArguments(self):
4004 qcls = _GetQueryImplementation(self.op.what) 4005 names = qlang.ReadSimpleFilter("name", self.op.filter) 4006 4007 self.impl = qcls(names, self.op.fields, False)
4008
4009 - def ExpandNames(self):
4010 self.impl.ExpandNames(self)
4011
4012 - def DeclareLocks(self, level):
4013 self.impl.DeclareLocks(self, level)
4014
4015 - def Exec(self, feedback_fn):
4016 return self.impl.NewStyleQuery(self)
4017
4018 4019 -class LUQueryFields(NoHooksLU):
4020 """Query for resources/items of a certain kind. 4021 4022 """ 4023 # pylint: disable-msg=W0142 4024 REQ_BGL = False 4025
4026 - def CheckArguments(self):
4027 self.qcls = _GetQueryImplementation(self.op.what)
4028
4029 - def ExpandNames(self):
4030 self.needed_locks = {}
4031
4032 - def Exec(self, feedback_fn):
4033 return self.qcls.FieldsQuery(self.op.fields)
4034
4035 4036 -class LUNodeModifyStorage(NoHooksLU):
4037 """Logical unit for modifying a storage volume on a node. 4038 4039 """ 4040 REQ_BGL = False 4041
4042 - def CheckArguments(self):
4043 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 4044 4045 storage_type = self.op.storage_type 4046 4047 try: 4048 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type] 4049 except KeyError: 4050 raise errors.OpPrereqError("Storage units of type '%s' can not be" 4051 " modified" % storage_type, 4052 errors.ECODE_INVAL) 4053 4054 diff = set(self.op.changes.keys()) - modifiable 4055 if diff: 4056 raise errors.OpPrereqError("The following fields can not be modified for" 4057 " storage units of type '%s': %r" % 4058 (storage_type, list(diff)), 4059 errors.ECODE_INVAL)
4060
4061 - def ExpandNames(self):
4062 self.needed_locks = { 4063 locking.LEVEL_NODE: self.op.node_name, 4064 }
4065
4066 - def Exec(self, feedback_fn):
4067 """Computes the list of nodes and their attributes. 4068 4069 """ 4070 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 4071 result = self.rpc.call_storage_modify(self.op.node_name, 4072 self.op.storage_type, st_args, 4073 self.op.name, self.op.changes) 4074 result.Raise("Failed to modify storage unit '%s' on %s" % 4075 (self.op.name, self.op.node_name))
4076
4077 4078 -class LUNodeAdd(LogicalUnit):
4079 """Logical unit for adding node to the cluster. 4080 4081 """ 4082 HPATH = "node-add" 4083 HTYPE = constants.HTYPE_NODE 4084 _NFLAGS = ["master_capable", "vm_capable"] 4085
4086 - def CheckArguments(self):
4087 self.primary_ip_family = self.cfg.GetPrimaryIPFamily() 4088 # validate/normalize the node name 4089 self.hostname = netutils.GetHostname(name=self.op.node_name, 4090 family=self.primary_ip_family) 4091 self.op.node_name = self.hostname.name 4092 4093 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode(): 4094 raise errors.OpPrereqError("Cannot readd the master node", 4095 errors.ECODE_STATE) 4096 4097 if self.op.readd and self.op.group: 4098 raise errors.OpPrereqError("Cannot pass a node group when a node is" 4099 " being readded", errors.ECODE_INVAL)
4100
4101 - def BuildHooksEnv(self):
4102 """Build hooks env. 4103 4104 This will run on all nodes before, and on all nodes + the new node after. 4105 4106 """ 4107 env = { 4108 "OP_TARGET": self.op.node_name, 4109 "NODE_NAME": self.op.node_name, 4110 "NODE_PIP": self.op.primary_ip, 4111 "NODE_SIP": self.op.secondary_ip, 4112 "MASTER_CAPABLE": str(self.op.master_capable), 4113 "VM_CAPABLE": str(self.op.vm_capable), 4114 } 4115 nodes_0 = self.cfg.GetNodeList() 4116 nodes_1 = nodes_0 + [self.op.node_name, ] 4117 return env, nodes_0, nodes_1
4118
4119 - def CheckPrereq(self):
4120 """Check prerequisites. 4121 4122 This checks: 4123 - the new node is not already in the config 4124 - it is resolvable 4125 - its parameters (single/dual homed) matches the cluster 4126 4127 Any errors are signaled by raising errors.OpPrereqError. 4128 4129 """ 4130 cfg = self.cfg 4131 hostname = self.hostname 4132 node = hostname.name 4133 primary_ip = self.op.primary_ip = hostname.ip 4134 if self.op.secondary_ip is None: 4135 if self.primary_ip_family == netutils.IP6Address.family: 4136 raise errors.OpPrereqError("When using a IPv6 primary address, a valid" 4137 " IPv4 address must be given as secondary", 4138 errors.ECODE_INVAL) 4139 self.op.secondary_ip = primary_ip 4140 4141 secondary_ip = self.op.secondary_ip 4142 if not netutils.IP4Address.IsValid(secondary_ip): 4143 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4" 4144 " address" % secondary_ip, errors.ECODE_INVAL) 4145 4146 node_list = cfg.GetNodeList() 4147 if not self.op.readd and node in node_list: 4148 raise errors.OpPrereqError("Node %s is already in the configuration" % 4149 node, errors.ECODE_EXISTS) 4150 elif self.op.readd and node not in node_list: 4151 raise errors.OpPrereqError("Node %s is not in the configuration" % node, 4152 errors.ECODE_NOENT) 4153 4154 self.changed_primary_ip = False 4155 4156 for existing_node_name in node_list: 4157 existing_node = cfg.GetNodeInfo(existing_node_name) 4158 4159 if self.op.readd and node == existing_node_name: 4160 if existing_node.secondary_ip != secondary_ip: 4161 raise errors.OpPrereqError("Readded node doesn't have the same IP" 4162 " address configuration as before", 4163 errors.ECODE_INVAL) 4164 if existing_node.primary_ip != primary_ip: 4165 self.changed_primary_ip = True 4166 4167 continue 4168 4169 if (existing_node.primary_ip == primary_ip or 4170 existing_node.secondary_ip == primary_ip or 4171 existing_node.primary_ip == secondary_ip or 4172 existing_node.secondary_ip == secondary_ip): 4173 raise errors.OpPrereqError("New node ip address(es) conflict with" 4174 " existing node %s" % existing_node.name, 4175 errors.ECODE_NOTUNIQUE) 4176 4177 # After this 'if' block, None is no longer a valid value for the 4178 # _capable op attributes 4179 if self.op.readd: 4180 old_node = self.cfg.GetNodeInfo(node) 4181 assert old_node is not None, "Can't retrieve locked node %s" % node 4182 for attr in self._NFLAGS: 4183 if getattr(self.op, attr) is None: 4184 setattr(self.op, attr, getattr(old_node, attr)) 4185 else: 4186 for attr in self._NFLAGS: 4187 if getattr(self.op, attr) is None: 4188 setattr(self.op, attr, True) 4189 4190 if self.op.readd and not self.op.vm_capable: 4191 pri, sec = cfg.GetNodeInstances(node) 4192 if pri or sec: 4193 raise errors.OpPrereqError("Node %s being re-added with vm_capable" 4194 " flag set to false, but it already holds" 4195 " instances" % node, 4196 errors.ECODE_STATE) 4197 4198 # check that the type of the node (single versus dual homed) is the 4199 # same as for the master 4200 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode()) 4201 master_singlehomed = myself.secondary_ip == myself.primary_ip 4202 newbie_singlehomed = secondary_ip == primary_ip 4203 if master_singlehomed != newbie_singlehomed: 4204 if master_singlehomed: 4205 raise errors.OpPrereqError("The master has no secondary ip but the" 4206 " new node has one", 4207 errors.ECODE_INVAL) 4208 else: 4209 raise errors.OpPrereqError("The master has a secondary ip but the" 4210 " new node doesn't have one", 4211 errors.ECODE_INVAL) 4212 4213 # checks reachability 4214 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT): 4215 raise errors.OpPrereqError("Node not reachable by ping", 4216 errors.ECODE_ENVIRON) 4217 4218 if not newbie_singlehomed: 4219 # check reachability from my secondary ip to newbie's secondary ip 4220 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT, 4221 source=myself.secondary_ip): 4222 raise errors.OpPrereqError("Node secondary ip not reachable by TCP" 4223 " based ping to node daemon port", 4224 errors.ECODE_ENVIRON) 4225 4226 if self.op.readd: 4227 exceptions = [node] 4228 else: 4229 exceptions = [] 4230 4231 if self.op.master_capable: 4232 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions) 4233 else: 4234 self.master_candidate = False 4235 4236 if self.op.readd: 4237 self.new_node = old_node 4238 else: 4239 node_group = cfg.LookupNodeGroup(self.op.group) 4240 self.new_node = objects.Node(name=node, 4241 primary_ip=primary_ip, 4242 secondary_ip=secondary_ip, 4243 master_candidate=self.master_candidate, 4244 offline=False, drained=False, 4245 group=node_group) 4246 4247 if self.op.ndparams: 4248 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4249
4250 - def Exec(self, feedback_fn):
4251 """Adds the new node to the cluster. 4252 4253 """ 4254 new_node = self.new_node 4255 node = new_node.name 4256 4257 # We adding a new node so we assume it's powered 4258 new_node.powered = True 4259 4260 # for re-adds, reset the offline/drained/master-candidate flags; 4261 # we need to reset here, otherwise offline would prevent RPC calls 4262 # later in the procedure; this also means that if the re-add 4263 # fails, we are left with a non-offlined, broken node 4264 if self.op.readd: 4265 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201 4266 self.LogInfo("Readding a node, the offline/drained flags were reset") 4267 # if we demote the node, we do cleanup later in the procedure 4268 new_node.master_candidate = self.master_candidate 4269 if self.changed_primary_ip: 4270 new_node.primary_ip = self.op.primary_ip 4271 4272 # copy the master/vm_capable flags 4273 for attr in self._NFLAGS: 4274 setattr(new_node, attr, getattr(self.op, attr)) 4275 4276 # notify the user about any possible mc promotion 4277 if new_node.master_candidate: 4278 self.LogInfo("Node will be a master candidate") 4279 4280 if self.op.ndparams: 4281 new_node.ndparams = self.op.ndparams 4282 else: 4283 new_node.ndparams = {} 4284 4285 # check connectivity 4286 result = self.rpc.call_version([node])[node] 4287 result.Raise("Can't get version information from node %s" % node) 4288 if constants.PROTOCOL_VERSION == result.payload: 4289 logging.info("Communication to node %s fine, sw version %s match", 4290 node, result.payload) 4291 else: 4292 raise errors.OpExecError("Version mismatch master version %s," 4293 " node version %s" % 4294 (constants.PROTOCOL_VERSION, result.payload)) 4295 4296 # Add node to our /etc/hosts, and add key to known_hosts 4297 if self.cfg.GetClusterInfo().modify_etc_hosts: 4298 master_node = self.cfg.GetMasterNode() 4299 result = self.rpc.call_etc_hosts_modify(master_node, 4300 constants.ETC_HOSTS_ADD, 4301 self.hostname.name, 4302 self.hostname.ip) 4303 result.Raise("Can't update hosts file with new host data") 4304 4305 if new_node.secondary_ip != new_node.primary_ip: 4306 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip, 4307 False) 4308 4309 node_verify_list = [self.cfg.GetMasterNode()] 4310 node_verify_param = { 4311 constants.NV_NODELIST: [node], 4312 # TODO: do a node-net-test as well? 4313 } 4314 4315 result = self.rpc.call_node_verify(node_verify_list, node_verify_param, 4316 self.cfg.GetClusterName()) 4317 for verifier in node_verify_list: 4318 result[verifier].Raise("Cannot communicate with node %s" % verifier) 4319 nl_payload = result[verifier].payload[constants.NV_NODELIST] 4320 if nl_payload: 4321 for failed in nl_payload: 4322 feedback_fn("ssh/hostname verification failed" 4323 " (checking from %s): %s" % 4324 (verifier, nl_payload[failed])) 4325 raise errors.OpExecError("ssh/hostname verification failed") 4326 4327 if self.op.readd: 4328 _RedistributeAncillaryFiles(self) 4329 self.context.ReaddNode(new_node) 4330 # make sure we redistribute the config 4331 self.cfg.Update(new_node, feedback_fn) 4332 # and make sure the new node will not have old files around 4333 if not new_node.master_candidate: 4334 result = self.rpc.call_node_demote_from_mc(new_node.name) 4335 msg = result.fail_msg 4336 if msg: 4337 self.LogWarning("Node failed to demote itself from master" 4338 " candidate status: %s" % msg) 4339 else: 4340 _RedistributeAncillaryFiles(self, additional_nodes=[node], 4341 additional_vm=self.op.vm_capable) 4342 self.context.AddNode(new_node, self.proc.GetECId())
4343
4344 4345 -class LUNodeSetParams(LogicalUnit):
4346 """Modifies the parameters of a node. 4347 4348 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline) 4349 to the node role (as _ROLE_*) 4350 @cvar _R2F: a dictionary from node role to tuples of flags 4351 @cvar _FLAGS: a list of attribute names corresponding to the flags 4352 4353 """ 4354 HPATH = "node-modify" 4355 HTYPE = constants.HTYPE_NODE 4356 REQ_BGL = False 4357 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4) 4358 _F2R = { 4359 (True, False, False): _ROLE_CANDIDATE, 4360 (False, True, False): _ROLE_DRAINED, 4361 (False, False, True): _ROLE_OFFLINE, 4362 (False, False, False): _ROLE_REGULAR, 4363 } 4364 _R2F = dict((v, k) for k, v in _F2R.items()) 4365 _FLAGS = ["master_candidate", "drained", "offline"] 4366
4367 - def CheckArguments(self):
4368 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 4369 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained, 4370 self.op.master_capable, self.op.vm_capable, 4371 self.op.secondary_ip, self.op.ndparams] 4372 if all_mods.count(None) == len(all_mods): 4373 raise errors.OpPrereqError("Please pass at least one modification", 4374 errors.ECODE_INVAL) 4375 if all_mods.count(True) > 1: 4376 raise errors.OpPrereqError("Can't set the node into more than one" 4377 " state at the same time", 4378 errors.ECODE_INVAL) 4379 4380 # Boolean value that tells us whether we might be demoting from MC 4381 self.might_demote = (self.op.master_candidate == False or 4382 self.op.offline == True or 4383 self.op.drained == True or 4384 self.op.master_capable == False) 4385 4386 if self.op.secondary_ip: 4387 if not netutils.IP4Address.IsValid(self.op.secondary_ip): 4388 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4" 4389 " address" % self.op.secondary_ip, 4390 errors.ECODE_INVAL) 4391 4392 self.lock_all = self.op.auto_promote and self.might_demote 4393 self.lock_instances = self.op.secondary_ip is not None
4394
4395 - def ExpandNames(self):
4396 if self.lock_all: 4397 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET} 4398 else: 4399 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name} 4400 4401 if self.lock_instances: 4402 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4403
4404 - def DeclareLocks(self, level):
4405 # If we have locked all instances, before waiting to lock nodes, release 4406 # all the ones living on nodes unrelated to the current operation. 4407 if level == locking.LEVEL_NODE and self.lock_instances: 4408 instances_release = [] 4409 instances_keep = [] 4410 self.affected_instances = [] 4411 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET: 4412 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]: 4413 instance = self.context.cfg.GetInstanceInfo(instance_name) 4414 i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR 4415 if i_mirrored and self.op.node_name in instance.all_nodes: 4416 instances_keep.append(instance_name) 4417 self.affected_instances.append(instance) 4418 else: 4419 instances_release.append(instance_name) 4420 if instances_release: 4421 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release) 4422 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4423
4424 - def BuildHooksEnv(self):
4425 """Build hooks env. 4426 4427 This runs on the master node. 4428 4429 """ 4430 env = { 4431 "OP_TARGET": self.op.node_name, 4432 "MASTER_CANDIDATE": str(self.op.master_candidate), 4433 "OFFLINE": str(self.op.offline), 4434 "DRAINED": str(self.op.drained), 4435 "MASTER_CAPABLE": str(self.op.master_capable), 4436 "VM_CAPABLE": str(self.op.vm_capable), 4437 } 4438 nl = [self.cfg.GetMasterNode(), 4439 self.op.node_name] 4440 return env, nl, nl
4441
4442 - def CheckPrereq(self):
4443 """Check prerequisites. 4444 4445 This only checks the instance list against the existing names. 4446 4447 """ 4448 node = self.node = self.cfg.GetNodeInfo(self.op.node_name) 4449 4450 if (self.op.master_candidate is not None or 4451 self.op.drained is not None or 4452 self.op.offline is not None): 4453 # we can't change the master's node flags 4454 if self.op.node_name == self.cfg.GetMasterNode(): 4455 raise errors.OpPrereqError("The master role can be changed" 4456 " only via master-failover", 4457 errors.ECODE_INVAL) 4458 4459 if self.op.master_candidate and not node.master_capable: 4460 raise errors.OpPrereqError("Node %s is not master capable, cannot make" 4461 " it a master candidate" % node.name, 4462 errors.ECODE_STATE) 4463 4464 if self.op.vm_capable == False: 4465 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name) 4466 if ipri or isec: 4467 raise errors.OpPrereqError("Node %s hosts instances, cannot unset" 4468 " the vm_capable flag" % node.name, 4469 errors.ECODE_STATE) 4470 4471 if node.master_candidate and self.might_demote and not self.lock_all: 4472 assert not self.op.auto_promote, "auto_promote set but lock_all not" 4473 # check if after removing the current node, we're missing master 4474 # candidates 4475 (mc_remaining, mc_should, _) = \ 4476 self.cfg.GetMasterCandidateStats(exceptions=[node.name]) 4477 if mc_remaining < mc_should: 4478 raise errors.OpPrereqError("Not enough master candidates, please" 4479 " pass auto promote option to allow" 4480 " promotion", errors.ECODE_STATE) 4481 4482 self.old_flags = old_flags = (node.master_candidate, 4483 node.drained, node.offline) 4484 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags) 4485 self.old_role = old_role = self._F2R[old_flags] 4486 4487 # Check for ineffective changes 4488 for attr in self._FLAGS: 4489 if (getattr(self.op, attr) == False and getattr(node, attr) == False): 4490 self.LogInfo("Ignoring request to unset flag %s, already unset", attr) 4491 setattr(self.op, attr, None) 4492 4493 # Past this point, any flag change to False means a transition 4494 # away from the respective state, as only real changes are kept 4495 4496 # TODO: We might query the real power state if it supports OOB 4497 if _SupportsOob(self.cfg, node): 4498 if self.op.offline is False and not (node.powered or 4499 self.op.powered == True): 4500 raise errors.OpPrereqError(("Please power on node %s first before you" 4501 " can reset offline state") % 4502 self.op.node_name) 4503 elif self.op.powered is not None: 4504 raise errors.OpPrereqError(("Unable to change powered state for node %s" 4505 " which does not support out-of-band" 4506 " handling") % self.op.node_name) 4507 4508 # If we're being deofflined/drained, we'll MC ourself if needed 4509 if (self.op.drained == False or self.op.offline == False or 4510 (self.op.master_capable and not node.master_capable)): 4511 if _DecideSelfPromotion(self): 4512 self.op.master_candidate = True 4513 self.LogInfo("Auto-promoting node to master candidate") 4514 4515 # If we're no longer master capable, we'll demote ourselves from MC 4516 if self.op.master_capable == False and node.master_candidate: 4517 self.LogInfo("Demoting from master candidate") 4518 self.op.master_candidate = False 4519 4520 # Compute new role 4521 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1 4522 if self.op.master_candidate: 4523 new_role = self._ROLE_CANDIDATE 4524 elif self.op.drained: 4525 new_role = self._ROLE_DRAINED 4526 elif self.op.offline: 4527 new_role = self._ROLE_OFFLINE 4528 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]: 4529 # False is still in new flags, which means we're un-setting (the 4530 # only) True flag 4531 new_role = self._ROLE_REGULAR 4532 else: # no new flags, nothing, keep old role 4533 new_role = old_role 4534 4535 self.new_role = new_role 4536 4537 if old_role == self._ROLE_OFFLINE and new_role != old_role: 4538 # Trying to transition out of offline status 4539 result = self.rpc.call_version([node.name])[node.name] 4540 if result.fail_msg: 4541 raise errors.OpPrereqError("Node %s is being de-offlined but fails" 4542 " to report its version: %s" % 4543 (node.name, result.fail_msg), 4544 errors.ECODE_STATE) 4545 else: 4546 self.LogWarning("Transitioning node from offline to online state" 4547 " without using re-add. Please make sure the node" 4548 " is healthy!") 4549 4550 if self.op.secondary_ip: 4551 # Ok even without locking, because this can't be changed by any LU 4552 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode()) 4553 master_singlehomed = master.secondary_ip == master.primary_ip 4554 if master_singlehomed and self.op.secondary_ip: 4555 raise errors.OpPrereqError("Cannot change the secondary ip on a single" 4556 " homed cluster", errors.ECODE_INVAL) 4557 4558 if node.offline: 4559 if self.affected_instances: 4560 raise errors.OpPrereqError("Cannot change secondary ip: offline" 4561 " node has instances (%s) configured" 4562 " to use it" % self.affected_instances) 4563 else: 4564 # On online nodes, check that no instances are running, and that 4565 # the node has the new ip and we can reach it. 4566 for instance in self.affected_instances: 4567 _CheckInstanceDown(self, instance, "cannot change secondary ip") 4568 4569 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True) 4570 if master.name != node.name: 4571 # check reachability from master secondary ip to new secondary ip 4572 if not netutils.TcpPing(self.op.secondary_ip, 4573 constants.DEFAULT_NODED_PORT, 4574 source=master.secondary_ip): 4575 raise errors.OpPrereqError("Node secondary ip not reachable by TCP" 4576 " based ping to node daemon port", 4577 errors.ECODE_ENVIRON) 4578 4579 if self.op.ndparams: 4580 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams) 4581 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES) 4582 self.new_ndparams = new_ndparams
4583
4584 - def Exec(self, feedback_fn):
4585 """Modifies a node. 4586 4587 """ 4588 node = self.node 4589 old_role = self.old_role 4590 new_role = self.new_role 4591 4592 result = [] 4593 4594 if self.op.ndparams: 4595 node.ndparams = self.new_ndparams 4596 4597 if self.op.powered is not None: 4598 node.powered = self.op.powered 4599 4600 for attr in ["master_capable", "vm_capable"]: 4601 val = getattr(self.op, attr) 4602 if val is not None: 4603 setattr(node, attr, val) 4604 result.append((attr, str(val))) 4605 4606 if new_role != old_role: 4607 # Tell the node to demote itself, if no longer MC and not offline 4608 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE: 4609 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg 4610 if msg: 4611 self.LogWarning("Node failed to demote itself: %s", msg) 4612 4613 new_flags = self._R2F[new_role] 4614 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS): 4615 if of != nf: 4616 result.append((desc, str(nf))) 4617 (node.master_candidate, node.drained, node.offline) = new_flags 4618 4619 # we locked all nodes, we adjust the CP before updating this node 4620 if self.lock_all: 4621 _AdjustCandidatePool(self, [node.name]) 4622 4623 if self.op.secondary_ip: 4624 node.secondary_ip = self.op.secondary_ip 4625 result.append(("secondary_ip", self.op.secondary_ip)) 4626 4627 # this will trigger configuration file update, if needed 4628 self.cfg.Update(node, feedback_fn) 4629 4630 # this will trigger job queue propagation or cleanup if the mc 4631 # flag changed 4632 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1: 4633 self.context.ReaddNode(node) 4634 4635 return result
4636
4637 4638 -class LUNodePowercycle(NoHooksLU):
4639 """Powercycles a node. 4640 4641 """ 4642 REQ_BGL = False 4643
4644 - def CheckArguments(self):
4645 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 4646 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force: 4647 raise errors.OpPrereqError("The node is the master and the force" 4648 " parameter was not set", 4649 errors.ECODE_INVAL)
4650
4651 - def ExpandNames(self):
4652 """Locking for PowercycleNode. 4653 4654 This is a last-resort option and shouldn't block on other 4655 jobs. Therefore, we grab no locks. 4656 4657 """ 4658 self.needed_locks = {}
4659
4660 - def Exec(self, feedback_fn):
4661 """Reboots a node. 4662 4663 """ 4664 result = self.rpc.call_node_powercycle(self.op.node_name, 4665 self.cfg.GetHypervisorType()) 4666 result.Raise("Failed to schedule the reboot") 4667 return result.payload
4668
4669 4670 -class LUClusterQuery(NoHooksLU):
4671 """Query cluster configuration. 4672 4673 """ 4674 REQ_BGL = False 4675
4676 - def ExpandNames(self):
4677 self.needed_locks = {}
4678
4679 - def Exec(self, feedback_fn):
4680 """Return cluster config. 4681 4682 """ 4683 cluster = self.cfg.GetClusterInfo() 4684 os_hvp = {} 4685 4686 # Filter just for enabled hypervisors 4687 for os_name, hv_dict in cluster.os_hvp.items(): 4688 os_hvp[os_name] = {} 4689 for hv_name, hv_params in hv_dict.items(): 4690 if hv_name in cluster.enabled_hypervisors: 4691 os_hvp[os_name][hv_name] = hv_params 4692 4693 # Convert ip_family to ip_version 4694 primary_ip_version = constants.IP4_VERSION 4695 if cluster.primary_ip_family == netutils.IP6Address.family: 4696 primary_ip_version = constants.IP6_VERSION 4697 4698 result = { 4699 "software_version": constants.RELEASE_VERSION, 4700 "protocol_version": constants.PROTOCOL_VERSION, 4701 "config_version": constants.CONFIG_VERSION, 4702 "os_api_version": max(constants.OS_API_VERSIONS), 4703 "export_version": constants.EXPORT_VERSION, 4704 "architecture": (platform.architecture()[0], platform.machine()), 4705 "name": cluster.cluster_name, 4706 "master": cluster.master_node, 4707 "default_hypervisor": cluster.enabled_hypervisors[0], 4708 "enabled_hypervisors": cluster.enabled_hypervisors, 4709 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name]) 4710 for hypervisor_name in cluster.enabled_hypervisors]), 4711 "os_hvp": os_hvp, 4712 "beparams": cluster.beparams, 4713 "osparams": cluster.osparams, 4714 "nicparams": cluster.nicparams, 4715 "ndparams": cluster.ndparams, 4716 "candidate_pool_size": cluster.candidate_pool_size, 4717 "master_netdev": cluster.master_netdev, 4718 "volume_group_name": cluster.volume_group_name, 4719 "drbd_usermode_helper": cluster.drbd_usermode_helper, 4720 "file_storage_dir": cluster.file_storage_dir, 4721 "maintain_node_health": cluster.maintain_node_health, 4722 "ctime": cluster.ctime, 4723 "mtime": cluster.mtime, 4724 "uuid": cluster.uuid, 4725 "tags": list(cluster.GetTags()), 4726 "uid_pool": cluster.uid_pool, 4727 "default_iallocator": cluster.default_iallocator, 4728 "reserved_lvs": cluster.reserved_lvs, 4729 "primary_ip_version": primary_ip_version, 4730 "prealloc_wipe_disks": cluster.prealloc_wipe_disks, 4731 "hidden_os": cluster.hidden_os, 4732 "blacklisted_os": cluster.blacklisted_os, 4733 } 4734 4735 return result
4736
4737 4738 -class LUClusterConfigQuery(NoHooksLU):
4739 """Return configuration values. 4740 4741 """ 4742 REQ_BGL = False 4743 _FIELDS_DYNAMIC = utils.FieldSet() 4744 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag", 4745 "watcher_pause", "volume_group_name") 4746
4747 - def CheckArguments(self):
4748 _CheckOutputFields(static=self._FIELDS_STATIC, 4749 dynamic=self._FIELDS_DYNAMIC, 4750 selected=self.op.output_fields)
4751
4752 - def ExpandNames(self):
4753 self.needed_locks = {}
4754
4755 - def Exec(self, feedback_fn):
4756 """Dump a representation of the cluster config to the standard output. 4757 4758 """ 4759 values = [] 4760 for field in self.op.output_fields: 4761 if field == "cluster_name": 4762 entry = self.cfg.GetClusterName() 4763 elif field == "master_node": 4764 entry = self.cfg.GetMasterNode() 4765 elif field == "drain_flag": 4766 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE) 4767 elif field == "watcher_pause": 4768 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE) 4769 elif field == "volume_group_name": 4770 entry = self.cfg.GetVGName() 4771 else: 4772 raise errors.ParameterError(field) 4773 values.append(entry) 4774 return values
4775
4776 4777 -class LUInstanceActivateDisks(NoHooksLU):
4778 """Bring up an instance's disks. 4779 4780 """ 4781 REQ_BGL = False 4782
4783 - def ExpandNames(self):
4784 self._ExpandAndLockInstance() 4785 self.needed_locks[locking.LEVEL_NODE] = [] 4786 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4787
4788 - def DeclareLocks(self, level):
4789 if level == locking.LEVEL_NODE: 4790 self._LockInstancesNodes()
4791
4792 - def CheckPrereq(self):
4793 """Check prerequisites. 4794 4795 This checks that the instance is in the cluster. 4796 4797 """ 4798 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4799 assert self.instance is not None, \ 4800 "Cannot retrieve locked instance %s" % self.op.instance_name 4801 _CheckNodeOnline(self, self.instance.primary_node)
4802
4803 - def Exec(self, feedback_fn):
4804 """Activate the disks. 4805 4806 """ 4807 disks_ok, disks_info = \ 4808 _AssembleInstanceDisks(self, self.instance, 4809 ignore_size=self.op.ignore_size) 4810 if not disks_ok: 4811 raise errors.OpExecError("Cannot activate block devices") 4812 4813 return disks_info
4814
4815 4816 -def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False, 4817 ignore_size=False):
4818 """Prepare the block devices for an instance. 4819 4820 This sets up the block devices on all nodes. 4821 4822 @type lu: L{LogicalUnit} 4823 @param lu: the logical unit on whose behalf we execute 4824 @type instance: L{objects.Instance} 4825 @param instance: the instance for whose disks we assemble 4826 @type disks: list of L{objects.Disk} or None 4827 @param disks: which disks to assemble (or all, if None) 4828 @type ignore_secondaries: boolean 4829 @param ignore_secondaries: if true, errors on secondary nodes 4830 won't result in an error return from the function 4831 @type ignore_size: boolean 4832 @param ignore_size: if true, the current known size of the disk 4833 will not be used during the disk activation, useful for cases 4834 when the size is wrong 4835 @return: False if the operation failed, otherwise a list of 4836 (host, instance_visible_name, node_visible_name) 4837 with the mapping from node devices to instance devices 4838 4839 """ 4840 device_info = [] 4841 disks_ok = True 4842 iname = instance.name 4843 disks = _ExpandCheckDisks(instance, disks) 4844 4845 # With the two passes mechanism we try to reduce the window of 4846 # opportunity for the race condition of switching DRBD to primary 4847 # before handshaking occured, but we do not eliminate it 4848 4849 # The proper fix would be to wait (with some limits) until the 4850 # connection has been made and drbd transitions from WFConnection 4851 # into any other network-connected state (Connected, SyncTarget, 4852 # SyncSource, etc.) 4853 4854 # 1st pass, assemble on all nodes in secondary mode 4855 for idx, inst_disk in enumerate(disks): 4856 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): 4857 if ignore_size: 4858 node_disk = node_disk.Copy() 4859 node_disk.UnsetSize() 4860 lu.cfg.SetDiskID(node_disk, node) 4861 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx) 4862 msg = result.fail_msg 4863 if msg: 4864 lu.proc.LogWarning("Could not prepare block device %s on node %s" 4865 " (is_primary=False, pass=1): %s", 4866 inst_disk.iv_name, node, msg) 4867 if not ignore_secondaries: 4868 disks_ok = False 4869 4870 # FIXME: race condition on drbd migration to primary 4871 4872 # 2nd pass, do only the primary node 4873 for idx, inst_disk in enumerate(disks): 4874 dev_path = None 4875 4876 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): 4877 if node != instance.primary_node: 4878 continue 4879 if ignore_size: 4880 node_disk = node_disk.Copy() 4881 node_disk.UnsetSize() 4882 lu.cfg.SetDiskID(node_disk, node) 4883 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx) 4884 msg = result.fail_msg 4885 if msg: 4886 lu.proc.LogWarning("Could not prepare block device %s on node %s" 4887 " (is_primary=True, pass=2): %s", 4888 inst_disk.iv_name, node, msg) 4889 disks_ok = False 4890 else: 4891 dev_path = result.payload 4892 4893 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path)) 4894 4895 # leave the disks configured for the primary node 4896 # this is a workaround that would be fixed better by 4897 # improving the logical/physical id handling 4898 for disk in disks: 4899 lu.cfg.SetDiskID(disk, instance.primary_node) 4900 4901 return disks_ok, device_info
4902
4903 4904 -def _StartInstanceDisks(lu, instance, force):
4905 """Start the disks of an instance. 4906 4907 """ 4908 disks_ok, _ = _AssembleInstanceDisks(lu, instance, 4909 ignore_secondaries=force) 4910 if not disks_ok: 4911 _ShutdownInstanceDisks(lu, instance) 4912 if force is not None and not force: 4913 lu.proc.LogWarning("", hint="If the message above refers to a" 4914 " secondary node," 4915 " you can retry the operation using '--force'.") 4916 raise errors.OpExecError("Disk consistency error")
4917
4918 4919 -class LUInstanceDeactivateDisks(NoHooksLU):
4920 """Shutdown an instance's disks. 4921 4922 """ 4923 REQ_BGL = False 4924
4925 - def ExpandNames(self):
4926 self._ExpandAndLockInstance() 4927 self.needed_locks[locking.LEVEL_NODE] = [] 4928 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4929
4930 - def DeclareLocks(self, level):
4931 if level == locking.LEVEL_NODE: 4932 self._LockInstancesNodes()
4933
4934 - def CheckPrereq(self):
4935 """Check prerequisites. 4936 4937 This checks that the instance is in the cluster. 4938 4939 """ 4940 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 4941 assert self.instance is not None, \ 4942 "Cannot retrieve locked instance %s" % self.op.instance_name
4943
4944 - def Exec(self, feedback_fn):
4945 """Deactivate the disks 4946 4947 """ 4948 instance = self.instance 4949 if self.op.force: 4950 _ShutdownInstanceDisks(self, instance) 4951 else: 4952 _SafeShutdownInstanceDisks(self, instance)
4953
4954 4955 -def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4956 """Shutdown block devices of an instance. 4957 4958 This function checks if an instance is running, before calling 4959 _ShutdownInstanceDisks. 4960 4961 """ 4962 _CheckInstanceDown(lu, instance, "cannot shutdown disks") 4963 _ShutdownInstanceDisks(lu, instance, disks=disks)
4964
4965 4966 -def _ExpandCheckDisks(instance, disks):
4967 """Return the instance disks selected by the disks list 4968 4969 @type disks: list of L{objects.Disk} or None 4970 @param disks: selected disks 4971 @rtype: list of L{objects.Disk} 4972 @return: selected instance disks to act on 4973 4974 """ 4975 if disks is None: 4976 return instance.disks 4977 else: 4978 if not set(disks).issubset(instance.disks): 4979 raise errors.ProgrammerError("Can only act on disks belonging to the" 4980 " target instance") 4981 return disks
4982
4983 4984 -def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4985 """Shutdown block devices of an instance. 4986 4987 This does the shutdown on all nodes of the instance. 4988 4989 If the ignore_primary is false, errors on the primary node are 4990 ignored. 4991 4992 """ 4993 all_result = True 4994 disks = _ExpandCheckDisks(instance, disks) 4995 4996 for disk in disks: 4997 for node, top_disk in disk.ComputeNodeTree(instance.primary_node): 4998 lu.cfg.SetDiskID(top_disk, node) 4999 result = lu.rpc.call_blockdev_shutdown(node, top_disk) 5000 msg = result.fail_msg 5001 if msg: 5002 lu.LogWarning("Could not shutdown block device %s on node %s: %s", 5003 disk.iv_name, node, msg) 5004 if ((node == instance.primary_node and not ignore_primary) or 5005 (node != instance.primary_node and not result.offline)): 5006 all_result = False 5007 return all_result
5008
5009 5010 -def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5011 """Checks if a node has enough free memory. 5012 5013 This function check if a given node has the needed amount of free 5014 memory. In case the node has less memory or we cannot get the 5015 information from the node, this function raise an OpPrereqError 5016 exception. 5017 5018 @type lu: C{LogicalUnit} 5019 @param lu: a logical unit from which we get configuration data 5020 @type node: C{str} 5021 @param node: the node to check 5022 @type reason: C{str} 5023 @param reason: string to use in the error message 5024 @type requested: C{int} 5025 @param requested: the amount of memory in MiB to check for 5026 @type hypervisor_name: C{str} 5027 @param hypervisor_name: the hypervisor to ask for memory stats 5028 @raise errors.OpPrereqError: if the node doesn't have enough memory, or 5029 we cannot check the node 5030 5031 """ 5032 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name) 5033 nodeinfo[node].Raise("Can't get data from node %s" % node, 5034 prereq=True, ecode=errors.ECODE_ENVIRON) 5035 free_mem = nodeinfo[node].payload.get('memory_free', None) 5036 if not isinstance(free_mem, int): 5037 raise errors.OpPrereqError("Can't compute free memory on node %s, result" 5038 " was '%s'" % (node, free_mem), 5039 errors.ECODE_ENVIRON) 5040 if requested > free_mem: 5041 raise errors.OpPrereqError("Not enough memory on node %s for %s:" 5042 " needed %s MiB, available %s MiB" % 5043 (node, reason, requested, free_mem), 5044 errors.ECODE_NORES)
5045
5046 5047 -def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5048 """Checks if nodes have enough free disk space in the all VGs. 5049 5050 This function check if all given nodes have the needed amount of 5051 free disk. In case any node has less disk or we cannot get the 5052 information from the node, this function raise an OpPrereqError 5053 exception. 5054 5055 @type lu: C{LogicalUnit} 5056 @param lu: a logical unit from which we get configuration data 5057 @type nodenames: C{list} 5058 @param nodenames: the list of node names to check 5059 @type req_sizes: C{dict} 5060 @param req_sizes: the hash of vg and corresponding amount of disk in 5061 MiB to check for 5062 @raise errors.OpPrereqError: if the node doesn't have enough disk, 5063 or we cannot check the node 5064 5065 """ 5066 for vg, req_size in req_sizes.items(): 5067 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5068
5069 5070 -def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5071 """Checks if nodes have enough free disk space in the specified VG. 5072 5073 This function check if all given nodes have the needed amount of 5074 free disk. In case any node has less disk or we cannot get the 5075 information from the node, this function raise an OpPrereqError 5076 exception. 5077 5078 @type lu: C{LogicalUnit} 5079 @param lu: a logical unit from which we get configuration data 5080 @type nodenames: C{list} 5081 @param nodenames: the list of node names to check 5082 @type vg: C{str} 5083 @param vg: the volume group to check 5084 @type requested: C{int} 5085 @param requested: the amount of disk in MiB to check for 5086 @raise errors.OpPrereqError: if the node doesn't have enough disk, 5087 or we cannot check the node 5088 5089 """ 5090 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None) 5091 for node in nodenames: 5092 info = nodeinfo[node] 5093 info.Raise("Cannot get current information from node %s" % node, 5094 prereq=True, ecode=errors.ECODE_ENVIRON) 5095 vg_free = info.payload.get("vg_free", None) 5096 if not isinstance(vg_free, int): 5097 raise errors.OpPrereqError("Can't compute free disk space on node" 5098 " %s for vg %s, result was '%s'" % 5099 (node, vg, vg_free), errors.ECODE_ENVIRON) 5100 if requested > vg_free: 5101 raise errors.OpPrereqError("Not enough disk space on target node %s" 5102 " vg %s: required %d MiB, available %d MiB" % 5103 (node, vg, requested, vg_free), 5104 errors.ECODE_NORES)
5105
5106 5107 -class LUInstanceStartup(LogicalUnit):
5108 """Starts an instance. 5109 5110 """ 5111 HPATH = "instance-start" 5112 HTYPE = constants.HTYPE_INSTANCE 5113 REQ_BGL = False 5114
5115 - def CheckArguments(self):
5116 # extra beparams 5117 if self.op.beparams: 5118 # fill the beparams dict 5119 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5120
5121 - def ExpandNames(self):
5123
5124 - def BuildHooksEnv(self):
5125 """Build hooks env. 5126 5127 This runs on master, primary and secondary nodes of the instance. 5128 5129 """ 5130 env = { 5131 "FORCE": self.op.force, 5132 } 5133 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 5134 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 5135 return env, nl, nl
5136
5137 - def CheckPrereq(self):
5138 """Check prerequisites. 5139 5140 This checks that the instance is in the cluster. 5141 5142 """ 5143 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5144 assert self.instance is not None, \ 5145 "Cannot retrieve locked instance %s" % self.op.instance_name 5146 5147 # extra hvparams 5148 if self.op.hvparams: 5149 # check hypervisor parameter syntax (locally) 5150 cluster = self.cfg.GetClusterInfo() 5151 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) 5152 filled_hvp = cluster.FillHV(instance) 5153 filled_hvp.update(self.op.hvparams) 5154 hv_type = hypervisor.GetHypervisor(instance.hypervisor) 5155 hv_type.CheckParameterSyntax(filled_hvp) 5156 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp) 5157 5158 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline 5159 5160 if self.primary_offline and self.op.ignore_offline_nodes: 5161 self.proc.LogWarning("Ignoring offline primary node") 5162 5163 if self.op.hvparams or self.op.beparams: 5164 self.proc.LogWarning("Overridden parameters are ignored") 5165 else: 5166 _CheckNodeOnline(self, instance.primary_node) 5167 5168 bep = self.cfg.GetClusterInfo().FillBE(instance) 5169 5170 # check bridges existence 5171 _CheckInstanceBridgesExist(self, instance) 5172 5173 remote_info = self.rpc.call_instance_info(instance.primary_node, 5174 instance.name, 5175 instance.hypervisor) 5176 remote_info.Raise("Error checking node %s" % instance.primary_node, 5177 prereq=True, ecode=errors.ECODE_ENVIRON) 5178 if not remote_info.payload: # not running already 5179 _CheckNodeFreeMemory(self, instance.primary_node, 5180 "starting instance %s" % instance.name, 5181 bep[constants.BE_MEMORY], instance.hypervisor)
5182
5183 - def Exec(self, feedback_fn):
5184 """Start the instance. 5185 5186 """ 5187 instance = self.instance 5188 force = self.op.force 5189 5190 if not self.op.no_remember: 5191 self.cfg.MarkInstanceUp(instance.name) 5192 5193 if self.primary_offline: 5194 assert self.op.ignore_offline_nodes 5195 self.proc.LogInfo("Primary node offline, marked instance as started") 5196 else: 5197 node_current = instance.primary_node 5198 5199 _StartInstanceDisks(self, instance, force) 5200 5201 result = self.rpc.call_instance_start(node_current, instance, 5202 self.op.hvparams, self.op.beparams) 5203 msg = result.fail_msg 5204 if msg: 5205 _ShutdownInstanceDisks(self, instance) 5206 raise errors.OpExecError("Could not start instance: %s" % msg)
5207
5208 5209 -class LUInstanceReboot(LogicalUnit):
5210 """Reboot an instance. 5211 5212 """ 5213 HPATH = "instance-reboot" 5214 HTYPE = constants.HTYPE_INSTANCE 5215 REQ_BGL = False 5216
5217 - def ExpandNames(self):
5219
5220 - def BuildHooksEnv(self):
5221 """Build hooks env. 5222 5223 This runs on master, primary and secondary nodes of the instance. 5224 5225 """ 5226 env = { 5227 "IGNORE_SECONDARIES": self.op.ignore_secondaries, 5228 "REBOOT_TYPE": self.op.reboot_type, 5229 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 5230 } 5231 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 5232 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 5233 return env, nl, nl
5234
5235 - def CheckPrereq(self):
5236 """Check prerequisites. 5237 5238 This checks that the instance is in the cluster. 5239 5240 """ 5241 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5242 assert self.instance is not None, \ 5243 "Cannot retrieve locked instance %s" % self.op.instance_name 5244 5245 _CheckNodeOnline(self, instance.primary_node) 5246 5247 # check bridges existence 5248 _CheckInstanceBridgesExist(self, instance)
5249
5250 - def Exec(self, feedback_fn):
5251 """Reboot the instance. 5252 5253 """ 5254 instance = self.instance 5255 ignore_secondaries = self.op.ignore_secondaries 5256 reboot_type = self.op.reboot_type 5257 5258 node_current = instance.primary_node 5259 5260 if reboot_type in [constants.INSTANCE_REBOOT_SOFT, 5261 constants.INSTANCE_REBOOT_HARD]: 5262 for disk in instance.disks: 5263 self.cfg.SetDiskID(disk, node_current) 5264 result = self.rpc.call_instance_reboot(node_current, instance, 5265 reboot_type, 5266 self.op.shutdown_timeout) 5267 result.Raise("Could not reboot instance") 5268 else: 5269 result = self.rpc.call_instance_shutdown(node_current, instance, 5270 self.op.shutdown_timeout) 5271 result.Raise("Could not shutdown instance for full reboot") 5272 _ShutdownInstanceDisks(self, instance) 5273 _StartInstanceDisks(self, instance, ignore_secondaries) 5274 result = self.rpc.call_instance_start(node_current, instance, None, None) 5275 msg = result.fail_msg 5276 if msg: 5277 _ShutdownInstanceDisks(self, instance) 5278 raise errors.OpExecError("Could not start instance for" 5279 " full reboot: %s" % msg) 5280 5281 self.cfg.MarkInstanceUp(instance.name)
5282
5283 5284 -class LUInstanceShutdown(LogicalUnit):
5285 """Shutdown an instance. 5286 5287 """ 5288 HPATH = "instance-stop" 5289 HTYPE = constants.HTYPE_INSTANCE 5290 REQ_BGL = False 5291
5292 - def ExpandNames(self):
5294
5295 - def BuildHooksEnv(self):
5296 """Build hooks env. 5297 5298 This runs on master, primary and secondary nodes of the instance. 5299 5300 """ 5301 env = _BuildInstanceHookEnvByObject(self, self.instance) 5302 env["TIMEOUT"] = self.op.timeout 5303 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 5304 return env, nl, nl
5305
5306 - def CheckPrereq(self):
5307 """Check prerequisites. 5308 5309 This checks that the instance is in the cluster. 5310 5311 """ 5312 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5313 assert self.instance is not None, \ 5314 "Cannot retrieve locked instance %s" % self.op.instance_name 5315 5316 self.primary_offline = \ 5317 self.cfg.GetNodeInfo(self.instance.primary_node).offline 5318 5319 if self.primary_offline and self.op.ignore_offline_nodes: 5320 self.proc.LogWarning("Ignoring offline primary node") 5321 else: 5322 _CheckNodeOnline(self, self.instance.primary_node)
5323
5324 - def Exec(self, feedback_fn):
5325 """Shutdown the instance. 5326 5327 """ 5328 instance = self.instance 5329 node_current = instance.primary_node 5330 timeout = self.op.timeout 5331 5332 if not self.op.no_remember: 5333 self.cfg.MarkInstanceDown(instance.name) 5334 5335 if self.primary_offline: 5336 assert self.op.ignore_offline_nodes 5337 self.proc.LogInfo("Primary node offline, marked instance as stopped") 5338 else: 5339 result = self.rpc.call_instance_shutdown(node_current, instance, timeout) 5340 msg = result.fail_msg 5341 if msg: 5342 self.proc.LogWarning("Could not shutdown instance: %s" % msg) 5343 5344 _ShutdownInstanceDisks(self, instance)
5345
5346 5347 -class LUInstanceReinstall(LogicalUnit):
5348 """Reinstall an instance. 5349 5350 """ 5351 HPATH = "instance-reinstall" 5352 HTYPE = constants.HTYPE_INSTANCE 5353 REQ_BGL = False 5354
5355 - def ExpandNames(self):
5357
5358 - def BuildHooksEnv(self):
5359 """Build hooks env. 5360 5361 This runs on master, primary and secondary nodes of the instance. 5362 5363 """ 5364 env = _BuildInstanceHookEnvByObject(self, self.instance) 5365 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 5366 return env, nl, nl
5367
5368 - def CheckPrereq(self):
5369 """Check prerequisites. 5370 5371 This checks that the instance is in the cluster and is not running. 5372 5373 """ 5374 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5375 assert instance is not None, \ 5376 "Cannot retrieve locked instance %s" % self.op.instance_name 5377 _CheckNodeOnline(self, instance.primary_node, "Instance primary node" 5378 " offline, cannot reinstall") 5379 for node in instance.secondary_nodes: 5380 _CheckNodeOnline(self, node, "Instance secondary node offline," 5381 " cannot reinstall") 5382 5383 if instance.disk_template == constants.DT_DISKLESS: 5384 raise errors.OpPrereqError("Instance '%s' has no disks" % 5385 self.op.instance_name, 5386 errors.ECODE_INVAL) 5387 _CheckInstanceDown(self, instance, "cannot reinstall") 5388 5389 if self.op.os_type is not None: 5390 # OS verification 5391 pnode = _ExpandNodeName(self.cfg, instance.primary_node) 5392 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant) 5393 instance_os = self.op.os_type 5394 else: 5395 instance_os = instance.os 5396 5397 nodelist = list(instance.all_nodes) 5398 5399 if self.op.osparams: 5400 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams) 5401 _CheckOSParams(self, True, nodelist, instance_os, i_osdict) 5402 self.os_inst = i_osdict # the new dict (without defaults) 5403 else: 5404 self.os_inst = None 5405 5406 self.instance = instance
5407
5408 - def Exec(self, feedback_fn):
5409 """Reinstall the instance. 5410 5411 """ 5412 inst = self.instance 5413 5414 if self.op.os_type is not None: 5415 feedback_fn("Changing OS to '%s'..." % self.op.os_type) 5416 inst.os = self.op.os_type 5417 # Write to configuration 5418 self.cfg.Update(inst, feedback_fn) 5419 5420 _StartInstanceDisks(self, inst, None) 5421 try: 5422 feedback_fn("Running the instance OS create scripts...") 5423 # FIXME: pass debug option from opcode to backend 5424 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True, 5425 self.op.debug_level, 5426 osparams=self.os_inst) 5427 result.Raise("Could not install OS for instance %s on node %s" % 5428 (inst.name, inst.primary_node)) 5429 finally: 5430 _ShutdownInstanceDisks(self, inst)
5431
5432 5433 -class LUInstanceRecreateDisks(LogicalUnit):
5434 """Recreate an instance's missing disks. 5435 5436 """ 5437 HPATH = "instance-recreate-disks" 5438 HTYPE = constants.HTYPE_INSTANCE 5439 REQ_BGL = False 5440
5441 - def CheckArguments(self):
5442 # normalise the disk list 5443 self.op.disks = sorted(frozenset(self.op.disks))
5444
5445 - def ExpandNames(self):
5446 self._ExpandAndLockInstance() 5447 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 5448 if self.op.nodes: 5449 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes] 5450 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes) 5451 else: 5452 self.needed_locks[locking.LEVEL_NODE] = []
5453
5454 - def DeclareLocks(self, level):
5455 if level == locking.LEVEL_NODE: 5456 # if we replace the nodes, we only need to lock the old primary, 5457 # otherwise we need to lock all nodes for disk re-creation 5458 primary_only = bool(self.op.nodes) 5459 self._LockInstancesNodes(primary_only=primary_only)
5460
5461 - def BuildHooksEnv(self):
5462 """Build hooks env. 5463 5464 This runs on master, primary and secondary nodes of the instance. 5465 5466 """ 5467 env = _BuildInstanceHookEnvByObject(self, self.instance) 5468 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 5469 return env, nl, nl
5470
5471 - def CheckPrereq(self):
5472 """Check prerequisites. 5473 5474 This checks that the instance is in the cluster and is not running. 5475 5476 """ 5477 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5478 assert instance is not None, \ 5479 "Cannot retrieve locked instance %s" % self.op.instance_name 5480 if self.op.nodes: 5481 if len(self.op.nodes) != len(instance.all_nodes): 5482 raise errors.OpPrereqError("Instance %s currently has %d nodes, but" 5483 " %d replacement nodes were specified" % 5484 (instance.name, len(instance.all_nodes), 5485 len(self.op.nodes)), 5486 errors.ECODE_INVAL) 5487 assert instance.disk_template != constants.DT_DRBD8 or \ 5488 len(self.op.nodes) == 2 5489 assert instance.disk_template != constants.DT_PLAIN or \ 5490 len(self.op.nodes) == 1 5491 primary_node = self.op.nodes[0] 5492 else: 5493 primary_node = instance.primary_node 5494 _CheckNodeOnline(self, primary_node) 5495 5496 if instance.disk_template == constants.DT_DISKLESS: 5497 raise errors.OpPrereqError("Instance '%s' has no disks" % 5498 self.op.instance_name, errors.ECODE_INVAL) 5499 # if we replace nodes *and* the old primary is offline, we don't 5500 # check 5501 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE] 5502 old_pnode = self.cfg.GetNodeInfo(instance.primary_node) 5503 if not (self.op.nodes and old_pnode.offline): 5504 _CheckInstanceDown(self, instance, "cannot recreate disks") 5505 5506 if not self.op.disks: 5507 self.op.disks = range(len(instance.disks)) 5508 else: 5509 for idx in self.op.disks: 5510 if idx >= len(instance.disks): 5511 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx, 5512 errors.ECODE_INVAL) 5513 if self.op.disks != range(len(instance.disks)) and self.op.nodes: 5514 raise errors.OpPrereqError("Can't recreate disks partially and" 5515 " change the nodes at the same time", 5516 errors.ECODE_INVAL) 5517 self.instance = instance
5518
5519 - def Exec(self, feedback_fn):
5520 """Recreate the disks. 5521 5522 """ 5523 instance = self.instance 5524 5525 to_skip = [] 5526 mods = [] # keeps track of needed logical_id changes 5527 5528 for idx, disk in enumerate(instance.disks): 5529 if idx not in self.op.disks: # disk idx has not been passed in 5530 to_skip.append(idx) 5531 continue 5532 # update secondaries for disks, if needed 5533 if self.op.nodes: 5534 if disk.dev_type == constants.LD_DRBD8: 5535 # need to update the nodes and minors 5536 assert len(self.op.nodes) == 2 5537 assert len(disk.logical_id) == 6 # otherwise disk internals 5538 # have changed 5539 (_, _, old_port, _, _, old_secret) = disk.logical_id 5540 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name) 5541 new_id = (self.op.nodes[0], self.op.nodes[1], old_port, 5542 new_minors[0], new_minors[1], old_secret) 5543 assert len(disk.logical_id) == len(new_id) 5544 mods.append((idx, new_id)) 5545 5546 # now that we have passed all asserts above, we can apply the mods 5547 # in a single run (to avoid partial changes) 5548 for idx, new_id in mods: 5549 instance.disks[idx].logical_id = new_id 5550 5551 # change primary node, if needed 5552 if self.op.nodes: 5553 instance.primary_node = self.op.nodes[0] 5554 self.LogWarning("Changing the instance's nodes, you will have to" 5555 " remove any disks left on the older nodes manually") 5556 5557 if self.op.nodes: 5558 self.cfg.Update(instance, feedback_fn) 5559 5560 _CreateDisks(self, instance, to_skip=to_skip)
5561
5562 5563 -class LUInstanceRename(LogicalUnit):
5564 """Rename an instance. 5565 5566 """ 5567 HPATH = "instance-rename" 5568 HTYPE = constants.HTYPE_INSTANCE 5569
5570 - def CheckArguments(self):
5571 """Check arguments. 5572 5573 """ 5574 if self.op.ip_check and not self.op.name_check: 5575 # TODO: make the ip check more flexible and not depend on the name check 5576 raise errors.OpPrereqError("Cannot do ip check without a name check", 5577 errors.ECODE_INVAL)
5578
5579 - def BuildHooksEnv(self):
5580 """Build hooks env. 5581 5582 This runs on master, primary and secondary nodes of the instance. 5583 5584 """ 5585 env = _BuildInstanceHookEnvByObject(self, self.instance) 5586 env["INSTANCE_NEW_NAME"] = self.op.new_name 5587 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 5588 return env, nl, nl
5589
5590 - def CheckPrereq(self):
5591 """Check prerequisites. 5592 5593 This checks that the instance is in the cluster and is not running. 5594 5595 """ 5596 self.op.instance_name = _ExpandInstanceName(self.cfg, 5597 self.op.instance_name) 5598 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5599 assert instance is not None 5600 _CheckNodeOnline(self, instance.primary_node) 5601 _CheckInstanceDown(self, instance, "cannot rename") 5602 self.instance = instance 5603 5604 new_name = self.op.new_name 5605 if self.op.name_check: 5606 hostname = netutils.GetHostname(name=new_name) 5607 if hostname != new_name: 5608 self.LogInfo("Resolved given name '%s' to '%s'", new_name, 5609 hostname.name) 5610 new_name = self.op.new_name = hostname.name 5611 if (self.op.ip_check and 5612 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)): 5613 raise errors.OpPrereqError("IP %s of instance %s already in use" % 5614 (hostname.ip, new_name), 5615 errors.ECODE_NOTUNIQUE) 5616 5617 instance_list = self.cfg.GetInstanceList() 5618 if new_name in instance_list and new_name != instance.name: 5619 raise errors.OpPrereqError("Instance '%s' is already in the cluster" % 5620 new_name, errors.ECODE_EXISTS)
5621
5622 - def Exec(self, feedback_fn):
5623 """Rename the instance. 5624 5625 """ 5626 inst = self.instance 5627 old_name = inst.name 5628 5629 rename_file_storage = False 5630 if (inst.disk_template == constants.DT_FILE and 5631 self.op.new_name != inst.name): 5632 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1]) 5633 rename_file_storage = True 5634 5635 self.cfg.RenameInstance(inst.name, self.op.new_name) 5636 # Change the instance lock. This is definitely safe while we hold the BGL 5637 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name) 5638 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name) 5639 5640 # re-read the instance from the configuration after rename 5641 inst = self.cfg.GetInstanceInfo(self.op.new_name) 5642 5643 if rename_file_storage: 5644 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1]) 5645 result = self.rpc.call_file_storage_dir_rename(inst.primary_node, 5646 old_file_storage_dir, 5647 new_file_storage_dir) 5648 result.Raise("Could not rename on node %s directory '%s' to '%s'" 5649 " (but the instance has been renamed in Ganeti)" % 5650 (inst.primary_node, old_file_storage_dir, 5651 new_file_storage_dir)) 5652 5653 _StartInstanceDisks(self, inst, None) 5654 try: 5655 result = self.rpc.call_instance_run_rename(inst.primary_node, inst, 5656 old_name, self.op.debug_level) 5657 msg = result.fail_msg 5658 if msg: 5659 msg = ("Could not run OS rename script for instance %s on node %s" 5660 " (but the instance has been renamed in Ganeti): %s" % 5661 (inst.name, inst.primary_node, msg)) 5662 self.proc.LogWarning(msg) 5663 finally: 5664 _ShutdownInstanceDisks(self, inst) 5665 5666 return inst.name
5667
5668 5669 -class LUInstanceRemove(LogicalUnit):
5670 """Remove an instance. 5671 5672 """ 5673 HPATH = "instance-remove" 5674 HTYPE = constants.HTYPE_INSTANCE 5675 REQ_BGL = False 5676
5677 - def ExpandNames(self):
5678 self._ExpandAndLockInstance() 5679 self.needed_locks[locking.LEVEL_NODE] = [] 5680 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5681
5682 - def DeclareLocks(self, level):
5683 if level == locking.LEVEL_NODE: 5684 self._LockInstancesNodes()
5685
5686 - def BuildHooksEnv(self):
5687 """Build hooks env. 5688 5689 This runs on master, primary and secondary nodes of the instance. 5690 5691 """ 5692 env = _BuildInstanceHookEnvByObject(self, self.instance) 5693 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout 5694 nl = [self.cfg.GetMasterNode()] 5695 nl_post = list(self.instance.all_nodes) + nl 5696 return env, nl, nl_post
5697
5698 - def CheckPrereq(self):
5699 """Check prerequisites. 5700 5701 This checks that the instance is in the cluster. 5702 5703 """ 5704 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5705 assert self.instance is not None, \ 5706 "Cannot retrieve locked instance %s" % self.op.instance_name
5707
5708 - def Exec(self, feedback_fn):
5709 """Remove the instance. 5710 5711 """ 5712 instance = self.instance 5713 logging.info("Shutting down instance %s on node %s", 5714 instance.name, instance.primary_node) 5715 5716 result = self.rpc.call_instance_shutdown(instance.primary_node, instance, 5717 self.op.shutdown_timeout) 5718 msg = result.fail_msg 5719 if msg: 5720 if self.op.ignore_failures: 5721 feedback_fn("Warning: can't shutdown instance: %s" % msg) 5722 else: 5723 raise errors.OpExecError("Could not shutdown instance %s on" 5724 " node %s: %s" % 5725 (instance.name, instance.primary_node, msg)) 5726 5727 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5728
5729 5730 -def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5731 """Utility function to remove an instance. 5732 5733 """ 5734 logging.info("Removing block devices for instance %s", instance.name) 5735 5736 if not _RemoveDisks(lu, instance): 5737 if not ignore_failures: 5738 raise errors.OpExecError("Can't remove instance's disks") 5739 feedback_fn("Warning: can't remove instance's disks") 5740 5741 logging.info("Removing instance %s out of cluster config", instance.name) 5742 5743 lu.cfg.RemoveInstance(instance.name) 5744 5745 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \ 5746 "Instance lock removal conflict" 5747 5748 # Remove lock for the instance 5749 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5750
5751 5752 -class LUInstanceQuery(NoHooksLU):
5753 """Logical unit for querying instances. 5754 5755 """ 5756 # pylint: disable-msg=W0142 5757 REQ_BGL = False 5758
5759 - def CheckArguments(self):
5760 self.iq = _InstanceQuery(self.op.names, self.op.output_fields, 5761 self.op.use_locking)
5762
5763 - def ExpandNames(self):
5764 self.iq.ExpandNames(self)
5765
5766 - def DeclareLocks(self, level):
5767 self.iq.DeclareLocks(self, level)
5768
5769 - def Exec(self, feedback_fn):
5770 return self.iq.OldStyleQuery(self)
5771
5772 5773 -class LUInstanceFailover(LogicalUnit):
5774 """Failover an instance. 5775 5776 """ 5777 HPATH = "instance-failover" 5778 HTYPE = constants.HTYPE_INSTANCE 5779 REQ_BGL = False 5780
5781 - def ExpandNames(self):
5782 self._ExpandAndLockInstance() 5783 self.needed_locks[locking.LEVEL_NODE] = [] 5784 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5785
5786 - def DeclareLocks(self, level):
5787 if level == locking.LEVEL_NODE: 5788 self._LockInstancesNodes()
5789
5790 - def BuildHooksEnv(self):
5791 """Build hooks env. 5792 5793 This runs on master, primary and secondary nodes of the instance. 5794 5795 """ 5796 instance = self.instance 5797 source_node = instance.primary_node 5798 target_node = instance.secondary_nodes[0] 5799 env = { 5800 "IGNORE_CONSISTENCY": self.op.ignore_consistency, 5801 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 5802 "OLD_PRIMARY": source_node, 5803 "OLD_SECONDARY": target_node, 5804 "NEW_PRIMARY": target_node, 5805 "NEW_SECONDARY": source_node, 5806 } 5807 env.update(_BuildInstanceHookEnvByObject(self, instance)) 5808 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes) 5809 nl_post = list(nl) 5810 nl_post.append(source_node) 5811 return env, nl, nl_post
5812
5813 - def CheckPrereq(self):
5814 """Check prerequisites. 5815 5816 This checks that the instance is in the cluster. 5817 5818 """ 5819 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 5820 assert self.instance is not None, \ 5821 "Cannot retrieve locked instance %s" % self.op.instance_name 5822 5823 bep = self.cfg.GetClusterInfo().FillBE(instance) 5824 if instance.disk_template not in constants.DTS_NET_MIRROR: 5825 raise errors.OpPrereqError("Instance's disk layout is not" 5826 " network mirrored, cannot failover.", 5827 errors.ECODE_STATE) 5828 5829 secondary_nodes = instance.secondary_nodes 5830 if not secondary_nodes: 5831 raise errors.ProgrammerError("no secondary node but using " 5832 "a mirrored disk template") 5833 5834 target_node = secondary_nodes[0] 5835 _CheckNodeOnline(self, target_node) 5836 _CheckNodeNotDrained(self, target_node) 5837 if instance.admin_up: 5838 # check memory requirements on the secondary node 5839 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" % 5840 instance.name, bep[constants.BE_MEMORY], 5841 instance.hypervisor) 5842 else: 5843 self.LogInfo("Not checking memory on the secondary node as" 5844 " instance will not be started") 5845 5846 # check bridge existance 5847 _CheckInstanceBridgesExist(self, instance, node=target_node)
5848
5849 - def Exec(self, feedback_fn):
5850 """Failover an instance. 5851 5852 The failover is done by shutting it down on its present node and 5853 starting it on the secondary. 5854 5855 """ 5856 instance = self.instance 5857 primary_node = self.cfg.GetNodeInfo(instance.primary_node) 5858 5859 source_node = instance.primary_node 5860 target_node = instance.secondary_nodes[0] 5861 5862 if instance.admin_up: 5863 feedback_fn("* checking disk consistency between source and target") 5864 for dev in instance.disks: 5865 # for drbd, these are drbd over lvm 5866 if not _CheckDiskConsistency(self, dev, target_node, False): 5867 if not self.op.ignore_consistency: 5868 raise errors.OpExecError("Disk %s is degraded on target node," 5869 " aborting failover." % dev.iv_name) 5870 else: 5871 feedback_fn("* not checking disk consistency as instance is not running") 5872 5873 feedback_fn("* shutting down instance on source node") 5874 logging.info("Shutting down instance %s on node %s", 5875 instance.name, source_node) 5876 5877 result = self.rpc.call_instance_shutdown(source_node, instance, 5878 self.op.shutdown_timeout) 5879 msg = result.fail_msg 5880 if msg: 5881 if self.op.ignore_consistency or primary_node.offline: 5882 self.proc.LogWarning("Could not shutdown instance %s on node %s." 5883 " Proceeding anyway. Please make sure node" 5884 " %s is down. Error details: %s", 5885 instance.name, source_node, source_node, msg) 5886 else: 5887 raise errors.OpExecError("Could not shutdown instance %s on" 5888 " node %s: %s" % 5889 (instance.name, source_node, msg)) 5890 5891 feedback_fn("* deactivating the instance's disks on source node") 5892 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True): 5893 raise errors.OpExecError("Can't shut down the instance's disks.") 5894 5895 instance.primary_node = target_node 5896 # distribute new instance config to the other nodes 5897 self.cfg.Update(instance, feedback_fn) 5898 5899 # Only start the instance if it's marked as up 5900 if instance.admin_up: 5901 feedback_fn("* activating the instance's disks on target node") 5902 logging.info("Starting instance %s on node %s", 5903 instance.name, target_node) 5904 5905 disks_ok, _ = _AssembleInstanceDisks(self, instance, 5906 ignore_secondaries=True) 5907 if not disks_ok: 5908 _ShutdownInstanceDisks(self, instance) 5909 raise errors.OpExecError("Can't activate the instance's disks") 5910 5911 feedback_fn("* starting the instance on the target node") 5912 result = self.rpc.call_instance_start(target_node, instance, None, None) 5913 msg = result.fail_msg 5914 if msg: 5915 _ShutdownInstanceDisks(self, instance) 5916 raise errors.OpExecError("Could not start instance %s on node %s: %s" % 5917 (instance.name, target_node, msg))
5918
5919 5920 -class LUInstanceMigrate(LogicalUnit):
5921 """Migrate an instance. 5922 5923 This is migration without shutting down, compared to the failover, 5924 which is done with shutdown. 5925 5926 """ 5927 HPATH = "instance-migrate" 5928 HTYPE = constants.HTYPE_INSTANCE 5929 REQ_BGL = False 5930
5931 - def ExpandNames(self):
5932 self._ExpandAndLockInstance() 5933 5934 self.needed_locks[locking.LEVEL_NODE] = [] 5935 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 5936 5937 self._migrater = TLMigrateInstance(self, self.op.instance_name, 5938 self.op.cleanup) 5939 self.tasklets = [self._migrater]
5940
5941 - def DeclareLocks(self, level):
5942 if level == locking.LEVEL_NODE: 5943 self._LockInstancesNodes()
5944
5945 - def BuildHooksEnv(self):
5946 """Build hooks env. 5947 5948 This runs on master, primary and secondary nodes of the instance. 5949 5950 """ 5951 instance = self._migrater.instance 5952 source_node = instance.primary_node 5953 target_node = instance.secondary_nodes[0] 5954 env = _BuildInstanceHookEnvByObject(self, instance) 5955 env["MIGRATE_LIVE"] = self._migrater.live 5956 env["MIGRATE_CLEANUP"] = self.op.cleanup 5957 env.update({ 5958 "OLD_PRIMARY": source_node, 5959 "OLD_SECONDARY": target_node, 5960 "NEW_PRIMARY": target_node, 5961 "NEW_SECONDARY": source_node, 5962 }) 5963 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes) 5964 nl_post = list(nl) 5965 nl_post.append(source_node) 5966 return env, nl, nl_post
5967
5968 5969 -class LUInstanceMove(LogicalUnit):
5970 """Move an instance by data-copying. 5971 5972 """ 5973 HPATH = "instance-move" 5974 HTYPE = constants.HTYPE_INSTANCE 5975 REQ_BGL = False 5976
5977 - def ExpandNames(self):
5978 self._ExpandAndLockInstance() 5979 target_node = _ExpandNodeName(self.cfg, self.op.target_node) 5980 self.op.target_node = target_node 5981 self.needed_locks[locking.LEVEL_NODE] = [target_node] 5982 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5983
5984 - def DeclareLocks(self, level):
5985 if level == locking.LEVEL_NODE: 5986 self._LockInstancesNodes(primary_only=True)
5987
5988 - def BuildHooksEnv(self):
5989 """Build hooks env. 5990 5991 This runs on master, primary and secondary nodes of the instance. 5992 5993 """ 5994 env = { 5995 "TARGET_NODE": self.op.target_node, 5996 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 5997 } 5998 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 5999 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node, 6000 self.op.target_node] 6001 return env, nl, nl
6002
6003 - def CheckPrereq(self):
6004 """Check prerequisites. 6005 6006 This checks that the instance is in the cluster. 6007 6008 """ 6009 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 6010 assert self.instance is not None, \ 6011 "Cannot retrieve locked instance %s" % self.op.instance_name 6012 6013 node = self.cfg.GetNodeInfo(self.op.target_node) 6014 assert node is not None, \ 6015 "Cannot retrieve locked node %s" % self.op.target_node 6016 6017 self.target_node = target_node = node.name 6018 6019 if target_node == instance.primary_node: 6020 raise errors.OpPrereqError("Instance %s is already on the node %s" % 6021 (instance.name, target_node), 6022 errors.ECODE_STATE) 6023 6024 bep = self.cfg.GetClusterInfo().FillBE(instance) 6025 6026 for idx, dsk in enumerate(instance.disks): 6027 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE): 6028 raise errors.OpPrereqError("Instance disk %d has a complex layout," 6029 " cannot copy" % idx, errors.ECODE_STATE) 6030 6031 _CheckNodeOnline(self, target_node) 6032 _CheckNodeNotDrained(self, target_node) 6033 _CheckNodeVmCapable(self, target_node) 6034 6035 if instance.admin_up: 6036 # check memory requirements on the secondary node 6037 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" % 6038 instance.name, bep[constants.BE_MEMORY], 6039 instance.hypervisor) 6040 else: 6041 self.LogInfo("Not checking memory on the secondary node as" 6042 " instance will not be started") 6043 6044 # check bridge existance 6045 _CheckInstanceBridgesExist(self, instance, node=target_node)
6046
6047 - def Exec(self, feedback_fn):
6048 """Move an instance. 6049 6050 The move is done by shutting it down on its present node, copying 6051 the data over (slow) and starting it on the new node. 6052 6053 """ 6054 instance = self.instance 6055 6056 source_node = instance.primary_node 6057 target_node = self.target_node 6058 6059 self.LogInfo("Shutting down instance %s on source node %s", 6060 instance.name, source_node) 6061 6062 result = self.rpc.call_instance_shutdown(source_node, instance, 6063 self.op.shutdown_timeout) 6064 msg = result.fail_msg 6065 if msg: 6066 if self.op.ignore_consistency: 6067 self.proc.LogWarning("Could not shutdown instance %s on node %s." 6068 " Proceeding anyway. Please make sure node" 6069 " %s is down. Error details: %s", 6070 instance.name, source_node, source_node, msg) 6071 else: 6072 raise errors.OpExecError("Could not shutdown instance %s on" 6073 " node %s: %s" % 6074 (instance.name, source_node, msg)) 6075 6076 # create the target disks 6077 try: 6078 _CreateDisks(self, instance, target_node=target_node) 6079 except errors.OpExecError: 6080 self.LogWarning("Device creation failed, reverting...") 6081 try: 6082 _RemoveDisks(self, instance, target_node=target_node) 6083 finally: 6084 self.cfg.ReleaseDRBDMinors(instance.name) 6085 raise 6086 6087 cluster_name = self.cfg.GetClusterInfo().cluster_name 6088 6089 errs = [] 6090 # activate, get path, copy the data over 6091 for idx, disk in enumerate(instance.disks): 6092 self.LogInfo("Copying data for disk %d", idx) 6093 result = self.rpc.call_blockdev_assemble(target_node, disk, 6094 instance.name, True, idx) 6095 if result.fail_msg: 6096 self.LogWarning("Can't assemble newly created disk %d: %s", 6097 idx, result.fail_msg) 6098 errs.append(result.fail_msg) 6099 break 6100 dev_path = result.payload 6101 result = self.rpc.call_blockdev_export(source_node, disk, 6102 target_node, dev_path, 6103 cluster_name) 6104 if result.fail_msg: 6105 self.LogWarning("Can't copy data over for disk %d: %s", 6106 idx, result.fail_msg) 6107 errs.append(result.fail_msg) 6108 break 6109 6110 if errs: 6111 self.LogWarning("Some disks failed to copy, aborting") 6112 try: 6113 _RemoveDisks(self, instance, target_node=target_node) 6114 finally: 6115 self.cfg.ReleaseDRBDMinors(instance.name) 6116 raise errors.OpExecError("Errors during disk copy: %s" % 6117 (",".join(errs),)) 6118 6119 instance.primary_node = target_node 6120 self.cfg.Update(instance, feedback_fn) 6121 6122 self.LogInfo("Removing the disks on the original node") 6123 _RemoveDisks(self, instance, target_node=source_node) 6124 6125 # Only start the instance if it's marked as up 6126 if instance.admin_up: 6127 self.LogInfo("Starting instance %s on node %s", 6128 instance.name, target_node) 6129 6130 disks_ok, _ = _AssembleInstanceDisks(self, instance, 6131 ignore_secondaries=True) 6132 if not disks_ok: 6133 _ShutdownInstanceDisks(self, instance) 6134 raise errors.OpExecError("Can't activate the instance's disks") 6135 6136 result = self.rpc.call_instance_start(target_node, instance, None, None) 6137 msg = result.fail_msg 6138 if msg: 6139 _ShutdownInstanceDisks(self, instance) 6140 raise errors.OpExecError("Could not start instance %s on node %s: %s" % 6141 (instance.name, target_node, msg))
6142
6143 6144 -class LUNodeMigrate(LogicalUnit):
6145 """Migrate all instances from a node. 6146 6147 """ 6148 HPATH = "node-migrate" 6149 HTYPE = constants.HTYPE_NODE 6150 REQ_BGL = False 6151
6152 - def ExpandNames(self):
6153 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 6154 6155 self.needed_locks = { 6156 locking.LEVEL_NODE: [self.op.node_name], 6157 } 6158 6159 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 6160 6161 # Create tasklets for migrating instances for all instances on this node 6162 names = [] 6163 tasklets = [] 6164 6165 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name): 6166 logging.debug("Migrating instance %s", inst.name) 6167 names.append(inst.name) 6168 6169 tasklets.append(TLMigrateInstance(self, inst.name, False)) 6170 6171 self.tasklets = tasklets 6172 6173 # Declare instance locks 6174 self.needed_locks[locking.LEVEL_INSTANCE] = names
6175
6176 - def DeclareLocks(self, level):
6177 if level == locking.LEVEL_NODE: 6178 self._LockInstancesNodes()
6179
6180 - def BuildHooksEnv(self):
6181 """Build hooks env. 6182 6183 This runs on the master, the primary and all the secondaries. 6184 6185 """ 6186 env = { 6187 "NODE_NAME": self.op.node_name, 6188 } 6189 6190 nl = [self.cfg.GetMasterNode()] 6191 6192 return (env, nl, nl)
6193
6194 6195 -class TLMigrateInstance(Tasklet):
6196 """Tasklet class for instance migration. 6197 6198 @type live: boolean 6199 @ivar live: whether the migration will be done live or non-live; 6200 this variable is initalized only after CheckPrereq has run 6201 6202 """
6203 - def __init__(self, lu, instance_name, cleanup):
6204 """Initializes this class. 6205 6206 """ 6207 Tasklet.__init__(self, lu) 6208 6209 # Parameters 6210 self.instance_name = instance_name 6211 self.cleanup = cleanup 6212 self.live = False # will be overridden later
6213
6214 - def CheckPrereq(self):
6215 """Check prerequisites. 6216 6217 This checks that the instance is in the cluster. 6218 6219 """ 6220 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name) 6221 instance = self.cfg.GetInstanceInfo(instance_name) 6222 assert instance is not None 6223 6224 if instance.disk_template != constants.DT_DRBD8: 6225 raise errors.OpPrereqError("Instance's disk layout is not" 6226 " drbd8, cannot migrate.", errors.ECODE_STATE) 6227 6228 secondary_nodes = instance.secondary_nodes 6229 if not secondary_nodes: 6230 raise errors.ConfigurationError("No secondary node but using" 6231 " drbd8 disk template") 6232 6233 i_be = self.cfg.GetClusterInfo().FillBE(instance) 6234 6235 target_node = secondary_nodes[0] 6236 # check memory requirements on the secondary node 6237 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" % 6238 instance.name, i_be[constants.BE_MEMORY], 6239 instance.hypervisor) 6240 6241 # check bridge existance 6242 _CheckInstanceBridgesExist(self.lu, instance, node=target_node) 6243 6244 if not self.cleanup: 6245 _CheckNodeNotDrained(self.lu, target_node) 6246 result = self.rpc.call_instance_migratable(instance.primary_node, 6247 instance) 6248 result.Raise("Can't migrate, please use failover", 6249 prereq=True, ecode=errors.ECODE_STATE) 6250 6251 self.instance = instance 6252 6253 if self.lu.op.live is not None and self.lu.op.mode is not None: 6254 raise errors.OpPrereqError("Only one of the 'live' and 'mode'" 6255 " parameters are accepted", 6256 errors.ECODE_INVAL) 6257 if self.lu.op.live is not None: 6258 if self.lu.op.live: 6259 self.lu.op.mode = constants.HT_MIGRATION_LIVE 6260 else: 6261 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE 6262 # reset the 'live' parameter to None so that repeated 6263 # invocations of CheckPrereq do not raise an exception 6264 self.lu.op.live = None 6265 elif self.lu.op.mode is None: 6266 # read the default value from the hypervisor 6267 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False) 6268 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE] 6269 6270 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6271
6272 - def _WaitUntilSync(self):
6273 """Poll with custom rpc for disk sync. 6274 6275 This uses our own step-based rpc call. 6276 6277 """ 6278 self.feedback_fn("* wait until resync is done") 6279 all_done = False 6280 while not all_done: 6281 all_done = True 6282 result = self.rpc.call_drbd_wait_sync(self.all_nodes, 6283 self.nodes_ip, 6284 self.instance.disks) 6285 min_percent = 100 6286 for node, nres in result.items(): 6287 nres.Raise("Cannot resync disks on node %s" % node) 6288 node_done, node_percent = nres.payload 6289 all_done = all_done and node_done 6290 if node_percent is not None: 6291 min_percent = min(min_percent, node_percent) 6292 if not all_done: 6293 if min_percent < 100: 6294 self.feedback_fn(" - progress: %.1f%%" % min_percent) 6295 time.sleep(2)
6296
6297 - def _EnsureSecondary(self, node):
6298 """Demote a node to secondary. 6299 6300 """ 6301 self.feedback_fn("* switching node %s to secondary mode" % node) 6302 6303 for dev in self.instance.disks: 6304 self.cfg.SetDiskID(dev, node) 6305 6306 result = self.rpc.call_blockdev_close(node, self.instance.name, 6307 self.instance.disks) 6308 result.Raise("Cannot change disk to secondary on node %s" % node)
6309
6310 - def _GoStandalone(self):
6311 """Disconnect from the network. 6312 6313 """ 6314 self.feedback_fn("* changing into standalone mode") 6315 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip, 6316 self.instance.disks) 6317 for node, nres in result.items(): 6318 nres.Raise("Cannot disconnect disks node %s" % node)
6319
6320 - def _GoReconnect(self, multimaster):
6321 """Reconnect to the network. 6322 6323 """ 6324 if multimaster: 6325 msg = "dual-master" 6326 else: 6327 msg = "single-master" 6328 self.feedback_fn("* changing disks into %s mode" % msg) 6329 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip, 6330 self.instance.disks, 6331 self.instance.name, multimaster) 6332 for node, nres in result.items(): 6333 nres.Raise("Cannot change disks config on node %s" % node)
6334
6335 - def _ExecCleanup(self):
6336 """Try to cleanup after a failed migration. 6337 6338 The cleanup is done by: 6339 - check that the instance is running only on one node 6340 (and update the config if needed) 6341 - change disks on its secondary node to secondary 6342 - wait until disks are fully synchronized 6343 - disconnect from the network 6344 - change disks into single-master mode 6345 - wait again until disks are fully synchronized 6346 6347 """ 6348 instance = self.instance 6349 target_node = self.target_node 6350 source_node = self.source_node 6351 6352 # check running on only one node 6353 self.feedback_fn("* checking where the instance actually runs" 6354 " (if this hangs, the hypervisor might be in" 6355 " a bad state)") 6356 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor]) 6357 for node, result in ins_l.items(): 6358 result.Raise("Can't contact node %s" % node) 6359 6360 runningon_source = instance.name in ins_l[source_node].payload 6361 runningon_target = instance.name in ins_l[target_node].payload 6362 6363 if runningon_source and runningon_target: 6364 raise errors.OpExecError("Instance seems to be running on two nodes," 6365 " or the hypervisor is confused. You will have" 6366 " to ensure manually that it runs only on one" 6367 " and restart this operation.") 6368 6369 if not (runningon_source or runningon_target): 6370 raise errors.OpExecError("Instance does not seem to be running at all." 6371 " In this case, it's safer to repair by" 6372 " running 'gnt-instance stop' to ensure disk" 6373 " shutdown, and then restarting it.") 6374 6375 if runningon_target: 6376 # the migration has actually succeeded, we need to update the config 6377 self.feedback_fn("* instance running on secondary node (%s)," 6378 " updating config" % target_node) 6379 instance.primary_node = target_node 6380 self.cfg.Update(instance, self.feedback_fn) 6381 demoted_node = source_node 6382 else: 6383 self.feedback_fn("* instance confirmed to be running on its" 6384 " primary node (%s)" % source_node) 6385 demoted_node = target_node 6386 6387 self._EnsureSecondary(demoted_node) 6388 try: 6389 self._WaitUntilSync() 6390 except errors.OpExecError: 6391 # we ignore here errors, since if the device is standalone, it 6392 # won't be able to sync 6393 pass 6394 self._GoStandalone() 6395 self._GoReconnect(False) 6396 self._WaitUntilSync() 6397 6398 self.feedback_fn("* done")
6399
6400 - def _RevertDiskStatus(self):
6401 """Try to revert the disk status after a failed migration. 6402 6403 """ 6404 target_node = self.target_node 6405 try: 6406 self._EnsureSecondary(target_node) 6407 self._GoStandalone() 6408 self._GoReconnect(False) 6409 self._WaitUntilSync() 6410 except errors.OpExecError, err: 6411 self.lu.LogWarning("Migration failed and I can't reconnect the" 6412 " drives: error '%s'\n" 6413 "Please look and recover the instance status" % 6414 str(err))
6415
6416 - def _AbortMigration(self):
6417 """Call the hypervisor code to abort a started migration. 6418 6419 """ 6420 instance = self.instance 6421 target_node = self.target_node 6422 migration_info = self.migration_info 6423 6424 abort_result = self.rpc.call_finalize_migration(target_node, 6425 instance, 6426 migration_info, 6427 False) 6428 abort_msg = abort_result.fail_msg 6429 if abort_msg: 6430 logging.error("Aborting migration failed on target node %s: %s", 6431 target_node, abort_msg)
6432 # Don't raise an exception here, as we stil have to try to revert the 6433 # disk status, even if this step failed. 6434
6435 - def _ExecMigration(self):
6436 """Migrate an instance. 6437 6438 The migrate is done by: 6439 - change the disks into dual-master mode 6440 - wait until disks are fully synchronized again 6441 - migrate the instance 6442 - change disks on the new secondary node (the old primary) to secondary 6443 - wait until disks are fully synchronized 6444 - change disks into single-master mode 6445 6446 """ 6447 instance = self.instance 6448 target_node = self.target_node 6449 source_node = self.source_node 6450 6451 self.feedback_fn("* checking disk consistency between source and target") 6452 for dev in instance.disks: 6453 if not _CheckDiskConsistency(self.lu, dev, target_node, False): 6454 raise errors.OpExecError("Disk %s is degraded or not fully" 6455 " synchronized on target node," 6456 " aborting migrate." % dev.iv_name) 6457 6458 # First get the migration information from the remote node 6459 result = self.rpc.call_migration_info(source_node, instance) 6460 msg = result.fail_msg 6461 if msg: 6462 log_err = ("Failed fetching source migration information from %s: %s" % 6463 (source_node, msg)) 6464 logging.error(log_err) 6465 raise errors.OpExecError(log_err) 6466 6467 self.migration_info = migration_info = result.payload 6468 6469 # Then switch the disks to master/master mode 6470 self._EnsureSecondary(target_node) 6471 self._GoStandalone() 6472 self._GoReconnect(True) 6473 self._WaitUntilSync() 6474 6475 self.feedback_fn("* preparing %s to accept the instance" % target_node) 6476 result = self.rpc.call_accept_instance(target_node, 6477 instance, 6478 migration_info, 6479 self.nodes_ip[target_node]) 6480 6481 msg = result.fail_msg 6482 if msg: 6483 logging.error("Instance pre-migration failed, trying to revert" 6484 " disk status: %s", msg) 6485 self.feedback_fn("Pre-migration failed, aborting") 6486 self._AbortMigration() 6487 self._RevertDiskStatus() 6488 raise errors.OpExecError("Could not pre-migrate instance %s: %s" % 6489 (instance.name, msg)) 6490 6491 self.feedback_fn("* migrating instance to %s" % target_node) 6492 time.sleep(10) 6493 result = self.rpc.call_instance_migrate(source_node, instance, 6494 self.nodes_ip[target_node], 6495 self.live) 6496 msg = result.fail_msg 6497 if msg: 6498 logging.error("Instance migration failed, trying to revert" 6499 " disk status: %s", msg) 6500 self.feedback_fn("Migration failed, aborting") 6501 self._AbortMigration() 6502 self._RevertDiskStatus() 6503 raise errors.OpExecError("Could not migrate instance %s: %s" % 6504 (instance.name, msg)) 6505 time.sleep(10) 6506 6507 instance.primary_node = target_node 6508 # distribute new instance config to the other nodes 6509 self.cfg.Update(instance, self.feedback_fn) 6510 6511 result = self.rpc.call_finalize_migration(target_node, 6512 instance, 6513 migration_info, 6514 True) 6515 msg = result.fail_msg 6516 if msg: 6517 logging.error("Instance migration succeeded, but finalization failed:" 6518 " %s", msg) 6519 raise errors.OpExecError("Could not finalize instance migration: %s" % 6520 msg) 6521 6522 self._EnsureSecondary(source_node) 6523 self._WaitUntilSync() 6524 self._GoStandalone() 6525 self._GoReconnect(False) 6526 self._WaitUntilSync() 6527 6528 self.feedback_fn("* done")
6529
6530 - def Exec(self, feedback_fn):
6531 """Perform the migration. 6532 6533 """ 6534 feedback_fn("Migrating instance %s" % self.instance.name) 6535 6536 self.feedback_fn = feedback_fn 6537 6538 self.source_node = self.instance.primary_node 6539 self.target_node = self.instance.secondary_nodes[0] 6540 self.all_nodes = [self.source_node, self.target_node] 6541 self.nodes_ip = { 6542 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip, 6543 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip, 6544 } 6545 6546 if self.cleanup: 6547 return self._ExecCleanup() 6548 else: 6549 return self._ExecMigration()
6550
6551 6552 -def _CreateBlockDev(lu, node, instance, device, force_create, 6553 info, force_open):
6554 """Create a tree of block devices on a given node. 6555 6556 If this device type has to be created on secondaries, create it and 6557 all its children. 6558 6559 If not, just recurse to children keeping the same 'force' value. 6560 6561 @param lu: the lu on whose behalf we execute 6562 @param node: the node on which to create the device 6563 @type instance: L{objects.Instance} 6564 @param instance: the instance which owns the device 6565 @type device: L{objects.Disk} 6566 @param device: the device to create 6567 @type force_create: boolean 6568 @param force_create: whether to force creation of this device; this 6569 will be change to True whenever we find a device which has 6570 CreateOnSecondary() attribute 6571 @param info: the extra 'metadata' we should attach to the device 6572 (this will be represented as a LVM tag) 6573 @type force_open: boolean 6574 @param force_open: this parameter will be passes to the 6575 L{backend.BlockdevCreate} function where it specifies 6576 whether we run on primary or not, and it affects both 6577 the child assembly and the device own Open() execution 6578 6579 """ 6580 if device.CreateOnSecondary(): 6581 force_create = True 6582 6583 if device.children: 6584 for child in device.children: 6585 _CreateBlockDev(lu, node, instance, child, force_create, 6586 info, force_open) 6587 6588 if not force_create: 6589 return 6590 6591 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6592
6593 6594 -def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6595 """Create a single block device on a given node. 6596 6597 This will not recurse over children of the device, so they must be 6598 created in advance. 6599 6600 @param lu: the lu on whose behalf we execute 6601 @param node: the node on which to create the device 6602 @type instance: L{objects.Instance} 6603 @param instance: the instance which owns the device 6604 @type device: L{objects.Disk} 6605 @param device: the device to create 6606 @param info: the extra 'metadata' we should attach to the device 6607 (this will be represented as a LVM tag) 6608 @type force_open: boolean 6609 @param force_open: this parameter will be passes to the 6610 L{backend.BlockdevCreate} function where it specifies 6611 whether we run on primary or not, and it affects both 6612 the child assembly and the device own Open() execution 6613 6614 """ 6615 lu.cfg.SetDiskID(device, node) 6616 result = lu.rpc.call_blockdev_create(node, device, device.size, 6617 instance.name, force_open, info) 6618 result.Raise("Can't create block device %s on" 6619 " node %s for instance %s" % (device, node, instance.name)) 6620 if device.physical_id is None: 6621 device.physical_id = result.payload
6622
6623 6624 -def _GenerateUniqueNames(lu, exts):
6625 """Generate a suitable LV name. 6626 6627 This will generate a logical volume name for the given instance. 6628 6629 """ 6630 results = [] 6631 for val in exts: 6632 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId()) 6633 results.append("%s%s" % (new_id, val)) 6634 return results
6635
6636 6637 -def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names, 6638 iv_name, p_minor, s_minor):
6639 """Generate a drbd8 device complete with its children. 6640 6641 """ 6642 assert len(vgnames) == len(names) == 2 6643 port = lu.cfg.AllocatePort() 6644 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId()) 6645 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size, 6646 logical_id=(vgnames[0], names[0])) 6647 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128, 6648 logical_id=(vgnames[1], names[1])) 6649 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size, 6650 logical_id=(primary, secondary, port, 6651 p_minor, s_minor, 6652 shared_secret), 6653 children=[dev_data, dev_meta], 6654 iv_name=iv_name) 6655 return drbd_dev
6656
6657 6658 -def _GenerateDiskTemplate(lu, template_name, 6659 instance_name, primary_node, 6660 secondary_nodes, disk_info, 6661 file_storage_dir, file_driver, 6662 base_index, feedback_fn):
6663 """Generate the entire disk layout for a given template type. 6664 6665 """ 6666 #TODO: compute space requirements 6667 6668 vgname = lu.cfg.GetVGName() 6669 disk_count = len(disk_info) 6670 disks = [] 6671 if template_name == constants.DT_DISKLESS: 6672 pass 6673 elif template_name == constants.DT_PLAIN: 6674 if len(secondary_nodes) != 0: 6675 raise errors.ProgrammerError("Wrong template configuration") 6676 6677 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i) 6678 for i in range(disk_count)]) 6679 for idx, disk in enumerate(disk_info): 6680 disk_index = idx + base_index 6681 vg = disk.get("vg", vgname) 6682 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx])) 6683 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"], 6684 logical_id=(vg, names[idx]), 6685 iv_name="disk/%d" % disk_index, 6686 mode=disk["mode"]) 6687 disks.append(disk_dev) 6688 elif template_name == constants.DT_DRBD8: 6689 if len(secondary_nodes) != 1: 6690 raise errors.ProgrammerError("Wrong template configuration") 6691 remote_node = secondary_nodes[0] 6692 minors = lu.cfg.AllocateDRBDMinor( 6693 [primary_node, remote_node] * len(disk_info), instance_name) 6694 6695 names = [] 6696 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i) 6697 for i in range(disk_count)]): 6698 names.append(lv_prefix + "_data") 6699 names.append(lv_prefix + "_meta") 6700 for idx, disk in enumerate(disk_info): 6701 disk_index = idx + base_index 6702 data_vg = disk.get("vg", vgname) 6703 meta_vg = disk.get("metavg", data_vg) 6704 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node, 6705 disk["size"], [data_vg, meta_vg], 6706 names[idx*2:idx*2+2], 6707 "disk/%d" % disk_index, 6708 minors[idx*2], minors[idx*2+1]) 6709 disk_dev.mode = disk["mode"] 6710 disks.append(disk_dev) 6711 elif template_name == constants.DT_FILE: 6712 if len(secondary_nodes) != 0: 6713 raise errors.ProgrammerError("Wrong template configuration") 6714 6715 opcodes.RequireFileStorage() 6716 6717 for idx, disk in enumerate(disk_info): 6718 disk_index = idx + base_index 6719 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"], 6720 iv_name="disk/%d" % disk_index, 6721 logical_id=(file_driver, 6722 "%s/disk%d" % (file_storage_dir, 6723 disk_index)), 6724 mode=disk["mode"]) 6725 disks.append(disk_dev) 6726 else: 6727 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name) 6728 return disks
6729
6730 6731 -def _GetInstanceInfoText(instance):
6732 """Compute that text that should be added to the disk's metadata. 6733 6734 """ 6735 return "originstname+%s" % instance.name
6736
6737 6738 -def _CalcEta(time_taken, written, total_size):
6739 """Calculates the ETA based on size written and total size. 6740 6741 @param time_taken: The time taken so far 6742 @param written: amount written so far 6743 @param total_size: The total size of data to be written 6744 @return: The remaining time in seconds 6745 6746 """ 6747 avg_time = time_taken / float(written) 6748 return (total_size - written) * avg_time
6749
6750 6751 -def _WipeDisks(lu, instance):
6752 """Wipes instance disks. 6753 6754 @type lu: L{LogicalUnit} 6755 @param lu: the logical unit on whose behalf we execute 6756 @type instance: L{objects.Instance} 6757 @param instance: the instance whose disks we should create 6758 @return: the success of the wipe 6759 6760 """ 6761 node = instance.primary_node 6762 6763 for device in instance.disks: 6764 lu.cfg.SetDiskID(device, node) 6765 6766 logging.info("Pause sync of instance %s disks", instance.name) 6767 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True) 6768 6769 for idx, success in enumerate(result.payload): 6770 if not success: 6771 logging.warn("pause-sync of instance %s for disks %d failed", 6772 instance.name, idx) 6773 6774 try: 6775 for idx, device in enumerate(instance.disks): 6776 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but 6777 # MAX_WIPE_CHUNK at max 6778 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 * 6779 constants.MIN_WIPE_CHUNK_PERCENT) 6780 # we _must_ make this an int, otherwise rounding errors will 6781 # occur 6782 wipe_chunk_size = int(wipe_chunk_size) 6783 6784 lu.LogInfo("* Wiping disk %d", idx) 6785 logging.info("Wiping disk %d for instance %s, node %s using" 6786 " chunk size %s", idx, instance.name, node, wipe_chunk_size) 6787 6788 offset = 0 6789 size = device.size 6790 last_output = 0 6791 start_time = time.time() 6792 6793 while offset < size: 6794 wipe_size = min(wipe_chunk_size, size - offset) 6795 logging.debug("Wiping disk %d, offset %s, chunk %s", 6796 idx, offset, wipe_size) 6797 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size) 6798 result.Raise("Could not wipe disk %d at offset %d for size %d" % 6799 (idx, offset, wipe_size)) 6800 now = time.time() 6801 offset += wipe_size 6802 if now - last_output >= 60: 6803 eta = _CalcEta(now - start_time, offset, size) 6804 lu.LogInfo(" - done: %.1f%% ETA: %s" % 6805 (offset / float(size) * 100, utils.FormatSeconds(eta))) 6806 last_output = now 6807 finally: 6808 logging.info("Resume sync of instance %s disks", instance.name) 6809 6810 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False) 6811 6812 for idx, success in enumerate(result.payload): 6813 if not success: 6814 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a" 6815 " look at the status and troubleshoot the issue.", idx) 6816 logging.warn("resume-sync of instance %s for disks %d failed", 6817 instance.name, idx)
6818
6819 6820 -def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6821 """Create all disks for an instance. 6822 6823 This abstracts away some work from AddInstance. 6824 6825 @type lu: L{LogicalUnit} 6826 @param lu: the logical unit on whose behalf we execute 6827 @type instance: L{objects.Instance} 6828 @param instance: the instance whose disks we should create 6829 @type to_skip: list 6830 @param to_skip: list of indices to skip 6831 @type target_node: string 6832 @param target_node: if passed, overrides the target node for creation 6833 @rtype: boolean 6834 @return: the success of the creation 6835 6836 """ 6837 info = _GetInstanceInfoText(instance) 6838 if target_node is None: 6839 pnode = instance.primary_node 6840 all_nodes = instance.all_nodes 6841 else: 6842 pnode = target_node 6843 all_nodes = [pnode] 6844 6845 if instance.disk_template == constants.DT_FILE: 6846 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) 6847 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir) 6848 6849 result.Raise("Failed to create directory '%s' on" 6850 " node %s" % (file_storage_dir, pnode)) 6851 6852 # Note: this needs to be kept in sync with adding of disks in 6853 # LUInstanceSetParams 6854 for idx, device in enumerate(instance.disks): 6855 if to_skip and idx in to_skip: 6856 continue 6857 logging.info("Creating volume %s for instance %s", 6858 device.iv_name, instance.name) 6859 #HARDCODE 6860 for node in all_nodes: 6861 f_create = node == pnode 6862 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6863
6864 6865 -def _RemoveDisks(lu, instance, target_node=None):
6866 """Remove all disks for an instance. 6867 6868 This abstracts away some work from `AddInstance()` and 6869 `RemoveInstance()`. Note that in case some of the devices couldn't 6870 be removed, the removal will continue with the other ones (compare 6871 with `_CreateDisks()`). 6872 6873 @type lu: L{LogicalUnit} 6874 @param lu: the logical unit on whose behalf we execute 6875 @type instance: L{objects.Instance} 6876 @param instance: the instance whose disks we should remove 6877 @type target_node: string 6878 @param target_node: used to override the node on which to remove the disks 6879 @rtype: boolean 6880 @return: the success of the removal 6881 6882 """ 6883 logging.info("Removing block devices for instance %s", instance.name) 6884 6885 all_result = True 6886 for device in instance.disks: 6887 if target_node: 6888 edata = [(target_node, device)] 6889 else: 6890 edata = device.ComputeNodeTree(instance.primary_node) 6891 for node, disk in edata: 6892 lu.cfg.SetDiskID(disk, node) 6893 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg 6894 if msg: 6895 lu.LogWarning("Could not remove block device %s on node %s," 6896 " continuing anyway: %s", device.iv_name, node, msg) 6897 all_result = False 6898 6899 if instance.disk_template == constants.DT_FILE: 6900 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) 6901 if target_node: 6902 tgt = target_node 6903 else: 6904 tgt = instance.primary_node 6905 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir) 6906 if result.fail_msg: 6907 lu.LogWarning("Could not remove directory '%s' on node %s: %s", 6908 file_storage_dir, instance.primary_node, result.fail_msg) 6909 all_result = False 6910 6911 return all_result
6912
6913 6914 -def _ComputeDiskSizePerVG(disk_template, disks):
6915 """Compute disk size requirements in the volume group 6916 6917 """ 6918 def _compute(disks, payload): 6919 """Universal algorithm 6920 6921 """ 6922 vgs = {} 6923 for disk in disks: 6924 vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload 6925 6926 return vgs
6927 6928 # Required free disk space as a function of disk and swap space 6929 req_size_dict = { 6930 constants.DT_DISKLESS: {}, 6931 constants.DT_PLAIN: _compute(disks, 0), 6932 # 128 MB are added for drbd metadata for each disk 6933 constants.DT_DRBD8: _compute(disks, 128), 6934 constants.DT_FILE: {}, 6935 } 6936 6937 if disk_template not in req_size_dict: 6938 raise errors.ProgrammerError("Disk template '%s' size requirement" 6939 " is unknown" % disk_template) 6940 6941 return req_size_dict[disk_template] 6942
6943 6944 -def _ComputeDiskSize(disk_template, disks):
6945 """Compute disk size requirements in the volume group 6946 6947 """ 6948 # Required free disk space as a function of disk and swap space 6949 req_size_dict = { 6950 constants.DT_DISKLESS: None, 6951 constants.DT_PLAIN: sum(d["size"] for d in disks), 6952 # 128 MB are added for drbd metadata for each disk 6953 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks), 6954 constants.DT_FILE: None, 6955 } 6956 6957 if disk_template not in req_size_dict: 6958 raise errors.ProgrammerError("Disk template '%s' size requirement" 6959 " is unknown" % disk_template) 6960 6961 return req_size_dict[disk_template]
6962
6963 6964 -def _FilterVmNodes(lu, nodenames):
6965 """Filters out non-vm_capable nodes from a list. 6966 6967 @type lu: L{LogicalUnit} 6968 @param lu: the logical unit for which we check 6969 @type nodenames: list 6970 @param nodenames: the list of nodes on which we should check 6971 @rtype: list 6972 @return: the list of vm-capable nodes 6973 6974 """ 6975 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList()) 6976 return [name for name in nodenames if name not in vm_nodes]
6977
6978 6979 -def _CheckHVParams(lu, nodenames, hvname, hvparams):
6980 """Hypervisor parameter validation. 6981 6982 This function abstract the hypervisor parameter validation to be 6983 used in both instance create and instance modify. 6984 6985 @type lu: L{LogicalUnit} 6986 @param lu: the logical unit for which we check 6987 @type nodenames: list 6988 @param nodenames: the list of nodes on which we should check 6989 @type hvname: string 6990 @param hvname: the name of the hypervisor we should use 6991 @type hvparams: dict 6992 @param hvparams: the parameters which we need to check 6993 @raise errors.OpPrereqError: if the parameters are not valid 6994 6995 """ 6996 nodenames = _FilterVmNodes(lu, nodenames) 6997 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, 6998 hvname, 6999 hvparams) 7000 for node in nodenames: 7001 info = hvinfo[node] 7002 if info.offline: 7003 continue 7004 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7005
7006 7007 -def _CheckOSParams(lu, required, nodenames, osname, osparams):
7008 """OS parameters validation. 7009 7010 @type lu: L{LogicalUnit} 7011 @param lu: the logical unit for which we check 7012 @type required: boolean 7013 @param required: whether the validation should fail if the OS is not 7014 found 7015 @type nodenames: list 7016 @param nodenames: the list of nodes on which we should check 7017 @type osname: string 7018 @param osname: the name of the hypervisor we should use 7019 @type osparams: dict 7020 @param osparams: the parameters which we need to check 7021 @raise errors.OpPrereqError: if the parameters are not valid 7022 7023 """ 7024 nodenames = _FilterVmNodes(lu, nodenames) 7025 result = lu.rpc.call_os_validate(required, nodenames, osname, 7026 [constants.OS_VALIDATE_PARAMETERS], 7027 osparams) 7028 for node, nres in result.items(): 7029 # we don't check for offline cases since this should be run only 7030 # against the master node and/or an instance's nodes 7031 nres.Raise("OS Parameters validation failed on node %s" % node) 7032 if not nres.payload: 7033 lu.LogInfo("OS %s not found on node %s, validation skipped", 7034 osname, node)
7035
7036 7037 -class LUInstanceCreate(LogicalUnit):
7038 """Create an instance. 7039 7040 """ 7041 HPATH = "instance-add" 7042 HTYPE = constants.HTYPE_INSTANCE 7043 REQ_BGL = False 7044
7045 - def CheckArguments(self):
7046 """Check arguments. 7047 7048 """ 7049 # do not require name_check to ease forward/backward compatibility 7050 # for tools 7051 if self.op.no_install and self.op.start: 7052 self.LogInfo("No-installation mode selected, disabling startup") 7053 self.op.start = False 7054 # validate/normalize the instance name 7055 self.op.instance_name = \ 7056 netutils.Hostname.GetNormalizedName(self.op.instance_name) 7057 7058 if self.op.ip_check and not self.op.name_check: 7059 # TODO: make the ip check more flexible and not depend on the name check 7060 raise errors.OpPrereqError("Cannot do ip check without a name check", 7061 errors.ECODE_INVAL) 7062 7063 # check nics' parameter names 7064 for nic in self.op.nics: 7065 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES) 7066 7067 # check disks. parameter names and consistent adopt/no-adopt strategy 7068 has_adopt = has_no_adopt = False 7069 for disk in self.op.disks: 7070 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES) 7071 if "adopt" in disk: 7072 has_adopt = True 7073 else: 7074 has_no_adopt = True 7075 if has_adopt and has_no_adopt: 7076 raise errors.OpPrereqError("Either all disks are adopted or none is", 7077 errors.ECODE_INVAL) 7078 if has_adopt: 7079 if self.op.disk_template not in constants.DTS_MAY_ADOPT: 7080 raise errors.OpPrereqError("Disk adoption is not supported for the" 7081 " '%s' disk template" % 7082 self.op.disk_template, 7083 errors.ECODE_INVAL) 7084 if self.op.iallocator is not None: 7085 raise errors.OpPrereqError("Disk adoption not allowed with an" 7086 " iallocator script", errors.ECODE_INVAL) 7087 if self.op.mode == constants.INSTANCE_IMPORT: 7088 raise errors.OpPrereqError("Disk adoption not allowed for" 7089 " instance import", errors.ECODE_INVAL) 7090 7091 self.adopt_disks = has_adopt 7092 7093 # instance name verification 7094 if self.op.name_check: 7095 self.hostname1 = netutils.GetHostname(name=self.op.instance_name) 7096 self.op.instance_name = self.hostname1.name 7097 # used in CheckPrereq for ip ping check 7098 self.check_ip = self.hostname1.ip 7099 else: 7100 self.check_ip = None 7101 7102 # file storage checks 7103 if (self.op.file_driver and 7104 not self.op.file_driver in constants.FILE_DRIVER): 7105 raise errors.OpPrereqError("Invalid file driver name '%s'" % 7106 self.op.file_driver, errors.ECODE_INVAL) 7107 7108 if self.op.disk_template == constants.DT_FILE: 7109 opcodes.RequireFileStorage() 7110 7111 ### Node/iallocator related checks 7112 _CheckIAllocatorOrNode(self, "iallocator", "pnode") 7113 7114 if self.op.pnode is not None: 7115 if self.op.disk_template in constants.DTS_NET_MIRROR: 7116 if self.op.snode is None: 7117 raise errors.OpPrereqError("The networked disk templates need" 7118 " a mirror node", errors.ECODE_INVAL) 7119 elif self.op.snode: 7120 self.LogWarning("Secondary node will be ignored on non-mirrored disk" 7121 " template") 7122 self.op.snode = None 7123 7124 self._cds = _GetClusterDomainSecret() 7125 7126 if self.op.mode == constants.INSTANCE_IMPORT: 7127 # On import force_variant must be True, because if we forced it at 7128 # initial install, our only chance when importing it back is that it 7129 # works again! 7130 self.op.force_variant = True 7131 7132 if self.op.no_install: 7133 self.LogInfo("No-installation mode has no effect during import") 7134 7135 elif self.op.mode == constants.INSTANCE_CREATE: 7136 if self.op.os_type is None: 7137 raise errors.OpPrereqError("No guest OS specified", 7138 errors.ECODE_INVAL) 7139 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os: 7140 raise errors.OpPrereqError("Guest OS '%s' is not allowed for" 7141 " installation" % self.op.os_type, 7142 errors.ECODE_STATE) 7143 if self.op.disk_template is None: 7144 raise errors.OpPrereqError("No disk template specified", 7145 errors.ECODE_INVAL) 7146 7147 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: 7148 # Check handshake to ensure both clusters have the same domain secret 7149 src_handshake = self.op.source_handshake 7150 if not src_handshake: 7151 raise errors.OpPrereqError("Missing source handshake", 7152 errors.ECODE_INVAL) 7153 7154 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds, 7155 src_handshake) 7156 if errmsg: 7157 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg, 7158 errors.ECODE_INVAL) 7159 7160 # Load and check source CA 7161 self.source_x509_ca_pem = self.op.source_x509_ca 7162 if not self.source_x509_ca_pem: 7163 raise errors.OpPrereqError("Missing source X509 CA", 7164 errors.ECODE_INVAL) 7165 7166 try: 7167 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem, 7168 self._cds) 7169 except OpenSSL.crypto.Error, err: 7170 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" % 7171 (err, ), errors.ECODE_INVAL) 7172 7173 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) 7174 if errcode is not None: 7175 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ), 7176 errors.ECODE_INVAL) 7177 7178 self.source_x509_ca = cert 7179 7180 src_instance_name = self.op.source_instance_name 7181 if not src_instance_name: 7182 raise errors.OpPrereqError("Missing source instance name", 7183 errors.ECODE_INVAL) 7184 7185 self.source_instance_name = \ 7186 netutils.GetHostname(name=src_instance_name).name 7187 7188 else: 7189 raise errors.OpPrereqError("Invalid instance creation mode %r" % 7190 self.op.mode, errors.ECODE_INVAL)
7191
7192 - def ExpandNames(self):
7193 """ExpandNames for CreateInstance. 7194 7195 Figure out the right locks for instance creation. 7196 7197 """ 7198 self.needed_locks = {} 7199 7200 instance_name = self.op.instance_name 7201 # this is just a preventive check, but someone might still add this 7202 # instance in the meantime, and creation will fail at lock-add time 7203 if instance_name in self.cfg.GetInstanceList(): 7204 raise errors.OpPrereqError("Instance '%s' is already in the cluster" % 7205 instance_name, errors.ECODE_EXISTS) 7206 7207 self.add_locks[locking.LEVEL_INSTANCE] = instance_name 7208 7209 if self.op.iallocator: 7210 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 7211 else: 7212 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode) 7213 nodelist = [self.op.pnode] 7214 if self.op.snode is not None: 7215 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode) 7216 nodelist.append(self.op.snode) 7217 self.needed_locks[locking.LEVEL_NODE] = nodelist 7218 7219 # in case of import lock the source node too 7220 if self.op.mode == constants.INSTANCE_IMPORT: 7221 src_node = self.op.src_node 7222 src_path = self.op.src_path 7223 7224 if src_path is None: 7225 self.op.src_path = src_path = self.op.instance_name 7226 7227 if src_node is None: 7228 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 7229 self.op.src_node = None 7230 if os.path.isabs(src_path): 7231 raise errors.OpPrereqError("Importing an instance from a path" 7232 " requires a source node option", 7233 errors.ECODE_INVAL) 7234 else: 7235 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node) 7236 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET: 7237 self.needed_locks[locking.LEVEL_NODE].append(src_node) 7238 if not os.path.isabs(src_path): 7239 self.op.src_path = src_path = \ 7240 utils.PathJoin(constants.EXPORT_DIR, src_path)
7241
7242 - def _RunAllocator(self):
7243 """Run the allocator based on input opcode. 7244 7245 """ 7246 nics = [n.ToDict() for n in self.nics] 7247 ial = IAllocator(self.cfg, self.rpc, 7248 mode=constants.IALLOCATOR_MODE_ALLOC, 7249 name=self.op.instance_name, 7250 disk_template=self.op.disk_template, 7251 tags=[], 7252 os=self.op.os_type, 7253 vcpus=self.be_full[constants.BE_VCPUS], 7254 mem_size=self.be_full[constants.BE_MEMORY], 7255 disks=self.disks, 7256 nics=nics, 7257 hypervisor=self.op.hypervisor, 7258 ) 7259 7260 ial.Run(self.op.iallocator) 7261 7262 if not ial.success: 7263 raise errors.OpPrereqError("Can't compute nodes using" 7264 " iallocator '%s': %s" % 7265 (self.op.iallocator, ial.info), 7266 errors.ECODE_NORES) 7267 if len(ial.result) != ial.required_nodes: 7268 raise errors.OpPrereqError("iallocator '%s' returned invalid number" 7269 " of nodes (%s), required %s" % 7270 (self.op.iallocator, len(ial.result), 7271 ial.required_nodes), errors.ECODE_FAULT) 7272 self.op.pnode = ial.result[0] 7273 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s", 7274 self.op.instance_name, self.op.iallocator, 7275 utils.CommaJoin(ial.result)) 7276 if ial.required_nodes == 2: 7277 self.op.snode = ial.result[1]
7278
7279 - def BuildHooksEnv(self):
7280 """Build hooks env. 7281 7282 This runs on master, primary and secondary nodes of the instance. 7283 7284 """ 7285 env = { 7286 "ADD_MODE": self.op.mode, 7287 } 7288 if self.op.mode == constants.INSTANCE_IMPORT: 7289 env["SRC_NODE"] = self.op.src_node 7290 env["SRC_PATH"] = self.op.src_path 7291 env["SRC_IMAGES"] = self.src_images 7292 7293 env.update(_BuildInstanceHookEnv( 7294 name=self.op.instance_name, 7295 primary_node=self.op.pnode, 7296 secondary_nodes=self.secondaries, 7297 status=self.op.start, 7298 os_type=self.op.os_type, 7299 memory=self.be_full[constants.BE_MEMORY], 7300 vcpus=self.be_full[constants.BE_VCPUS], 7301 nics=_NICListToTuple(self, self.nics), 7302 disk_template=self.op.disk_template, 7303 disks=[(d["size"], d["mode"]) for d in self.disks], 7304 bep=self.be_full, 7305 hvp=self.hv_full, 7306 hypervisor_name=self.op.hypervisor, 7307 )) 7308 7309 nl = ([self.cfg.GetMasterNode(), self.op.pnode] + 7310 self.secondaries) 7311 return env, nl, nl
7312
7313 - def _ReadExportInfo(self):
7314 """Reads the export information from disk. 7315 7316 It will override the opcode source node and path with the actual 7317 information, if these two were not specified before. 7318 7319 @return: the export information 7320 7321 """ 7322 assert self.op.mode == constants.INSTANCE_IMPORT 7323 7324 src_node = self.op.src_node 7325 src_path = self.op.src_path 7326 7327 if src_node is None: 7328 locked_nodes = self.acquired_locks[locking.LEVEL_NODE] 7329 exp_list = self.rpc.call_export_list(locked_nodes) 7330 found = False 7331 for node in exp_list: 7332 if exp_list[node].fail_msg: 7333 continue 7334 if src_path in exp_list[node].payload: 7335 found = True 7336 self.op.src_node = src_node = node 7337 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR, 7338 src_path) 7339 break 7340 if not found: 7341 raise errors.OpPrereqError("No export found for relative path %s" % 7342 src_path, errors.ECODE_INVAL) 7343 7344 _CheckNodeOnline(self, src_node) 7345 result = self.rpc.call_export_info(src_node, src_path) 7346 result.Raise("No export or invalid export found in dir %s" % src_path) 7347 7348 export_info = objects.SerializableConfigParser.Loads(str(result.payload)) 7349 if not export_info.has_section(constants.INISECT_EXP): 7350 raise errors.ProgrammerError("Corrupted export config", 7351 errors.ECODE_ENVIRON) 7352 7353 ei_version = export_info.get(constants.INISECT_EXP, "version") 7354 if (int(ei_version) != constants.EXPORT_VERSION): 7355 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" % 7356 (ei_version, constants.EXPORT_VERSION), 7357 errors.ECODE_ENVIRON) 7358 return export_info
7359
7360 - def _ReadExportParams(self, einfo):
7361 """Use export parameters as defaults. 7362 7363 In case the opcode doesn't specify (as in override) some instance 7364 parameters, then try to use them from the export information, if 7365 that declares them. 7366 7367 """ 7368 self.op.os_type = einfo.get(constants.INISECT_EXP, "os") 7369 7370 if self.op.disk_template is None: 7371 if einfo.has_option(constants.INISECT_INS, "disk_template"): 7372 self.op.disk_template = einfo.get(constants.INISECT_INS, 7373 "disk_template") 7374 else: 7375 raise errors.OpPrereqError("No disk template specified and the export" 7376 " is missing the disk_template information", 7377 errors.ECODE_INVAL) 7378 7379 if not self.op.disks: 7380 if einfo.has_option(constants.INISECT_INS, "disk_count"): 7381 disks = [] 7382 # TODO: import the disk iv_name too 7383 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")): 7384 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx) 7385 disks.append({"size": disk_sz}) 7386 self.op.disks = disks 7387 else: 7388 raise errors.OpPrereqError("No disk info specified and the export" 7389 " is missing the disk information", 7390 errors.ECODE_INVAL) 7391 7392 if (not self.op.nics and 7393 einfo.has_option(constants.INISECT_INS, "nic_count")): 7394 nics = [] 7395 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")): 7396 ndict = {} 7397 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]: 7398 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name)) 7399 ndict[name] = v 7400 nics.append(ndict) 7401 self.op.nics = nics 7402 7403 if (self.op.hypervisor is None and 7404 einfo.has_option(constants.INISECT_INS, "hypervisor")): 7405 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor") 7406 if einfo.has_section(constants.INISECT_HYP): 7407 # use the export parameters but do not override the ones 7408 # specified by the user 7409 for name, value in einfo.items(constants.INISECT_HYP): 7410 if name not in self.op.hvparams: 7411 self.op.hvparams[name] = value 7412 7413 if einfo.has_section(constants.INISECT_BEP): 7414 # use the parameters, without overriding 7415 for name, value in einfo.items(constants.INISECT_BEP): 7416 if name not in self.op.beparams: 7417 self.op.beparams[name] = value 7418 else: 7419 # try to read the parameters old style, from the main section 7420 for name in constants.BES_PARAMETERS: 7421 if (name not in self.op.beparams and 7422 einfo.has_option(constants.INISECT_INS, name)): 7423 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name) 7424 7425 if einfo.has_section(constants.INISECT_OSP): 7426 # use the parameters, without overriding 7427 for name, value in einfo.items(constants.INISECT_OSP): 7428 if name not in self.op.osparams: 7429 self.op.osparams[name] = value
7430
7431 - def _RevertToDefaults(self, cluster):
7432 """Revert the instance parameters to the default values. 7433 7434 """ 7435 # hvparams 7436 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {}) 7437 for name in self.op.hvparams.keys(): 7438 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]: 7439 del self.op.hvparams[name] 7440 # beparams 7441 be_defs = cluster.SimpleFillBE({}) 7442 for name in self.op.beparams.keys(): 7443 if name in be_defs and be_defs[name] == self.op.beparams[name]: 7444 del self.op.beparams[name] 7445 # nic params 7446 nic_defs = cluster.SimpleFillNIC({}) 7447 for nic in self.op.nics: 7448 for name in constants.NICS_PARAMETERS: 7449 if name in nic and name in nic_defs and nic[name] == nic_defs[name]: 7450 del nic[name] 7451 # osparams 7452 os_defs = cluster.SimpleFillOS(self.op.os_type, {}) 7453 for name in self.op.osparams.keys(): 7454 if name in os_defs and os_defs[name] == self.op.osparams[name]: 7455 del self.op.osparams[name]
7456
7457 - def _CalculateFileStorageDir(self):
7458 """Calculate final instance file storage dir. 7459 7460 """ 7461 # file storage dir calculation/check 7462 self.instance_file_storage_dir = None 7463 if self.op.disk_template == constants.DT_FILE: 7464 # build the full file storage dir path 7465 joinargs = [] 7466 7467 cfg_storagedir = self.cfg.GetFileStorageDir() 7468 if not cfg_storagedir: 7469 raise errors.OpPrereqError("Cluster file storage dir not defined") 7470 joinargs.append(cfg_storagedir) 7471 7472 if self.op.file_storage_dir is not None: 7473 joinargs.append(self.op.file_storage_dir) 7474 7475 joinargs.append(self.op.instance_name) 7476 7477 # pylint: disable-msg=W0142 7478 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
7479
7480 - def CheckPrereq(self):
7481 """Check prerequisites. 7482 7483 """ 7484 self._CalculateFileStorageDir() 7485 7486 if self.op.mode == constants.INSTANCE_IMPORT: 7487 export_info = self._ReadExportInfo() 7488 self._ReadExportParams(export_info) 7489 7490 if (not self.cfg.GetVGName() and 7491 self.op.disk_template not in constants.DTS_NOT_LVM): 7492 raise errors.OpPrereqError("Cluster does not support lvm-based" 7493 " instances", errors.ECODE_STATE) 7494 7495 if self.op.hypervisor is None: 7496 self.op.hypervisor = self.cfg.GetHypervisorType() 7497 7498 cluster = self.cfg.GetClusterInfo() 7499 enabled_hvs = cluster.enabled_hypervisors 7500 if self.op.hypervisor not in enabled_hvs: 7501 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the" 7502 " cluster (%s)" % (self.op.hypervisor, 7503 ",".join(enabled_hvs)), 7504 errors.ECODE_STATE) 7505 7506 # check hypervisor parameter syntax (locally) 7507 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) 7508 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, 7509 self.op.hvparams) 7510 hv_type = hypervisor.GetHypervisor(self.op.hypervisor) 7511 hv_type.CheckParameterSyntax(filled_hvp) 7512 self.hv_full = filled_hvp 7513 # check that we don't specify global parameters on an instance 7514 _CheckGlobalHvParams(self.op.hvparams) 7515 7516 # fill and remember the beparams dict 7517 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 7518 self.be_full = cluster.SimpleFillBE(self.op.beparams) 7519 7520 # build os parameters 7521 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams) 7522 7523 # now that hvp/bep are in final format, let's reset to defaults, 7524 # if told to do so 7525 if self.op.identify_defaults: 7526 self._RevertToDefaults(cluster) 7527 7528 # NIC buildup 7529 self.nics = [] 7530 for idx, nic in enumerate(self.op.nics): 7531 nic_mode_req = nic.get("mode", None) 7532 nic_mode = nic_mode_req 7533 if nic_mode is None: 7534 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE] 7535 7536 # in routed mode, for the first nic, the default ip is 'auto' 7537 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0: 7538 default_ip_mode = constants.VALUE_AUTO 7539 else: 7540 default_ip_mode = constants.VALUE_NONE 7541 7542 # ip validity checks 7543 ip = nic.get("ip", default_ip_mode) 7544 if ip is None or ip.lower() == constants.VALUE_NONE: 7545 nic_ip = None 7546 elif ip.lower() == constants.VALUE_AUTO: 7547 if not self.op.name_check: 7548 raise errors.OpPrereqError("IP address set to auto but name checks" 7549 " have been skipped", 7550 errors.ECODE_INVAL) 7551 nic_ip = self.hostname1.ip 7552 else: 7553 if not netutils.IPAddress.IsValid(ip): 7554 raise errors.OpPrereqError("Invalid IP address '%s'" % ip, 7555 errors.ECODE_INVAL) 7556 nic_ip = ip 7557 7558 # TODO: check the ip address for uniqueness 7559 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip: 7560 raise errors.OpPrereqError("Routed nic mode requires an ip address", 7561 errors.ECODE_INVAL) 7562 7563 # MAC address verification 7564 mac = nic.get("mac", constants.VALUE_AUTO) 7565 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 7566 mac = utils.NormalizeAndValidateMac(mac) 7567 7568 try: 7569 self.cfg.ReserveMAC(mac, self.proc.GetECId()) 7570 except errors.ReservationError: 7571 raise errors.OpPrereqError("MAC address %s already in use" 7572 " in cluster" % mac, 7573 errors.ECODE_NOTUNIQUE) 7574 7575 # bridge verification 7576 bridge = nic.get("bridge", None) 7577 link = nic.get("link", None) 7578 if bridge and link: 7579 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'" 7580 " at the same time", errors.ECODE_INVAL) 7581 elif bridge and nic_mode == constants.NIC_MODE_ROUTED: 7582 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic", 7583 errors.ECODE_INVAL) 7584 elif bridge: 7585 link = bridge 7586 7587 nicparams = {} 7588 if nic_mode_req: 7589 nicparams[constants.NIC_MODE] = nic_mode_req 7590 if link: 7591 nicparams[constants.NIC_LINK] = link 7592 7593 check_params = cluster.SimpleFillNIC(nicparams) 7594 objects.NIC.CheckParameterSyntax(check_params) 7595 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams)) 7596 7597 # disk checks/pre-build 7598 self.disks = [] 7599 for disk in self.op.disks: 7600 mode = disk.get("mode", constants.DISK_RDWR) 7601 if mode not in constants.DISK_ACCESS_SET: 7602 raise errors.OpPrereqError("Invalid disk access mode '%s'" % 7603 mode, errors.ECODE_INVAL) 7604 size = disk.get("size", None) 7605 if size is None: 7606 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL) 7607 try: 7608 size = int(size) 7609 except (TypeError, ValueError): 7610 raise errors.OpPrereqError("Invalid disk size '%s'" % size, 7611 errors.ECODE_INVAL) 7612 data_vg = disk.get("vg", self.cfg.GetVGName()) 7613 meta_vg = disk.get("metavg", data_vg) 7614 new_disk = {"size": size, "mode": mode, "vg": data_vg, "metavg": meta_vg} 7615 if "adopt" in disk: 7616 new_disk["adopt"] = disk["adopt"] 7617 self.disks.append(new_disk) 7618 7619 if self.op.mode == constants.INSTANCE_IMPORT: 7620 7621 # Check that the new instance doesn't have less disks than the export 7622 instance_disks = len(self.disks) 7623 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count') 7624 if instance_disks < export_disks: 7625 raise errors.OpPrereqError("Not enough disks to import." 7626 " (instance: %d, export: %d)" % 7627 (instance_disks, export_disks), 7628 errors.ECODE_INVAL) 7629 7630 disk_images = [] 7631 for idx in range(export_disks): 7632 option = 'disk%d_dump' % idx 7633 if export_info.has_option(constants.INISECT_INS, option): 7634 # FIXME: are the old os-es, disk sizes, etc. useful? 7635 export_name = export_info.get(constants.INISECT_INS, option) 7636 image = utils.PathJoin(self.op.src_path, export_name) 7637 disk_images.append(image) 7638 else: 7639 disk_images.append(False) 7640 7641 self.src_images = disk_images 7642 7643 old_name = export_info.get(constants.INISECT_INS, 'name') 7644 try: 7645 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count') 7646 except (TypeError, ValueError), err: 7647 raise errors.OpPrereqError("Invalid export file, nic_count is not" 7648 " an integer: %s" % str(err), 7649 errors.ECODE_STATE) 7650 if self.op.instance_name == old_name: 7651 for idx, nic in enumerate(self.nics): 7652 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx: 7653 nic_mac_ini = 'nic%d_mac' % idx 7654 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini) 7655 7656 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT 7657 7658 # ip ping checks (we use the same ip that was resolved in ExpandNames) 7659 if self.op.ip_check: 7660 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT): 7661 raise errors.OpPrereqError("IP %s of instance %s already in use" % 7662 (self.check_ip, self.op.instance_name), 7663 errors.ECODE_NOTUNIQUE) 7664 7665 #### mac address generation 7666 # By generating here the mac address both the allocator and the hooks get 7667 # the real final mac address rather than the 'auto' or 'generate' value. 7668 # There is a race condition between the generation and the instance object 7669 # creation, which means that we know the mac is valid now, but we're not 7670 # sure it will be when we actually add the instance. If things go bad 7671 # adding the instance will abort because of a duplicate mac, and the 7672 # creation job will fail. 7673 for nic in self.nics: 7674 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 7675 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId()) 7676 7677 #### allocator run 7678 7679 if self.op.iallocator is not None: 7680 self._RunAllocator() 7681 7682 #### node related checks 7683 7684 # check primary node 7685 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode) 7686 assert self.pnode is not None, \ 7687 "Cannot retrieve locked node %s" % self.op.pnode 7688 if pnode.offline: 7689 raise errors.OpPrereqError("Cannot use offline primary node '%s'" % 7690 pnode.name, errors.ECODE_STATE) 7691 if pnode.drained: 7692 raise errors.OpPrereqError("Cannot use drained primary node '%s'" % 7693 pnode.name, errors.ECODE_STATE) 7694 if not pnode.vm_capable: 7695 raise errors.OpPrereqError("Cannot use non-vm_capable primary node" 7696 " '%s'" % pnode.name, errors.ECODE_STATE) 7697 7698 self.secondaries = [] 7699 7700 # mirror node verification 7701 if self.op.disk_template in constants.DTS_NET_MIRROR: 7702 if self.op.snode == pnode.name: 7703 raise errors.OpPrereqError("The secondary node cannot be the" 7704 " primary node.", errors.ECODE_INVAL) 7705 _CheckNodeOnline(self, self.op.snode) 7706 _CheckNodeNotDrained(self, self.op.snode) 7707 _CheckNodeVmCapable(self, self.op.snode) 7708 self.secondaries.append(self.op.snode) 7709 7710 nodenames = [pnode.name] + self.secondaries 7711 7712 if not self.adopt_disks: 7713 # Check lv size requirements, if not adopting 7714 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks) 7715 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes) 7716 7717 else: # instead, we must check the adoption data 7718 all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks]) 7719 if len(all_lvs) != len(self.disks): 7720 raise errors.OpPrereqError("Duplicate volume names given for adoption", 7721 errors.ECODE_INVAL) 7722 for lv_name in all_lvs: 7723 try: 7724 # FIXME: lv_name here is "vg/lv" need to ensure that other calls 7725 # to ReserveLV uses the same syntax 7726 self.cfg.ReserveLV(lv_name, self.proc.GetECId()) 7727 except errors.ReservationError: 7728 raise errors.OpPrereqError("LV named %s used by another instance" % 7729 lv_name, errors.ECODE_NOTUNIQUE) 7730 7731 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name] 7732 vg_names.Raise("Cannot get VG information from node %s" % pnode.name) 7733 7734 node_lvs = self.rpc.call_lv_list([pnode.name], 7735 vg_names.payload.keys())[pnode.name] 7736 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name) 7737 node_lvs = node_lvs.payload 7738 7739 delta = all_lvs.difference(node_lvs.keys()) 7740 if delta: 7741 raise errors.OpPrereqError("Missing logical volume(s): %s" % 7742 utils.CommaJoin(delta), 7743 errors.ECODE_INVAL) 7744 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]] 7745 if online_lvs: 7746 raise errors.OpPrereqError("Online logical volumes found, cannot" 7747 " adopt: %s" % utils.CommaJoin(online_lvs), 7748 errors.ECODE_STATE) 7749 # update the size of disk based on what is found 7750 for dsk in self.disks: 7751 dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0])) 7752 7753 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams) 7754 7755 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant) 7756 # check OS parameters (remotely) 7757 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full) 7758 7759 _CheckNicsBridgesExist(self, self.nics, self.pnode.name) 7760 7761 # memory check on primary node 7762 if self.op.start: 7763 _CheckNodeFreeMemory(self, self.pnode.name, 7764 "creating instance %s" % self.op.instance_name, 7765 self.be_full[constants.BE_MEMORY], 7766 self.op.hypervisor) 7767 7768 self.dry_run_result = list(nodenames)
7769
7770 - def Exec(self, feedback_fn):
7771 """Create and add the instance to the cluster. 7772 7773 """ 7774 instance = self.op.instance_name 7775 pnode_name = self.pnode.name 7776 7777 ht_kind = self.op.hypervisor 7778 if ht_kind in constants.HTS_REQ_PORT: 7779 network_port = self.cfg.AllocatePort() 7780 else: 7781 network_port = None 7782 7783 disks = _GenerateDiskTemplate(self, 7784 self.op.disk_template, 7785 instance, pnode_name, 7786 self.secondaries, 7787 self.disks, 7788 self.instance_file_storage_dir, 7789 self.op.file_driver, 7790 0, 7791 feedback_fn) 7792 7793 iobj = objects.Instance(name=instance, os=self.op.os_type, 7794 primary_node=pnode_name, 7795 nics=self.nics, disks=disks, 7796 disk_template=self.op.disk_template, 7797 admin_up=False, 7798 network_port=network_port, 7799 beparams=self.op.beparams, 7800 hvparams=self.op.hvparams, 7801 hypervisor=self.op.hypervisor, 7802 osparams=self.op.osparams, 7803 ) 7804 7805 if self.adopt_disks: 7806 # rename LVs to the newly-generated names; we need to construct 7807 # 'fake' LV disks with the old data, plus the new unique_id 7808 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks] 7809 rename_to = [] 7810 for t_dsk, a_dsk in zip (tmp_disks, self.disks): 7811 rename_to.append(t_dsk.logical_id) 7812 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"]) 7813 self.cfg.SetDiskID(t_dsk, pnode_name) 7814 result = self.rpc.call_blockdev_rename(pnode_name, 7815 zip(tmp_disks, rename_to)) 7816 result.Raise("Failed to rename adoped LVs") 7817 else: 7818 feedback_fn("* creating instance disks...") 7819 try: 7820 _CreateDisks(self, iobj) 7821 except errors.OpExecError: 7822 self.LogWarning("Device creation failed, reverting...") 7823 try: 7824 _RemoveDisks(self, iobj) 7825 finally: 7826 self.cfg.ReleaseDRBDMinors(instance) 7827 raise 7828 7829 feedback_fn("adding instance %s to cluster config" % instance) 7830 7831 self.cfg.AddInstance(iobj, self.proc.GetECId()) 7832 7833 # Declare that we don't want to remove the instance lock anymore, as we've 7834 # added the instance to the config 7835 del self.remove_locks[locking.LEVEL_INSTANCE] 7836 # Unlock all the nodes 7837 if self.op.mode == constants.INSTANCE_IMPORT: 7838 nodes_keep = [self.op.src_node] 7839 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE] 7840 if node != self.op.src_node] 7841 self.context.glm.release(locking.LEVEL_NODE, nodes_release) 7842 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep 7843 else: 7844 self.context.glm.release(locking.LEVEL_NODE) 7845 del self.acquired_locks[locking.LEVEL_NODE] 7846 7847 disk_abort = False 7848 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks: 7849 feedback_fn("* wiping instance disks...") 7850 try: 7851 _WipeDisks(self, iobj) 7852 except errors.OpExecError, err: 7853 logging.exception("Wiping disks failed") 7854 self.LogWarning("Wiping instance disks failed (%s)", err) 7855 disk_abort = True 7856 7857 if disk_abort: 7858 # Something is already wrong with the disks, don't do anything else 7859 pass 7860 elif self.op.wait_for_sync: 7861 disk_abort = not _WaitForSync(self, iobj) 7862 elif iobj.disk_template in constants.DTS_NET_MIRROR: 7863 # make sure the disks are not degraded (still sync-ing is ok) 7864 time.sleep(15) 7865 feedback_fn("* checking mirrors status") 7866 disk_abort = not _WaitForSync(self, iobj, oneshot=True) 7867 else: 7868 disk_abort = False 7869 7870 if disk_abort: 7871 _RemoveDisks(self, iobj) 7872 self.cfg.RemoveInstance(iobj.name) 7873 # Make sure the instance lock gets removed 7874 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name 7875 raise errors.OpExecError("There are some degraded disks for" 7876 " this instance") 7877 7878 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks: 7879 if self.op.mode == constants.INSTANCE_CREATE: 7880 if not self.op.no_install: 7881 feedback_fn("* running the instance OS create scripts...") 7882 # FIXME: pass debug option from opcode to backend 7883 result = self.rpc.call_instance_os_add(pnode_name, iobj, False, 7884 self.op.debug_level) 7885 result.Raise("Could not add os for instance %s" 7886 " on node %s" % (instance, pnode_name)) 7887 7888 elif self.op.mode == constants.INSTANCE_IMPORT: 7889 feedback_fn("* running the instance OS import scripts...") 7890 7891 transfers = [] 7892 7893 for idx, image in enumerate(self.src_images): 7894 if not image: 7895 continue 7896 7897 # FIXME: pass debug option from opcode to backend 7898 dt = masterd.instance.DiskTransfer("disk/%s" % idx, 7899 constants.IEIO_FILE, (image, ), 7900 constants.IEIO_SCRIPT, 7901 (iobj.disks[idx], idx), 7902 None) 7903 transfers.append(dt) 7904 7905 import_result = \ 7906 masterd.instance.TransferInstanceData(self, feedback_fn, 7907 self.op.src_node, pnode_name, 7908 self.pnode.secondary_ip, 7909 iobj, transfers) 7910 if not compat.all(import_result): 7911 self.LogWarning("Some disks for instance %s on node %s were not" 7912 " imported successfully" % (instance, pnode_name)) 7913 7914 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: 7915 feedback_fn("* preparing remote import...") 7916 # The source cluster will stop the instance before attempting to make a 7917 # connection. In some cases stopping an instance can take a long time, 7918 # hence the shutdown timeout is added to the connection timeout. 7919 connect_timeout = (constants.RIE_CONNECT_TIMEOUT + 7920 self.op.source_shutdown_timeout) 7921 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) 7922 7923 assert iobj.primary_node == self.pnode.name 7924 disk_results = \ 7925 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode, 7926 self.source_x509_ca, 7927 self._cds, timeouts) 7928 if not compat.all(disk_results): 7929 # TODO: Should the instance still be started, even if some disks 7930 # failed to import (valid for local imports, too)? 7931 self.LogWarning("Some disks for instance %s on node %s were not" 7932 " imported successfully" % (instance, pnode_name)) 7933 7934 # Run rename script on newly imported instance 7935 assert iobj.name == instance 7936 feedback_fn("Running rename script for %s" % instance) 7937 result = self.rpc.call_instance_run_rename(pnode_name, iobj, 7938 self.source_instance_name, 7939 self.op.debug_level) 7940 if result.fail_msg: 7941 self.LogWarning("Failed to run rename script for %s on node" 7942 " %s: %s" % (instance, pnode_name, result.fail_msg)) 7943 7944 else: 7945 # also checked in the prereq part 7946 raise errors.ProgrammerError("Unknown OS initialization mode '%s'" 7947 % self.op.mode) 7948 7949 if self.op.start: 7950 iobj.admin_up = True 7951 self.cfg.Update(iobj, feedback_fn) 7952 logging.info("Starting instance %s on node %s", instance, pnode_name) 7953 feedback_fn("* starting instance...") 7954 result = self.rpc.call_instance_start(pnode_name, iobj, None, None) 7955 result.Raise("Could not start instance") 7956 7957 return list(iobj.all_nodes)
7958
7959 7960 -class LUInstanceConsole(NoHooksLU):
7961 """Connect to an instance's console. 7962 7963 This is somewhat special in that it returns the command line that 7964 you need to run on the master node in order to connect to the 7965 console. 7966 7967 """ 7968 REQ_BGL = False 7969
7970 - def ExpandNames(self):
7972
7973 - def CheckPrereq(self):
7974 """Check prerequisites. 7975 7976 This checks that the instance is in the cluster. 7977 7978 """ 7979 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7980 assert self.instance is not None, \ 7981 "Cannot retrieve locked instance %s" % self.op.instance_name 7982 _CheckNodeOnline(self, self.instance.primary_node)
7983
7984 - def Exec(self, feedback_fn):
7985 """Connect to the console of an instance 7986 7987 """ 7988 instance = self.instance 7989 node = instance.primary_node 7990 7991 node_insts = self.rpc.call_instance_list([node], 7992 [instance.hypervisor])[node] 7993 node_insts.Raise("Can't get node information from %s" % node) 7994 7995 if instance.name not in node_insts.payload: 7996 if instance.admin_up: 7997 state = "ERROR_down" 7998 else: 7999 state = "ADMIN_down" 8000 raise errors.OpExecError("Instance %s is not running (state %s)" % 8001 (instance.name, state)) 8002 8003 logging.debug("Connecting to console of %s on %s", instance.name, node) 8004 8005 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8006
8007 8008 -def _GetInstanceConsole(cluster, instance):
8009 """Returns console information for an instance. 8010 8011 @type cluster: L{objects.Cluster} 8012 @type instance: L{objects.Instance} 8013 @rtype: dict 8014 8015 """ 8016 hyper = hypervisor.GetHypervisor(instance.hypervisor) 8017 # beparams and hvparams are passed separately, to avoid editing the 8018 # instance and then saving the defaults in the instance itself. 8019 hvparams = cluster.FillHV(instance) 8020 beparams = cluster.FillBE(instance) 8021 console = hyper.GetInstanceConsole(instance, hvparams, beparams) 8022 8023 assert console.instance == instance.name 8024 assert console.Validate() 8025 8026 return console.ToDict()
8027
8028 8029 -class LUInstanceReplaceDisks(LogicalUnit):
8030 """Replace the disks of an instance. 8031 8032 """ 8033 HPATH = "mirrors-replace" 8034 HTYPE = constants.HTYPE_INSTANCE 8035 REQ_BGL = False 8036
8037 - def CheckArguments(self):
8038 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node, 8039 self.op.iallocator)
8040
8041 - def ExpandNames(self):
8042 self._ExpandAndLockInstance() 8043 8044 if self.op.iallocator is not None: 8045 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 8046 8047 elif self.op.remote_node is not None: 8048 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 8049 self.op.remote_node = remote_node 8050 8051 # Warning: do not remove the locking of the new secondary here 8052 # unless DRBD8.AddChildren is changed to work in parallel; 8053 # currently it doesn't since parallel invocations of 8054 # FindUnusedMinor will conflict 8055 self.needed_locks[locking.LEVEL_NODE] = [remote_node] 8056 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 8057 8058 else: 8059 self.needed_locks[locking.LEVEL_NODE] = [] 8060 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 8061 8062 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode, 8063 self.op.iallocator, self.op.remote_node, 8064 self.op.disks, False, self.op.early_release) 8065 8066 self.tasklets = [self.replacer]
8067
8068 - def DeclareLocks(self, level):
8069 # If we're not already locking all nodes in the set we have to declare the 8070 # instance's primary/secondary nodes. 8071 if (level == locking.LEVEL_NODE and 8072 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET): 8073 self._LockInstancesNodes()
8074
8075 - def BuildHooksEnv(self):
8076 """Build hooks env. 8077 8078 This runs on the master, the primary and all the secondaries. 8079 8080 """ 8081 instance = self.replacer.instance 8082 env = { 8083 "MODE": self.op.mode, 8084 "NEW_SECONDARY": self.op.remote_node, 8085 "OLD_SECONDARY": instance.secondary_nodes[0], 8086 } 8087 env.update(_BuildInstanceHookEnvByObject(self, instance)) 8088 nl = [ 8089 self.cfg.GetMasterNode(), 8090 instance.primary_node, 8091 ] 8092 if self.op.remote_node is not None: 8093 nl.append(self.op.remote_node) 8094 return env, nl, nl
8095
8096 8097 -class TLReplaceDisks(Tasklet):
8098 """Replaces disks for an instance. 8099 8100 Note: Locking is not within the scope of this class. 8101 8102 """
8103 - def __init__(self, lu, instance_name, mode, iallocator_name, remote_node, 8104 disks, delay_iallocator, early_release):
8105 """Initializes this class. 8106 8107 """ 8108 Tasklet.__init__(self, lu) 8109 8110 # Parameters 8111 self.instance_name = instance_name 8112 self.mode = mode 8113 self.iallocator_name = iallocator_name 8114 self.remote_node = remote_node 8115 self.disks = disks 8116 self.delay_iallocator = delay_iallocator 8117 self.early_release = early_release 8118 8119 # Runtime data 8120 self.instance = None 8121 self.new_node = None 8122 self.target_node = None 8123 self.other_node = None 8124 self.remote_node_info = None 8125 self.node_secondary_ip = None
8126 8127 @staticmethod
8128 - def CheckArguments(mode, remote_node, iallocator):
8129 """Helper function for users of this class. 8130 8131 """ 8132 # check for valid parameter combination 8133 if mode == constants.REPLACE_DISK_CHG: 8134 if remote_node is None and iallocator is None: 8135 raise errors.OpPrereqError("When changing the secondary either an" 8136 " iallocator script must be used or the" 8137 " new node given", errors.ECODE_INVAL) 8138 8139 if remote_node is not None and iallocator is not None: 8140 raise errors.OpPrereqError("Give either the iallocator or the new" 8141 " secondary, not both", errors.ECODE_INVAL) 8142 8143 elif remote_node is not None or iallocator is not None: 8144 # Not replacing the secondary 8145 raise errors.OpPrereqError("The iallocator and new node options can" 8146 " only be used when changing the" 8147 " secondary node", errors.ECODE_INVAL)
8148 8149 @staticmethod
8150 - def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8151 """Compute a new secondary node using an IAllocator. 8152 8153 """ 8154 ial = IAllocator(lu.cfg, lu.rpc, 8155 mode=constants.IALLOCATOR_MODE_RELOC, 8156 name=instance_name, 8157 relocate_from=relocate_from) 8158 8159 ial.Run(iallocator_name) 8160 8161 if not ial.success: 8162 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':" 8163 " %s" % (iallocator_name, ial.info), 8164 errors.ECODE_NORES) 8165 8166 if len(ial.result) != ial.required_nodes: 8167 raise errors.OpPrereqError("iallocator '%s' returned invalid number" 8168 " of nodes (%s), required %s" % 8169 (iallocator_name, 8170 len(ial.result), ial.required_nodes), 8171 errors.ECODE_FAULT) 8172 8173 remote_node_name = ial.result[0] 8174 8175 lu.LogInfo("Selected new secondary for instance '%s': %s", 8176 instance_name, remote_node_name) 8177 8178 return remote_node_name
8179
8180 - def _FindFaultyDisks(self, node_name):
8181 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance, 8182 node_name, True)
8183
8184 - def CheckPrereq(self):
8185 """Check prerequisites. 8186 8187 This checks that the instance is in the cluster. 8188 8189 """ 8190 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name) 8191 assert instance is not None, \ 8192 "Cannot retrieve locked instance %s" % self.instance_name 8193 8194 if instance.disk_template != constants.DT_DRBD8: 8195 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based" 8196 " instances", errors.ECODE_INVAL) 8197 8198 if len(instance.secondary_nodes) != 1: 8199 raise errors.OpPrereqError("The instance has a strange layout," 8200 " expected one secondary but found %d" % 8201 len(instance.secondary_nodes), 8202 errors.ECODE_FAULT) 8203 8204 if not self.delay_iallocator: 8205 self._CheckPrereq2()
8206
8207 - def _CheckPrereq2(self):
8208 """Check prerequisites, second part. 8209 8210 This function should always be part of CheckPrereq. It was separated and is 8211 now called from Exec because during node evacuation iallocator was only 8212 called with an unmodified cluster model, not taking planned changes into 8213 account. 8214 8215 """ 8216 instance = self.instance 8217 secondary_node = instance.secondary_nodes[0] 8218 8219 if self.iallocator_name is None: 8220 remote_node = self.remote_node 8221 else: 8222 remote_node = self._RunAllocator(self.lu, self.iallocator_name, 8223 instance.name, instance.secondary_nodes) 8224 8225 if remote_node is not None: 8226 self.remote_node_info = self.cfg.GetNodeInfo(remote_node) 8227 assert self.remote_node_info is not None, \ 8228 "Cannot retrieve locked node %s" % remote_node 8229 else: 8230 self.remote_node_info = None 8231 8232 if remote_node == self.instance.primary_node: 8233 raise errors.OpPrereqError("The specified node is the primary node of" 8234 " the instance.", errors.ECODE_INVAL) 8235 8236 if remote_node == secondary_node: 8237 raise errors.OpPrereqError("The specified node is already the" 8238 " secondary node of the instance.", 8239 errors.ECODE_INVAL) 8240 8241 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO, 8242 constants.REPLACE_DISK_CHG): 8243 raise errors.OpPrereqError("Cannot specify disks to be replaced", 8244 errors.ECODE_INVAL) 8245 8246 if self.mode == constants.REPLACE_DISK_AUTO: 8247 faulty_primary = self._FindFaultyDisks(instance.primary_node) 8248 faulty_secondary = self._FindFaultyDisks(secondary_node) 8249 8250 if faulty_primary and faulty_secondary: 8251 raise errors.OpPrereqError("Instance %s has faulty disks on more than" 8252 " one node and can not be repaired" 8253 " automatically" % self.instance_name, 8254 errors.ECODE_STATE) 8255 8256 if faulty_primary: 8257 self.disks = faulty_primary 8258 self.target_node = instance.primary_node 8259 self.other_node = secondary_node 8260 check_nodes = [self.target_node, self.other_node] 8261 elif faulty_secondary: 8262 self.disks = faulty_secondary 8263 self.target_node = secondary_node 8264 self.other_node = instance.primary_node 8265 check_nodes = [self.target_node, self.other_node] 8266 else: 8267 self.disks = [] 8268 check_nodes = [] 8269 8270 else: 8271 # Non-automatic modes 8272 if self.mode == constants.REPLACE_DISK_PRI: 8273 self.target_node = instance.primary_node 8274 self.other_node = secondary_node 8275 check_nodes = [self.target_node, self.other_node] 8276 8277 elif self.mode == constants.REPLACE_DISK_SEC: 8278 self.target_node = secondary_node 8279 self.other_node = instance.primary_node 8280 check_nodes = [self.target_node, self.other_node] 8281 8282 elif self.mode == constants.REPLACE_DISK_CHG: 8283 self.new_node = remote_node 8284 self.other_node = instance.primary_node 8285 self.target_node = secondary_node 8286 check_nodes = [self.new_node, self.other_node] 8287 8288 _CheckNodeNotDrained(self.lu, remote_node) 8289 _CheckNodeVmCapable(self.lu, remote_node) 8290 8291 old_node_info = self.cfg.GetNodeInfo(secondary_node) 8292 assert old_node_info is not None 8293 if old_node_info.offline and not self.early_release: 8294 # doesn't make sense to delay the release 8295 self.early_release = True 8296 self.lu.LogInfo("Old secondary %s is offline, automatically enabling" 8297 " early-release mode", secondary_node) 8298 8299 else: 8300 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" % 8301 self.mode) 8302 8303 # If not specified all disks should be replaced 8304 if not self.disks: 8305 self.disks = range(len(self.instance.disks)) 8306 8307 for node in check_nodes: 8308 _CheckNodeOnline(self.lu, node) 8309 8310 touched_nodes = frozenset([self.new_node, self.other_node, 8311 self.target_node]) 8312 8313 if self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET: 8314 # Release unneeded node locks 8315 for name in self.lu.acquired_locks[locking.LEVEL_NODE]: 8316 if name not in touched_nodes: 8317 self._ReleaseNodeLock(name) 8318 8319 # Check whether disks are valid 8320 for disk_idx in self.disks: 8321 instance.FindDisk(disk_idx) 8322 8323 # Get secondary node IP addresses 8324 self.node_secondary_ip = \ 8325 dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip) 8326 for node_name in touched_nodes 8327 if node_name is not None)
8328
8329 - def Exec(self, feedback_fn):
8330 """Execute disk replacement. 8331 8332 This dispatches the disk replacement to the appropriate handler. 8333 8334 """ 8335 if self.delay_iallocator: 8336 self._CheckPrereq2() 8337 8338 if (self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET and 8339 __debug__): 8340 # Verify owned locks before starting operation 8341 owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE) 8342 assert set(owned_locks) == set(self.node_secondary_ip), \ 8343 "Not owning the correct locks: %s" % (owned_locks, ) 8344 8345 if not self.disks: 8346 feedback_fn("No disks need replacement") 8347 return 8348 8349 feedback_fn("Replacing disk(s) %s for %s" % 8350 (utils.CommaJoin(self.disks), self.instance.name)) 8351 8352 activate_disks = (not self.instance.admin_up) 8353 8354 # Activate the instance disks if we're replacing them on a down instance 8355 if activate_disks: 8356 _StartInstanceDisks(self.lu, self.instance, True) 8357 8358 try: 8359 # Should we replace the secondary node? 8360 if self.new_node is not None: 8361 fn = self._ExecDrbd8Secondary 8362 else: 8363 fn = self._ExecDrbd8DiskOnly 8364 8365 result = fn(feedback_fn) 8366 finally: 8367 # Deactivate the instance disks if we're replacing them on a 8368 # down instance 8369 if activate_disks: 8370 _SafeShutdownInstanceDisks(self.lu, self.instance) 8371 8372 if __debug__: 8373 # Verify owned locks 8374 owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE) 8375 assert ((self.early_release and not owned_locks) or 8376 (not self.early_release and 8377 set(owned_locks) == set(self.node_secondary_ip))), \ 8378 ("Not owning the correct locks, early_release=%s, owned=%r" % 8379 (self.early_release, owned_locks)) 8380 8381 return result
8382
8383 - def _CheckVolumeGroup(self, nodes):
8384 self.lu.LogInfo("Checking volume groups") 8385 8386 vgname = self.cfg.GetVGName() 8387 8388 # Make sure volume group exists on all involved nodes 8389 results = self.rpc.call_vg_list(nodes) 8390 if not results: 8391 raise errors.OpExecError("Can't list volume groups on the nodes") 8392 8393 for node in nodes: 8394 res = results[node] 8395 res.Raise("Error checking node %s" % node) 8396 if vgname not in res.payload: 8397 raise errors.OpExecError("Volume group '%s' not found on node %s" % 8398 (vgname, node))
8399
8400 - def _CheckDisksExistence(self, nodes):
8401 # Check disk existence 8402 for idx, dev in enumerate(self.instance.disks): 8403 if idx not in self.disks: 8404 continue 8405 8406 for node in nodes: 8407 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node)) 8408 self.cfg.SetDiskID(dev, node) 8409 8410 result = self.rpc.call_blockdev_find(node, dev) 8411 8412 msg = result.fail_msg 8413 if msg or not result.payload: 8414 if not msg: 8415 msg = "disk not found" 8416 raise errors.OpExecError("Can't find disk/%d on node %s: %s" % 8417 (idx, node, msg))
8418
8419 - def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8420 for idx, dev in enumerate(self.instance.disks): 8421 if idx not in self.disks: 8422 continue 8423 8424 self.lu.LogInfo("Checking disk/%d consistency on node %s" % 8425 (idx, node_name)) 8426 8427 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary, 8428 ldisk=ldisk): 8429 raise errors.OpExecError("Node %s has degraded storage, unsafe to" 8430 " replace disks for instance %s" % 8431 (node_name, self.instance.name))
8432
8433 - def _CreateNewStorage(self, node_name):
8434 """Create new storage on the primary or secondary node. 8435 8436 This is only used for same-node replaces, not for changing the 8437 secondary node, hence we don't want to modify the existing disk. 8438 8439 """ 8440 iv_names = {} 8441 8442 for idx, dev in enumerate(self.instance.disks): 8443 if idx not in self.disks: 8444 continue 8445 8446 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx)) 8447 8448 self.cfg.SetDiskID(dev, node_name) 8449 8450 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]] 8451 names = _GenerateUniqueNames(self.lu, lv_names) 8452 8453 vg_data = dev.children[0].logical_id[0] 8454 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size, 8455 logical_id=(vg_data, names[0])) 8456 vg_meta = dev.children[1].logical_id[0] 8457 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128, 8458 logical_id=(vg_meta, names[1])) 8459 8460 new_lvs = [lv_data, lv_meta] 8461 old_lvs = [child.Copy() for child in dev.children] 8462 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs) 8463 8464 # we pass force_create=True to force the LVM creation 8465 for new_lv in new_lvs: 8466 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True, 8467 _GetInstanceInfoText(self.instance), False) 8468 8469 return iv_names
8470
8471 - def _CheckDevices(self, node_name, iv_names):
8472 for name, (dev, _, _) in iv_names.iteritems(): 8473 self.cfg.SetDiskID(dev, node_name) 8474 8475 result = self.rpc.call_blockdev_find(node_name, dev) 8476 8477 msg = result.fail_msg 8478 if msg or not result.payload: 8479 if not msg: 8480 msg = "disk not found" 8481 raise errors.OpExecError("Can't find DRBD device %s: %s" % 8482 (name, msg)) 8483 8484 if result.payload.is_degraded: 8485 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8486
8487 - def _RemoveOldStorage(self, node_name, iv_names):
8488 for name, (_, old_lvs, _) in iv_names.iteritems(): 8489 self.lu.LogInfo("Remove logical volumes for %s" % name) 8490 8491 for lv in old_lvs: 8492 self.cfg.SetDiskID(lv, node_name) 8493 8494 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg 8495 if msg: 8496 self.lu.LogWarning("Can't remove old LV: %s" % msg, 8497 hint="remove unused LVs manually")
8498
8499 - def _ReleaseNodeLock(self, node_name):
8500 """Releases the lock for a given node.""" 8501 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8502
8503 - def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
8504 """Replace a disk on the primary or secondary for DRBD 8. 8505 8506 The algorithm for replace is quite complicated: 8507 8508 1. for each disk to be replaced: 8509 8510 1. create new LVs on the target node with unique names 8511 1. detach old LVs from the drbd device 8512 1. rename old LVs to name_replaced.<time_t> 8513 1. rename new LVs to old LVs 8514 1. attach the new LVs (with the old names now) to the drbd device 8515 8516 1. wait for sync across all devices 8517 8518 1. for each modified disk: 8519 8520 1. remove old LVs (which have the name name_replaces.<time_t>) 8521 8522 Failures are not very well handled. 8523 8524 """ 8525 steps_total = 6 8526 8527 # Step: check device activation 8528 self.lu.LogStep(1, steps_total, "Check device existence") 8529 self._CheckDisksExistence([self.other_node, self.target_node]) 8530 self._CheckVolumeGroup([self.target_node, self.other_node]) 8531 8532 # Step: check other node consistency 8533 self.lu.LogStep(2, steps_total, "Check peer consistency") 8534 self._CheckDisksConsistency(self.other_node, 8535 self.other_node == self.instance.primary_node, 8536 False) 8537 8538 # Step: create new storage 8539 self.lu.LogStep(3, steps_total, "Allocate new storage") 8540 iv_names = self._CreateNewStorage(self.target_node) 8541 8542 # Step: for each lv, detach+rename*2+attach 8543 self.lu.LogStep(4, steps_total, "Changing drbd configuration") 8544 for dev, old_lvs, new_lvs in iv_names.itervalues(): 8545 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name) 8546 8547 result = self.rpc.call_blockdev_removechildren(self.target_node, dev, 8548 old_lvs) 8549 result.Raise("Can't detach drbd from local storage on node" 8550 " %s for device %s" % (self.target_node, dev.iv_name)) 8551 #dev.children = [] 8552 #cfg.Update(instance) 8553 8554 # ok, we created the new LVs, so now we know we have the needed 8555 # storage; as such, we proceed on the target node to rename 8556 # old_lv to _old, and new_lv to old_lv; note that we rename LVs 8557 # using the assumption that logical_id == physical_id (which in 8558 # turn is the unique_id on that node) 8559 8560 # FIXME(iustin): use a better name for the replaced LVs 8561 temp_suffix = int(time.time()) 8562 ren_fn = lambda d, suff: (d.physical_id[0], 8563 d.physical_id[1] + "_replaced-%s" % suff) 8564 8565 # Build the rename list based on what LVs exist on the node 8566 rename_old_to_new = [] 8567 for to_ren in old_lvs: 8568 result = self.rpc.call_blockdev_find(self.target_node, to_ren) 8569 if not result.fail_msg and result.payload: 8570 # device exists 8571 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix))) 8572 8573 self.lu.LogInfo("Renaming the old LVs on the target node") 8574 result = self.rpc.call_blockdev_rename(self.target_node, 8575 rename_old_to_new) 8576 result.Raise("Can't rename old LVs on node %s" % self.target_node) 8577 8578 # Now we rename the new LVs to the old LVs 8579 self.lu.LogInfo("Renaming the new LVs on the target node") 8580 rename_new_to_old = [(new, old.physical_id) 8581 for old, new in zip(old_lvs, new_lvs)] 8582 result = self.rpc.call_blockdev_rename(self.target_node, 8583 rename_new_to_old) 8584 result.Raise("Can't rename new LVs on node %s" % self.target_node) 8585 8586 # Intermediate steps of in memory modifications 8587 for old, new in zip(old_lvs, new_lvs): 8588 new.logical_id = old.logical_id 8589 self.cfg.SetDiskID(new, self.target_node) 8590 8591 # We need to modify old_lvs so that removal later removes the 8592 # right LVs, not the newly added ones; note that old_lvs is a 8593 # copy here 8594 for disk in old_lvs: 8595 disk.logical_id = ren_fn(disk, temp_suffix) 8596 self.cfg.SetDiskID(disk, self.target_node) 8597 8598 # Now that the new lvs have the old name, we can add them to the device 8599 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node) 8600 result = self.rpc.call_blockdev_addchildren(self.target_node, dev, 8601 new_lvs) 8602 msg = result.fail_msg 8603 if msg: 8604 for new_lv in new_lvs: 8605 msg2 = self.rpc.call_blockdev_remove(self.target_node, 8606 new_lv).fail_msg 8607 if msg2: 8608 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2, 8609 hint=("cleanup manually the unused logical" 8610 "volumes")) 8611 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg) 8612 8613 cstep = 5 8614 if self.early_release: 8615 self.lu.LogStep(cstep, steps_total, "Removing old storage") 8616 cstep += 1 8617 self._RemoveOldStorage(self.target_node, iv_names) 8618 # WARNING: we release both node locks here, do not do other RPCs 8619 # than WaitForSync to the primary node 8620 self._ReleaseNodeLock([self.target_node, self.other_node]) 8621 8622 # Wait for sync 8623 # This can fail as the old devices are degraded and _WaitForSync 8624 # does a combined result over all disks, so we don't check its return value 8625 self.lu.LogStep(cstep, steps_total, "Sync devices") 8626 cstep += 1 8627 _WaitForSync(self.lu, self.instance) 8628 8629 # Check all devices manually 8630 self._CheckDevices(self.instance.primary_node, iv_names) 8631 8632 # Step: remove old storage 8633 if not self.early_release: 8634 self.lu.LogStep(cstep, steps_total, "Removing old storage") 8635 cstep += 1 8636 self._RemoveOldStorage(self.target_node, iv_names)
8637
8638 - def _ExecDrbd8Secondary(self, feedback_fn):
8639 """Replace the secondary node for DRBD 8. 8640 8641 The algorithm for replace is quite complicated: 8642 - for all disks of the instance: 8643 - create new LVs on the new node with same names 8644 - shutdown the drbd device on the old secondary 8645 - disconnect the drbd network on the primary 8646 - create the drbd device on the new secondary 8647 - network attach the drbd on the primary, using an artifice: 8648 the drbd code for Attach() will connect to the network if it 8649 finds a device which is connected to the good local disks but 8650 not network enabled 8651 - wait for sync across all devices 8652 - remove all disks from the old secondary 8653 8654 Failures are not very well handled. 8655 8656 """ 8657 steps_total = 6 8658 8659 # Step: check device activation 8660 self.lu.LogStep(1, steps_total, "Check device existence") 8661 self._CheckDisksExistence([self.instance.primary_node]) 8662 self._CheckVolumeGroup([self.instance.primary_node]) 8663 8664 # Step: check other node consistency 8665 self.lu.LogStep(2, steps_total, "Check peer consistency") 8666 self._CheckDisksConsistency(self.instance.primary_node, True, True) 8667 8668 # Step: create new storage 8669 self.lu.LogStep(3, steps_total, "Allocate new storage") 8670 for idx, dev in enumerate(self.instance.disks): 8671 self.lu.LogInfo("Adding new local storage on %s for disk/%d" % 8672 (self.new_node, idx)) 8673 # we pass force_create=True to force LVM creation 8674 for new_lv in dev.children: 8675 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True, 8676 _GetInstanceInfoText(self.instance), False) 8677 8678 # Step 4: dbrd minors and drbd setups changes 8679 # after this, we must manually remove the drbd minors on both the 8680 # error and the success paths 8681 self.lu.LogStep(4, steps_total, "Changing drbd configuration") 8682 minors = self.cfg.AllocateDRBDMinor([self.new_node 8683 for dev in self.instance.disks], 8684 self.instance.name) 8685 logging.debug("Allocated minors %r", minors) 8686 8687 iv_names = {} 8688 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)): 8689 self.lu.LogInfo("activating a new drbd on %s for disk/%d" % 8690 (self.new_node, idx)) 8691 # create new devices on new_node; note that we create two IDs: 8692 # one without port, so the drbd will be activated without 8693 # networking information on the new node at this stage, and one 8694 # with network, for the latter activation in step 4 8695 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id 8696 if self.instance.primary_node == o_node1: 8697 p_minor = o_minor1 8698 else: 8699 assert self.instance.primary_node == o_node2, "Three-node instance?" 8700 p_minor = o_minor2 8701 8702 new_alone_id = (self.instance.primary_node, self.new_node, None, 8703 p_minor, new_minor, o_secret) 8704 new_net_id = (self.instance.primary_node, self.new_node, o_port, 8705 p_minor, new_minor, o_secret) 8706 8707 iv_names[idx] = (dev, dev.children, new_net_id) 8708 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor, 8709 new_net_id) 8710 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8, 8711 logical_id=new_alone_id, 8712 children=dev.children, 8713 size=dev.size) 8714 try: 8715 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd, 8716 _GetInstanceInfoText(self.instance), False) 8717 except errors.GenericError: 8718 self.cfg.ReleaseDRBDMinors(self.instance.name) 8719 raise 8720 8721 # We have new devices, shutdown the drbd on the old secondary 8722 for idx, dev in enumerate(self.instance.disks): 8723 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx) 8724 self.cfg.SetDiskID(dev, self.target_node) 8725 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg 8726 if msg: 8727 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old" 8728 "node: %s" % (idx, msg), 8729 hint=("Please cleanup this device manually as" 8730 " soon as possible")) 8731 8732 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)") 8733 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], 8734 self.node_secondary_ip, 8735 self.instance.disks)\ 8736 [self.instance.primary_node] 8737 8738 msg = result.fail_msg 8739 if msg: 8740 # detaches didn't succeed (unlikely) 8741 self.cfg.ReleaseDRBDMinors(self.instance.name) 8742 raise errors.OpExecError("Can't detach the disks from the network on" 8743 " old node: %s" % (msg,)) 8744 8745 # if we managed to detach at least one, we update all the disks of 8746 # the instance to point to the new secondary 8747 self.lu.LogInfo("Updating instance configuration") 8748 for dev, _, new_logical_id in iv_names.itervalues(): 8749 dev.logical_id = new_logical_id 8750 self.cfg.SetDiskID(dev, self.instance.primary_node) 8751 8752 self.cfg.Update(self.instance, feedback_fn) 8753 8754 # and now perform the drbd attach 8755 self.lu.LogInfo("Attaching primary drbds to new secondary" 8756 " (standalone => connected)") 8757 result = self.rpc.call_drbd_attach_net([self.instance.primary_node, 8758 self.new_node], 8759 self.node_secondary_ip, 8760 self.instance.disks, 8761 self.instance.name, 8762 False) 8763 for to_node, to_result in result.items(): 8764 msg = to_result.fail_msg 8765 if msg: 8766 self.lu.LogWarning("Can't attach drbd disks on node %s: %s", 8767 to_node, msg, 8768 hint=("please do a gnt-instance info to see the" 8769 " status of disks")) 8770 cstep = 5 8771 if self.early_release: 8772 self.lu.LogStep(cstep, steps_total, "Removing old storage") 8773 cstep += 1 8774 self._RemoveOldStorage(self.target_node, iv_names) 8775 # WARNING: we release all node locks here, do not do other RPCs 8776 # than WaitForSync to the primary node 8777 self._ReleaseNodeLock([self.instance.primary_node, 8778 self.target_node, 8779 self.new_node]) 8780 8781 # Wait for sync 8782 # This can fail as the old devices are degraded and _WaitForSync 8783 # does a combined result over all disks, so we don't check its return value 8784 self.lu.LogStep(cstep, steps_total, "Sync devices") 8785 cstep += 1 8786 _WaitForSync(self.lu, self.instance) 8787 8788 # Check all devices manually 8789 self._CheckDevices(self.instance.primary_node, iv_names) 8790 8791 # Step: remove old storage 8792 if not self.early_release: 8793 self.lu.LogStep(cstep, steps_total, "Removing old storage") 8794 self._RemoveOldStorage(self.target_node, iv_names)
8795
8796 8797 -class LURepairNodeStorage(NoHooksLU):
8798 """Repairs the volume group on a node. 8799 8800 """ 8801 REQ_BGL = False 8802
8803 - def CheckArguments(self):
8804 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 8805 8806 storage_type = self.op.storage_type 8807 8808 if (constants.SO_FIX_CONSISTENCY not in 8809 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])): 8810 raise errors.OpPrereqError("Storage units of type '%s' can not be" 8811 " repaired" % storage_type, 8812 errors.ECODE_INVAL)
8813
8814 - def ExpandNames(self):
8815 self.needed_locks = { 8816 locking.LEVEL_NODE: [self.op.node_name], 8817 }
8818
8819 - def _CheckFaultyDisks(self, instance, node_name):
8820 """Ensure faulty disks abort the opcode or at least warn.""" 8821 try: 8822 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance, 8823 node_name, True): 8824 raise errors.OpPrereqError("Instance '%s' has faulty disks on" 8825 " node '%s'" % (instance.name, node_name), 8826 errors.ECODE_STATE) 8827 except errors.OpPrereqError, err: 8828 if self.op.ignore_consistency: 8829 self.proc.LogWarning(str(err.args[0])) 8830 else: 8831 raise
8832
8833 - def CheckPrereq(self):
8834 """Check prerequisites. 8835 8836 """ 8837 # Check whether any instance on this node has faulty disks 8838 for inst in _GetNodeInstances(self.cfg, self.op.node_name): 8839 if not inst.admin_up: 8840 continue 8841 check_nodes = set(inst.all_nodes) 8842 check_nodes.discard(self.op.node_name) 8843 for inst_node_name in check_nodes: 8844 self._CheckFaultyDisks(inst, inst_node_name)
8845
8846 - def Exec(self, feedback_fn):
8847 feedback_fn("Repairing storage unit '%s' on %s ..." % 8848 (self.op.name, self.op.node_name)) 8849 8850 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 8851 result = self.rpc.call_storage_execute(self.op.node_name, 8852 self.op.storage_type, st_args, 8853 self.op.name, 8854 constants.SO_FIX_CONSISTENCY) 8855 result.Raise("Failed to repair storage unit '%s' on %s" % 8856 (self.op.name, self.op.node_name))
8857
8858 8859 -class LUNodeEvacStrategy(NoHooksLU):
8860 """Computes the node evacuation strategy. 8861 8862 """ 8863 REQ_BGL = False 8864
8865 - def CheckArguments(self):
8866 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8867
8868 - def ExpandNames(self):
8869 self.op.nodes = _GetWantedNodes(self, self.op.nodes) 8870 self.needed_locks = locks = {} 8871 if self.op.remote_node is None: 8872 locks[locking.LEVEL_NODE] = locking.ALL_SET 8873 else: 8874 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 8875 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8876
8877 - def Exec(self, feedback_fn):
8878 instances = [] 8879 for node in self.op.nodes: 8880 instances.extend(_GetNodeSecondaryInstances(self.cfg, node)) 8881 if not instances: 8882 return [] 8883 8884 if self.op.remote_node is not None: 8885 result = [] 8886 for i in instances: 8887 if i.primary_node == self.op.remote_node: 8888 raise errors.OpPrereqError("Node %s is the primary node of" 8889 " instance %s, cannot use it as" 8890 " secondary" % 8891 (self.op.remote_node, i.name), 8892 errors.ECODE_INVAL) 8893 result.append([i.name, self.op.remote_node]) 8894 else: 8895 ial = IAllocator(self.cfg, self.rpc, 8896 mode=constants.IALLOCATOR_MODE_MEVAC, 8897 evac_nodes=self.op.nodes) 8898 ial.Run(self.op.iallocator, validate=True) 8899 if not ial.success: 8900 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info, 8901 errors.ECODE_NORES) 8902 result = ial.result 8903 return result
8904
8905 8906 -class LUInstanceGrowDisk(LogicalUnit):
8907 """Grow a disk of an instance. 8908 8909 """ 8910 HPATH = "disk-grow" 8911 HTYPE = constants.HTYPE_INSTANCE 8912 REQ_BGL = False 8913
8914 - def ExpandNames(self):
8915 self._ExpandAndLockInstance() 8916 self.needed_locks[locking.LEVEL_NODE] = [] 8917 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8918
8919 - def DeclareLocks(self, level):
8920 if level == locking.LEVEL_NODE: 8921 self._LockInstancesNodes()
8922
8923 - def BuildHooksEnv(self):
8924 """Build hooks env. 8925 8926 This runs on the master, the primary and all the secondaries. 8927 8928 """ 8929 env = { 8930 "DISK": self.op.disk, 8931 "AMOUNT": self.op.amount, 8932 } 8933 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 8934 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 8935 return env, nl, nl
8936
8937 - def CheckPrereq(self):
8938 """Check prerequisites. 8939 8940 This checks that the instance is in the cluster. 8941 8942 """ 8943 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 8944 assert instance is not None, \ 8945 "Cannot retrieve locked instance %s" % self.op.instance_name 8946 nodenames = list(instance.all_nodes) 8947 for node in nodenames: 8948 _CheckNodeOnline(self, node) 8949 8950 self.instance = instance 8951 8952 if instance.disk_template not in constants.DTS_GROWABLE: 8953 raise errors.OpPrereqError("Instance's disk layout does not support" 8954 " growing.", errors.ECODE_INVAL) 8955 8956 self.disk = instance.FindDisk(self.op.disk) 8957 8958 if instance.disk_template != constants.DT_FILE: 8959 # TODO: check the free disk space for file, when that feature 8960 # will be supported 8961 _CheckNodesFreeDiskPerVG(self, nodenames, 8962 self.disk.ComputeGrowth(self.op.amount))
8963
8964 - def Exec(self, feedback_fn):
8965 """Execute disk grow. 8966 8967 """ 8968 instance = self.instance 8969 disk = self.disk 8970 8971 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk]) 8972 if not disks_ok: 8973 raise errors.OpExecError("Cannot activate block device to grow") 8974 8975 for node in instance.all_nodes: 8976 self.cfg.SetDiskID(disk, node) 8977 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount) 8978 result.Raise("Grow request failed to node %s" % node) 8979 8980 # TODO: Rewrite code to work properly 8981 # DRBD goes into sync mode for a short amount of time after executing the 8982 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby 8983 # calling "resize" in sync mode fails. Sleeping for a short amount of 8984 # time is a work-around. 8985 time.sleep(5) 8986 8987 disk.RecordGrow(self.op.amount) 8988 self.cfg.Update(instance, feedback_fn) 8989 if self.op.wait_for_sync: 8990 disk_abort = not _WaitForSync(self, instance, disks=[disk]) 8991 if disk_abort: 8992 self.proc.LogWarning("Warning: disk sync-ing has not returned a good" 8993 " status.\nPlease check the instance.") 8994 if not instance.admin_up: 8995 _SafeShutdownInstanceDisks(self, instance, disks=[disk]) 8996 elif not instance.admin_up: 8997 self.proc.LogWarning("Not shutting down the disk even if the instance is" 8998 " not supposed to be running because no wait for" 8999 " sync mode was requested.")
9000
9001 9002 -class LUInstanceQueryData(NoHooksLU):
9003 """Query runtime instance data. 9004 9005 """ 9006 REQ_BGL = False 9007
9008 - def ExpandNames(self):
9009 self.needed_locks = {} 9010 9011 # Use locking if requested or when non-static information is wanted 9012 if not (self.op.static or self.op.use_locking): 9013 self.LogWarning("Non-static data requested, locks need to be acquired") 9014 self.op.use_locking = True 9015 9016 if self.op.instances or not self.op.use_locking: 9017 # Expand instance names right here 9018 self.wanted_names = _GetWantedInstances(self, self.op.instances) 9019 else: 9020 # Will use acquired locks 9021 self.wanted_names = None 9022 9023 if self.op.use_locking: 9024 self.share_locks = dict.fromkeys(locking.LEVELS, 1) 9025 9026 if self.wanted_names is None: 9027 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET 9028 else: 9029 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names 9030 9031 self.needed_locks[locking.LEVEL_NODE] = [] 9032 self.share_locks = dict.fromkeys(locking.LEVELS, 1) 9033 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9034
9035 - def DeclareLocks(self, level):
9036 if self.op.use_locking and level == locking.LEVEL_NODE: 9037 self._LockInstancesNodes()
9038
9039 - def CheckPrereq(self):
9040 """Check prerequisites. 9041 9042 This only checks the optional instance list against the existing names. 9043 9044 """ 9045 if self.wanted_names is None: 9046 assert self.op.use_locking, "Locking was not used" 9047 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE] 9048 9049 self.wanted_instances = [self.cfg.GetInstanceInfo(name) 9050 for name in self.wanted_names]
9051
9052 - def _ComputeBlockdevStatus(self, node, instance_name, dev):
9053 """Returns the status of a block device 9054 9055 """ 9056 if self.op.static or not node: 9057 return None 9058 9059 self.cfg.SetDiskID(dev, node) 9060 9061 result = self.rpc.call_blockdev_find(node, dev) 9062 if result.offline: 9063 return None 9064 9065 result.Raise("Can't compute disk status for %s" % instance_name) 9066 9067 status = result.payload 9068 if status is None: 9069 return None 9070 9071 return (status.dev_path, status.major, status.minor, 9072 status.sync_percent, status.estimated_time, 9073 status.is_degraded, status.ldisk_status)
9074
9075 - def _ComputeDiskStatus(self, instance, snode, dev):
9076 """Compute block device status. 9077 9078 """ 9079 if dev.dev_type in constants.LDS_DRBD: 9080 # we change the snode then (otherwise we use the one passed in) 9081 if dev.logical_id[0] == instance.primary_node: 9082 snode = dev.logical_id[1] 9083 else: 9084 snode = dev.logical_id[0] 9085 9086 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node, 9087 instance.name, dev) 9088 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev) 9089 9090 if dev.children: 9091 dev_children = [self._ComputeDiskStatus(instance, snode, child) 9092 for child in dev.children] 9093 else: 9094 dev_children = [] 9095 9096 return { 9097 "iv_name": dev.iv_name, 9098 "dev_type": dev.dev_type, 9099 "logical_id": dev.logical_id, 9100 "physical_id": dev.physical_id, 9101 "pstatus": dev_pstatus, 9102 "sstatus": dev_sstatus, 9103 "children": dev_children, 9104 "mode": dev.mode, 9105 "size": dev.size, 9106 }
9107
9108 - def Exec(self, feedback_fn):
9109 """Gather and return data""" 9110 result = {} 9111 9112 cluster = self.cfg.GetClusterInfo() 9113 9114 for instance in self.wanted_instances: 9115 if not self.op.static: 9116 remote_info = self.rpc.call_instance_info(instance.primary_node, 9117 instance.name, 9118 instance.hypervisor) 9119 remote_info.Raise("Error checking node %s" % instance.primary_node) 9120 remote_info = remote_info.payload 9121 if remote_info and "state" in remote_info: 9122 remote_state = "up" 9123 else: 9124 remote_state = "down" 9125 else: 9126 remote_state = None 9127 if instance.admin_up: 9128 config_state = "up" 9129 else: 9130 config_state = "down" 9131 9132 disks = [self._ComputeDiskStatus(instance, None, device) 9133 for device in instance.disks] 9134 9135 result[instance.name] = { 9136 "name": instance.name, 9137 "config_state": config_state, 9138 "run_state": remote_state, 9139 "pnode": instance.primary_node, 9140 "snodes": instance.secondary_nodes, 9141 "os": instance.os, 9142 # this happens to be the same format used for hooks 9143 "nics": _NICListToTuple(self, instance.nics), 9144 "disk_template": instance.disk_template, 9145 "disks": disks, 9146 "hypervisor": instance.hypervisor, 9147 "network_port": instance.network_port, 9148 "hv_instance": instance.hvparams, 9149 "hv_actual": cluster.FillHV(instance, skip_globals=True), 9150 "be_instance": instance.beparams, 9151 "be_actual": cluster.FillBE(instance), 9152 "os_instance": instance.osparams, 9153 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams), 9154 "serial_no": instance.serial_no, 9155 "mtime": instance.mtime, 9156 "ctime": instance.ctime, 9157 "uuid": instance.uuid, 9158 } 9159 9160 return result
9161
9162 9163 -class LUInstanceSetParams(LogicalUnit):
9164 """Modifies an instances's parameters. 9165 9166 """ 9167 HPATH = "instance-modify" 9168 HTYPE = constants.HTYPE_INSTANCE 9169 REQ_BGL = False 9170
9171 - def CheckArguments(self):
9172 if not (self.op.nics or self.op.disks or self.op.disk_template or 9173 self.op.hvparams or self.op.beparams or self.op.os_name): 9174 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL) 9175 9176 if self.op.hvparams: 9177 _CheckGlobalHvParams(self.op.hvparams) 9178 9179 # Disk validation 9180 disk_addremove = 0 9181 for disk_op, disk_dict in self.op.disks: 9182 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES) 9183 if disk_op == constants.DDM_REMOVE: 9184 disk_addremove += 1 9185 continue 9186 elif disk_op == constants.DDM_ADD: 9187 disk_addremove += 1 9188 else: 9189 if not isinstance(disk_op, int): 9190 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL) 9191 if not isinstance(disk_dict, dict): 9192 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict 9193 raise errors.OpPrereqError(msg, errors.ECODE_INVAL) 9194 9195 if disk_op == constants.DDM_ADD: 9196 mode = disk_dict.setdefault('mode', constants.DISK_RDWR) 9197 if mode not in constants.DISK_ACCESS_SET: 9198 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode, 9199 errors.ECODE_INVAL) 9200 size = disk_dict.get('size', None) 9201 if size is None: 9202 raise errors.OpPrereqError("Required disk parameter size missing", 9203 errors.ECODE_INVAL) 9204 try: 9205 size = int(size) 9206 except (TypeError, ValueError), err: 9207 raise errors.OpPrereqError("Invalid disk size parameter: %s" % 9208 str(err), errors.ECODE_INVAL) 9209 disk_dict['size'] = size 9210 else: 9211 # modification of disk 9212 if 'size' in disk_dict: 9213 raise errors.OpPrereqError("Disk size change not possible, use" 9214 " grow-disk", errors.ECODE_INVAL) 9215 9216 if disk_addremove > 1: 9217 raise errors.OpPrereqError("Only one disk add or remove operation" 9218 " supported at a time", errors.ECODE_INVAL) 9219 9220 if self.op.disks and self.op.disk_template is not None: 9221 raise errors.OpPrereqError("Disk template conversion and other disk" 9222 " changes not supported at the same time", 9223 errors.ECODE_INVAL) 9224 9225 if (self.op.disk_template and 9226 self.op.disk_template in constants.DTS_NET_MIRROR and 9227 self.op.remote_node is None): 9228 raise errors.OpPrereqError("Changing the disk template to a mirrored" 9229 " one requires specifying a secondary node", 9230 errors.ECODE_INVAL) 9231 9232 # NIC validation 9233 nic_addremove = 0 9234 for nic_op, nic_dict in self.op.nics: 9235 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES) 9236 if nic_op == constants.DDM_REMOVE: 9237 nic_addremove += 1 9238 continue 9239 elif nic_op == constants.DDM_ADD: 9240 nic_addremove += 1 9241 else: 9242 if not isinstance(nic_op, int): 9243 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL) 9244 if not isinstance(nic_dict, dict): 9245 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict 9246 raise errors.OpPrereqError(msg, errors.ECODE_INVAL) 9247 9248 # nic_dict should be a dict 9249 nic_ip = nic_dict.get('ip', None) 9250 if nic_ip is not None: 9251 if nic_ip.lower() == constants.VALUE_NONE: 9252 nic_dict['ip'] = None 9253 else: 9254 if not netutils.IPAddress.IsValid(nic_ip): 9255 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip, 9256 errors.ECODE_INVAL) 9257 9258 nic_bridge = nic_dict.get('bridge', None) 9259 nic_link = nic_dict.get('link', None) 9260 if nic_bridge and nic_link: 9261 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'" 9262 " at the same time", errors.ECODE_INVAL) 9263 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE: 9264 nic_dict['bridge'] = None 9265 elif nic_link and nic_link.lower() == constants.VALUE_NONE: 9266 nic_dict['link'] = None 9267 9268 if nic_op == constants.DDM_ADD: 9269 nic_mac = nic_dict.get('mac', None) 9270 if nic_mac is None: 9271 nic_dict['mac'] = constants.VALUE_AUTO 9272 9273 if 'mac' in nic_dict: 9274 nic_mac = nic_dict['mac'] 9275 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 9276 nic_mac = utils.NormalizeAndValidateMac(nic_mac) 9277 9278 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO: 9279 raise errors.OpPrereqError("'auto' is not a valid MAC address when" 9280 " modifying an existing nic", 9281 errors.ECODE_INVAL) 9282 9283 if nic_addremove > 1: 9284 raise errors.OpPrereqError("Only one NIC add or remove operation" 9285 " supported at a time", errors.ECODE_INVAL)
9286
9287 - def ExpandNames(self):
9288 self._ExpandAndLockInstance() 9289 self.needed_locks[locking.LEVEL_NODE] = [] 9290 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9291
9292 - def DeclareLocks(self, level):
9293 if level == locking.LEVEL_NODE: 9294 self._LockInstancesNodes() 9295 if self.op.disk_template and self.op.remote_node: 9296 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 9297 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9298
9299 - def BuildHooksEnv(self):
9300 """Build hooks env. 9301 9302 This runs on the master, primary and secondaries. 9303 9304 """ 9305 args = dict() 9306 if constants.BE_MEMORY in self.be_new: 9307 args['memory'] = self.be_new[constants.BE_MEMORY] 9308 if constants.BE_VCPUS in self.be_new: 9309 args['vcpus'] = self.be_new[constants.BE_VCPUS] 9310 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk 9311 # information at all. 9312 if self.op.nics: 9313 args['nics'] = [] 9314 nic_override = dict(self.op.nics) 9315 for idx, nic in enumerate(self.instance.nics): 9316 if idx in nic_override: 9317 this_nic_override = nic_override[idx] 9318 else: 9319 this_nic_override = {} 9320 if 'ip' in this_nic_override: 9321 ip = this_nic_override['ip'] 9322 else: 9323 ip = nic.ip 9324 if 'mac' in this_nic_override: 9325 mac = this_nic_override['mac'] 9326 else: 9327 mac = nic.mac 9328 if idx in self.nic_pnew: 9329 nicparams = self.nic_pnew[idx] 9330 else: 9331 nicparams = self.cluster.SimpleFillNIC(nic.nicparams) 9332 mode = nicparams[constants.NIC_MODE] 9333 link = nicparams[constants.NIC_LINK] 9334 args['nics'].append((ip, mac, mode, link)) 9335 if constants.DDM_ADD in nic_override: 9336 ip = nic_override[constants.DDM_ADD].get('ip', None) 9337 mac = nic_override[constants.DDM_ADD]['mac'] 9338 nicparams = self.nic_pnew[constants.DDM_ADD] 9339 mode = nicparams[constants.NIC_MODE] 9340 link = nicparams[constants.NIC_LINK] 9341 args['nics'].append((ip, mac, mode, link)) 9342 elif constants.DDM_REMOVE in nic_override: 9343 del args['nics'][-1] 9344 9345 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args) 9346 if self.op.disk_template: 9347 env["NEW_DISK_TEMPLATE"] = self.op.disk_template 9348 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 9349 return env, nl, nl
9350
9351 - def CheckPrereq(self):
9352 """Check prerequisites. 9353 9354 This only checks the instance list against the existing names. 9355 9356 """ 9357 # checking the new params on the primary/secondary nodes 9358 9359 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 9360 cluster = self.cluster = self.cfg.GetClusterInfo() 9361 assert self.instance is not None, \ 9362 "Cannot retrieve locked instance %s" % self.op.instance_name 9363 pnode = instance.primary_node 9364 nodelist = list(instance.all_nodes) 9365 9366 # OS change 9367 if self.op.os_name and not self.op.force: 9368 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name, 9369 self.op.force_variant) 9370 instance_os = self.op.os_name 9371 else: 9372 instance_os = instance.os 9373 9374 if self.op.disk_template: 9375 if instance.disk_template == self.op.disk_template: 9376 raise errors.OpPrereqError("Instance already has disk template %s" % 9377 instance.disk_template, errors.ECODE_INVAL) 9378 9379 if (instance.disk_template, 9380 self.op.disk_template) not in self._DISK_CONVERSIONS: 9381 raise errors.OpPrereqError("Unsupported disk template conversion from" 9382 " %s to %s" % (instance.disk_template, 9383 self.op.disk_template), 9384 errors.ECODE_INVAL) 9385 _CheckInstanceDown(self, instance, "cannot change disk template") 9386 if self.op.disk_template in constants.DTS_NET_MIRROR: 9387 if self.op.remote_node == pnode: 9388 raise errors.OpPrereqError("Given new secondary node %s is the same" 9389 " as the primary node of the instance" % 9390 self.op.remote_node, errors.ECODE_STATE) 9391 _CheckNodeOnline(self, self.op.remote_node) 9392 _CheckNodeNotDrained(self, self.op.remote_node) 9393 # FIXME: here we assume that the old instance type is DT_PLAIN 9394 assert instance.disk_template == constants.DT_PLAIN 9395 disks = [{"size": d.size, "vg": d.logical_id[0]} 9396 for d in instance.disks] 9397 required = _ComputeDiskSizePerVG(self.op.disk_template, disks) 9398 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required) 9399 9400 # hvparams processing 9401 if self.op.hvparams: 9402 hv_type = instance.hypervisor 9403 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams) 9404 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES) 9405 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict) 9406 9407 # local check 9408 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new) 9409 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new) 9410 self.hv_new = hv_new # the new actual values 9411 self.hv_inst = i_hvdict # the new dict (without defaults) 9412 else: 9413 self.hv_new = self.hv_inst = {} 9414 9415 # beparams processing 9416 if self.op.beparams: 9417 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams, 9418 use_none=True) 9419 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES) 9420 be_new = cluster.SimpleFillBE(i_bedict) 9421 self.be_new = be_new # the new actual values 9422 self.be_inst = i_bedict # the new dict (without defaults) 9423 else: 9424 self.be_new = self.be_inst = {} 9425 be_old = cluster.FillBE(instance) 9426 9427 # osparams processing 9428 if self.op.osparams: 9429 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams) 9430 _CheckOSParams(self, True, nodelist, instance_os, i_osdict) 9431 self.os_inst = i_osdict # the new dict (without defaults) 9432 else: 9433 self.os_inst = {} 9434 9435 self.warn = [] 9436 9437 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and 9438 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]): 9439 mem_check_list = [pnode] 9440 if be_new[constants.BE_AUTO_BALANCE]: 9441 # either we changed auto_balance to yes or it was from before 9442 mem_check_list.extend(instance.secondary_nodes) 9443 instance_info = self.rpc.call_instance_info(pnode, instance.name, 9444 instance.hypervisor) 9445 nodeinfo = self.rpc.call_node_info(mem_check_list, None, 9446 instance.hypervisor) 9447 pninfo = nodeinfo[pnode] 9448 msg = pninfo.fail_msg 9449 if msg: 9450 # Assume the primary node is unreachable and go ahead 9451 self.warn.append("Can't get info from primary node %s: %s" % 9452 (pnode, msg)) 9453 elif not isinstance(pninfo.payload.get('memory_free', None), int): 9454 self.warn.append("Node data from primary node %s doesn't contain" 9455 " free memory information" % pnode) 9456 elif instance_info.fail_msg: 9457 self.warn.append("Can't get instance runtime information: %s" % 9458 instance_info.fail_msg) 9459 else: 9460 if instance_info.payload: 9461 current_mem = int(instance_info.payload['memory']) 9462 else: 9463 # Assume instance not running 9464 # (there is a slight race condition here, but it's not very probable, 9465 # and we have no other way to check) 9466 current_mem = 0 9467 miss_mem = (be_new[constants.BE_MEMORY] - current_mem - 9468 pninfo.payload['memory_free']) 9469 if miss_mem > 0: 9470 raise errors.OpPrereqError("This change will prevent the instance" 9471 " from starting, due to %d MB of memory" 9472 " missing on its primary node" % miss_mem, 9473 errors.ECODE_NORES) 9474 9475 if be_new[constants.BE_AUTO_BALANCE]: 9476 for node, nres in nodeinfo.items(): 9477 if node not in instance.secondary_nodes: 9478 continue 9479 nres.Raise("Can't get info from secondary node %s" % node, 9480 prereq=True, ecode=errors.ECODE_STATE) 9481 if not isinstance(nres.payload.get('memory_free', None), int): 9482 raise errors.OpPrereqError("Secondary node %s didn't return free" 9483 " memory information" % node, 9484 errors.ECODE_STATE) 9485 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']: 9486 raise errors.OpPrereqError("This change will prevent the instance" 9487 " from failover to its secondary node" 9488 " %s, due to not enough memory" % node, 9489 errors.ECODE_STATE) 9490 9491 # NIC processing 9492 self.nic_pnew = {} 9493 self.nic_pinst = {} 9494 for nic_op, nic_dict in self.op.nics: 9495 if nic_op == constants.DDM_REMOVE: 9496 if not instance.nics: 9497 raise errors.OpPrereqError("Instance has no NICs, cannot remove", 9498 errors.ECODE_INVAL) 9499 continue 9500 if nic_op != constants.DDM_ADD: 9501 # an existing nic 9502 if not instance.nics: 9503 raise errors.OpPrereqError("Invalid NIC index %s, instance has" 9504 " no NICs" % nic_op, 9505 errors.ECODE_INVAL) 9506 if nic_op < 0 or nic_op >= len(instance.nics): 9507 raise errors.OpPrereqError("Invalid NIC index %s, valid values" 9508 " are 0 to %d" % 9509 (nic_op, len(instance.nics) - 1), 9510 errors.ECODE_INVAL) 9511 old_nic_params = instance.nics[nic_op].nicparams 9512 old_nic_ip = instance.nics[nic_op].ip 9513 else: 9514 old_nic_params = {} 9515 old_nic_ip = None 9516 9517 update_params_dict = dict([(key, nic_dict[key]) 9518 for key in constants.NICS_PARAMETERS 9519 if key in nic_dict]) 9520 9521 if 'bridge' in nic_dict: 9522 update_params_dict[constants.NIC_LINK] = nic_dict['bridge'] 9523 9524 new_nic_params = _GetUpdatedParams(old_nic_params, 9525 update_params_dict) 9526 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES) 9527 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params) 9528 objects.NIC.CheckParameterSyntax(new_filled_nic_params) 9529 self.nic_pinst[nic_op] = new_nic_params 9530 self.nic_pnew[nic_op] = new_filled_nic_params 9531 new_nic_mode = new_filled_nic_params[constants.NIC_MODE] 9532 9533 if new_nic_mode == constants.NIC_MODE_BRIDGED: 9534 nic_bridge = new_filled_nic_params[constants.NIC_LINK] 9535 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg 9536 if msg: 9537 msg = "Error checking bridges on node %s: %s" % (pnode, msg) 9538 if self.op.force: 9539 self.warn.append(msg) 9540 else: 9541 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) 9542 if new_nic_mode == constants.NIC_MODE_ROUTED: 9543 if 'ip' in nic_dict: 9544 nic_ip = nic_dict['ip'] 9545 else: 9546 nic_ip = old_nic_ip 9547 if nic_ip is None: 9548 raise errors.OpPrereqError('Cannot set the nic ip to None' 9549 ' on a routed nic', errors.ECODE_INVAL) 9550 if 'mac' in nic_dict: 9551 nic_mac = nic_dict['mac'] 9552 if nic_mac is None: 9553 raise errors.OpPrereqError('Cannot set the nic mac to None', 9554 errors.ECODE_INVAL) 9555 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 9556 # otherwise generate the mac 9557 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId()) 9558 else: 9559 # or validate/reserve the current one 9560 try: 9561 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId()) 9562 except errors.ReservationError: 9563 raise errors.OpPrereqError("MAC address %s already in use" 9564 " in cluster" % nic_mac, 9565 errors.ECODE_NOTUNIQUE) 9566 9567 # DISK processing 9568 if self.op.disks and instance.disk_template == constants.DT_DISKLESS: 9569 raise errors.OpPrereqError("Disk operations not supported for" 9570 " diskless instances", 9571 errors.ECODE_INVAL) 9572 for disk_op, _ in self.op.disks: 9573 if disk_op == constants.DDM_REMOVE: 9574 if len(instance.disks) == 1: 9575 raise errors.OpPrereqError("Cannot remove the last disk of" 9576 " an instance", errors.ECODE_INVAL) 9577 _CheckInstanceDown(self, instance, "cannot remove disks") 9578 9579 if (disk_op == constants.DDM_ADD and 9580 len(instance.disks) >= constants.MAX_DISKS): 9581 raise errors.OpPrereqError("Instance has too many disks (%d), cannot" 9582 " add more" % constants.MAX_DISKS, 9583 errors.ECODE_STATE) 9584 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE): 9585 # an existing disk 9586 if disk_op < 0 or disk_op >= len(instance.disks): 9587 raise errors.OpPrereqError("Invalid disk index %s, valid values" 9588 " are 0 to %d" % 9589 (disk_op, len(instance.disks)), 9590 errors.ECODE_INVAL) 9591 9592 return
9593
9594 - def _ConvertPlainToDrbd(self, feedback_fn):
9595 """Converts an instance from plain to drbd. 9596 9597 """ 9598 feedback_fn("Converting template to drbd") 9599 instance = self.instance 9600 pnode = instance.primary_node 9601 snode = self.op.remote_node 9602 9603 # create a fake disk info for _GenerateDiskTemplate 9604 disk_info = [{"size": d.size, "mode": d.mode, 9605 "vg": d.logical_id[0]} for d in instance.disks] 9606 new_disks = _GenerateDiskTemplate(self, self.op.disk_template, 9607 instance.name, pnode, [snode], 9608 disk_info, None, None, 0, feedback_fn) 9609 info = _GetInstanceInfoText(instance) 9610 feedback_fn("Creating aditional volumes...") 9611 # first, create the missing data and meta devices 9612 for disk in new_disks: 9613 # unfortunately this is... not too nice 9614 _CreateSingleBlockDev(self, pnode, instance, disk.children[1], 9615 info, True) 9616 for child in disk.children: 9617 _CreateSingleBlockDev(self, snode, instance, child, info, True) 9618 # at this stage, all new LVs have been created, we can rename the 9619 # old ones 9620 feedback_fn("Renaming original volumes...") 9621 rename_list = [(o, n.children[0].logical_id) 9622 for (o, n) in zip(instance.disks, new_disks)] 9623 result = self.rpc.call_blockdev_rename(pnode, rename_list) 9624 result.Raise("Failed to rename original LVs") 9625 9626 feedback_fn("Initializing DRBD devices...") 9627 # all child devices are in place, we can now create the DRBD devices 9628 for disk in new_disks: 9629 for node in [pnode, snode]: 9630 f_create = node == pnode 9631 _CreateSingleBlockDev(self, node, instance, disk, info, f_create) 9632 9633 # at this point, the instance has been modified 9634 instance.disk_template = constants.DT_DRBD8 9635 instance.disks = new_disks 9636 self.cfg.Update(instance, feedback_fn) 9637 9638 # disks are created, waiting for sync 9639 disk_abort = not _WaitForSync(self, instance, 9640 oneshot=not self.op.wait_for_sync) 9641 if disk_abort: 9642 raise errors.OpExecError("There are some degraded disks for" 9643 " this instance, please cleanup manually")
9644
9645 - def _ConvertDrbdToPlain(self, feedback_fn):
9646 """Converts an instance from drbd to plain. 9647 9648 """ 9649 instance = self.instance 9650 assert len(instance.secondary_nodes) == 1 9651 pnode = instance.primary_node 9652 snode = instance.secondary_nodes[0] 9653 feedback_fn("Converting template to plain") 9654 9655 old_disks = instance.disks 9656 new_disks = [d.children[0] for d in old_disks] 9657 9658 # copy over size and mode 9659 for parent, child in zip(old_disks, new_disks): 9660 child.size = parent.size 9661 child.mode = parent.mode 9662 9663 # update instance structure 9664 instance.disks = new_disks 9665 instance.disk_template = constants.DT_PLAIN 9666 self.cfg.Update(instance, feedback_fn) 9667 9668 feedback_fn("Removing volumes on the secondary node...") 9669 for disk in old_disks: 9670 self.cfg.SetDiskID(disk, snode) 9671 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg 9672 if msg: 9673 self.LogWarning("Could not remove block device %s on node %s," 9674 " continuing anyway: %s", disk.iv_name, snode, msg) 9675 9676 feedback_fn("Removing unneeded volumes on the primary node...") 9677 for idx, disk in enumerate(old_disks): 9678 meta = disk.children[1] 9679 self.cfg.SetDiskID(meta, pnode) 9680 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg 9681 if msg: 9682 self.LogWarning("Could not remove metadata for disk %d on node %s," 9683 " continuing anyway: %s", idx, pnode, msg)
9684
9685 - def Exec(self, feedback_fn):
9686 """Modifies an instance. 9687 9688 All parameters take effect only at the next restart of the instance. 9689 9690 """ 9691 # Process here the warnings from CheckPrereq, as we don't have a 9692 # feedback_fn there. 9693 for warn in self.warn: 9694 feedback_fn("WARNING: %s" % warn) 9695 9696 result = [] 9697 instance = self.instance 9698 # disk changes 9699 for disk_op, disk_dict in self.op.disks: 9700 if disk_op == constants.DDM_REMOVE: 9701 # remove the last disk 9702 device = instance.disks.pop() 9703 device_idx = len(instance.disks) 9704 for node, disk in device.ComputeNodeTree(instance.primary_node): 9705 self.cfg.SetDiskID(disk, node) 9706 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg 9707 if msg: 9708 self.LogWarning("Could not remove disk/%d on node %s: %s," 9709 " continuing anyway", device_idx, node, msg) 9710 result.append(("disk/%d" % device_idx, "remove")) 9711 elif disk_op == constants.DDM_ADD: 9712 # add a new disk 9713 if instance.disk_template == constants.DT_FILE: 9714 file_driver, file_path = instance.disks[0].logical_id 9715 file_path = os.path.dirname(file_path) 9716 else: 9717 file_driver = file_path = None 9718 disk_idx_base = len(instance.disks) 9719 new_disk = _GenerateDiskTemplate(self, 9720 instance.disk_template, 9721 instance.name, instance.primary_node, 9722 instance.secondary_nodes, 9723 [disk_dict], 9724 file_path, 9725 file_driver, 9726 disk_idx_base, feedback_fn)[0] 9727 instance.disks.append(new_disk) 9728 info = _GetInstanceInfoText(instance) 9729 9730 logging.info("Creating volume %s for instance %s", 9731 new_disk.iv_name, instance.name) 9732 # Note: this needs to be kept in sync with _CreateDisks 9733 #HARDCODE 9734 for node in instance.all_nodes: 9735 f_create = node == instance.primary_node 9736 try: 9737 _CreateBlockDev(self, node, instance, new_disk, 9738 f_create, info, f_create) 9739 except errors.OpExecError, err: 9740 self.LogWarning("Failed to create volume %s (%s) on" 9741 " node %s: %s", 9742 new_disk.iv_name, new_disk, node, err) 9743 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" % 9744 (new_disk.size, new_disk.mode))) 9745 else: 9746 # change a given disk 9747 instance.disks[disk_op].mode = disk_dict['mode'] 9748 result.append(("disk.mode/%d" % disk_op, disk_dict['mode'])) 9749 9750 if self.op.disk_template: 9751 r_shut = _ShutdownInstanceDisks(self, instance) 9752 if not r_shut: 9753 raise errors.OpExecError("Cannot shutdown instance disks, unable to" 9754 " proceed with disk template conversion") 9755 mode = (instance.disk_template, self.op.disk_template) 9756 try: 9757 self._DISK_CONVERSIONS[mode](self, feedback_fn) 9758 except: 9759 self.cfg.ReleaseDRBDMinors(instance.name) 9760 raise 9761 result.append(("disk_template", self.op.disk_template)) 9762 9763 # NIC changes 9764 for nic_op, nic_dict in self.op.nics: 9765 if nic_op == constants.DDM_REMOVE: 9766 # remove the last nic 9767 del instance.nics[-1] 9768 result.append(("nic.%d" % len(instance.nics), "remove")) 9769 elif nic_op == constants.DDM_ADD: 9770 # mac and bridge should be set, by now 9771 mac = nic_dict['mac'] 9772 ip = nic_dict.get('ip', None) 9773 nicparams = self.nic_pinst[constants.DDM_ADD] 9774 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams) 9775 instance.nics.append(new_nic) 9776 result.append(("nic.%d" % (len(instance.nics) - 1), 9777 "add:mac=%s,ip=%s,mode=%s,link=%s" % 9778 (new_nic.mac, new_nic.ip, 9779 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE], 9780 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK] 9781 ))) 9782 else: 9783 for key in 'mac', 'ip': 9784 if key in nic_dict: 9785 setattr(instance.nics[nic_op], key, nic_dict[key]) 9786 if nic_op in self.nic_pinst: 9787 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op] 9788 for key, val in nic_dict.iteritems(): 9789 result.append(("nic.%s/%d" % (key, nic_op), val)) 9790 9791 # hvparams changes 9792 if self.op.hvparams: 9793 instance.hvparams = self.hv_inst 9794 for key, val in self.op.hvparams.iteritems(): 9795 result.append(("hv/%s" % key, val)) 9796 9797 # beparams changes 9798 if self.op.beparams: 9799 instance.beparams = self.be_inst 9800 for key, val in self.op.beparams.iteritems(): 9801 result.append(("be/%s" % key, val)) 9802 9803 # OS change 9804 if self.op.os_name: 9805 instance.os = self.op.os_name 9806 9807 # osparams changes 9808 if self.op.osparams: 9809 instance.osparams = self.os_inst 9810 for key, val in self.op.osparams.iteritems(): 9811 result.append(("os/%s" % key, val)) 9812 9813 self.cfg.Update(instance, feedback_fn) 9814 9815 return result
9816 9817 _DISK_CONVERSIONS = { 9818 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd, 9819 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain, 9820 }
9821
9822 9823 -class LUBackupQuery(NoHooksLU):
9824 """Query the exports list 9825 9826 """ 9827 REQ_BGL = False 9828
9829 - def ExpandNames(self):
9830 self.needed_locks = {} 9831 self.share_locks[locking.LEVEL_NODE] = 1 9832 if not self.op.nodes: 9833 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 9834 else: 9835 self.needed_locks[locking.LEVEL_NODE] = \ 9836 _GetWantedNodes(self, self.op.nodes)
9837
9838 - def Exec(self, feedback_fn):
9839 """Compute the list of all the exported system images. 9840 9841 @rtype: dict 9842 @return: a dictionary with the structure node->(export-list) 9843 where export-list is a list of the instances exported on 9844 that node. 9845 9846 """ 9847 self.nodes = self.acquired_locks[locking.LEVEL_NODE] 9848 rpcresult = self.rpc.call_export_list(self.nodes) 9849 result = {} 9850 for node in rpcresult: 9851 if rpcresult[node].fail_msg: 9852 result[node] = False 9853 else: 9854 result[node] = rpcresult[node].payload 9855 9856 return result
9857
9858 9859 -class LUBackupPrepare(NoHooksLU):
9860 """Prepares an instance for an export and returns useful information. 9861 9862 """ 9863 REQ_BGL = False 9864
9865 - def ExpandNames(self):
9867
9868 - def CheckPrereq(self):
9869 """Check prerequisites. 9870 9871 """ 9872 instance_name = self.op.instance_name 9873 9874 self.instance = self.cfg.GetInstanceInfo(instance_name) 9875 assert self.instance is not None, \ 9876 "Cannot retrieve locked instance %s" % self.op.instance_name 9877 _CheckNodeOnline(self, self.instance.primary_node) 9878 9879 self._cds = _GetClusterDomainSecret()
9880
9881 - def Exec(self, feedback_fn):
9882 """Prepares an instance for an export. 9883 9884 """ 9885 instance = self.instance 9886 9887 if self.op.mode == constants.EXPORT_MODE_REMOTE: 9888 salt = utils.GenerateSecret(8) 9889 9890 feedback_fn("Generating X509 certificate on %s" % instance.primary_node) 9891 result = self.rpc.call_x509_cert_create(instance.primary_node, 9892 constants.RIE_CERT_VALIDITY) 9893 result.Raise("Can't create X509 key and certificate on %s" % result.node) 9894 9895 (name, cert_pem) = result.payload 9896 9897 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 9898 cert_pem) 9899 9900 return { 9901 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds), 9902 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt), 9903 salt), 9904 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt), 9905 } 9906 9907 return None
9908
9909 9910 -class LUBackupExport(LogicalUnit):
9911 """Export an instance to an image in the cluster. 9912 9913 """ 9914 HPATH = "instance-export" 9915 HTYPE = constants.HTYPE_INSTANCE 9916 REQ_BGL = False 9917
9918 - def CheckArguments(self):
9919 """Check the arguments. 9920 9921 """ 9922 self.x509_key_name = self.op.x509_key_name 9923 self.dest_x509_ca_pem = self.op.destination_x509_ca 9924 9925 if self.op.mode == constants.EXPORT_MODE_REMOTE: 9926 if not self.x509_key_name: 9927 raise errors.OpPrereqError("Missing X509 key name for encryption", 9928 errors.ECODE_INVAL) 9929 9930 if not self.dest_x509_ca_pem: 9931 raise errors.OpPrereqError("Missing destination X509 CA", 9932 errors.ECODE_INVAL)
9933
9934 - def ExpandNames(self):
9935 self._ExpandAndLockInstance() 9936 9937 # Lock all nodes for local exports 9938 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9939 # FIXME: lock only instance primary and destination node 9940 # 9941 # Sad but true, for now we have do lock all nodes, as we don't know where 9942 # the previous export might be, and in this LU we search for it and 9943 # remove it from its current node. In the future we could fix this by: 9944 # - making a tasklet to search (share-lock all), then create the 9945 # new one, then one to remove, after 9946 # - removing the removal operation altogether 9947 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9948
9949 - def DeclareLocks(self, level):
9950 """Last minute lock declaration."""
9951 # All nodes are locked anyway, so nothing to do here. 9952
9953 - def BuildHooksEnv(self):
9954 """Build hooks env. 9955 9956 This will run on the master, primary node and target node. 9957 9958 """ 9959 env = { 9960 "EXPORT_MODE": self.op.mode, 9961 "EXPORT_NODE": self.op.target_node, 9962 "EXPORT_DO_SHUTDOWN": self.op.shutdown, 9963 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 9964 # TODO: Generic function for boolean env variables 9965 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)), 9966 } 9967 9968 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 9969 9970 nl = [self.cfg.GetMasterNode(), self.instance.primary_node] 9971 9972 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9973 nl.append(self.op.target_node) 9974 9975 return env, nl, nl
9976
9977 - def CheckPrereq(self):
9978 """Check prerequisites. 9979 9980 This checks that the instance and node names are valid. 9981 9982 """ 9983 instance_name = self.op.instance_name 9984 9985 self.instance = self.cfg.GetInstanceInfo(instance_name) 9986 assert self.instance is not None, \ 9987 "Cannot retrieve locked instance %s" % self.op.instance_name 9988 _CheckNodeOnline(self, self.instance.primary_node) 9989 9990 if (self.op.remove_instance and self.instance.admin_up and 9991 not self.op.shutdown): 9992 raise errors.OpPrereqError("Can not remove instance without shutting it" 9993 " down before") 9994 9995 if self.op.mode == constants.EXPORT_MODE_LOCAL: 9996 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node) 9997 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node) 9998 assert self.dst_node is not None 9999 10000 _CheckNodeOnline(self, self.dst_node.name) 10001 _CheckNodeNotDrained(self, self.dst_node.name) 10002 10003 self._cds = None 10004 self.dest_disk_info = None 10005 self.dest_x509_ca = None 10006 10007 elif self.op.mode == constants.EXPORT_MODE_REMOTE: 10008 self.dst_node = None 10009 10010 if len(self.op.target_node) != len(self.instance.disks): 10011 raise errors.OpPrereqError(("Received destination information for %s" 10012 " disks, but instance %s has %s disks") % 10013 (len(self.op.target_node), instance_name, 10014 len(self.instance.disks)), 10015 errors.ECODE_INVAL) 10016 10017 cds = _GetClusterDomainSecret() 10018 10019 # Check X509 key name 10020 try: 10021 (key_name, hmac_digest, hmac_salt) = self.x509_key_name 10022 except (TypeError, ValueError), err: 10023 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err) 10024 10025 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt): 10026 raise errors.OpPrereqError("HMAC for X509 key name is wrong", 10027 errors.ECODE_INVAL) 10028 10029 # Load and verify CA 10030 try: 10031 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds) 10032 except OpenSSL.crypto.Error, err: 10033 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" % 10034 (err, ), errors.ECODE_INVAL) 10035 10036 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) 10037 if errcode is not None: 10038 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % 10039 (msg, ), errors.ECODE_INVAL) 10040 10041 self.dest_x509_ca = cert 10042 10043 # Verify target information 10044 disk_info = [] 10045 for idx, disk_data in enumerate(self.op.target_node): 10046 try: 10047 (host, port, magic) = \ 10048 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data) 10049 except errors.GenericError, err: 10050 raise errors.OpPrereqError("Target info for disk %s: %s" % 10051 (idx, err), errors.ECODE_INVAL) 10052 10053 disk_info.append((host, port, magic)) 10054 10055 assert len(disk_info) == len(self.op.target_node) 10056 self.dest_disk_info = disk_info 10057 10058 else: 10059 raise errors.ProgrammerError("Unhandled export mode %r" % 10060 self.op.mode) 10061 10062 # instance disk type verification 10063 # TODO: Implement export support for file-based disks 10064 for disk in self.instance.disks: 10065 if disk.dev_type == constants.LD_FILE: 10066 raise errors.OpPrereqError("Export not supported for instances with" 10067 " file-based disks", errors.ECODE_INVAL)
10068
10069 - def _CleanupExports(self, feedback_fn):
10070 """Removes exports of current instance from all other nodes. 10071 10072 If an instance in a cluster with nodes A..D was exported to node C, its 10073 exports will be removed from the nodes A, B and D. 10074 10075 """ 10076 assert self.op.mode != constants.EXPORT_MODE_REMOTE 10077 10078 nodelist = self.cfg.GetNodeList() 10079 nodelist.remove(self.dst_node.name) 10080 10081 # on one-node clusters nodelist will be empty after the removal 10082 # if we proceed the backup would be removed because OpBackupQuery 10083 # substitutes an empty list with the full cluster node list. 10084 iname = self.instance.name 10085 if nodelist: 10086 feedback_fn("Removing old exports for instance %s" % iname) 10087 exportlist = self.rpc.call_export_list(nodelist) 10088 for node in exportlist: 10089 if exportlist[node].fail_msg: 10090 continue 10091 if iname in exportlist[node].payload: 10092 msg = self.rpc.call_export_remove(node, iname).fail_msg 10093 if msg: 10094 self.LogWarning("Could not remove older export for instance %s" 10095 " on node %s: %s", iname, node, msg)
10096
10097 - def Exec(self, feedback_fn):
10098 """Export an instance to an image in the cluster. 10099 10100 """ 10101 assert self.op.mode in constants.EXPORT_MODES 10102 10103 instance = self.instance 10104 src_node = instance.primary_node 10105 10106 if self.op.shutdown: 10107 # shutdown the instance, but not the disks 10108 feedback_fn("Shutting down instance %s" % instance.name) 10109 result = self.rpc.call_instance_shutdown(src_node, instance, 10110 self.op.shutdown_timeout) 10111 # TODO: Maybe ignore failures if ignore_remove_failures is set 10112 result.Raise("Could not shutdown instance %s on" 10113 " node %s" % (instance.name, src_node)) 10114 10115 # set the disks ID correctly since call_instance_start needs the 10116 # correct drbd minor to create the symlinks 10117 for disk in instance.disks: 10118 self.cfg.SetDiskID(disk, src_node) 10119 10120 activate_disks = (not instance.admin_up) 10121 10122 if activate_disks: 10123 # Activate the instance disks if we'exporting a stopped instance 10124 feedback_fn("Activating disks for %s" % instance.name) 10125 _StartInstanceDisks(self, instance, None) 10126 10127 try: 10128 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn, 10129 instance) 10130 10131 helper.CreateSnapshots() 10132 try: 10133 if (self.op.shutdown and instance.admin_up and 10134 not self.op.remove_instance): 10135 assert not activate_disks 10136 feedback_fn("Starting instance %s" % instance.name) 10137 result = self.rpc.call_instance_start(src_node, instance, None, None) 10138 msg = result.fail_msg 10139 if msg: 10140 feedback_fn("Failed to start instance: %s" % msg) 10141 _ShutdownInstanceDisks(self, instance) 10142 raise errors.OpExecError("Could not start instance: %s" % msg) 10143 10144 if self.op.mode == constants.EXPORT_MODE_LOCAL: 10145 (fin_resu, dresults) = helper.LocalExport(self.dst_node) 10146 elif self.op.mode == constants.EXPORT_MODE_REMOTE: 10147 connect_timeout = constants.RIE_CONNECT_TIMEOUT 10148 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) 10149 10150 (key_name, _, _) = self.x509_key_name 10151 10152 dest_ca_pem = \ 10153 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, 10154 self.dest_x509_ca) 10155 10156 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info, 10157 key_name, dest_ca_pem, 10158 timeouts) 10159 finally: 10160 helper.Cleanup() 10161 10162 # Check for backwards compatibility 10163 assert len(dresults) == len(instance.disks) 10164 assert compat.all(isinstance(i, bool) for i in dresults), \ 10165 "Not all results are boolean: %r" % dresults 10166 10167 finally: 10168 if activate_disks: 10169 feedback_fn("Deactivating disks for %s" % instance.name) 10170 _ShutdownInstanceDisks(self, instance) 10171 10172 if not (compat.all(dresults) and fin_resu): 10173 failures = [] 10174 if not fin_resu: 10175 failures.append("export finalization") 10176 if not compat.all(dresults): 10177 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults) 10178 if not dsk) 10179 failures.append("disk export: disk(s) %s" % fdsk) 10180 10181 raise errors.OpExecError("Export failed, errors in %s" % 10182 utils.CommaJoin(failures)) 10183 10184 # At this point, the export was successful, we can cleanup/finish 10185 10186 # Remove instance if requested 10187 if self.op.remove_instance: 10188 feedback_fn("Removing instance %s" % instance.name) 10189 _RemoveInstance(self, feedback_fn, instance, 10190 self.op.ignore_remove_failures) 10191 10192 if self.op.mode == constants.EXPORT_MODE_LOCAL: 10193 self._CleanupExports(feedback_fn) 10194 10195 return fin_resu, dresults
10196
10197 10198 -class LUBackupRemove(NoHooksLU):
10199 """Remove exports related to the named instance. 10200 10201 """ 10202 REQ_BGL = False 10203
10204 - def ExpandNames(self):
10205 self.needed_locks = {} 10206 # We need all nodes to be locked in order for RemoveExport to work, but we 10207 # don't need to lock the instance itself, as nothing will happen to it (and 10208 # we can remove exports also for a removed instance) 10209 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10210
10211 - def Exec(self, feedback_fn):
10212 """Remove any export. 10213 10214 """ 10215 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name) 10216 # If the instance was not found we'll try with the name that was passed in. 10217 # This will only work if it was an FQDN, though. 10218 fqdn_warn = False 10219 if not instance_name: 10220 fqdn_warn = True 10221 instance_name = self.op.instance_name 10222 10223 locked_nodes = self.acquired_locks[locking.LEVEL_NODE] 10224 exportlist = self.rpc.call_export_list(locked_nodes) 10225 found = False 10226 for node in exportlist: 10227 msg = exportlist[node].fail_msg 10228 if msg: 10229 self.LogWarning("Failed to query node %s (continuing): %s", node, msg) 10230 continue 10231 if instance_name in exportlist[node].payload: 10232 found = True 10233 result = self.rpc.call_export_remove(node, instance_name) 10234 msg = result.fail_msg 10235 if msg: 10236 logging.error("Could not remove export for instance %s" 10237 " on node %s: %s", instance_name, node, msg) 10238 10239 if fqdn_warn and not found: 10240 feedback_fn("Export not found. If trying to remove an export belonging" 10241 " to a deleted instance please use its Fully Qualified" 10242 " Domain Name.")
10243
10244 10245 -class LUGroupAdd(LogicalUnit):
10246 """Logical unit for creating node groups. 10247 10248 """ 10249 HPATH = "group-add" 10250 HTYPE = constants.HTYPE_GROUP 10251 REQ_BGL = False 10252
10253 - def ExpandNames(self):
10254 # We need the new group's UUID here so that we can create and acquire the 10255 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup 10256 # that it should not check whether the UUID exists in the configuration. 10257 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId()) 10258 self.needed_locks = {} 10259 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10260
10261 - def CheckPrereq(self):
10262 """Check prerequisites. 10263 10264 This checks that the given group name is not an existing node group 10265 already. 10266 10267 """ 10268 try: 10269 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 10270 except errors.OpPrereqError: 10271 pass 10272 else: 10273 raise errors.OpPrereqError("Desired group name '%s' already exists as a" 10274 " node group (UUID: %s)" % 10275 (self.op.group_name, existing_uuid), 10276 errors.ECODE_EXISTS) 10277 10278 if self.op.ndparams: 10279 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10280
10281 - def BuildHooksEnv(self):
10282 """Build hooks env. 10283 10284 """ 10285 env = { 10286 "GROUP_NAME": self.op.group_name, 10287 } 10288 mn = self.cfg.GetMasterNode() 10289 return env, [mn], [mn]
10290
10291 - def Exec(self, feedback_fn):
10292 """Add the node group to the cluster. 10293 10294 """ 10295 group_obj = objects.NodeGroup(name=self.op.group_name, members=[], 10296 uuid=self.group_uuid, 10297 alloc_policy=self.op.alloc_policy, 10298 ndparams=self.op.ndparams) 10299 10300 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False) 10301 del self.remove_locks[locking.LEVEL_NODEGROUP]
10302
10303 10304 -class LUGroupAssignNodes(NoHooksLU):
10305 """Logical unit for assigning nodes to groups. 10306 10307 """ 10308 REQ_BGL = False 10309
10310 - def ExpandNames(self):
10311 # These raise errors.OpPrereqError on their own: 10312 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 10313 self.op.nodes = _GetWantedNodes(self, self.op.nodes) 10314 10315 # We want to lock all the affected nodes and groups. We have readily 10316 # available the list of nodes, and the *destination* group. To gather the 10317 # list of "source" groups, we need to fetch node information later on. 10318 self.needed_locks = { 10319 locking.LEVEL_NODEGROUP: set([self.group_uuid]), 10320 locking.LEVEL_NODE: self.op.nodes, 10321 }
10322
10323 - def DeclareLocks(self, level):
10324 if level == locking.LEVEL_NODEGROUP: 10325 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1 10326 10327 # Try to get all affected nodes' groups without having the group or node 10328 # lock yet. Needs verification later in the code flow. 10329 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes) 10330 10331 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
10332
10333 - def CheckPrereq(self):
10334 """Check prerequisites. 10335 10336 """ 10337 assert self.needed_locks[locking.LEVEL_NODEGROUP] 10338 assert (frozenset(self.acquired_locks[locking.LEVEL_NODE]) == 10339 frozenset(self.op.nodes)) 10340 10341 expected_locks = (set([self.group_uuid]) | 10342 self.cfg.GetNodeGroupsFromNodes(self.op.nodes)) 10343 actual_locks = self.acquired_locks[locking.LEVEL_NODEGROUP] 10344 if actual_locks != expected_locks: 10345 raise errors.OpExecError("Nodes changed groups since locks were acquired," 10346 " current groups are '%s', used to be '%s'" % 10347 (utils.CommaJoin(expected_locks), 10348 utils.CommaJoin(actual_locks))) 10349 10350 self.node_data = self.cfg.GetAllNodesInfo() 10351 self.group = self.cfg.GetNodeGroup(self.group_uuid) 10352 instance_data = self.cfg.GetAllInstancesInfo() 10353 10354 if self.group is None: 10355 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" % 10356 (self.op.group_name, self.group_uuid)) 10357 10358 (new_splits, previous_splits) = \ 10359 self.CheckAssignmentForSplitInstances([(node, self.group_uuid) 10360 for node in self.op.nodes], 10361 self.node_data, instance_data) 10362 10363 if new_splits: 10364 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits)) 10365 10366 if not self.op.force: 10367 raise errors.OpExecError("The following instances get split by this" 10368 " change and --force was not given: %s" % 10369 fmt_new_splits) 10370 else: 10371 self.LogWarning("This operation will split the following instances: %s", 10372 fmt_new_splits) 10373 10374 if previous_splits: 10375 self.LogWarning("In addition, these already-split instances continue" 10376 " to be split across groups: %s", 10377 utils.CommaJoin(utils.NiceSort(previous_splits)))
10378
10379 - def Exec(self, feedback_fn):
10380 """Assign nodes to a new group. 10381 10382 """ 10383 for node in self.op.nodes: 10384 self.node_data[node].group = self.group_uuid 10385 10386 # FIXME: Depends on side-effects of modifying the result of 10387 # C{cfg.GetAllNodesInfo} 10388 10389 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10390 10391 @staticmethod
10392 - def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10393 """Check for split instances after a node assignment. 10394 10395 This method considers a series of node assignments as an atomic operation, 10396 and returns information about split instances after applying the set of 10397 changes. 10398 10399 In particular, it returns information about newly split instances, and 10400 instances that were already split, and remain so after the change. 10401 10402 Only instances whose disk template is listed in constants.DTS_NET_MIRROR are 10403 considered. 10404 10405 @type changes: list of (node_name, new_group_uuid) pairs. 10406 @param changes: list of node assignments to consider. 10407 @param node_data: a dict with data for all nodes 10408 @param instance_data: a dict with all instances to consider 10409 @rtype: a two-tuple 10410 @return: a list of instances that were previously okay and result split as a 10411 consequence of this change, and a list of instances that were previously 10412 split and this change does not fix. 10413 10414 """ 10415 changed_nodes = dict((node, group) for node, group in changes 10416 if node_data[node].group != group) 10417 10418 all_split_instances = set() 10419 previously_split_instances = set() 10420 10421 def InstanceNodes(instance): 10422 return [instance.primary_node] + list(instance.secondary_nodes)
10423 10424 for inst in instance_data.values(): 10425 if inst.disk_template not in constants.DTS_NET_MIRROR: 10426 continue 10427 10428 instance_nodes = InstanceNodes(inst) 10429 10430 if len(set(node_data[node].group for node in instance_nodes)) > 1: 10431 previously_split_instances.add(inst.name) 10432 10433 if len(set(changed_nodes.get(node, node_data[node].group) 10434 for node in instance_nodes)) > 1: 10435 all_split_instances.add(inst.name) 10436 10437 return (list(all_split_instances - previously_split_instances), 10438 list(previously_split_instances & all_split_instances))
10439
10440 10441 -class _GroupQuery(_QueryBase):
10442 10443 FIELDS = query.GROUP_FIELDS 10444
10445 - def ExpandNames(self, lu):
10446 lu.needed_locks = {} 10447 10448 self._all_groups = lu.cfg.GetAllNodeGroupsInfo() 10449 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values()) 10450 10451 if not self.names: 10452 self.wanted = [name_to_uuid[name] 10453 for name in utils.NiceSort(name_to_uuid.keys())] 10454 else: 10455 # Accept names to be either names or UUIDs. 10456 missing = [] 10457 self.wanted = [] 10458 all_uuid = frozenset(self._all_groups.keys()) 10459 10460 for name in self.names: 10461 if name in all_uuid: 10462 self.wanted.append(name) 10463 elif name in name_to_uuid: 10464 self.wanted.append(name_to_uuid[name]) 10465 else: 10466 missing.append(name) 10467 10468 if missing: 10469 raise errors.OpPrereqError("Some groups do not exist: %s" % 10470 utils.CommaJoin(missing), 10471 errors.ECODE_NOENT)
10472
10473 - def DeclareLocks(self, lu, level):
10474 pass
10475
10476 - def _GetQueryData(self, lu):
10477 """Computes the list of node groups and their attributes. 10478 10479 """ 10480 do_nodes = query.GQ_NODE in self.requested_data 10481 do_instances = query.GQ_INST in self.requested_data 10482 10483 group_to_nodes = None 10484 group_to_instances = None 10485 10486 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for 10487 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the 10488 # latter GetAllInstancesInfo() is not enough, for we have to go through 10489 # instance->node. Hence, we will need to process nodes even if we only need 10490 # instance information. 10491 if do_nodes or do_instances: 10492 all_nodes = lu.cfg.GetAllNodesInfo() 10493 group_to_nodes = dict((uuid, []) for uuid in self.wanted) 10494 node_to_group = {} 10495 10496 for node in all_nodes.values(): 10497 if node.group in group_to_nodes: 10498 group_to_nodes[node.group].append(node.name) 10499 node_to_group[node.name] = node.group 10500 10501 if do_instances: 10502 all_instances = lu.cfg.GetAllInstancesInfo() 10503 group_to_instances = dict((uuid, []) for uuid in self.wanted) 10504 10505 for instance in all_instances.values(): 10506 node = instance.primary_node 10507 if node in node_to_group: 10508 group_to_instances[node_to_group[node]].append(instance.name) 10509 10510 if not do_nodes: 10511 # Do not pass on node information if it was not requested. 10512 group_to_nodes = None 10513 10514 return query.GroupQueryData([self._all_groups[uuid] 10515 for uuid in self.wanted], 10516 group_to_nodes, group_to_instances)
10517
10518 10519 -class LUGroupQuery(NoHooksLU):
10520 """Logical unit for querying node groups. 10521 10522 """ 10523 REQ_BGL = False 10524
10525 - def CheckArguments(self):
10526 self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10527
10528 - def ExpandNames(self):
10529 self.gq.ExpandNames(self)
10530
10531 - def Exec(self, feedback_fn):
10532 return self.gq.OldStyleQuery(self)
10533
10534 10535 -class LUGroupSetParams(LogicalUnit):
10536 """Modifies the parameters of a node group. 10537 10538 """ 10539 HPATH = "group-modify" 10540 HTYPE = constants.HTYPE_GROUP 10541 REQ_BGL = False 10542
10543 - def CheckArguments(self):
10544 all_changes = [ 10545 self.op.ndparams, 10546 self.op.alloc_policy, 10547 ] 10548 10549 if all_changes.count(None) == len(all_changes): 10550 raise errors.OpPrereqError("Please pass at least one modification", 10551 errors.ECODE_INVAL)
10552
10553 - def ExpandNames(self):
10554 # This raises errors.OpPrereqError on its own: 10555 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 10556 10557 self.needed_locks = { 10558 locking.LEVEL_NODEGROUP: [self.group_uuid], 10559 }
10560
10561 - def CheckPrereq(self):
10562 """Check prerequisites. 10563 10564 """ 10565 self.group = self.cfg.GetNodeGroup(self.group_uuid) 10566 10567 if self.group is None: 10568 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" % 10569 (self.op.group_name, self.group_uuid)) 10570 10571 if self.op.ndparams: 10572 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams) 10573 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) 10574 self.new_ndparams = new_ndparams
10575
10576 - def BuildHooksEnv(self):
10577 """Build hooks env. 10578 10579 """ 10580 env = { 10581 "GROUP_NAME": self.op.group_name, 10582 "NEW_ALLOC_POLICY": self.op.alloc_policy, 10583 } 10584 mn = self.cfg.GetMasterNode() 10585 return env, [mn], [mn]
10586
10587 - def Exec(self, feedback_fn):
10588 """Modifies the node group. 10589 10590 """ 10591 result = [] 10592 10593 if self.op.ndparams: 10594 self.group.ndparams = self.new_ndparams 10595 result.append(("ndparams", str(self.group.ndparams))) 10596 10597 if self.op.alloc_policy: 10598 self.group.alloc_policy = self.op.alloc_policy 10599 10600 self.cfg.Update(self.group, feedback_fn) 10601 return result
10602
10603 10604 10605 -class LUGroupRemove(LogicalUnit):
10606 HPATH = "group-remove" 10607 HTYPE = constants.HTYPE_GROUP 10608 REQ_BGL = False 10609
10610 - def ExpandNames(self):
10611 # This will raises errors.OpPrereqError on its own: 10612 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 10613 self.needed_locks = { 10614 locking.LEVEL_NODEGROUP: [self.group_uuid], 10615 }
10616
10617 - def CheckPrereq(self):
10618 """Check prerequisites. 10619 10620 This checks that the given group name exists as a node group, that is 10621 empty (i.e., contains no nodes), and that is not the last group of the 10622 cluster. 10623 10624 """ 10625 # Verify that the group is empty. 10626 group_nodes = [node.name 10627 for node in self.cfg.GetAllNodesInfo().values() 10628 if node.group == self.group_uuid] 10629 10630 if group_nodes: 10631 raise errors.OpPrereqError("Group '%s' not empty, has the following" 10632 " nodes: %s" % 10633 (self.op.group_name, 10634 utils.CommaJoin(utils.NiceSort(group_nodes))), 10635 errors.ECODE_STATE) 10636 10637 # Verify the cluster would not be left group-less. 10638 if len(self.cfg.GetNodeGroupList()) == 1: 10639 raise errors.OpPrereqError("Group '%s' is the only group," 10640 " cannot be removed" % 10641 self.op.group_name, 10642 errors.ECODE_STATE)
10643
10644 - def BuildHooksEnv(self):
10645 """Build hooks env. 10646 10647 """ 10648 env = { 10649 "GROUP_NAME": self.op.group_name, 10650 } 10651 mn = self.cfg.GetMasterNode() 10652 return env, [mn], [mn]
10653
10654 - def Exec(self, feedback_fn):
10655 """Remove the node group. 10656 10657 """ 10658 try: 10659 self.cfg.RemoveNodeGroup(self.group_uuid) 10660 except errors.ConfigurationError: 10661 raise errors.OpExecError("Group '%s' with UUID %s disappeared" % 10662 (self.op.group_name, self.group_uuid)) 10663 10664 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10665
10666 10667 -class LUGroupRename(LogicalUnit):
10668 HPATH = "group-rename" 10669 HTYPE = constants.HTYPE_GROUP 10670 REQ_BGL = False 10671
10672 - def ExpandNames(self):
10673 # This raises errors.OpPrereqError on its own: 10674 self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name) 10675 10676 self.needed_locks = { 10677 locking.LEVEL_NODEGROUP: [self.group_uuid], 10678 }
10679
10680 - def CheckPrereq(self):
10681 """Check prerequisites. 10682 10683 This checks that the given old_name exists as a node group, and that 10684 new_name doesn't. 10685 10686 """ 10687 try: 10688 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name) 10689 except errors.OpPrereqError: 10690 pass 10691 else: 10692 raise errors.OpPrereqError("Desired new name '%s' clashes with existing" 10693 " node group (UUID: %s)" % 10694 (self.op.new_name, new_name_uuid), 10695 errors.ECODE_EXISTS)
10696
10697 - def BuildHooksEnv(self):
10698 """Build hooks env. 10699 10700 """ 10701 env = { 10702 "OLD_NAME": self.op.old_name, 10703 "NEW_NAME": self.op.new_name, 10704 } 10705 10706 mn = self.cfg.GetMasterNode() 10707 all_nodes = self.cfg.GetAllNodesInfo() 10708 run_nodes = [mn] 10709 all_nodes.pop(mn, None) 10710 10711 for node in all_nodes.values(): 10712 if node.group == self.group_uuid: 10713 run_nodes.append(node.name) 10714 10715 return env, run_nodes, run_nodes
10716
10717 - def Exec(self, feedback_fn):
10718 """Rename the node group. 10719 10720 """ 10721 group = self.cfg.GetNodeGroup(self.group_uuid) 10722 10723 if group is None: 10724 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" % 10725 (self.op.old_name, self.group_uuid)) 10726 10727 group.name = self.op.new_name 10728 self.cfg.Update(group, feedback_fn) 10729 10730 return self.op.new_name
10731
10732 10733 -class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10734 """Generic tags LU. 10735 10736 This is an abstract class which is the parent of all the other tags LUs. 10737 10738 """ 10739
10740 - def ExpandNames(self):
10741 self.needed_locks = {} 10742 if self.op.kind == constants.TAG_NODE: 10743 self.op.name = _ExpandNodeName(self.cfg, self.op.name) 10744 self.needed_locks[locking.LEVEL_NODE] = self.op.name 10745 elif self.op.kind == constants.TAG_INSTANCE: 10746 self.op.name = _ExpandInstanceName(self.cfg, self.op.name) 10747 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10748 10749 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's 10750 # not possible to acquire the BGL based on opcode parameters) 10751
10752 - def CheckPrereq(self):
10753 """Check prerequisites. 10754 10755 """ 10756 if self.op.kind == constants.TAG_CLUSTER: 10757 self.target = self.cfg.GetClusterInfo() 10758 elif self.op.kind == constants.TAG_NODE: 10759 self.target = self.cfg.GetNodeInfo(self.op.name) 10760 elif self.op.kind == constants.TAG_INSTANCE: 10761 self.target = self.cfg.GetInstanceInfo(self.op.name) 10762 else: 10763 raise errors.OpPrereqError("Wrong tag type requested (%s)" % 10764 str(self.op.kind), errors.ECODE_INVAL)
10765
10766 10767 -class LUTagsGet(TagsLU):
10768 """Returns the tags of a given object. 10769 10770 """ 10771 REQ_BGL = False 10772
10773 - def ExpandNames(self):
10774 TagsLU.ExpandNames(self) 10775 10776 # Share locks as this is only a read operation 10777 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10778
10779 - def Exec(self, feedback_fn):
10780 """Returns the tag list. 10781 10782 """ 10783 return list(self.target.GetTags())
10784
10785 10786 -class LUTagsSearch(NoHooksLU):
10787 """Searches the tags for a given pattern. 10788 10789 """ 10790 REQ_BGL = False 10791
10792 - def ExpandNames(self):
10793 self.needed_locks = {}
10794
10795 - def CheckPrereq(self):
10796 """Check prerequisites. 10797 10798 This checks the pattern passed for validity by compiling it. 10799 10800 """ 10801 try: 10802 self.re = re.compile(self.op.pattern) 10803 except re.error, err: 10804 raise errors.OpPrereqError("Invalid search pattern '%s': %s" % 10805 (self.op.pattern, err), errors.ECODE_INVAL)
10806
10807 - def Exec(self, feedback_fn):
10808 """Returns the tag list. 10809 10810 """ 10811 cfg = self.cfg 10812 tgts = [("/cluster", cfg.GetClusterInfo())] 10813 ilist = cfg.GetAllInstancesInfo().values() 10814 tgts.extend([("/instances/%s" % i.name, i) for i in ilist]) 10815 nlist = cfg.GetAllNodesInfo().values() 10816 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist]) 10817 results = [] 10818 for path, target in tgts: 10819 for tag in target.GetTags(): 10820 if self.re.search(tag): 10821 results.append((path, tag)) 10822 return results
10823
10824 10825 -class LUTagsSet(TagsLU):
10826 """Sets a tag on a given object. 10827 10828 """ 10829 REQ_BGL = False 10830
10831 - def CheckPrereq(self):
10832 """Check prerequisites. 10833 10834 This checks the type and length of the tag name and value. 10835 10836 """ 10837 TagsLU.CheckPrereq(self) 10838 for tag in self.op.tags: 10839 objects.TaggableObject.ValidateTag(tag)
10840
10841 - def Exec(self, feedback_fn):
10842 """Sets the tag. 10843 10844 """ 10845 try: 10846 for tag in self.op.tags: 10847 self.target.AddTag(tag) 10848 except errors.TagError, err: 10849 raise errors.OpExecError("Error while setting tag: %s" % str(err)) 10850 self.cfg.Update(self.target, feedback_fn)
10851
10852 10853 -class LUTagsDel(TagsLU):
10854 """Delete a list of tags from a given object. 10855 10856 """ 10857 REQ_BGL = False 10858
10859 - def CheckPrereq(self):
10860 """Check prerequisites. 10861 10862 This checks that we have the given tag. 10863 10864 """ 10865 TagsLU.CheckPrereq(self) 10866 for tag in self.op.tags: 10867 objects.TaggableObject.ValidateTag(tag) 10868 del_tags = frozenset(self.op.tags) 10869 cur_tags = self.target.GetTags() 10870 10871 diff_tags = del_tags - cur_tags 10872 if diff_tags: 10873 diff_names = ("'%s'" % i for i in sorted(diff_tags)) 10874 raise errors.OpPrereqError("Tag(s) %s not found" % 10875 (utils.CommaJoin(diff_names), ), 10876 errors.ECODE_NOENT)
10877
10878 - def Exec(self, feedback_fn):
10879 """Remove the tag from the object. 10880 10881 """ 10882 for tag in self.op.tags: 10883 self.target.RemoveTag(tag) 10884 self.cfg.Update(self.target, feedback_fn)
10885
10886 10887 -class LUTestDelay(NoHooksLU):
10888 """Sleep for a specified amount of time. 10889 10890 This LU sleeps on the master and/or nodes for a specified amount of 10891 time. 10892 10893 """ 10894 REQ_BGL = False 10895
10896 - def ExpandNames(self):
10897 """Expand names and set required locks. 10898 10899 This expands the node list, if any. 10900 10901 """ 10902 self.needed_locks = {} 10903 if self.op.on_nodes: 10904 # _GetWantedNodes can be used here, but is not always appropriate to use 10905 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for 10906 # more information. 10907 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes) 10908 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10909
10910 - def _TestDelay(self):
10911 """Do the actual sleep. 10912 10913 """ 10914 if self.op.on_master: 10915 if not utils.TestDelay(self.op.duration): 10916 raise errors.OpExecError("Error during master delay test") 10917 if self.op.on_nodes: 10918 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration) 10919 for node, node_result in result.items(): 10920 node_result.Raise("Failure during rpc call to node %s" % node)
10921
10922 - def Exec(self, feedback_fn):
10923 """Execute the test delay opcode, with the wanted repetitions. 10924 10925 """ 10926 if self.op.repeat == 0: 10927 self._TestDelay() 10928 else: 10929 top_value = self.op.repeat - 1 10930 for i in range(self.op.repeat): 10931 self.LogInfo("Test delay iteration %d/%d" % (i, top_value)) 10932 self._TestDelay()
10933
10934 10935 -class LUTestJqueue(NoHooksLU):
10936 """Utility LU to test some aspects of the job queue. 10937 10938 """ 10939 REQ_BGL = False 10940 10941 # Must be lower than default timeout for WaitForJobChange to see whether it 10942 # notices changed jobs 10943 _CLIENT_CONNECT_TIMEOUT = 20.0 10944 _CLIENT_CONFIRM_TIMEOUT = 60.0 10945 10946 @classmethod
10947 - def _NotifyUsingSocket(cls, cb, errcls):
10948 """Opens a Unix socket and waits for another program to connect. 10949 10950 @type cb: callable 10951 @param cb: Callback to send socket name to client 10952 @type errcls: class 10953 @param errcls: Exception class to use for errors 10954 10955 """ 10956 # Using a temporary directory as there's no easy way to create temporary 10957 # sockets without writing a custom loop around tempfile.mktemp and 10958 # socket.bind 10959 tmpdir = tempfile.mkdtemp() 10960 try: 10961 tmpsock = utils.PathJoin(tmpdir, "sock") 10962 10963 logging.debug("Creating temporary socket at %s", tmpsock) 10964 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) 10965 try: 10966 sock.bind(tmpsock) 10967 sock.listen(1) 10968 10969 # Send details to client 10970 cb(tmpsock) 10971 10972 # Wait for client to connect before continuing 10973 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT) 10974 try: 10975 (conn, _) = sock.accept() 10976 except socket.error, err: 10977 raise errcls("Client didn't connect in time (%s)" % err) 10978 finally: 10979 sock.close() 10980 finally: 10981 # Remove as soon as client is connected 10982 shutil.rmtree(tmpdir) 10983 10984 # Wait for client to close 10985 try: 10986 try: 10987 # pylint: disable-msg=E1101 10988 # Instance of '_socketobject' has no ... member 10989 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT) 10990 conn.recv(1) 10991 except socket.error, err: 10992 raise errcls("Client failed to confirm notification (%s)" % err) 10993 finally: 10994 conn.close()
10995
10996 - def _SendNotification(self, test, arg, sockname):
10997 """Sends a notification to the client. 10998 10999 @type test: string 11000 @param test: Test name 11001 @param arg: Test argument (depends on test) 11002 @type sockname: string 11003 @param sockname: Socket path 11004 11005 """ 11006 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11007
11008 - def _Notify(self, prereq, test, arg):
11009 """Notifies the client of a test. 11010 11011 @type prereq: bool 11012 @param prereq: Whether this is a prereq-phase test 11013 @type test: string 11014 @param test: Test name 11015 @param arg: Test argument (depends on test) 11016 11017 """ 11018 if prereq: 11019 errcls = errors.OpPrereqError 11020 else: 11021 errcls = errors.OpExecError 11022 11023 return self._NotifyUsingSocket(compat.partial(self._SendNotification, 11024 test, arg), 11025 errcls)
11026
11027 - def CheckArguments(self):
11028 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1 11029 self.expandnames_calls = 0
11030
11031 - def ExpandNames(self):
11032 checkargs_calls = getattr(self, "checkargs_calls", 0) 11033 if checkargs_calls < 1: 11034 raise errors.ProgrammerError("CheckArguments was not called") 11035 11036 self.expandnames_calls += 1 11037 11038 if self.op.notify_waitlock: 11039 self._Notify(True, constants.JQT_EXPANDNAMES, None) 11040 11041 self.LogInfo("Expanding names") 11042 11043 # Get lock on master node (just to get a lock, not for a particular reason) 11044 self.needed_locks = { 11045 locking.LEVEL_NODE: self.cfg.GetMasterNode(), 11046 }
11047
11048 - def Exec(self, feedback_fn):
11049 if self.expandnames_calls < 1: 11050 raise errors.ProgrammerError("ExpandNames was not called") 11051 11052 if self.op.notify_exec: 11053 self._Notify(False, constants.JQT_EXEC, None) 11054 11055 self.LogInfo("Executing") 11056 11057 if self.op.log_messages: 11058 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages)) 11059 for idx, msg in enumerate(self.op.log_messages): 11060 self.LogInfo("Sending log message %s", idx + 1) 11061 feedback_fn(constants.JQT_MSGPREFIX + msg) 11062 # Report how many test messages have been sent 11063 self._Notify(False, constants.JQT_LOGMSG, idx + 1) 11064 11065 if self.op.fail: 11066 raise errors.OpExecError("Opcode failure was requested") 11067 11068 return True
11069
11070 11071 -class IAllocator(object):
11072 """IAllocator framework. 11073 11074 An IAllocator instance has three sets of attributes: 11075 - cfg that is needed to query the cluster 11076 - input data (all members of the _KEYS class attribute are required) 11077 - four buffer attributes (in|out_data|text), that represent the 11078 input (to the external script) in text and data structure format, 11079 and the output from it, again in two formats 11080 - the result variables from the script (success, info, nodes) for 11081 easy usage 11082 11083 """ 11084 # pylint: disable-msg=R0902 11085 # lots of instance attributes 11086 _ALLO_KEYS = [ 11087 "name", "mem_size", "disks", "disk_template", 11088 "os", "tags", "nics", "vcpus", "hypervisor", 11089 ] 11090 _RELO_KEYS = [ 11091 "name", "relocate_from", 11092 ] 11093 _EVAC_KEYS = [ 11094 "evac_nodes", 11095 ] 11096
11097 - def __init__(self, cfg, rpc, mode, **kwargs):
11098 self.cfg = cfg 11099 self.rpc = rpc 11100 # init buffer variables 11101 self.in_text = self.out_text = self.in_data = self.out_data = None 11102 # init all input fields so that pylint is happy 11103 self.mode = mode 11104 self.mem_size = self.disks = self.disk_template = None 11105 self.os = self.tags = self.nics = self.vcpus = None 11106 self.hypervisor = None 11107 self.relocate_from = None 11108 self.name = None 11109 self.evac_nodes = None 11110 # computed fields 11111 self.required_nodes = None 11112 # init result fields 11113 self.success = self.info = self.result = None 11114 if self.mode == constants.IALLOCATOR_MODE_ALLOC: 11115 keyset = self._ALLO_KEYS 11116 fn = self._AddNewInstance 11117 elif self.mode == constants.IALLOCATOR_MODE_RELOC: 11118 keyset = self._RELO_KEYS 11119 fn = self._AddRelocateInstance 11120 elif self.mode == constants.IALLOCATOR_MODE_MEVAC: 11121 keyset = self._EVAC_KEYS 11122 fn = self._AddEvacuateNodes 11123 else: 11124 raise errors.ProgrammerError("Unknown mode '%s' passed to the" 11125 " IAllocator" % self.mode) 11126 for key in kwargs: 11127 if key not in keyset: 11128 raise errors.ProgrammerError("Invalid input parameter '%s' to" 11129 " IAllocator" % key) 11130 setattr(self, key, kwargs[key]) 11131 11132 for key in keyset: 11133 if key not in kwargs: 11134 raise errors.ProgrammerError("Missing input parameter '%s' to" 11135 " IAllocator" % key) 11136 self._BuildInputData(fn)
11137
11138 - def _ComputeClusterData(self):
11139 """Compute the generic allocator input data. 11140 11141 This is the data that is independent of the actual operation. 11142 11143 """ 11144 cfg = self.cfg 11145 cluster_info = cfg.GetClusterInfo() 11146 # cluster data 11147 data = { 11148 "version": constants.IALLOCATOR_VERSION, 11149 "cluster_name": cfg.GetClusterName(), 11150 "cluster_tags": list(cluster_info.GetTags()), 11151 "enabled_hypervisors": list(cluster_info.enabled_hypervisors), 11152 # we don't have job IDs 11153 } 11154 ninfo = cfg.GetAllNodesInfo() 11155 iinfo = cfg.GetAllInstancesInfo().values() 11156 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo] 11157 11158 # node data 11159 node_list = [n.name for n in ninfo.values() if n.vm_capable] 11160 11161 if self.mode == constants.IALLOCATOR_MODE_ALLOC: 11162 hypervisor_name = self.hypervisor 11163 elif self.mode == constants.IALLOCATOR_MODE_RELOC: 11164 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor 11165 elif self.mode == constants.IALLOCATOR_MODE_MEVAC: 11166 hypervisor_name = cluster_info.enabled_hypervisors[0] 11167 11168 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(), 11169 hypervisor_name) 11170 node_iinfo = \ 11171 self.rpc.call_all_instances_info(node_list, 11172 cluster_info.enabled_hypervisors) 11173 11174 data["nodegroups"] = self._ComputeNodeGroupData(cfg) 11175 11176 config_ndata = self._ComputeBasicNodeData(ninfo) 11177 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo, 11178 i_list, config_ndata) 11179 assert len(data["nodes"]) == len(ninfo), \ 11180 "Incomplete node data computed" 11181 11182 data["instances"] = self._ComputeInstanceData(cluster_info, i_list) 11183 11184 self.in_data = data
11185 11186 @staticmethod
11187 - def _ComputeNodeGroupData(cfg):
11188 """Compute node groups data. 11189 11190 """ 11191 ng = {} 11192 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items(): 11193 ng[guuid] = { 11194 "name": gdata.name, 11195 "alloc_policy": gdata.alloc_policy, 11196 } 11197 return ng
11198 11199 @staticmethod
11200 - def _ComputeBasicNodeData(node_cfg):
11201 """Compute global node data. 11202 11203 @rtype: dict 11204 @returns: a dict of name: (node dict, node config) 11205 11206 """ 11207 node_results = {} 11208 for ninfo in node_cfg.values(): 11209 # fill in static (config-based) values 11210 pnr = { 11211 "tags": list(ninfo.GetTags()), 11212 "primary_ip": ninfo.primary_ip, 11213 "secondary_ip": ninfo.secondary_ip, 11214 "offline": ninfo.offline, 11215 "drained": ninfo.drained, 11216 "master_candidate": ninfo.master_candidate, 11217 "group": ninfo.group, 11218 "master_capable": ninfo.master_capable, 11219 "vm_capable": ninfo.vm_capable, 11220 } 11221 11222 node_results[ninfo.name] = pnr 11223 11224 return node_results
11225 11226 @staticmethod
11227 - def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list, 11228 node_results):
11229 """Compute global node data. 11230 11231 @param node_results: the basic node structures as filled from the config 11232 11233 """ 11234 # make a copy of the current dict 11235 node_results = dict(node_results) 11236 for nname, nresult in node_data.items(): 11237 assert nname in node_results, "Missing basic data for node %s" % nname 11238 ninfo = node_cfg[nname] 11239 11240 if not (ninfo.offline or ninfo.drained): 11241 nresult.Raise("Can't get data for node %s" % nname) 11242 node_iinfo[nname].Raise("Can't get node instance info from node %s" % 11243 nname) 11244 remote_info = nresult.payload 11245 11246 for attr in ['memory_total', 'memory_free', 'memory_dom0', 11247 'vg_size', 'vg_free', 'cpu_total']: 11248 if attr not in remote_info: 11249 raise errors.OpExecError("Node '%s' didn't return attribute" 11250 " '%s'" % (nname, attr)) 11251 if not isinstance(remote_info[attr], int): 11252 raise errors.OpExecError("Node '%s' returned invalid value" 11253 " for '%s': %s" % 11254 (nname, attr, remote_info[attr])) 11255 # compute memory used by primary instances 11256 i_p_mem = i_p_up_mem = 0 11257 for iinfo, beinfo in i_list: 11258 if iinfo.primary_node == nname: 11259 i_p_mem += beinfo[constants.BE_MEMORY] 11260 if iinfo.name not in node_iinfo[nname].payload: 11261 i_used_mem = 0 11262 else: 11263 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory']) 11264 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem 11265 remote_info['memory_free'] -= max(0, i_mem_diff) 11266 11267 if iinfo.admin_up: 11268 i_p_up_mem += beinfo[constants.BE_MEMORY] 11269 11270 # compute memory used by instances 11271 pnr_dyn = { 11272 "total_memory": remote_info['memory_total'], 11273 "reserved_memory": remote_info['memory_dom0'], 11274 "free_memory": remote_info['memory_free'], 11275 "total_disk": remote_info['vg_size'], 11276 "free_disk": remote_info['vg_free'], 11277 "total_cpus": remote_info['cpu_total'], 11278 "i_pri_memory": i_p_mem, 11279 "i_pri_up_memory": i_p_up_mem, 11280 } 11281 pnr_dyn.update(node_results[nname]) 11282 node_results[nname] = pnr_dyn 11283 11284 return node_results
11285 11286 @staticmethod
11287 - def _ComputeInstanceData(cluster_info, i_list):
11288 """Compute global instance data. 11289 11290 """ 11291 instance_data = {} 11292 for iinfo, beinfo in i_list: 11293 nic_data = [] 11294 for nic in iinfo.nics: 11295 filled_params = cluster_info.SimpleFillNIC(nic.nicparams) 11296 nic_dict = {"mac": nic.mac, 11297 "ip": nic.ip, 11298 "mode": filled_params[constants.NIC_MODE], 11299 "link": filled_params[constants.NIC_LINK], 11300 } 11301 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 11302 nic_dict["bridge"] = filled_params[constants.NIC_LINK] 11303 nic_data.append(nic_dict) 11304 pir = { 11305 "tags": list(iinfo.GetTags()), 11306 "admin_up": iinfo.admin_up, 11307 "vcpus": beinfo[constants.BE_VCPUS], 11308 "memory": beinfo[constants.BE_MEMORY], 11309 "os": iinfo.os, 11310 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes), 11311 "nics": nic_data, 11312 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks], 11313 "disk_template": iinfo.disk_template, 11314 "hypervisor": iinfo.hypervisor, 11315 } 11316 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template, 11317 pir["disks"]) 11318 instance_data[iinfo.name] = pir 11319 11320 return instance_data
11321
11322 - def _AddNewInstance(self):
11323 """Add new instance data to allocator structure. 11324 11325 This in combination with _AllocatorGetClusterData will create the 11326 correct structure needed as input for the allocator. 11327 11328 The checks for the completeness of the opcode must have already been 11329 done. 11330 11331 """ 11332 disk_space = _ComputeDiskSize(self.disk_template, self.disks) 11333 11334 if self.disk_template in constants.DTS_NET_MIRROR: 11335 self.required_nodes = 2 11336 else: 11337 self.required_nodes = 1 11338 request = { 11339 "name": self.name, 11340 "disk_template": self.disk_template, 11341 "tags": self.tags, 11342 "os": self.os, 11343 "vcpus": self.vcpus, 11344 "memory": self.mem_size, 11345 "disks": self.disks, 11346 "disk_space_total": disk_space, 11347 "nics": self.nics, 11348 "required_nodes": self.required_nodes, 11349 } 11350 return request
11351
11352 - def _AddRelocateInstance(self):
11353 """Add relocate instance data to allocator structure. 11354 11355 This in combination with _IAllocatorGetClusterData will create the 11356 correct structure needed as input for the allocator. 11357 11358 The checks for the completeness of the opcode must have already been 11359 done. 11360 11361 """ 11362 instance = self.cfg.GetInstanceInfo(self.name) 11363 if instance is None: 11364 raise errors.ProgrammerError("Unknown instance '%s' passed to" 11365 " IAllocator" % self.name) 11366 11367 if instance.disk_template not in constants.DTS_NET_MIRROR: 11368 raise errors.OpPrereqError("Can't relocate non-mirrored instances", 11369 errors.ECODE_INVAL) 11370 11371 if len(instance.secondary_nodes) != 1: 11372 raise errors.OpPrereqError("Instance has not exactly one secondary node", 11373 errors.ECODE_STATE) 11374 11375 self.required_nodes = 1 11376 disk_sizes = [{'size': disk.size} for disk in instance.disks] 11377 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes) 11378 11379 request = { 11380 "name": self.name, 11381 "disk_space_total": disk_space, 11382 "required_nodes": self.required_nodes, 11383 "relocate_from": self.relocate_from, 11384 } 11385 return request
11386
11387 - def _AddEvacuateNodes(self):
11388 """Add evacuate nodes data to allocator structure. 11389 11390 """ 11391 request = { 11392 "evac_nodes": self.evac_nodes 11393 } 11394 return request
11395
11396 - def _BuildInputData(self, fn):
11397 """Build input data structures. 11398 11399 """ 11400 self._ComputeClusterData() 11401 11402 request = fn() 11403 request["type"] = self.mode 11404 self.in_data["request"] = request 11405 11406 self.in_text = serializer.Dump(self.in_data)
11407
11408 - def Run(self, name, validate=True, call_fn=None):
11409 """Run an instance allocator and return the results. 11410 11411 """ 11412 if call_fn is None: 11413 call_fn = self.rpc.call_iallocator_runner 11414 11415 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text) 11416 result.Raise("Failure while running the iallocator script") 11417 11418 self.out_text = result.payload 11419 if validate: 11420 self._ValidateResult()
11421
11422 - def _ValidateResult(self):
11423 """Process the allocator results. 11424 11425 This will process and if successful save the result in 11426 self.out_data and the other parameters. 11427 11428 """ 11429 try: 11430 rdict = serializer.Load(self.out_text) 11431 except Exception, err: 11432 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err)) 11433 11434 if not isinstance(rdict, dict): 11435 raise errors.OpExecError("Can't parse iallocator results: not a dict") 11436 11437 # TODO: remove backwards compatiblity in later versions 11438 if "nodes" in rdict and "result" not in rdict: 11439 rdict["result"] = rdict["nodes"] 11440 del rdict["nodes"] 11441 11442 for key in "success", "info", "result": 11443 if key not in rdict: 11444 raise errors.OpExecError("Can't parse iallocator results:" 11445 " missing key '%s'" % key) 11446 setattr(self, key, rdict[key]) 11447 11448 if not isinstance(rdict["result"], list): 11449 raise errors.OpExecError("Can't parse iallocator results: 'result' key" 11450 " is not a list") 11451 self.out_data = rdict
11452
11453 11454 -class LUTestAllocator(NoHooksLU):
11455 """Run allocator tests. 11456 11457 This LU runs the allocator tests 11458 11459 """
11460 - def CheckPrereq(self):
11461 """Check prerequisites. 11462 11463 This checks the opcode parameters depending on the director and mode test. 11464 11465 """ 11466 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC: 11467 for attr in ["mem_size", "disks", "disk_template", 11468 "os", "tags", "nics", "vcpus"]: 11469 if not hasattr(self.op, attr): 11470 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" % 11471 attr, errors.ECODE_INVAL) 11472 iname = self.cfg.ExpandInstanceName(self.op.name) 11473 if iname is not None: 11474 raise errors.OpPrereqError("Instance '%s' already in the cluster" % 11475 iname, errors.ECODE_EXISTS) 11476 if not isinstance(self.op.nics, list): 11477 raise errors.OpPrereqError("Invalid parameter 'nics'", 11478 errors.ECODE_INVAL) 11479 if not isinstance(self.op.disks, list): 11480 raise errors.OpPrereqError("Invalid parameter 'disks'", 11481 errors.ECODE_INVAL) 11482 for row in self.op.disks: 11483 if (not isinstance(row, dict) or 11484 "size" not in row or 11485 not isinstance(row["size"], int) or 11486 "mode" not in row or 11487 row["mode"] not in ['r', 'w']): 11488 raise errors.OpPrereqError("Invalid contents of the 'disks'" 11489 " parameter", errors.ECODE_INVAL) 11490 if self.op.hypervisor is None: 11491 self.op.hypervisor = self.cfg.GetHypervisorType() 11492 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: 11493 fname = _ExpandInstanceName(self.cfg, self.op.name) 11494 self.op.name = fname 11495 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes 11496 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC: 11497 if not hasattr(self.op, "evac_nodes"): 11498 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on" 11499 " opcode input", errors.ECODE_INVAL) 11500 else: 11501 raise errors.OpPrereqError("Invalid test allocator mode '%s'" % 11502 self.op.mode, errors.ECODE_INVAL) 11503 11504 if self.op.direction == constants.IALLOCATOR_DIR_OUT: 11505 if self.op.allocator is None: 11506 raise errors.OpPrereqError("Missing allocator name", 11507 errors.ECODE_INVAL) 11508 elif self.op.direction != constants.IALLOCATOR_DIR_IN: 11509 raise errors.OpPrereqError("Wrong allocator test '%s'" % 11510 self.op.direction, errors.ECODE_INVAL)
11511
11512 - def Exec(self, feedback_fn):
11513 """Run the allocator test. 11514 11515 """ 11516 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC: 11517 ial = IAllocator(self.cfg, self.rpc, 11518 mode=self.op.mode, 11519 name=self.op.name, 11520 mem_size=self.op.mem_size, 11521 disks=self.op.disks, 11522 disk_template=self.op.disk_template, 11523 os=self.op.os, 11524 tags=self.op.tags, 11525 nics=self.op.nics, 11526 vcpus=self.op.vcpus, 11527 hypervisor=self.op.hypervisor, 11528 ) 11529 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: 11530 ial = IAllocator(self.cfg, self.rpc, 11531 mode=self.op.mode, 11532 name=self.op.name, 11533 relocate_from=list(self.relocate_from), 11534 ) 11535 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC: 11536 ial = IAllocator(self.cfg, self.rpc, 11537 mode=self.op.mode, 11538 evac_nodes=self.op.evac_nodes) 11539 else: 11540 raise errors.ProgrammerError("Uncatched mode %s in" 11541 " LUTestAllocator.Exec", self.op.mode) 11542 11543 if self.op.direction == constants.IALLOCATOR_DIR_IN: 11544 result = ial.in_text 11545 else: 11546 ial.Run(self.op.allocator, validate=False) 11547 result = ial.out_text 11548 return result
11549 11550 11551 #: Query type implementations 11552 _QUERY_IMPL = { 11553 constants.QR_INSTANCE: _InstanceQuery, 11554 constants.QR_NODE: _NodeQuery, 11555 constants.QR_GROUP: _GroupQuery, 11556 }
11557 11558 11559 -def _GetQueryImplementation(name):
11560 """Returns the implemtnation for a query type. 11561 11562 @param name: Query type, must be one of L{constants.QR_OP_QUERY} 11563 11564 """ 11565 try: 11566 return _QUERY_IMPL[name] 11567 except KeyError: 11568 raise errors.OpPrereqError("Unknown query resource '%s'" % name, 11569 errors.ECODE_INVAL)
11570