Package ganeti :: Module cmdlib
[hide private]
[frames] | no frames]

Source Code for Module ganeti.cmdlib

    1  # 
    2  # 
    3   
    4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. 
    5  # 
    6  # This program is free software; you can redistribute it and/or modify 
    7  # it under the terms of the GNU General Public License as published by 
    8  # the Free Software Foundation; either version 2 of the License, or 
    9  # (at your option) any later version. 
   10  # 
   11  # This program is distributed in the hope that it will be useful, but 
   12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
   13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
   14  # General Public License for more details. 
   15  # 
   16  # You should have received a copy of the GNU General Public License 
   17  # along with this program; if not, write to the Free Software 
   18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
   19  # 02110-1301, USA. 
   20   
   21   
   22  """Module implementing the master-side code.""" 
   23   
   24  # pylint: disable=W0201,C0302 
   25   
   26  # W0201 since most LU attributes are defined in CheckPrereq or similar 
   27  # functions 
   28   
   29  # C0302: since we have waaaay too many lines in this module 
   30   
   31  import os 
   32  import os.path 
   33  import time 
   34  import re 
   35  import logging 
   36  import copy 
   37  import OpenSSL 
   38  import socket 
   39  import tempfile 
   40  import shutil 
   41  import itertools 
   42  import operator 
   43   
   44  from ganeti import ssh 
   45  from ganeti import utils 
   46  from ganeti import errors 
   47  from ganeti import hypervisor 
   48  from ganeti import locking 
   49  from ganeti import constants 
   50  from ganeti import objects 
   51  from ganeti import ssconf 
   52  from ganeti import uidpool 
   53  from ganeti import compat 
   54  from ganeti import masterd 
   55  from ganeti import netutils 
   56  from ganeti import query 
   57  from ganeti import qlang 
   58  from ganeti import opcodes 
   59  from ganeti import ht 
   60  from ganeti import rpc 
   61  from ganeti import runtime 
   62  from ganeti import pathutils 
   63  from ganeti import vcluster 
   64  from ganeti import network 
   65  from ganeti.masterd import iallocator 
   66   
   67  import ganeti.masterd.instance # pylint: disable=W0611 
   68   
   69   
   70  # States of instance 
   71  INSTANCE_DOWN = [constants.ADMINST_DOWN] 
   72  INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP] 
   73  INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE] 
   74   
   75  #: Instance status in which an instance can be marked as offline/online 
   76  CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([ 
   77    constants.ADMINST_OFFLINE, 
   78    ])) 
79 80 81 -class ResultWithJobs:
82 """Data container for LU results with jobs. 83 84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized 85 by L{mcpu._ProcessResult}. The latter will then submit the jobs 86 contained in the C{jobs} attribute and include the job IDs in the opcode 87 result. 88 89 """
90 - def __init__(self, jobs, **kwargs):
91 """Initializes this class. 92 93 Additional return values can be specified as keyword arguments. 94 95 @type jobs: list of lists of L{opcode.OpCode} 96 @param jobs: A list of lists of opcode objects 97 98 """ 99 self.jobs = jobs 100 self.other = kwargs
101
102 103 -class LogicalUnit(object):
104 """Logical Unit base class. 105 106 Subclasses must follow these rules: 107 - implement ExpandNames 108 - implement CheckPrereq (except when tasklets are used) 109 - implement Exec (except when tasklets are used) 110 - implement BuildHooksEnv 111 - implement BuildHooksNodes 112 - redefine HPATH and HTYPE 113 - optionally redefine their run requirements: 114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively 115 116 Note that all commands require root permissions. 117 118 @ivar dry_run_result: the value (if any) that will be returned to the caller 119 in dry-run mode (signalled by opcode dry_run parameter) 120 121 """ 122 HPATH = None 123 HTYPE = None 124 REQ_BGL = True 125
126 - def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit. 128 129 This needs to be overridden in derived classes in order to check op 130 validity. 131 132 """ 133 self.proc = processor 134 self.op = op 135 self.cfg = context.cfg 136 self.glm = context.glm 137 # readability alias 138 self.owned_locks = context.glm.list_owned 139 self.context = context 140 self.rpc = rpc_runner 141 142 # Dictionaries used to declare locking needs to mcpu 143 self.needed_locks = None 144 self.share_locks = dict.fromkeys(locking.LEVELS, 0) 145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False) 146 147 self.add_locks = {} 148 self.remove_locks = {} 149 150 # Used to force good behavior when calling helper functions 151 self.recalculate_locks = {} 152 153 # logging 154 self.Log = processor.Log # pylint: disable=C0103 155 self.LogWarning = processor.LogWarning # pylint: disable=C0103 156 self.LogInfo = processor.LogInfo # pylint: disable=C0103 157 self.LogStep = processor.LogStep # pylint: disable=C0103 158 # support for dry-run 159 self.dry_run_result = None 160 # support for generic debug attribute 161 if (not hasattr(self.op, "debug_level") or 162 not isinstance(self.op.debug_level, int)): 163 self.op.debug_level = 0 164 165 # Tasklets 166 self.tasklets = None 167 168 # Validate opcode parameters and set defaults 169 self.op.Validate(True) 170 171 self.CheckArguments()
172
173 - def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments. 175 176 This method is for doing a simple syntactic check and ensure 177 validity of opcode parameters, without any cluster-related 178 checks. While the same can be accomplished in ExpandNames and/or 179 CheckPrereq, doing these separate is better because: 180 181 - ExpandNames is left as as purely a lock-related function 182 - CheckPrereq is run after we have acquired locks (and possible 183 waited for them) 184 185 The function is allowed to change the self.op attribute so that 186 later methods can no longer worry about missing parameters. 187 188 """ 189 pass
190
191 - def ExpandNames(self):
192 """Expand names for this LU. 193 194 This method is called before starting to execute the opcode, and it should 195 update all the parameters of the opcode to their canonical form (e.g. a 196 short node name must be fully expanded after this method has successfully 197 completed). This way locking, hooks, logging, etc. can work correctly. 198 199 LUs which implement this method must also populate the self.needed_locks 200 member, as a dict with lock levels as keys, and a list of needed lock names 201 as values. Rules: 202 203 - use an empty dict if you don't need any lock 204 - if you don't need any lock at a particular level omit that 205 level (note that in this case C{DeclareLocks} won't be called 206 at all for that level) 207 - if you need locks at a level, but you can't calculate it in 208 this function, initialise that level with an empty list and do 209 further processing in L{LogicalUnit.DeclareLocks} (see that 210 function's docstring) 211 - don't put anything for the BGL level 212 - if you want all locks at a level use L{locking.ALL_SET} as a value 213 214 If you need to share locks (rather than acquire them exclusively) at one 215 level you can modify self.share_locks, setting a true value (usually 1) for 216 that level. By default locks are not shared. 217 218 This function can also define a list of tasklets, which then will be 219 executed in order instead of the usual LU-level CheckPrereq and Exec 220 functions, if those are not defined by the LU. 221 222 Examples:: 223 224 # Acquire all nodes and one instance 225 self.needed_locks = { 226 locking.LEVEL_NODE: locking.ALL_SET, 227 locking.LEVEL_INSTANCE: ['instance1.example.com'], 228 } 229 # Acquire just two nodes 230 self.needed_locks = { 231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'], 232 } 233 # Acquire no locks 234 self.needed_locks = {} # No, you can't leave it to the default value None 235 236 """ 237 # The implementation of this method is mandatory only if the new LU is 238 # concurrent, so that old LUs don't need to be changed all at the same 239 # time. 240 if self.REQ_BGL: 241 self.needed_locks = {} # Exclusive LUs don't need locks. 242 else: 243 raise NotImplementedError
244
245 - def DeclareLocks(self, level):
246 """Declare LU locking needs for a level 247 248 While most LUs can just declare their locking needs at ExpandNames time, 249 sometimes there's the need to calculate some locks after having acquired 250 the ones before. This function is called just before acquiring locks at a 251 particular level, but after acquiring the ones at lower levels, and permits 252 such calculations. It can be used to modify self.needed_locks, and by 253 default it does nothing. 254 255 This function is only called if you have something already set in 256 self.needed_locks for the level. 257 258 @param level: Locking level which is going to be locked 259 @type level: member of L{ganeti.locking.LEVELS} 260 261 """
262
263 - def CheckPrereq(self):
264 """Check prerequisites for this LU. 265 266 This method should check that the prerequisites for the execution 267 of this LU are fulfilled. It can do internode communication, but 268 it should be idempotent - no cluster or system changes are 269 allowed. 270 271 The method should raise errors.OpPrereqError in case something is 272 not fulfilled. Its return value is ignored. 273 274 This method should also update all the parameters of the opcode to 275 their canonical form if it hasn't been done by ExpandNames before. 276 277 """ 278 if self.tasklets is not None: 279 for (idx, tl) in enumerate(self.tasklets): 280 logging.debug("Checking prerequisites for tasklet %s/%s", 281 idx + 1, len(self.tasklets)) 282 tl.CheckPrereq() 283 else: 284 pass
285
286 - def Exec(self, feedback_fn):
287 """Execute the LU. 288 289 This method should implement the actual work. It should raise 290 errors.OpExecError for failures that are somewhat dealt with in 291 code, or expected. 292 293 """ 294 if self.tasklets is not None: 295 for (idx, tl) in enumerate(self.tasklets): 296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets)) 297 tl.Exec(feedback_fn) 298 else: 299 raise NotImplementedError
300
301 - def BuildHooksEnv(self):
302 """Build hooks environment for this LU. 303 304 @rtype: dict 305 @return: Dictionary containing the environment that will be used for 306 running the hooks for this LU. The keys of the dict must not be prefixed 307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner 308 will extend the environment with additional variables. If no environment 309 should be defined, an empty dictionary should be returned (not C{None}). 310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function 311 will not be called. 312 313 """ 314 raise NotImplementedError
315
316 - def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks. 318 319 @rtype: tuple; (list, list) 320 @return: Tuple containing a list of node names on which the hook 321 should run before the execution and a list of node names on which the 322 hook should run after the execution. No nodes should be returned as an 323 empty list (and not None). 324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function 325 will not be called. 326 327 """ 328 raise NotImplementedError
329
330 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks. 332 333 This method is called every time a hooks phase is executed, and notifies 334 the Logical Unit about the hooks' result. The LU can then use it to alter 335 its result based on the hooks. By default the method does nothing and the 336 previous result is passed back unchanged but any LU can define it if it 337 wants to use the local cluster hook-scripts somehow. 338 339 @param phase: one of L{constants.HOOKS_PHASE_POST} or 340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 341 @param hook_results: the results of the multi-node hooks rpc call 342 @param feedback_fn: function used send feedback back to the caller 343 @param lu_result: the previous Exec result this LU had, or None 344 in the PRE phase 345 @return: the new Exec result, based on the previous result 346 and hook results 347 348 """ 349 # API must be kept, thus we ignore the unused argument and could 350 # be a function warnings 351 # pylint: disable=W0613,R0201 352 return lu_result
353
354 - def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance. 356 357 Many LUs that work on an instance take its name in self.op.instance_name 358 and need to expand it and then declare the expanded name for locking. This 359 function does it, and then updates self.op.instance_name to the expanded 360 name. It also initializes needed_locks as a dict, if this hasn't been done 361 before. 362 363 """ 364 if self.needed_locks is None: 365 self.needed_locks = {} 366 else: 367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \ 368 "_ExpandAndLockInstance called with instance-level locks set" 369 self.op.instance_name = _ExpandInstanceName(self.cfg, 370 self.op.instance_name) 371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
372
373 - def _LockInstancesNodes(self, primary_only=False, 374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking. 376 377 This function should be called after locking one or more instances to lock 378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE] 379 with all primary or secondary nodes for instances already locked and 380 present in self.needed_locks[locking.LEVEL_INSTANCE]. 381 382 It should be called from DeclareLocks, and for safety only works if 383 self.recalculate_locks[locking.LEVEL_NODE] is set. 384 385 In the future it may grow parameters to just lock some instance's nodes, or 386 to just lock primaries or secondary nodes, if needed. 387 388 If should be called in DeclareLocks in a way similar to:: 389 390 if level == locking.LEVEL_NODE: 391 self._LockInstancesNodes() 392 393 @type primary_only: boolean 394 @param primary_only: only lock primary nodes of locked instances 395 @param level: Which lock level to use for locking nodes 396 397 """ 398 assert level in self.recalculate_locks, \ 399 "_LockInstancesNodes helper function called with no nodes to recalculate" 400 401 # TODO: check if we're really been called with the instance locks held 402 403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the 404 # future we might want to have different behaviors depending on the value 405 # of self.recalculate_locks[locking.LEVEL_NODE] 406 wanted_nodes = [] 407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE) 408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i): 409 wanted_nodes.append(instance.primary_node) 410 if not primary_only: 411 wanted_nodes.extend(instance.secondary_nodes) 412 413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE: 414 self.needed_locks[level] = wanted_nodes 415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND: 416 self.needed_locks[level].extend(wanted_nodes) 417 else: 418 raise errors.ProgrammerError("Unknown recalculation mode") 419 420 del self.recalculate_locks[level]
421
422 423 -class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks. 425 426 This LU is intended as a parent for other LogicalUnits which will 427 run no hooks, in order to reduce duplicate code. 428 429 """ 430 HPATH = None 431 HTYPE = None 432
433 - def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu. 435 436 This just raises an error. 437 438 """ 439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
440
441 - def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU. 443 444 """ 445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
446
447 448 -class Tasklet:
449 """Tasklet base class. 450 451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or 452 they can mix legacy code with tasklets. Locking needs to be done in the LU, 453 tasklets know nothing about locks. 454 455 Subclasses must follow these rules: 456 - Implement CheckPrereq 457 - Implement Exec 458 459 """
460 - def __init__(self, lu):
461 self.lu = lu 462 463 # Shortcuts 464 self.cfg = lu.cfg 465 self.rpc = lu.rpc
466
467 - def CheckPrereq(self):
468 """Check prerequisites for this tasklets. 469 470 This method should check whether the prerequisites for the execution of 471 this tasklet are fulfilled. It can do internode communication, but it 472 should be idempotent - no cluster or system changes are allowed. 473 474 The method should raise errors.OpPrereqError in case something is not 475 fulfilled. Its return value is ignored. 476 477 This method should also update all parameters to their canonical form if it 478 hasn't been done before. 479 480 """ 481 pass
482
483 - def Exec(self, feedback_fn):
484 """Execute the tasklet. 485 486 This method should implement the actual work. It should raise 487 errors.OpExecError for failures that are somewhat dealt with in code, or 488 expected. 489 490 """ 491 raise NotImplementedError
492
493 494 -class _QueryBase:
495 """Base for query utility classes. 496 497 """ 498 #: Attribute holding field definitions 499 FIELDS = None 500 501 #: Field to sort by 502 SORT_FIELD = "name" 503
504 - def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class. 506 507 """ 508 self.use_locking = use_locking 509 510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter, 511 namefield=self.SORT_FIELD) 512 self.requested_data = self.query.RequestedData() 513 self.names = self.query.RequestedNames() 514 515 # Sort only if no names were requested 516 self.sort_by_name = not self.names 517 518 self.do_locking = None 519 self.wanted = None
520
521 - def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query. 523 524 """ 525 if self.do_locking: 526 names = lu.owned_locks(lock_level) 527 else: 528 names = all_names 529 530 if self.wanted == locking.ALL_SET: 531 assert not self.names 532 # caller didn't specify names, so ordering is not important 533 return utils.NiceSort(names) 534 535 # caller specified names and we must keep the same order 536 assert self.names 537 assert not self.do_locking or lu.glm.is_owned(lock_level) 538 539 missing = set(self.wanted).difference(names) 540 if missing: 541 raise errors.OpExecError("Some items were removed before retrieving" 542 " their data: %s" % missing) 543 544 # Return expanded names 545 return self.wanted
546
547 - def ExpandNames(self, lu):
548 """Expand names for this query. 549 550 See L{LogicalUnit.ExpandNames}. 551 552 """ 553 raise NotImplementedError()
554
555 - def DeclareLocks(self, lu, level):
556 """Declare locks for this query. 557 558 See L{LogicalUnit.DeclareLocks}. 559 560 """ 561 raise NotImplementedError()
562
563 - def _GetQueryData(self, lu):
564 """Collects all data for this query. 565 566 @return: Query data object 567 568 """ 569 raise NotImplementedError()
570
571 - def NewStyleQuery(self, lu):
572 """Collect data and execute query. 573 574 """ 575 return query.GetQueryResponse(self.query, self._GetQueryData(lu), 576 sort_by_name=self.sort_by_name)
577
578 - def OldStyleQuery(self, lu):
579 """Collect data and execute query. 580 581 """ 582 return self.query.OldStyleQuery(self._GetQueryData(lu), 583 sort_by_name=self.sort_by_name)
584
585 586 -def _ShareAll():
587 """Returns a dict declaring all lock levels shared. 588 589 """ 590 return dict.fromkeys(locking.LEVELS, 1)
591
592 593 -def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method. 595 596 @param instance: The instance object 597 @type devs: List of L{objects.Disk} 598 @param devs: The root devices (not any of its children!) 599 @param cfg: The config object 600 @returns The annotated disk copies 601 @see L{rpc.AnnotateDiskParams} 602 603 """ 604 return rpc.AnnotateDiskParams(instance.disk_template, devs, 605 cfg.GetInstanceDiskParams(instance))
606
607 608 -def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes, 609 cur_group_uuid):
610 """Checks if node groups for locked instances are still correct. 611 612 @type cfg: L{config.ConfigWriter} 613 @param cfg: Cluster configuration 614 @type instances: dict; string as key, L{objects.Instance} as value 615 @param instances: Dictionary, instance name as key, instance object as value 616 @type owned_groups: iterable of string 617 @param owned_groups: List of owned groups 618 @type owned_nodes: iterable of string 619 @param owned_nodes: List of owned nodes 620 @type cur_group_uuid: string or None 621 @param cur_group_uuid: Optional group UUID to check against instance's groups 622 623 """ 624 for (name, inst) in instances.items(): 625 assert owned_nodes.issuperset(inst.all_nodes), \ 626 "Instance %s's nodes changed while we kept the lock" % name 627 628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups) 629 630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \ 631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
632
633 634 -def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups, 635 primary_only=False):
636 """Checks if the owned node groups are still correct for an instance. 637 638 @type cfg: L{config.ConfigWriter} 639 @param cfg: The cluster configuration 640 @type instance_name: string 641 @param instance_name: Instance name 642 @type owned_groups: set or frozenset 643 @param owned_groups: List of currently owned node groups 644 @type primary_only: boolean 645 @param primary_only: Whether to check node groups for only the primary node 646 647 """ 648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only) 649 650 if not owned_groups.issuperset(inst_groups): 651 raise errors.OpPrereqError("Instance %s's node groups changed since" 652 " locks were acquired, current groups are" 653 " are '%s', owning groups '%s'; retry the" 654 " operation" % 655 (instance_name, 656 utils.CommaJoin(inst_groups), 657 utils.CommaJoin(owned_groups)), 658 errors.ECODE_STATE) 659 660 return inst_groups
661
662 663 -def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct. 665 666 @type cfg: L{config.ConfigWriter} 667 @param cfg: The cluster configuration 668 @type group_uuid: string 669 @param group_uuid: Node group UUID 670 @type owned_instances: set or frozenset 671 @param owned_instances: List of currently owned instances 672 673 """ 674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid) 675 if owned_instances != wanted_instances: 676 raise errors.OpPrereqError("Instances in node group '%s' changed since" 677 " locks were acquired, wanted '%s', have '%s';" 678 " retry the operation" % 679 (group_uuid, 680 utils.CommaJoin(wanted_instances), 681 utils.CommaJoin(owned_instances)), 682 errors.ECODE_STATE) 683 684 return wanted_instances
685
686 687 -def _SupportsOob(cfg, node):
688 """Tells if node supports OOB. 689 690 @type cfg: L{config.ConfigWriter} 691 @param cfg: The cluster configuration 692 @type node: L{objects.Node} 693 @param node: The node 694 @return: The OOB script if supported or an empty string otherwise 695 696 """ 697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
698
699 700 -def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node. 702 703 @type cfg: L{config.ConfigWriter} 704 @param cfg: The cluster configuration 705 @type node: L{objects.Node} 706 @param node: The node 707 @rtype: bool 708 @return: The effective value of exclusive_storage 709 710 """ 711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
712
713 714 -def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node. 716 717 @type cfg: L{config.ConfigWriter} 718 @param cfg: The cluster configuration 719 @type nodename: string 720 @param nodename: The node 721 @rtype: bool 722 @return: The effective value of exclusive_storage 723 @raise errors.OpPrereqError: if no node exists with the given name 724 725 """ 726 ni = cfg.GetNodeInfo(nodename) 727 if ni is None: 728 raise errors.OpPrereqError("Invalid node name %s" % nodename, 729 errors.ECODE_NOENT) 730 return _IsExclusiveStorageEnabledNode(cfg, ni)
731
732 733 -def _CopyLockList(names):
734 """Makes a copy of a list of lock names. 735 736 Handles L{locking.ALL_SET} correctly. 737 738 """ 739 if names == locking.ALL_SET: 740 return locking.ALL_SET 741 else: 742 return names[:]
743
744 745 -def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names. 747 748 @type lu: L{LogicalUnit} 749 @param lu: the logical unit on whose behalf we execute 750 @type nodes: list 751 @param nodes: list of node names or None for all nodes 752 @rtype: list 753 @return: the list of nodes, sorted 754 @raise errors.ProgrammerError: if the nodes parameter is wrong type 755 756 """ 757 if nodes: 758 return [_ExpandNodeName(lu.cfg, name) for name in nodes] 759 760 return utils.NiceSort(lu.cfg.GetNodeList())
761
762 763 -def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names. 765 766 @type lu: L{LogicalUnit} 767 @param lu: the logical unit on whose behalf we execute 768 @type instances: list 769 @param instances: list of instance names or None for all instances 770 @rtype: list 771 @return: the list of instances, sorted 772 @raise errors.OpPrereqError: if the instances parameter is wrong type 773 @raise errors.OpPrereqError: if any of the passed instances is not found 774 775 """ 776 if instances: 777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances] 778 else: 779 wanted = utils.NiceSort(lu.cfg.GetInstanceList()) 780 return wanted
781
782 783 -def _GetUpdatedParams(old_params, update_dict, 784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary. 786 787 @type old_params: dict 788 @param old_params: old parameters 789 @type update_dict: dict 790 @param update_dict: dict containing new parameter values, or 791 constants.VALUE_DEFAULT to reset the parameter to its default 792 value 793 @param use_default: boolean 794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT} 795 values as 'to be deleted' values 796 @param use_none: boolean 797 @type use_none: whether to recognise C{None} values as 'to be 798 deleted' values 799 @rtype: dict 800 @return: the new parameter dictionary 801 802 """ 803 params_copy = copy.deepcopy(old_params) 804 for key, val in update_dict.iteritems(): 805 if ((use_default and val == constants.VALUE_DEFAULT) or 806 (use_none and val is None)): 807 try: 808 del params_copy[key] 809 except KeyError: 810 pass 811 else: 812 params_copy[key] = val 813 return params_copy
814
815 816 -def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy. 818 819 @param group_policy: whether this policy applies to a group and thus 820 we should support removal of policy entries 821 822 """ 823 use_none = use_default = group_policy 824 ipolicy = copy.deepcopy(old_ipolicy) 825 for key, value in new_ipolicy.items(): 826 if key not in constants.IPOLICY_ALL_KEYS: 827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key, 828 errors.ECODE_INVAL) 829 if key in constants.IPOLICY_ISPECS: 830 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value, 831 use_none=use_none, 832 use_default=use_default) 833 utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES) 834 else: 835 if (not value or value == [constants.VALUE_DEFAULT] or 836 value == constants.VALUE_DEFAULT): 837 if group_policy: 838 del ipolicy[key] 839 else: 840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'" 841 " on the cluster'" % key, 842 errors.ECODE_INVAL) 843 else: 844 if key in constants.IPOLICY_PARAMETERS: 845 # FIXME: we assume all such values are float 846 try: 847 ipolicy[key] = float(value) 848 except (TypeError, ValueError), err: 849 raise errors.OpPrereqError("Invalid value for attribute" 850 " '%s': '%s', error: %s" % 851 (key, value, err), errors.ECODE_INVAL) 852 else: 853 # FIXME: we assume all others are lists; this should be redone 854 # in a nicer way 855 ipolicy[key] = list(value) 856 try: 857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy) 858 except errors.ConfigurationError, err: 859 raise errors.OpPrereqError("Invalid instance policy: %s" % err, 860 errors.ECODE_INVAL) 861 return ipolicy
862
863 864 -def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type. 866 867 @param base: The dict with the old data 868 @param updates: The dict with the new data 869 @param type_check: Dict suitable to ForceDictType to verify correct types 870 @returns: A new dict with updated and verified values 871 872 """ 873 def fn(old, value): 874 new = _GetUpdatedParams(old, value) 875 utils.ForceDictType(new, type_check) 876 return new
877 878 ret = copy.deepcopy(base) 879 ret.update(dict((key, fn(base.get(key, {}), value)) 880 for key, value in updates.items())) 881 return ret 882
883 884 -def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object 886 887 @param op_input: The input dict from the opcode 888 @param obj_input: The input dict from the objects 889 @return: The verified and updated dict 890 891 """ 892 if op_input: 893 invalid_hvs = set(op_input) - constants.HYPER_TYPES 894 if invalid_hvs: 895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:" 896 " %s" % utils.CommaJoin(invalid_hvs), 897 errors.ECODE_INVAL) 898 if obj_input is None: 899 obj_input = {} 900 type_check = constants.HVSTS_PARAMETER_TYPES 901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check) 902 903 return None
904
905 906 -def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object 908 909 @param op_input: The input dict from the opcode 910 @param obj_input: The input dict from the objects 911 @return: The verified and updated dict 912 """ 913 if op_input: 914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES 915 if invalid_dst: 916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" % 917 utils.CommaJoin(invalid_dst), 918 errors.ECODE_INVAL) 919 type_check = constants.DSS_PARAMETER_TYPES 920 if obj_input is None: 921 obj_input = {} 922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value, 923 type_check)) 924 for key, value in op_input.items()) 925 926 return None
927
928 929 -def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU. 931 932 @type lu: L{LogicalUnit} 933 @param level: Lock level 934 @type names: list or None 935 @param names: Names of locks to release 936 @type keep: list or None 937 @param keep: Names of locks to retain 938 939 """ 940 assert not (keep is not None and names is not None), \ 941 "Only one of the 'names' and the 'keep' parameters can be given" 942 943 if names is not None: 944 should_release = names.__contains__ 945 elif keep: 946 should_release = lambda name: name not in keep 947 else: 948 should_release = None 949 950 owned = lu.owned_locks(level) 951 if not owned: 952 # Not owning any lock at this level, do nothing 953 pass 954 955 elif should_release: 956 retain = [] 957 release = [] 958 959 # Determine which locks to release 960 for name in owned: 961 if should_release(name): 962 release.append(name) 963 else: 964 retain.append(name) 965 966 assert len(lu.owned_locks(level)) == (len(retain) + len(release)) 967 968 # Release just some locks 969 lu.glm.release(level, names=release) 970 971 assert frozenset(lu.owned_locks(level)) == frozenset(retain) 972 else: 973 # Release everything 974 lu.glm.release(level) 975 976 assert not lu.glm.is_owned(level), "No locks should be owned"
977
978 979 -def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name. 981 982 @type instances: list of L{objects.Instance} 983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value 984 985 """ 986 return dict(((node, vol), inst.name) 987 for inst in instances 988 for (node, vols) in inst.MapLVsByNode().items() 989 for vol in vols)
990
991 992 -def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node. 994 995 """ 996 hm = lu.proc.BuildHooksManager(lu) 997 try: 998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name]) 999 except Exception, err: # pylint: disable=W0703 1000 lu.LogWarning("Errors occurred running hooks on %s: %s", 1001 node_name, err)
1002
1003 1004 -def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid. 1006 1007 @type static: L{utils.FieldSet} 1008 @param static: static fields set 1009 @type dynamic: L{utils.FieldSet} 1010 @param dynamic: dynamic fields set 1011 1012 """ 1013 f = utils.FieldSet() 1014 f.Extend(static) 1015 f.Extend(dynamic) 1016 1017 delta = f.NonMatching(selected) 1018 if delta: 1019 raise errors.OpPrereqError("Unknown output fields selected: %s" 1020 % ",".join(delta), errors.ECODE_INVAL)
1021
1022 1023 -def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024 """Make sure that none of the given paramters is global. 1025 1026 If a global parameter is found, an L{errors.OpPrereqError} exception is 1027 raised. This is used to avoid setting global parameters for individual nodes. 1028 1029 @type params: dictionary 1030 @param params: Parameters to check 1031 @type glob_pars: dictionary 1032 @param glob_pars: Forbidden parameters 1033 @type kind: string 1034 @param kind: Kind of parameters (e.g. "node") 1035 @type bad_levels: string 1036 @param bad_levels: Level(s) at which the parameters are forbidden (e.g. 1037 "instance") 1038 @type good_levels: strings 1039 @param good_levels: Level(s) at which the parameters are allowed (e.g. 1040 "cluster or group") 1041 1042 """ 1043 used_globals = glob_pars.intersection(params) 1044 if used_globals: 1045 msg = ("The following %s parameters are global and cannot" 1046 " be customized at %s level, please modify them at" 1047 " %s level: %s" % 1048 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals))) 1049 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1050
1051 1052 -def _CheckNodeOnline(lu, node, msg=None):
1053 """Ensure that a given node is online. 1054 1055 @param lu: the LU on behalf of which we make the check 1056 @param node: the node to check 1057 @param msg: if passed, should be a message to replace the default one 1058 @raise errors.OpPrereqError: if the node is offline 1059 1060 """ 1061 if msg is None: 1062 msg = "Can't use offline node" 1063 if lu.cfg.GetNodeInfo(node).offline: 1064 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1065
1066 1067 -def _CheckNodeNotDrained(lu, node):
1068 """Ensure that a given node is not drained. 1069 1070 @param lu: the LU on behalf of which we make the check 1071 @param node: the node to check 1072 @raise errors.OpPrereqError: if the node is drained 1073 1074 """ 1075 if lu.cfg.GetNodeInfo(node).drained: 1076 raise errors.OpPrereqError("Can't use drained node %s" % node, 1077 errors.ECODE_STATE)
1078
1079 1080 -def _CheckNodeVmCapable(lu, node):
1081 """Ensure that a given node is vm capable. 1082 1083 @param lu: the LU on behalf of which we make the check 1084 @param node: the node to check 1085 @raise errors.OpPrereqError: if the node is not vm capable 1086 1087 """ 1088 if not lu.cfg.GetNodeInfo(node).vm_capable: 1089 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node, 1090 errors.ECODE_STATE)
1091
1092 1093 -def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094 """Ensure that a node supports a given OS. 1095 1096 @param lu: the LU on behalf of which we make the check 1097 @param node: the node to check 1098 @param os_name: the OS to query about 1099 @param force_variant: whether to ignore variant errors 1100 @raise errors.OpPrereqError: if the node is not supporting the OS 1101 1102 """ 1103 result = lu.rpc.call_os_get(node, os_name) 1104 result.Raise("OS '%s' not in supported OS list for node %s" % 1105 (os_name, node), 1106 prereq=True, ecode=errors.ECODE_INVAL) 1107 if not force_variant: 1108 _CheckOSVariant(result.payload, os_name)
1109
1110 1111 -def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112 """Ensure that a node has the given secondary ip. 1113 1114 @type lu: L{LogicalUnit} 1115 @param lu: the LU on behalf of which we make the check 1116 @type node: string 1117 @param node: the node to check 1118 @type secondary_ip: string 1119 @param secondary_ip: the ip to check 1120 @type prereq: boolean 1121 @param prereq: whether to throw a prerequisite or an execute error 1122 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True 1123 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False 1124 1125 """ 1126 result = lu.rpc.call_node_has_ip_address(node, secondary_ip) 1127 result.Raise("Failure checking secondary ip on node %s" % node, 1128 prereq=prereq, ecode=errors.ECODE_ENVIRON) 1129 if not result.payload: 1130 msg = ("Node claims it doesn't have the secondary ip you gave (%s)," 1131 " please fix and re-run this command" % secondary_ip) 1132 if prereq: 1133 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) 1134 else: 1135 raise errors.OpExecError(msg)
1136
1137 1138 -def _CheckNodePVs(nresult, exclusive_storage):
1139 """Check node PVs. 1140 1141 """ 1142 pvlist_dict = nresult.get(constants.NV_PVLIST, None) 1143 if pvlist_dict is None: 1144 return (["Can't get PV list from node"], None) 1145 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict) 1146 errlist = [] 1147 # check that ':' is not present in PV names, since it's a 1148 # special character for lvcreate (denotes the range of PEs to 1149 # use on the PV) 1150 for pv in pvlist: 1151 if ":" in pv.name: 1152 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" % 1153 (pv.name, pv.vg_name)) 1154 es_pvinfo = None 1155 if exclusive_storage: 1156 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist) 1157 errlist.extend(errmsgs) 1158 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None) 1159 if shared_pvs: 1160 for (pvname, lvlist) in shared_pvs: 1161 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...) 1162 errlist.append("PV %s is shared among unrelated LVs (%s)" % 1163 (pvname, utils.CommaJoin(lvlist))) 1164 return (errlist, es_pvinfo)
1165
1166 1167 -def _GetClusterDomainSecret():
1168 """Reads the cluster domain secret. 1169 1170 """ 1171 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE, 1172 strict=True)
1173
1174 1175 -def _CheckInstanceState(lu, instance, req_states, msg=None):
1176 """Ensure that an instance is in one of the required states. 1177 1178 @param lu: the LU on behalf of which we make the check 1179 @param instance: the instance to check 1180 @param msg: if passed, should be a message to replace the default one 1181 @raise errors.OpPrereqError: if the instance is not in the required state 1182 1183 """ 1184 if msg is None: 1185 msg = ("can't use instance from outside %s states" % 1186 utils.CommaJoin(req_states)) 1187 if instance.admin_state not in req_states: 1188 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" % 1189 (instance.name, instance.admin_state, msg), 1190 errors.ECODE_STATE) 1191 1192 if constants.ADMINST_UP not in req_states: 1193 pnode = instance.primary_node 1194 if not lu.cfg.GetNodeInfo(pnode).offline: 1195 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode] 1196 ins_l.Raise("Can't contact node %s for instance information" % pnode, 1197 prereq=True, ecode=errors.ECODE_ENVIRON) 1198 if instance.name in ins_l.payload: 1199 raise errors.OpPrereqError("Instance %s is running, %s" % 1200 (instance.name, msg), errors.ECODE_STATE) 1201 else: 1202 lu.LogWarning("Primary node offline, ignoring check that instance" 1203 " is down")
1204
1205 1206 -def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207 """Computes if value is in the desired range. 1208 1209 @param name: name of the parameter for which we perform the check 1210 @param qualifier: a qualifier used in the error message (e.g. 'disk/1', 1211 not just 'disk') 1212 @param ipolicy: dictionary containing min, max and std values 1213 @param value: actual value that we want to use 1214 @return: None or element not meeting the criteria 1215 1216 1217 """ 1218 if value in [None, constants.VALUE_AUTO]: 1219 return None 1220 max_v = ipolicy[constants.ISPECS_MAX].get(name, value) 1221 min_v = ipolicy[constants.ISPECS_MIN].get(name, value) 1222 if value > max_v or min_v > value: 1223 if qualifier: 1224 fqn = "%s/%s" % (name, qualifier) 1225 else: 1226 fqn = name 1227 return ("%s value %s is not in range [%s, %s]" % 1228 (fqn, value, min_v, max_v)) 1229 return None
1230
1231 1232 -def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count, 1233 nic_count, disk_sizes, spindle_use, 1234 disk_template, 1235 _compute_fn=_ComputeMinMaxSpec):
1236 """Verifies ipolicy against provided specs. 1237 1238 @type ipolicy: dict 1239 @param ipolicy: The ipolicy 1240 @type mem_size: int 1241 @param mem_size: The memory size 1242 @type cpu_count: int 1243 @param cpu_count: Used cpu cores 1244 @type disk_count: int 1245 @param disk_count: Number of disks used 1246 @type nic_count: int 1247 @param nic_count: Number of nics used 1248 @type disk_sizes: list of ints 1249 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count}) 1250 @type spindle_use: int 1251 @param spindle_use: The number of spindles this instance uses 1252 @type disk_template: string 1253 @param disk_template: The disk template of the instance 1254 @param _compute_fn: The compute function (unittest only) 1255 @return: A list of violations, or an empty list of no violations are found 1256 1257 """ 1258 assert disk_count == len(disk_sizes) 1259 1260 test_settings = [ 1261 (constants.ISPEC_MEM_SIZE, "", mem_size), 1262 (constants.ISPEC_CPU_COUNT, "", cpu_count), 1263 (constants.ISPEC_NIC_COUNT, "", nic_count), 1264 (constants.ISPEC_SPINDLE_USE, "", spindle_use), 1265 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d) 1266 for idx, d in enumerate(disk_sizes)] 1267 if disk_template != constants.DT_DISKLESS: 1268 # This check doesn't make sense for diskless instances 1269 test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count)) 1270 ret = [] 1271 allowed_dts = ipolicy[constants.IPOLICY_DTS] 1272 if disk_template not in allowed_dts: 1273 ret.append("Disk template %s is not allowed (allowed templates: %s)" % 1274 (disk_template, utils.CommaJoin(allowed_dts))) 1275 1276 return ret + filter(None, 1277 (_compute_fn(name, qualifier, ipolicy, value) 1278 for (name, qualifier, value) in test_settings))
1279
1280 1281 -def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg, 1282 _compute_fn=_ComputeIPolicySpecViolation):
1283 """Compute if instance meets the specs of ipolicy. 1284 1285 @type ipolicy: dict 1286 @param ipolicy: The ipolicy to verify against 1287 @type instance: L{objects.Instance} 1288 @param instance: The instance to verify 1289 @type cfg: L{config.ConfigWriter} 1290 @param cfg: Cluster configuration 1291 @param _compute_fn: The function to verify ipolicy (unittest only) 1292 @see: L{_ComputeIPolicySpecViolation} 1293 1294 """ 1295 be_full = cfg.GetClusterInfo().FillBE(instance) 1296 mem_size = be_full[constants.BE_MAXMEM] 1297 cpu_count = be_full[constants.BE_VCPUS] 1298 spindle_use = be_full[constants.BE_SPINDLE_USE] 1299 disk_count = len(instance.disks) 1300 disk_sizes = [disk.size for disk in instance.disks] 1301 nic_count = len(instance.nics) 1302 disk_template = instance.disk_template 1303 1304 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count, 1305 disk_sizes, spindle_use, disk_template)
1306
1307 1308 -def _ComputeIPolicyInstanceSpecViolation( 1309 ipolicy, instance_spec, disk_template, 1310 _compute_fn=_ComputeIPolicySpecViolation):
1311 """Compute if instance specs meets the specs of ipolicy. 1312 1313 @type ipolicy: dict 1314 @param ipolicy: The ipolicy to verify against 1315 @param instance_spec: dict 1316 @param instance_spec: The instance spec to verify 1317 @type disk_template: string 1318 @param disk_template: the disk template of the instance 1319 @param _compute_fn: The function to verify ipolicy (unittest only) 1320 @see: L{_ComputeIPolicySpecViolation} 1321 1322 """ 1323 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None) 1324 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None) 1325 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0) 1326 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, []) 1327 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0) 1328 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None) 1329 1330 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count, 1331 disk_sizes, spindle_use, disk_template)
1332
1333 1334 -def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group, 1335 target_group, cfg, 1336 _compute_fn=_ComputeIPolicyInstanceViolation):
1337 """Compute if instance meets the specs of the new target group. 1338 1339 @param ipolicy: The ipolicy to verify 1340 @param instance: The instance object to verify 1341 @param current_group: The current group of the instance 1342 @param target_group: The new group of the instance 1343 @type cfg: L{config.ConfigWriter} 1344 @param cfg: Cluster configuration 1345 @param _compute_fn: The function to verify ipolicy (unittest only) 1346 @see: L{_ComputeIPolicySpecViolation} 1347 1348 """ 1349 if current_group == target_group: 1350 return [] 1351 else: 1352 return _compute_fn(ipolicy, instance, cfg)
1353
1354 1355 -def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False, 1356 _compute_fn=_ComputeIPolicyNodeViolation):
1357 """Checks that the target node is correct in terms of instance policy. 1358 1359 @param ipolicy: The ipolicy to verify 1360 @param instance: The instance object to verify 1361 @param node: The new node to relocate 1362 @type cfg: L{config.ConfigWriter} 1363 @param cfg: Cluster configuration 1364 @param ignore: Ignore violations of the ipolicy 1365 @param _compute_fn: The function to verify ipolicy (unittest only) 1366 @see: L{_ComputeIPolicySpecViolation} 1367 1368 """ 1369 primary_node = lu.cfg.GetNodeInfo(instance.primary_node) 1370 res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg) 1371 1372 if res: 1373 msg = ("Instance does not meet target node group's (%s) instance" 1374 " policy: %s") % (node.group, utils.CommaJoin(res)) 1375 if ignore: 1376 lu.LogWarning(msg) 1377 else: 1378 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1379
1380 1381 -def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg):
1382 """Computes a set of any instances that would violate the new ipolicy. 1383 1384 @param old_ipolicy: The current (still in-place) ipolicy 1385 @param new_ipolicy: The new (to become) ipolicy 1386 @param instances: List of instances to verify 1387 @type cfg: L{config.ConfigWriter} 1388 @param cfg: Cluster configuration 1389 @return: A list of instances which violates the new ipolicy but 1390 did not before 1391 1392 """ 1393 return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) - 1394 _ComputeViolatingInstances(old_ipolicy, instances, cfg))
1395
1396 1397 -def _ExpandItemName(fn, name, kind):
1398 """Expand an item name. 1399 1400 @param fn: the function to use for expansion 1401 @param name: requested item name 1402 @param kind: text description ('Node' or 'Instance') 1403 @return: the resolved (full) name 1404 @raise errors.OpPrereqError: if the item is not found 1405 1406 """ 1407 full_name = fn(name) 1408 if full_name is None: 1409 raise errors.OpPrereqError("%s '%s' not known" % (kind, name), 1410 errors.ECODE_NOENT) 1411 return full_name
1412
1413 1414 -def _ExpandNodeName(cfg, name):
1415 """Wrapper over L{_ExpandItemName} for nodes.""" 1416 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1417
1418 1419 -def _ExpandInstanceName(cfg, name):
1420 """Wrapper over L{_ExpandItemName} for instance.""" 1421 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1422
1423 1424 -def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6, 1425 mac_prefix, tags):
1426 """Builds network related env variables for hooks 1427 1428 This builds the hook environment from individual variables. 1429 1430 @type name: string 1431 @param name: the name of the network 1432 @type subnet: string 1433 @param subnet: the ipv4 subnet 1434 @type gateway: string 1435 @param gateway: the ipv4 gateway 1436 @type network6: string 1437 @param network6: the ipv6 subnet 1438 @type gateway6: string 1439 @param gateway6: the ipv6 gateway 1440 @type mac_prefix: string 1441 @param mac_prefix: the mac_prefix 1442 @type tags: list 1443 @param tags: the tags of the network 1444 1445 """ 1446 env = {} 1447 if name: 1448 env["NETWORK_NAME"] = name 1449 if subnet: 1450 env["NETWORK_SUBNET"] = subnet 1451 if gateway: 1452 env["NETWORK_GATEWAY"] = gateway 1453 if network6: 1454 env["NETWORK_SUBNET6"] = network6 1455 if gateway6: 1456 env["NETWORK_GATEWAY6"] = gateway6 1457 if mac_prefix: 1458 env["NETWORK_MAC_PREFIX"] = mac_prefix 1459 if tags: 1460 env["NETWORK_TAGS"] = " ".join(tags) 1461 1462 return env
1463
1464 1465 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, 1466 minmem, maxmem, vcpus, nics, disk_template, disks, 1467 bep, hvp, hypervisor_name, tags):
1468 """Builds instance related env variables for hooks 1469 1470 This builds the hook environment from individual variables. 1471 1472 @type name: string 1473 @param name: the name of the instance 1474 @type primary_node: string 1475 @param primary_node: the name of the instance's primary node 1476 @type secondary_nodes: list 1477 @param secondary_nodes: list of secondary nodes as strings 1478 @type os_type: string 1479 @param os_type: the name of the instance's OS 1480 @type status: string 1481 @param status: the desired status of the instance 1482 @type minmem: string 1483 @param minmem: the minimum memory size of the instance 1484 @type maxmem: string 1485 @param maxmem: the maximum memory size of the instance 1486 @type vcpus: string 1487 @param vcpus: the count of VCPUs the instance has 1488 @type nics: list 1489 @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing 1490 the NICs the instance has 1491 @type disk_template: string 1492 @param disk_template: the disk template of the instance 1493 @type disks: list 1494 @param disks: the list of (size, mode) pairs 1495 @type bep: dict 1496 @param bep: the backend parameters for the instance 1497 @type hvp: dict 1498 @param hvp: the hypervisor parameters for the instance 1499 @type hypervisor_name: string 1500 @param hypervisor_name: the hypervisor for the instance 1501 @type tags: list 1502 @param tags: list of instance tags as strings 1503 @rtype: dict 1504 @return: the hook environment for this instance 1505 1506 """ 1507 env = { 1508 "OP_TARGET": name, 1509 "INSTANCE_NAME": name, 1510 "INSTANCE_PRIMARY": primary_node, 1511 "INSTANCE_SECONDARIES": " ".join(secondary_nodes), 1512 "INSTANCE_OS_TYPE": os_type, 1513 "INSTANCE_STATUS": status, 1514 "INSTANCE_MINMEM": minmem, 1515 "INSTANCE_MAXMEM": maxmem, 1516 # TODO(2.9) remove deprecated "memory" value 1517 "INSTANCE_MEMORY": maxmem, 1518 "INSTANCE_VCPUS": vcpus, 1519 "INSTANCE_DISK_TEMPLATE": disk_template, 1520 "INSTANCE_HYPERVISOR": hypervisor_name, 1521 } 1522 if nics: 1523 nic_count = len(nics) 1524 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics): 1525 if ip is None: 1526 ip = "" 1527 env["INSTANCE_NIC%d_IP" % idx] = ip 1528 env["INSTANCE_NIC%d_MAC" % idx] = mac 1529 env["INSTANCE_NIC%d_MODE" % idx] = mode 1530 env["INSTANCE_NIC%d_LINK" % idx] = link 1531 if netinfo: 1532 nobj = objects.Network.FromDict(netinfo) 1533 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx)) 1534 elif network: 1535 # FIXME: broken network reference: the instance NIC specifies a 1536 # network, but the relevant network entry was not in the config. This 1537 # should be made impossible. 1538 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net 1539 if mode == constants.NIC_MODE_BRIDGED: 1540 env["INSTANCE_NIC%d_BRIDGE" % idx] = link 1541 else: 1542 nic_count = 0 1543 1544 env["INSTANCE_NIC_COUNT"] = nic_count 1545 1546 if disks: 1547 disk_count = len(disks) 1548 for idx, (size, mode) in enumerate(disks): 1549 env["INSTANCE_DISK%d_SIZE" % idx] = size 1550 env["INSTANCE_DISK%d_MODE" % idx] = mode 1551 else: 1552 disk_count = 0 1553 1554 env["INSTANCE_DISK_COUNT"] = disk_count 1555 1556 if not tags: 1557 tags = [] 1558 1559 env["INSTANCE_TAGS"] = " ".join(tags) 1560 1561 for source, kind in [(bep, "BE"), (hvp, "HV")]: 1562 for key, value in source.items(): 1563 env["INSTANCE_%s_%s" % (kind, key)] = value 1564 1565 return env
1566
1567 1568 -def _NICToTuple(lu, nic):
1569 """Build a tupple of nic information. 1570 1571 @type lu: L{LogicalUnit} 1572 @param lu: the logical unit on whose behalf we execute 1573 @type nic: L{objects.NIC} 1574 @param nic: nic to convert to hooks tuple 1575 1576 """ 1577 cluster = lu.cfg.GetClusterInfo() 1578 filled_params = cluster.SimpleFillNIC(nic.nicparams) 1579 mode = filled_params[constants.NIC_MODE] 1580 link = filled_params[constants.NIC_LINK] 1581 netinfo = None 1582 if nic.network: 1583 nobj = lu.cfg.GetNetwork(nic.network) 1584 netinfo = objects.Network.ToDict(nobj) 1585 return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1586
1587 1588 -def _NICListToTuple(lu, nics):
1589 """Build a list of nic information tuples. 1590 1591 This list is suitable to be passed to _BuildInstanceHookEnv or as a return 1592 value in LUInstanceQueryData. 1593 1594 @type lu: L{LogicalUnit} 1595 @param lu: the logical unit on whose behalf we execute 1596 @type nics: list of L{objects.NIC} 1597 @param nics: list of nics to convert to hooks tuples 1598 1599 """ 1600 hooks_nics = [] 1601 for nic in nics: 1602 hooks_nics.append(_NICToTuple(lu, nic)) 1603 return hooks_nics
1604
1605 1606 -def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1607 """Builds instance related env variables for hooks from an object. 1608 1609 @type lu: L{LogicalUnit} 1610 @param lu: the logical unit on whose behalf we execute 1611 @type instance: L{objects.Instance} 1612 @param instance: the instance for which we should build the 1613 environment 1614 @type override: dict 1615 @param override: dictionary with key/values that will override 1616 our values 1617 @rtype: dict 1618 @return: the hook environment dictionary 1619 1620 """ 1621 cluster = lu.cfg.GetClusterInfo() 1622 bep = cluster.FillBE(instance) 1623 hvp = cluster.FillHV(instance) 1624 args = { 1625 "name": instance.name, 1626 "primary_node": instance.primary_node, 1627 "secondary_nodes": instance.secondary_nodes, 1628 "os_type": instance.os, 1629 "status": instance.admin_state, 1630 "maxmem": bep[constants.BE_MAXMEM], 1631 "minmem": bep[constants.BE_MINMEM], 1632 "vcpus": bep[constants.BE_VCPUS], 1633 "nics": _NICListToTuple(lu, instance.nics), 1634 "disk_template": instance.disk_template, 1635 "disks": [(disk.size, disk.mode) for disk in instance.disks], 1636 "bep": bep, 1637 "hvp": hvp, 1638 "hypervisor_name": instance.hypervisor, 1639 "tags": instance.tags, 1640 } 1641 if override: 1642 args.update(override) 1643 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1644
1645 1646 -def _AdjustCandidatePool(lu, exceptions):
1647 """Adjust the candidate pool after node operations. 1648 1649 """ 1650 mod_list = lu.cfg.MaintainCandidatePool(exceptions) 1651 if mod_list: 1652 lu.LogInfo("Promoted nodes to master candidate role: %s", 1653 utils.CommaJoin(node.name for node in mod_list)) 1654 for name in mod_list: 1655 lu.context.ReaddNode(name) 1656 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions) 1657 if mc_now > mc_max: 1658 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % 1659 (mc_now, mc_max))
1660
1661 1662 -def _DecideSelfPromotion(lu, exceptions=None):
1663 """Decide whether I should promote myself as a master candidate. 1664 1665 """ 1666 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size 1667 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions) 1668 # the new node will increase mc_max with one, so: 1669 mc_should = min(mc_should + 1, cp_size) 1670 return mc_now < mc_should
1671
1672 1673 -def _ComputeViolatingInstances(ipolicy, instances, cfg):
1674 """Computes a set of instances who violates given ipolicy. 1675 1676 @param ipolicy: The ipolicy to verify 1677 @type instances: L{objects.Instance} 1678 @param instances: List of instances to verify 1679 @type cfg: L{config.ConfigWriter} 1680 @param cfg: Cluster configuration 1681 @return: A frozenset of instance names violating the ipolicy 1682 1683 """ 1684 return frozenset([inst.name for inst in instances 1685 if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)])
1686
1687 1688 -def _CheckNicsBridgesExist(lu, target_nics, target_node):
1689 """Check that the brigdes needed by a list of nics exist. 1690 1691 """ 1692 cluster = lu.cfg.GetClusterInfo() 1693 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics] 1694 brlist = [params[constants.NIC_LINK] for params in paramslist 1695 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED] 1696 if brlist: 1697 result = lu.rpc.call_bridges_exist(target_node, brlist) 1698 result.Raise("Error checking bridges on destination node '%s'" % 1699 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1700
1701 1702 -def _CheckInstanceBridgesExist(lu, instance, node=None):
1703 """Check that the brigdes needed by an instance exist. 1704 1705 """ 1706 if node is None: 1707 node = instance.primary_node 1708 _CheckNicsBridgesExist(lu, instance.nics, node)
1709
1710 1711 -def _CheckOSVariant(os_obj, name):
1712 """Check whether an OS name conforms to the os variants specification. 1713 1714 @type os_obj: L{objects.OS} 1715 @param os_obj: OS object to check 1716 @type name: string 1717 @param name: OS name passed by the user, to check for validity 1718 1719 """ 1720 variant = objects.OS.GetVariant(name) 1721 if not os_obj.supported_variants: 1722 if variant: 1723 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'" 1724 " passed)" % (os_obj.name, variant), 1725 errors.ECODE_INVAL) 1726 return 1727 if not variant: 1728 raise errors.OpPrereqError("OS name must include a variant", 1729 errors.ECODE_INVAL) 1730 1731 if variant not in os_obj.supported_variants: 1732 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1733
1734 1735 -def _GetNodeInstancesInner(cfg, fn):
1736 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1737
1738 1739 -def _GetNodeInstances(cfg, node_name):
1740 """Returns a list of all primary and secondary instances on a node. 1741 1742 """ 1743 1744 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1745
1746 1747 -def _GetNodePrimaryInstances(cfg, node_name):
1748 """Returns primary instances on a node. 1749 1750 """ 1751 return _GetNodeInstancesInner(cfg, 1752 lambda inst: node_name == inst.primary_node)
1753
1754 1755 -def _GetNodeSecondaryInstances(cfg, node_name):
1756 """Returns secondary instances on a node. 1757 1758 """ 1759 return _GetNodeInstancesInner(cfg, 1760 lambda inst: node_name in inst.secondary_nodes)
1761
1762 1763 -def _GetStorageTypeArgs(cfg, storage_type):
1764 """Returns the arguments for a storage type. 1765 1766 """ 1767 # Special case for file storage 1768 if storage_type == constants.ST_FILE: 1769 # storage.FileStorage wants a list of storage directories 1770 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]] 1771 1772 return []
1773
1774 1775 -def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1776 faulty = [] 1777 1778 for dev in instance.disks: 1779 cfg.SetDiskID(dev, node_name) 1780 1781 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks, 1782 instance)) 1783 result.Raise("Failed to get disk status from node %s" % node_name, 1784 prereq=prereq, ecode=errors.ECODE_ENVIRON) 1785 1786 for idx, bdev_status in enumerate(result.payload): 1787 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY: 1788 faulty.append(idx) 1789 1790 return faulty
1791
1792 1793 -def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1794 """Check the sanity of iallocator and node arguments and use the 1795 cluster-wide iallocator if appropriate. 1796 1797 Check that at most one of (iallocator, node) is specified. If none is 1798 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT}, 1799 then the LU's opcode's iallocator slot is filled with the cluster-wide 1800 default iallocator. 1801 1802 @type iallocator_slot: string 1803 @param iallocator_slot: the name of the opcode iallocator slot 1804 @type node_slot: string 1805 @param node_slot: the name of the opcode target node slot 1806 1807 """ 1808 node = getattr(lu.op, node_slot, None) 1809 ialloc = getattr(lu.op, iallocator_slot, None) 1810 if node == []: 1811 node = None 1812 1813 if node is not None and ialloc is not None: 1814 raise errors.OpPrereqError("Do not specify both, iallocator and node", 1815 errors.ECODE_INVAL) 1816 elif ((node is None and ialloc is None) or 1817 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT): 1818 default_iallocator = lu.cfg.GetDefaultIAllocator() 1819 if default_iallocator: 1820 setattr(lu.op, iallocator_slot, default_iallocator) 1821 else: 1822 raise errors.OpPrereqError("No iallocator or node given and no" 1823 " cluster-wide default iallocator found;" 1824 " please specify either an iallocator or a" 1825 " node, or set a cluster-wide default" 1826 " iallocator", errors.ECODE_INVAL)
1827
1828 1829 -def _GetDefaultIAllocator(cfg, ialloc):
1830 """Decides on which iallocator to use. 1831 1832 @type cfg: L{config.ConfigWriter} 1833 @param cfg: Cluster configuration object 1834 @type ialloc: string or None 1835 @param ialloc: Iallocator specified in opcode 1836 @rtype: string 1837 @return: Iallocator name 1838 1839 """ 1840 if not ialloc: 1841 # Use default iallocator 1842 ialloc = cfg.GetDefaultIAllocator() 1843 1844 if not ialloc: 1845 raise errors.OpPrereqError("No iallocator was specified, neither in the" 1846 " opcode nor as a cluster-wide default", 1847 errors.ECODE_INVAL) 1848 1849 return ialloc
1850
1851 1852 -def _CheckHostnameSane(lu, name):
1853 """Ensures that a given hostname resolves to a 'sane' name. 1854 1855 The given name is required to be a prefix of the resolved hostname, 1856 to prevent accidental mismatches. 1857 1858 @param lu: the logical unit on behalf of which we're checking 1859 @param name: the name we should resolve and check 1860 @return: the resolved hostname object 1861 1862 """ 1863 hostname = netutils.GetHostname(name=name) 1864 if hostname.name != name: 1865 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name) 1866 if not utils.MatchNameComponent(name, [hostname.name]): 1867 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the" 1868 " same as given hostname '%s'") % 1869 (hostname.name, name), errors.ECODE_INVAL) 1870 return hostname
1871
1872 1873 -class LUClusterPostInit(LogicalUnit):
1874 """Logical unit for running hooks after cluster initialization. 1875 1876 """ 1877 HPATH = "cluster-init" 1878 HTYPE = constants.HTYPE_CLUSTER 1879
1880 - def BuildHooksEnv(self):
1881 """Build hooks env. 1882 1883 """ 1884 return { 1885 "OP_TARGET": self.cfg.GetClusterName(), 1886 }
1887
1888 - def BuildHooksNodes(self):
1889 """Build hooks nodes. 1890 1891 """ 1892 return ([], [self.cfg.GetMasterNode()])
1893
1894 - def Exec(self, feedback_fn):
1895 """Nothing to do. 1896 1897 """ 1898 return True
1899
1900 1901 -class LUClusterDestroy(LogicalUnit):
1902 """Logical unit for destroying the cluster. 1903 1904 """ 1905 HPATH = "cluster-destroy" 1906 HTYPE = constants.HTYPE_CLUSTER 1907
1908 - def BuildHooksEnv(self):
1909 """Build hooks env. 1910 1911 """ 1912 return { 1913 "OP_TARGET": self.cfg.GetClusterName(), 1914 }
1915
1916 - def BuildHooksNodes(self):
1917 """Build hooks nodes. 1918 1919 """ 1920 return ([], [])
1921
1922 - def CheckPrereq(self):
1923 """Check prerequisites. 1924 1925 This checks whether the cluster is empty. 1926 1927 Any errors are signaled by raising errors.OpPrereqError. 1928 1929 """ 1930 master = self.cfg.GetMasterNode() 1931 1932 nodelist = self.cfg.GetNodeList() 1933 if len(nodelist) != 1 or nodelist[0] != master: 1934 raise errors.OpPrereqError("There are still %d node(s) in" 1935 " this cluster." % (len(nodelist) - 1), 1936 errors.ECODE_INVAL) 1937 instancelist = self.cfg.GetInstanceList() 1938 if instancelist: 1939 raise errors.OpPrereqError("There are still %d instance(s) in" 1940 " this cluster." % len(instancelist), 1941 errors.ECODE_INVAL)
1942
1943 - def Exec(self, feedback_fn):
1944 """Destroys the cluster. 1945 1946 """ 1947 master_params = self.cfg.GetMasterNetworkParameters() 1948 1949 # Run post hooks on master node before it's removed 1950 _RunPostHook(self, master_params.name) 1951 1952 ems = self.cfg.GetUseExternalMipScript() 1953 result = self.rpc.call_node_deactivate_master_ip(master_params.name, 1954 master_params, ems) 1955 if result.fail_msg: 1956 self.LogWarning("Error disabling the master IP address: %s", 1957 result.fail_msg) 1958 1959 return master_params.name
1960
1961 1962 -def _VerifyCertificate(filename):
1963 """Verifies a certificate for L{LUClusterVerifyConfig}. 1964 1965 @type filename: string 1966 @param filename: Path to PEM file 1967 1968 """ 1969 try: 1970 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 1971 utils.ReadFile(filename)) 1972 except Exception, err: # pylint: disable=W0703 1973 return (LUClusterVerifyConfig.ETYPE_ERROR, 1974 "Failed to load X509 certificate %s: %s" % (filename, err)) 1975 1976 (errcode, msg) = \ 1977 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN, 1978 constants.SSL_CERT_EXPIRATION_ERROR) 1979 1980 if msg: 1981 fnamemsg = "While verifying %s: %s" % (filename, msg) 1982 else: 1983 fnamemsg = None 1984 1985 if errcode is None: 1986 return (None, fnamemsg) 1987 elif errcode == utils.CERT_WARNING: 1988 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg) 1989 elif errcode == utils.CERT_ERROR: 1990 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg) 1991 1992 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1993
1994 1995 -def _GetAllHypervisorParameters(cluster, instances):
1996 """Compute the set of all hypervisor parameters. 1997 1998 @type cluster: L{objects.Cluster} 1999 @param cluster: the cluster object 2000 @param instances: list of L{objects.Instance} 2001 @param instances: additional instances from which to obtain parameters 2002 @rtype: list of (origin, hypervisor, parameters) 2003 @return: a list with all parameters found, indicating the hypervisor they 2004 apply to, and the origin (can be "cluster", "os X", or "instance Y") 2005 2006 """ 2007 hvp_data = [] 2008 2009 for hv_name in cluster.enabled_hypervisors: 2010 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name))) 2011 2012 for os_name, os_hvp in cluster.os_hvp.items(): 2013 for hv_name, hv_params in os_hvp.items(): 2014 if hv_params: 2015 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name) 2016 hvp_data.append(("os %s" % os_name, hv_name, full_params)) 2017 2018 # TODO: collapse identical parameter values in a single one 2019 for instance in instances: 2020 if instance.hvparams: 2021 hvp_data.append(("instance %s" % instance.name, instance.hypervisor, 2022 cluster.FillHV(instance))) 2023 2024 return hvp_data
2025
2026 2027 -class _VerifyErrors(object):
2028 """Mix-in for cluster/group verify LUs. 2029 2030 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects 2031 self.op and self._feedback_fn to be available.) 2032 2033 """ 2034 2035 ETYPE_FIELD = "code" 2036 ETYPE_ERROR = "ERROR" 2037 ETYPE_WARNING = "WARNING" 2038
2039 - def _Error(self, ecode, item, msg, *args, **kwargs):
2040 """Format an error message. 2041 2042 Based on the opcode's error_codes parameter, either format a 2043 parseable error code, or a simpler error string. 2044 2045 This must be called only from Exec and functions called from Exec. 2046 2047 """ 2048 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) 2049 itype, etxt, _ = ecode 2050 # If the error code is in the list of ignored errors, demote the error to a 2051 # warning 2052 if etxt in self.op.ignore_errors: # pylint: disable=E1101 2053 ltype = self.ETYPE_WARNING 2054 # first complete the msg 2055 if args: 2056 msg = msg % args 2057 # then format the whole message 2058 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101 2059 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) 2060 else: 2061 if item: 2062 item = " " + item 2063 else: 2064 item = "" 2065 msg = "%s: %s%s: %s" % (ltype, itype, item, msg) 2066 # and finally report it via the feedback_fn 2067 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101 2068 # do not mark the operation as failed for WARN cases only 2069 if ltype == self.ETYPE_ERROR: 2070 self.bad = True
2071
2072 - def _ErrorIf(self, cond, *args, **kwargs):
2073 """Log an error message if the passed condition is True. 2074 2075 """ 2076 if (bool(cond) 2077 or self.op.debug_simulate_errors): # pylint: disable=E1101 2078 self._Error(*args, **kwargs)
2079
2080 2081 -class LUClusterVerify(NoHooksLU):
2082 """Submits all jobs necessary to verify the cluster. 2083 2084 """ 2085 REQ_BGL = False 2086
2087 - def ExpandNames(self):
2088 self.needed_locks = {}
2089
2090 - def Exec(self, feedback_fn):
2091 jobs = [] 2092 2093 if self.op.group_name: 2094 groups = [self.op.group_name] 2095 depends_fn = lambda: None 2096 else: 2097 groups = self.cfg.GetNodeGroupList() 2098 2099 # Verify global configuration 2100 jobs.append([ 2101 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors), 2102 ]) 2103 2104 # Always depend on global verification 2105 depends_fn = lambda: [(-len(jobs), [])] 2106 2107 jobs.extend( 2108 [opcodes.OpClusterVerifyGroup(group_name=group, 2109 ignore_errors=self.op.ignore_errors, 2110 depends=depends_fn())] 2111 for group in groups) 2112 2113 # Fix up all parameters 2114 for op in itertools.chain(*jobs): # pylint: disable=W0142 2115 op.debug_simulate_errors = self.op.debug_simulate_errors 2116 op.verbose = self.op.verbose 2117 op.error_codes = self.op.error_codes 2118 try: 2119 op.skip_checks = self.op.skip_checks 2120 except AttributeError: 2121 assert not isinstance(op, opcodes.OpClusterVerifyGroup) 2122 2123 return ResultWithJobs(jobs)
2124
2125 2126 -class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2127 """Verifies the cluster config. 2128 2129 """ 2130 REQ_BGL = False 2131
2132 - def _VerifyHVP(self, hvp_data):
2133 """Verifies locally the syntax of the hypervisor parameters. 2134 2135 """ 2136 for item, hv_name, hv_params in hvp_data: 2137 msg = ("hypervisor %s parameters syntax check (source %s): %%s" % 2138 (item, hv_name)) 2139 try: 2140 hv_class = hypervisor.GetHypervisorClass(hv_name) 2141 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 2142 hv_class.CheckParameterSyntax(hv_params) 2143 except errors.GenericError, err: 2144 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2145
2146 - def ExpandNames(self):
2147 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET) 2148 self.share_locks = _ShareAll()
2149
2150 - def CheckPrereq(self):
2151 """Check prerequisites. 2152 2153 """ 2154 # Retrieve all information 2155 self.all_group_info = self.cfg.GetAllNodeGroupsInfo() 2156 self.all_node_info = self.cfg.GetAllNodesInfo() 2157 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2158
2159 - def Exec(self, feedback_fn):
2160 """Verify integrity of cluster, performing various test on nodes. 2161 2162 """ 2163 self.bad = False 2164 self._feedback_fn = feedback_fn 2165 2166 feedback_fn("* Verifying cluster config") 2167 2168 for msg in self.cfg.VerifyConfig(): 2169 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg) 2170 2171 feedback_fn("* Verifying cluster certificate files") 2172 2173 for cert_filename in pathutils.ALL_CERT_FILES: 2174 (errcode, msg) = _VerifyCertificate(cert_filename) 2175 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode) 2176 2177 feedback_fn("* Verifying hypervisor parameters") 2178 2179 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(), 2180 self.all_inst_info.values())) 2181 2182 feedback_fn("* Verifying all nodes belong to an existing group") 2183 2184 # We do this verification here because, should this bogus circumstance 2185 # occur, it would never be caught by VerifyGroup, which only acts on 2186 # nodes/instances reachable from existing node groups. 2187 2188 dangling_nodes = set(node.name for node in self.all_node_info.values() 2189 if node.group not in self.all_group_info) 2190 2191 dangling_instances = {} 2192 no_node_instances = [] 2193 2194 for inst in self.all_inst_info.values(): 2195 if inst.primary_node in dangling_nodes: 2196 dangling_instances.setdefault(inst.primary_node, []).append(inst.name) 2197 elif inst.primary_node not in self.all_node_info: 2198 no_node_instances.append(inst.name) 2199 2200 pretty_dangling = [ 2201 "%s (%s)" % 2202 (node.name, 2203 utils.CommaJoin(dangling_instances.get(node.name, 2204 ["no instances"]))) 2205 for node in dangling_nodes] 2206 2207 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES, 2208 None, 2209 "the following nodes (and their instances) belong to a non" 2210 " existing group: %s", utils.CommaJoin(pretty_dangling)) 2211 2212 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST, 2213 None, 2214 "the following instances have a non-existing primary-node:" 2215 " %s", utils.CommaJoin(no_node_instances)) 2216 2217 return not self.bad
2218
2219 2220 -class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2221 """Verifies the status of a node group. 2222 2223 """ 2224 HPATH = "cluster-verify" 2225 HTYPE = constants.HTYPE_CLUSTER 2226 REQ_BGL = False 2227 2228 _HOOKS_INDENT_RE = re.compile("^", re.M) 2229
2230 - class NodeImage(object):
2231 """A class representing the logical and physical status of a node. 2232 2233 @type name: string 2234 @ivar name: the node name to which this object refers 2235 @ivar volumes: a structure as returned from 2236 L{ganeti.backend.GetVolumeList} (runtime) 2237 @ivar instances: a list of running instances (runtime) 2238 @ivar pinst: list of configured primary instances (config) 2239 @ivar sinst: list of configured secondary instances (config) 2240 @ivar sbp: dictionary of {primary-node: list of instances} for all 2241 instances for which this node is secondary (config) 2242 @ivar mfree: free memory, as reported by hypervisor (runtime) 2243 @ivar dfree: free disk, as reported by the node (runtime) 2244 @ivar offline: the offline status (config) 2245 @type rpc_fail: boolean 2246 @ivar rpc_fail: whether the RPC verify call was successfull (overall, 2247 not whether the individual keys were correct) (runtime) 2248 @type lvm_fail: boolean 2249 @ivar lvm_fail: whether the RPC call didn't return valid LVM data 2250 @type hyp_fail: boolean 2251 @ivar hyp_fail: whether the RPC call didn't return the instance list 2252 @type ghost: boolean 2253 @ivar ghost: whether this is a known node or not (config) 2254 @type os_fail: boolean 2255 @ivar os_fail: whether the RPC call didn't return valid OS data 2256 @type oslist: list 2257 @ivar oslist: list of OSes as diagnosed by DiagnoseOS 2258 @type vm_capable: boolean 2259 @ivar vm_capable: whether the node can host instances 2260 @type pv_min: float 2261 @ivar pv_min: size in MiB of the smallest PVs 2262 @type pv_max: float 2263 @ivar pv_max: size in MiB of the biggest PVs 2264 2265 """
2266 - def __init__(self, offline=False, name=None, vm_capable=True):
2267 self.name = name 2268 self.volumes = {} 2269 self.instances = [] 2270 self.pinst = [] 2271 self.sinst = [] 2272 self.sbp = {} 2273 self.mfree = 0 2274 self.dfree = 0 2275 self.offline = offline 2276 self.vm_capable = vm_capable 2277 self.rpc_fail = False 2278 self.lvm_fail = False 2279 self.hyp_fail = False 2280 self.ghost = False 2281 self.os_fail = False 2282 self.oslist = {} 2283 self.pv_min = None 2284 self.pv_max = None
2285
2286 - def ExpandNames(self):
2287 # This raises errors.OpPrereqError on its own: 2288 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 2289 2290 # Get instances in node group; this is unsafe and needs verification later 2291 inst_names = \ 2292 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 2293 2294 self.needed_locks = { 2295 locking.LEVEL_INSTANCE: inst_names, 2296 locking.LEVEL_NODEGROUP: [self.group_uuid], 2297 locking.LEVEL_NODE: [], 2298 2299 # This opcode is run by watcher every five minutes and acquires all nodes 2300 # for a group. It doesn't run for a long time, so it's better to acquire 2301 # the node allocation lock as well. 2302 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 2303 } 2304 2305 self.share_locks = _ShareAll()
2306
2307 - def DeclareLocks(self, level):
2308 if level == locking.LEVEL_NODE: 2309 # Get members of node group; this is unsafe and needs verification later 2310 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members) 2311 2312 all_inst_info = self.cfg.GetAllInstancesInfo() 2313 2314 # In Exec(), we warn about mirrored instances that have primary and 2315 # secondary living in separate node groups. To fully verify that 2316 # volumes for these instances are healthy, we will need to do an 2317 # extra call to their secondaries. We ensure here those nodes will 2318 # be locked. 2319 for inst in self.owned_locks(locking.LEVEL_INSTANCE): 2320 # Important: access only the instances whose lock is owned 2321 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR: 2322 nodes.update(all_inst_info[inst].secondary_nodes) 2323 2324 self.needed_locks[locking.LEVEL_NODE] = nodes
2325
2326 - def CheckPrereq(self):
2327 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 2328 self.group_info = self.cfg.GetNodeGroup(self.group_uuid) 2329 2330 group_nodes = set(self.group_info.members) 2331 group_instances = \ 2332 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 2333 2334 unlocked_nodes = \ 2335 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) 2336 2337 unlocked_instances = \ 2338 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE)) 2339 2340 if unlocked_nodes: 2341 raise errors.OpPrereqError("Missing lock for nodes: %s" % 2342 utils.CommaJoin(unlocked_nodes), 2343 errors.ECODE_STATE) 2344 2345 if unlocked_instances: 2346 raise errors.OpPrereqError("Missing lock for instances: %s" % 2347 utils.CommaJoin(unlocked_instances), 2348 errors.ECODE_STATE) 2349 2350 self.all_node_info = self.cfg.GetAllNodesInfo() 2351 self.all_inst_info = self.cfg.GetAllInstancesInfo() 2352 2353 self.my_node_names = utils.NiceSort(group_nodes) 2354 self.my_inst_names = utils.NiceSort(group_instances) 2355 2356 self.my_node_info = dict((name, self.all_node_info[name]) 2357 for name in self.my_node_names) 2358 2359 self.my_inst_info = dict((name, self.all_inst_info[name]) 2360 for name in self.my_inst_names) 2361 2362 # We detect here the nodes that will need the extra RPC calls for verifying 2363 # split LV volumes; they should be locked. 2364 extra_lv_nodes = set() 2365 2366 for inst in self.my_inst_info.values(): 2367 if inst.disk_template in constants.DTS_INT_MIRROR: 2368 for nname in inst.all_nodes: 2369 if self.all_node_info[nname].group != self.group_uuid: 2370 extra_lv_nodes.add(nname) 2371 2372 unlocked_lv_nodes = \ 2373 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) 2374 2375 if unlocked_lv_nodes: 2376 raise errors.OpPrereqError("Missing node locks for LV check: %s" % 2377 utils.CommaJoin(unlocked_lv_nodes), 2378 errors.ECODE_STATE) 2379 self.extra_lv_nodes = list(extra_lv_nodes)
2380
2381 - def _VerifyNode(self, ninfo, nresult):
2382 """Perform some basic validation on data returned from a node. 2383 2384 - check the result data structure is well formed and has all the 2385 mandatory fields 2386 - check ganeti version 2387 2388 @type ninfo: L{objects.Node} 2389 @param ninfo: the node to check 2390 @param nresult: the results from the node 2391 @rtype: boolean 2392 @return: whether overall this call was successful (and we can expect 2393 reasonable values in the respose) 2394 2395 """ 2396 node = ninfo.name 2397 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2398 2399 # main result, nresult should be a non-empty dict 2400 test = not nresult or not isinstance(nresult, dict) 2401 _ErrorIf(test, constants.CV_ENODERPC, node, 2402 "unable to verify node: no data returned") 2403 if test: 2404 return False 2405 2406 # compares ganeti version 2407 local_version = constants.PROTOCOL_VERSION 2408 remote_version = nresult.get("version", None) 2409 test = not (remote_version and 2410 isinstance(remote_version, (list, tuple)) and 2411 len(remote_version) == 2) 2412 _ErrorIf(test, constants.CV_ENODERPC, node, 2413 "connection to node returned invalid data") 2414 if test: 2415 return False 2416 2417 test = local_version != remote_version[0] 2418 _ErrorIf(test, constants.CV_ENODEVERSION, node, 2419 "incompatible protocol versions: master %s," 2420 " node %s", local_version, remote_version[0]) 2421 if test: 2422 return False 2423 2424 # node seems compatible, we can actually try to look into its results 2425 2426 # full package version 2427 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], 2428 constants.CV_ENODEVERSION, node, 2429 "software version mismatch: master %s, node %s", 2430 constants.RELEASE_VERSION, remote_version[1], 2431 code=self.ETYPE_WARNING) 2432 2433 hyp_result = nresult.get(constants.NV_HYPERVISOR, None) 2434 if ninfo.vm_capable and isinstance(hyp_result, dict): 2435 for hv_name, hv_result in hyp_result.iteritems(): 2436 test = hv_result is not None 2437 _ErrorIf(test, constants.CV_ENODEHV, node, 2438 "hypervisor %s verify failure: '%s'", hv_name, hv_result) 2439 2440 hvp_result = nresult.get(constants.NV_HVPARAMS, None) 2441 if ninfo.vm_capable and isinstance(hvp_result, list): 2442 for item, hv_name, hv_result in hvp_result: 2443 _ErrorIf(True, constants.CV_ENODEHV, node, 2444 "hypervisor %s parameter verify failure (source %s): %s", 2445 hv_name, item, hv_result) 2446 2447 test = nresult.get(constants.NV_NODESETUP, 2448 ["Missing NODESETUP results"]) 2449 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s", 2450 "; ".join(test)) 2451 2452 return True
2453
2454 - def _VerifyNodeTime(self, ninfo, nresult, 2455 nvinfo_starttime, nvinfo_endtime):
2456 """Check the node time. 2457 2458 @type ninfo: L{objects.Node} 2459 @param ninfo: the node to check 2460 @param nresult: the remote results for the node 2461 @param nvinfo_starttime: the start time of the RPC call 2462 @param nvinfo_endtime: the end time of the RPC call 2463 2464 """ 2465 node = ninfo.name 2466 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2467 2468 ntime = nresult.get(constants.NV_TIME, None) 2469 try: 2470 ntime_merged = utils.MergeTime(ntime) 2471 except (ValueError, TypeError): 2472 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time") 2473 return 2474 2475 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): 2476 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) 2477 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): 2478 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) 2479 else: 2480 ntime_diff = None 2481 2482 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node, 2483 "Node time diverges by at least %s from master node time", 2484 ntime_diff)
2485
2486 - def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2487 """Check the node LVM results and update info for cross-node checks. 2488 2489 @type ninfo: L{objects.Node} 2490 @param ninfo: the node to check 2491 @param nresult: the remote results for the node 2492 @param vg_name: the configured VG name 2493 @type nimg: L{NodeImage} 2494 @param nimg: node image 2495 2496 """ 2497 if vg_name is None: 2498 return 2499 2500 node = ninfo.name 2501 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2502 2503 # checks vg existence and size > 20G 2504 vglist = nresult.get(constants.NV_VGLIST, None) 2505 test = not vglist 2506 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups") 2507 if not test: 2508 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, 2509 constants.MIN_VG_SIZE) 2510 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus) 2511 2512 # Check PVs 2513 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage) 2514 for em in errmsgs: 2515 self._Error(constants.CV_ENODELVM, node, em) 2516 if pvminmax is not None: 2517 (nimg.pv_min, nimg.pv_max) = pvminmax
2518
2519 - def _VerifyGroupLVM(self, node_image, vg_name):
2520 """Check cross-node consistency in LVM. 2521 2522 @type node_image: dict 2523 @param node_image: info about nodes, mapping from node to names to 2524 L{NodeImage} objects 2525 @param vg_name: the configured VG name 2526 2527 """ 2528 if vg_name is None: 2529 return 2530 2531 # Only exlcusive storage needs this kind of checks 2532 if not self._exclusive_storage: 2533 return 2534 2535 # exclusive_storage wants all PVs to have the same size (approximately), 2536 # if the smallest and the biggest ones are okay, everything is fine. 2537 # pv_min is None iff pv_max is None 2538 vals = filter((lambda ni: ni.pv_min is not None), node_image.values()) 2539 if not vals: 2540 return 2541 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals) 2542 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals) 2543 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax) 2544 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name, 2545 "PV sizes differ too much in the group; smallest (%s MB) is" 2546 " on %s, biggest (%s MB) is on %s", 2547 pvmin, minnode, pvmax, maxnode)
2548
2549 - def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2550 """Check the node bridges. 2551 2552 @type ninfo: L{objects.Node} 2553 @param ninfo: the node to check 2554 @param nresult: the remote results for the node 2555 @param bridges: the expected list of bridges 2556 2557 """ 2558 if not bridges: 2559 return 2560 2561 node = ninfo.name 2562 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2563 2564 missing = nresult.get(constants.NV_BRIDGES, None) 2565 test = not isinstance(missing, list) 2566 _ErrorIf(test, constants.CV_ENODENET, node, 2567 "did not return valid bridge information") 2568 if not test: 2569 _ErrorIf(bool(missing), constants.CV_ENODENET, node, 2570 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2571
2572 - def _VerifyNodeUserScripts(self, ninfo, nresult):
2573 """Check the results of user scripts presence and executability on the node 2574 2575 @type ninfo: L{objects.Node} 2576 @param ninfo: the node to check 2577 @param nresult: the remote results for the node 2578 2579 """ 2580 node = ninfo.name 2581 2582 test = not constants.NV_USERSCRIPTS in nresult 2583 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node, 2584 "did not return user scripts information") 2585 2586 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None) 2587 if not test: 2588 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node, 2589 "user scripts not present or not executable: %s" % 2590 utils.CommaJoin(sorted(broken_scripts)))
2591
2592 - def _VerifyNodeNetwork(self, ninfo, nresult):
2593 """Check the node network connectivity results. 2594 2595 @type ninfo: L{objects.Node} 2596 @param ninfo: the node to check 2597 @param nresult: the remote results for the node 2598 2599 """ 2600 node = ninfo.name 2601 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2602 2603 test = constants.NV_NODELIST not in nresult 2604 _ErrorIf(test, constants.CV_ENODESSH, node, 2605 "node hasn't returned node ssh connectivity data") 2606 if not test: 2607 if nresult[constants.NV_NODELIST]: 2608 for a_node, a_msg in nresult[constants.NV_NODELIST].items(): 2609 _ErrorIf(True, constants.CV_ENODESSH, node, 2610 "ssh communication with node '%s': %s", a_node, a_msg) 2611 2612 test = constants.NV_NODENETTEST not in nresult 2613 _ErrorIf(test, constants.CV_ENODENET, node, 2614 "node hasn't returned node tcp connectivity data") 2615 if not test: 2616 if nresult[constants.NV_NODENETTEST]: 2617 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) 2618 for anode in nlist: 2619 _ErrorIf(True, constants.CV_ENODENET, node, 2620 "tcp communication with node '%s': %s", 2621 anode, nresult[constants.NV_NODENETTEST][anode]) 2622 2623 test = constants.NV_MASTERIP not in nresult 2624 _ErrorIf(test, constants.CV_ENODENET, node, 2625 "node hasn't returned node master IP reachability data") 2626 if not test: 2627 if not nresult[constants.NV_MASTERIP]: 2628 if node == self.master_node: 2629 msg = "the master node cannot reach the master IP (not configured?)" 2630 else: 2631 msg = "cannot reach the master IP" 2632 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2633
2634 - def _VerifyInstance(self, instance, inst_config, node_image, 2635 diskstatus):
2636 """Verify an instance. 2637 2638 This function checks to see if the required block devices are 2639 available on the instance's node, and that the nodes are in the correct 2640 state. 2641 2642 """ 2643 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2644 pnode = inst_config.primary_node 2645 pnode_img = node_image[pnode] 2646 groupinfo = self.cfg.GetAllNodeGroupsInfo() 2647 2648 node_vol_should = {} 2649 inst_config.MapLVsByNode(node_vol_should) 2650 2651 cluster = self.cfg.GetClusterInfo() 2652 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, 2653 self.group_info) 2654 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg) 2655 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err), 2656 code=self.ETYPE_WARNING) 2657 2658 for node in node_vol_should: 2659 n_img = node_image[node] 2660 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 2661 # ignore missing volumes on offline or broken nodes 2662 continue 2663 for volume in node_vol_should[node]: 2664 test = volume not in n_img.volumes 2665 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance, 2666 "volume %s missing on node %s", volume, node) 2667 2668 if inst_config.admin_state == constants.ADMINST_UP: 2669 test = instance not in pnode_img.instances and not pnode_img.offline 2670 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance, 2671 "instance not running on its primary node %s", 2672 pnode) 2673 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance, 2674 "instance is marked as running and lives on offline node %s", 2675 pnode) 2676 2677 diskdata = [(nname, success, status, idx) 2678 for (nname, disks) in diskstatus.items() 2679 for idx, (success, status) in enumerate(disks)] 2680 2681 for nname, success, bdev_status, idx in diskdata: 2682 # the 'ghost node' construction in Exec() ensures that we have a 2683 # node here 2684 snode = node_image[nname] 2685 bad_snode = snode.ghost or snode.offline 2686 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and 2687 not success and not bad_snode, 2688 constants.CV_EINSTANCEFAULTYDISK, instance, 2689 "couldn't retrieve status for disk/%s on %s: %s", 2690 idx, nname, bdev_status) 2691 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and 2692 success and bdev_status.ldisk_status == constants.LDS_FAULTY), 2693 constants.CV_EINSTANCEFAULTYDISK, instance, 2694 "disk/%s on %s is faulty", idx, nname) 2695 2696 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, 2697 constants.CV_ENODERPC, pnode, "instance %s, connection to" 2698 " primary node failed", instance) 2699 2700 _ErrorIf(len(inst_config.secondary_nodes) > 1, 2701 constants.CV_EINSTANCELAYOUT, 2702 instance, "instance has multiple secondary nodes: %s", 2703 utils.CommaJoin(inst_config.secondary_nodes), 2704 code=self.ETYPE_WARNING) 2705 2706 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE: 2707 # Disk template not compatible with exclusive_storage: no instance 2708 # node should have the flag set 2709 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, 2710 inst_config.all_nodes) 2711 es_nodes = [n for (n, es) in es_flags.items() 2712 if es] 2713 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance, 2714 "instance has template %s, which is not supported on nodes" 2715 " that have exclusive storage set: %s", 2716 inst_config.disk_template, utils.CommaJoin(es_nodes)) 2717 2718 if inst_config.disk_template in constants.DTS_INT_MIRROR: 2719 instance_nodes = utils.NiceSort(inst_config.all_nodes) 2720 instance_groups = {} 2721 2722 for node in instance_nodes: 2723 instance_groups.setdefault(self.all_node_info[node].group, 2724 []).append(node) 2725 2726 pretty_list = [ 2727 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name) 2728 # Sort so that we always list the primary node first. 2729 for group, nodes in sorted(instance_groups.items(), 2730 key=lambda (_, nodes): pnode in nodes, 2731 reverse=True)] 2732 2733 self._ErrorIf(len(instance_groups) > 1, 2734 constants.CV_EINSTANCESPLITGROUPS, 2735 instance, "instance has primary and secondary nodes in" 2736 " different groups: %s", utils.CommaJoin(pretty_list), 2737 code=self.ETYPE_WARNING) 2738 2739 inst_nodes_offline = [] 2740 for snode in inst_config.secondary_nodes: 2741 s_img = node_image[snode] 2742 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC, 2743 snode, "instance %s, connection to secondary node failed", 2744 instance) 2745 2746 if s_img.offline: 2747 inst_nodes_offline.append(snode) 2748 2749 # warn that the instance lives on offline nodes 2750 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance, 2751 "instance has offline secondary node(s) %s", 2752 utils.CommaJoin(inst_nodes_offline)) 2753 # ... or ghost/non-vm_capable nodes 2754 for node in inst_config.all_nodes: 2755 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE, 2756 instance, "instance lives on ghost node %s", node) 2757 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE, 2758 instance, "instance lives on non-vm_capable node %s", node)
2759
2760 - def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2761 """Verify if there are any unknown volumes in the cluster. 2762 2763 The .os, .swap and backup volumes are ignored. All other volumes are 2764 reported as unknown. 2765 2766 @type reserved: L{ganeti.utils.FieldSet} 2767 @param reserved: a FieldSet of reserved volume names 2768 2769 """ 2770 for node, n_img in node_image.items(): 2771 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or 2772 self.all_node_info[node].group != self.group_uuid): 2773 # skip non-healthy nodes 2774 continue 2775 for volume in n_img.volumes: 2776 test = ((node not in node_vol_should or 2777 volume not in node_vol_should[node]) and 2778 not reserved.Matches(volume)) 2779 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node, 2780 "volume %s is unknown", volume)
2781
2782 - def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2783 """Verify N+1 Memory Resilience. 2784 2785 Check that if one single node dies we can still start all the 2786 instances it was primary for. 2787 2788 """ 2789 cluster_info = self.cfg.GetClusterInfo() 2790 for node, n_img in node_image.items(): 2791 # This code checks that every node which is now listed as 2792 # secondary has enough memory to host all instances it is 2793 # supposed to should a single other node in the cluster fail. 2794 # FIXME: not ready for failover to an arbitrary node 2795 # FIXME: does not support file-backed instances 2796 # WARNING: we currently take into account down instances as well 2797 # as up ones, considering that even if they're down someone 2798 # might want to start them even in the event of a node failure. 2799 if n_img.offline or self.all_node_info[node].group != self.group_uuid: 2800 # we're skipping nodes marked offline and nodes in other groups from 2801 # the N+1 warning, since most likely we don't have good memory 2802 # infromation from them; we already list instances living on such 2803 # nodes, and that's enough warning 2804 continue 2805 #TODO(dynmem): also consider ballooning out other instances 2806 for prinode, instances in n_img.sbp.items(): 2807 needed_mem = 0 2808 for instance in instances: 2809 bep = cluster_info.FillBE(instance_cfg[instance]) 2810 if bep[constants.BE_AUTO_BALANCE]: 2811 needed_mem += bep[constants.BE_MINMEM] 2812 test = n_img.mfree < needed_mem 2813 self._ErrorIf(test, constants.CV_ENODEN1, node, 2814 "not enough memory to accomodate instance failovers" 2815 " should node %s fail (%dMiB needed, %dMiB available)", 2816 prinode, needed_mem, n_img.mfree)
2817 2818 @classmethod
2819 - def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo, 2820 (files_all, files_opt, files_mc, files_vm)):
2821 """Verifies file checksums collected from all nodes. 2822 2823 @param errorif: Callback for reporting errors 2824 @param nodeinfo: List of L{objects.Node} objects 2825 @param master_node: Name of master node 2826 @param all_nvinfo: RPC results 2827 2828 """ 2829 # Define functions determining which nodes to consider for a file 2830 files2nodefn = [ 2831 (files_all, None), 2832 (files_mc, lambda node: (node.master_candidate or 2833 node.name == master_node)), 2834 (files_vm, lambda node: node.vm_capable), 2835 ] 2836 2837 # Build mapping from filename to list of nodes which should have the file 2838 nodefiles = {} 2839 for (files, fn) in files2nodefn: 2840 if fn is None: 2841 filenodes = nodeinfo 2842 else: 2843 filenodes = filter(fn, nodeinfo) 2844 nodefiles.update((filename, 2845 frozenset(map(operator.attrgetter("name"), filenodes))) 2846 for filename in files) 2847 2848 assert set(nodefiles) == (files_all | files_mc | files_vm) 2849 2850 fileinfo = dict((filename, {}) for filename in nodefiles) 2851 ignore_nodes = set() 2852 2853 for node in nodeinfo: 2854 if node.offline: 2855 ignore_nodes.add(node.name) 2856 continue 2857 2858 nresult = all_nvinfo[node.name] 2859 2860 if nresult.fail_msg or not nresult.payload: 2861 node_files = None 2862 else: 2863 fingerprints = nresult.payload.get(constants.NV_FILELIST, None) 2864 node_files = dict((vcluster.LocalizeVirtualPath(key), value) 2865 for (key, value) in fingerprints.items()) 2866 del fingerprints 2867 2868 test = not (node_files and isinstance(node_files, dict)) 2869 errorif(test, constants.CV_ENODEFILECHECK, node.name, 2870 "Node did not return file checksum data") 2871 if test: 2872 ignore_nodes.add(node.name) 2873 continue 2874 2875 # Build per-checksum mapping from filename to nodes having it 2876 for (filename, checksum) in node_files.items(): 2877 assert filename in nodefiles 2878 fileinfo[filename].setdefault(checksum, set()).add(node.name) 2879 2880 for (filename, checksums) in fileinfo.items(): 2881 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum" 2882 2883 # Nodes having the file 2884 with_file = frozenset(node_name 2885 for nodes in fileinfo[filename].values() 2886 for node_name in nodes) - ignore_nodes 2887 2888 expected_nodes = nodefiles[filename] - ignore_nodes 2889 2890 # Nodes missing file 2891 missing_file = expected_nodes - with_file 2892 2893 if filename in files_opt: 2894 # All or no nodes 2895 errorif(missing_file and missing_file != expected_nodes, 2896 constants.CV_ECLUSTERFILECHECK, None, 2897 "File %s is optional, but it must exist on all or no" 2898 " nodes (not found on %s)", 2899 filename, utils.CommaJoin(utils.NiceSort(missing_file))) 2900 else: 2901 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None, 2902 "File %s is missing from node(s) %s", filename, 2903 utils.CommaJoin(utils.NiceSort(missing_file))) 2904 2905 # Warn if a node has a file it shouldn't 2906 unexpected = with_file - expected_nodes 2907 errorif(unexpected, 2908 constants.CV_ECLUSTERFILECHECK, None, 2909 "File %s should not exist on node(s) %s", 2910 filename, utils.CommaJoin(utils.NiceSort(unexpected))) 2911 2912 # See if there are multiple versions of the file 2913 test = len(checksums) > 1 2914 if test: 2915 variants = ["variant %s on %s" % 2916 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes))) 2917 for (idx, (checksum, nodes)) in 2918 enumerate(sorted(checksums.items()))] 2919 else: 2920 variants = [] 2921 2922 errorif(test, constants.CV_ECLUSTERFILECHECK, None, 2923 "File %s found with %s different checksums (%s)", 2924 filename, len(checksums), "; ".join(variants))
2925
2926 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper, 2927 drbd_map):
2928 """Verifies and the node DRBD status. 2929 2930 @type ninfo: L{objects.Node} 2931 @param ninfo: the node to check 2932 @param nresult: the remote results for the node 2933 @param instanceinfo: the dict of instances 2934 @param drbd_helper: the configured DRBD usermode helper 2935 @param drbd_map: the DRBD map as returned by 2936 L{ganeti.config.ConfigWriter.ComputeDRBDMap} 2937 2938 """ 2939 node = ninfo.name 2940 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2941 2942 if drbd_helper: 2943 helper_result = nresult.get(constants.NV_DRBDHELPER, None) 2944 test = (helper_result is None) 2945 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, 2946 "no drbd usermode helper returned") 2947 if helper_result: 2948 status, payload = helper_result 2949 test = not status 2950 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, 2951 "drbd usermode helper check unsuccessful: %s", payload) 2952 test = status and (payload != drbd_helper) 2953 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, 2954 "wrong drbd usermode helper: %s", payload) 2955 2956 # compute the DRBD minors 2957 node_drbd = {} 2958 for minor, instance in drbd_map[node].items(): 2959 test = instance not in instanceinfo 2960 _ErrorIf(test, constants.CV_ECLUSTERCFG, None, 2961 "ghost instance '%s' in temporary DRBD map", instance) 2962 # ghost instance should not be running, but otherwise we 2963 # don't give double warnings (both ghost instance and 2964 # unallocated minor in use) 2965 if test: 2966 node_drbd[minor] = (instance, False) 2967 else: 2968 instance = instanceinfo[instance] 2969 node_drbd[minor] = (instance.name, 2970 instance.admin_state == constants.ADMINST_UP) 2971 2972 # and now check them 2973 used_minors = nresult.get(constants.NV_DRBDLIST, []) 2974 test = not isinstance(used_minors, (tuple, list)) 2975 _ErrorIf(test, constants.CV_ENODEDRBD, node, 2976 "cannot parse drbd status file: %s", str(used_minors)) 2977 if test: 2978 # we cannot check drbd status 2979 return 2980 2981 for minor, (iname, must_exist) in node_drbd.items(): 2982 test = minor not in used_minors and must_exist 2983 _ErrorIf(test, constants.CV_ENODEDRBD, node, 2984 "drbd minor %d of instance %s is not active", minor, iname) 2985 for minor in used_minors: 2986 test = minor not in node_drbd 2987 _ErrorIf(test, constants.CV_ENODEDRBD, node, 2988 "unallocated drbd minor %d is in use", minor)
2989
2990 - def _UpdateNodeOS(self, ninfo, nresult, nimg):
2991 """Builds the node OS structures. 2992 2993 @type ninfo: L{objects.Node} 2994 @param ninfo: the node to check 2995 @param nresult: the remote results for the node 2996 @param nimg: the node image object 2997 2998 """ 2999 node = ninfo.name 3000 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3001 3002 remote_os = nresult.get(constants.NV_OSLIST, None) 3003 test = (not isinstance(remote_os, list) or 3004 not compat.all(isinstance(v, list) and len(v) == 7 3005 for v in remote_os)) 3006 3007 _ErrorIf(test, constants.CV_ENODEOS, node, 3008 "node hasn't returned valid OS data") 3009 3010 nimg.os_fail = test 3011 3012 if test: 3013 return 3014 3015 os_dict = {} 3016 3017 for (name, os_path, status, diagnose, 3018 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]: 3019 3020 if name not in os_dict: 3021 os_dict[name] = [] 3022 3023 # parameters is a list of lists instead of list of tuples due to 3024 # JSON lacking a real tuple type, fix it: 3025 parameters = [tuple(v) for v in parameters] 3026 os_dict[name].append((os_path, status, diagnose, 3027 set(variants), set(parameters), set(api_ver))) 3028 3029 nimg.oslist = os_dict
3030
3031 - def _VerifyNodeOS(self, ninfo, nimg, base):
3032 """Verifies the node OS list. 3033 3034 @type ninfo: L{objects.Node} 3035 @param ninfo: the node to check 3036 @param nimg: the node image object 3037 @param base: the 'template' node we match against (e.g. from the master) 3038 3039 """ 3040 node = ninfo.name 3041 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3042 3043 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" 3044 3045 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l] 3046 for os_name, os_data in nimg.oslist.items(): 3047 assert os_data, "Empty OS status for OS %s?!" % os_name 3048 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] 3049 _ErrorIf(not f_status, constants.CV_ENODEOS, node, 3050 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag) 3051 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node, 3052 "OS '%s' has multiple entries (first one shadows the rest): %s", 3053 os_name, utils.CommaJoin([v[0] for v in os_data])) 3054 # comparisons with the 'base' image 3055 test = os_name not in base.oslist 3056 _ErrorIf(test, constants.CV_ENODEOS, node, 3057 "Extra OS %s not present on reference node (%s)", 3058 os_name, base.name) 3059 if test: 3060 continue 3061 assert base.oslist[os_name], "Base node has empty OS status?" 3062 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0] 3063 if not b_status: 3064 # base OS is invalid, skipping 3065 continue 3066 for kind, a, b in [("API version", f_api, b_api), 3067 ("variants list", f_var, b_var), 3068 ("parameters", beautify_params(f_param), 3069 beautify_params(b_param))]: 3070 _ErrorIf(a != b, constants.CV_ENODEOS, node, 3071 "OS %s for %s differs from reference node %s: [%s] vs. [%s]", 3072 kind, os_name, base.name, 3073 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b))) 3074 3075 # check any missing OSes 3076 missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) 3077 _ErrorIf(missing, constants.CV_ENODEOS, node, 3078 "OSes present on reference node %s but missing on this node: %s", 3079 base.name, utils.CommaJoin(missing))
3080
3081 - def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3082 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}. 3083 3084 @type ninfo: L{objects.Node} 3085 @param ninfo: the node to check 3086 @param nresult: the remote results for the node 3087 @type is_master: bool 3088 @param is_master: Whether node is the master node 3089 3090 """ 3091 node = ninfo.name 3092 3093 if (is_master and 3094 (constants.ENABLE_FILE_STORAGE or 3095 constants.ENABLE_SHARED_FILE_STORAGE)): 3096 try: 3097 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS] 3098 except KeyError: 3099 # This should never happen 3100 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node, 3101 "Node did not return forbidden file storage paths") 3102 else: 3103 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node, 3104 "Found forbidden file storage paths: %s", 3105 utils.CommaJoin(fspaths)) 3106 else: 3107 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult, 3108 constants.CV_ENODEFILESTORAGEPATHS, node, 3109 "Node should not have returned forbidden file storage" 3110 " paths")
3111
3112 - def _VerifyOob(self, ninfo, nresult):
3113 """Verifies out of band functionality of a node. 3114 3115 @type ninfo: L{objects.Node} 3116 @param ninfo: the node to check 3117 @param nresult: the remote results for the node 3118 3119 """ 3120 node = ninfo.name 3121 # We just have to verify the paths on master and/or master candidates 3122 # as the oob helper is invoked on the master 3123 if ((ninfo.master_candidate or ninfo.master_capable) and 3124 constants.NV_OOB_PATHS in nresult): 3125 for path_result in nresult[constants.NV_OOB_PATHS]: 3126 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3127
3128 - def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3129 """Verifies and updates the node volume data. 3130 3131 This function will update a L{NodeImage}'s internal structures 3132 with data from the remote call. 3133 3134 @type ninfo: L{objects.Node} 3135 @param ninfo: the node to check 3136 @param nresult: the remote results for the node 3137 @param nimg: the node image object 3138 @param vg_name: the configured VG name 3139 3140 """ 3141 node = ninfo.name 3142 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3143 3144 nimg.lvm_fail = True 3145 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") 3146 if vg_name is None: 3147 pass 3148 elif isinstance(lvdata, basestring): 3149 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s", 3150 utils.SafeEncode(lvdata)) 3151 elif not isinstance(lvdata, dict): 3152 _ErrorIf(True, constants.CV_ENODELVM, node, 3153 "rpc call to node failed (lvlist)") 3154 else: 3155 nimg.volumes = lvdata 3156 nimg.lvm_fail = False
3157
3158 - def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3159 """Verifies and updates the node instance list. 3160 3161 If the listing was successful, then updates this node's instance 3162 list. Otherwise, it marks the RPC call as failed for the instance 3163 list key. 3164 3165 @type ninfo: L{objects.Node} 3166 @param ninfo: the node to check 3167 @param nresult: the remote results for the node 3168 @param nimg: the node image object 3169 3170 """ 3171 idata = nresult.get(constants.NV_INSTANCELIST, None) 3172 test = not isinstance(idata, list) 3173 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, 3174 "rpc call to node failed (instancelist): %s", 3175 utils.SafeEncode(str(idata))) 3176 if test: 3177 nimg.hyp_fail = True 3178 else: 3179 nimg.instances = idata
3180
3181 - def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3182 """Verifies and computes a node information map 3183 3184 @type ninfo: L{objects.Node} 3185 @param ninfo: the node to check 3186 @param nresult: the remote results for the node 3187 @param nimg: the node image object 3188 @param vg_name: the configured VG name 3189 3190 """ 3191 node = ninfo.name 3192 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3193 3194 # try to read free memory (from the hypervisor) 3195 hv_info = nresult.get(constants.NV_HVINFO, None) 3196 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info 3197 _ErrorIf(test, constants.CV_ENODEHV, node, 3198 "rpc call to node failed (hvinfo)") 3199 if not test: 3200 try: 3201 nimg.mfree = int(hv_info["memory_free"]) 3202 except (ValueError, TypeError): 3203 _ErrorIf(True, constants.CV_ENODERPC, node, 3204 "node returned invalid nodeinfo, check hypervisor") 3205 3206 # FIXME: devise a free space model for file based instances as well 3207 if vg_name is not None: 3208 test = (constants.NV_VGLIST not in nresult or 3209 vg_name not in nresult[constants.NV_VGLIST]) 3210 _ErrorIf(test, constants.CV_ENODELVM, node, 3211 "node didn't return data for the volume group '%s'" 3212 " - it is either missing or broken", vg_name) 3213 if not test: 3214 try: 3215 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) 3216 except (ValueError, TypeError): 3217 _ErrorIf(True, constants.CV_ENODERPC, node, 3218 "node returned invalid LVM info, check LVM status")
3219
3220 - def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3221 """Gets per-disk status information for all instances. 3222 3223 @type nodelist: list of strings 3224 @param nodelist: Node names 3225 @type node_image: dict of (name, L{objects.Node}) 3226 @param node_image: Node objects 3227 @type instanceinfo: dict of (name, L{objects.Instance}) 3228 @param instanceinfo: Instance objects 3229 @rtype: {instance: {node: [(succes, payload)]}} 3230 @return: a dictionary of per-instance dictionaries with nodes as 3231 keys and disk information as values; the disk information is a 3232 list of tuples (success, payload) 3233 3234 """ 3235 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3236 3237 node_disks = {} 3238 node_disks_devonly = {} 3239 diskless_instances = set() 3240 diskless = constants.DT_DISKLESS 3241 3242 for nname in nodelist: 3243 node_instances = list(itertools.chain(node_image[nname].pinst, 3244 node_image[nname].sinst)) 3245 diskless_instances.update(inst for inst in node_instances 3246 if instanceinfo[inst].disk_template == diskless) 3247 disks = [(inst, disk) 3248 for inst in node_instances 3249 for disk in instanceinfo[inst].disks] 3250 3251 if not disks: 3252 # No need to collect data 3253 continue 3254 3255 node_disks[nname] = disks 3256 3257 # _AnnotateDiskParams makes already copies of the disks 3258 devonly = [] 3259 for (inst, dev) in disks: 3260 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg) 3261 self.cfg.SetDiskID(anno_disk, nname) 3262 devonly.append(anno_disk) 3263 3264 node_disks_devonly[nname] = devonly 3265 3266 assert len(node_disks) == len(node_disks_devonly) 3267 3268 # Collect data from all nodes with disks 3269 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(), 3270 node_disks_devonly) 3271 3272 assert len(result) == len(node_disks) 3273 3274 instdisk = {} 3275 3276 for (nname, nres) in result.items(): 3277 disks = node_disks[nname] 3278 3279 if nres.offline: 3280 # No data from this node 3281 data = len(disks) * [(False, "node offline")] 3282 else: 3283 msg = nres.fail_msg 3284 _ErrorIf(msg, constants.CV_ENODERPC, nname, 3285 "while getting disk information: %s", msg) 3286 if msg: 3287 # No data from this node 3288 data = len(disks) * [(False, msg)] 3289 else: 3290 data = [] 3291 for idx, i in enumerate(nres.payload): 3292 if isinstance(i, (tuple, list)) and len(i) == 2: 3293 data.append(i) 3294 else: 3295 logging.warning("Invalid result from node %s, entry %d: %s", 3296 nname, idx, i) 3297 data.append((False, "Invalid result from the remote node")) 3298 3299 for ((inst, _), status) in zip(disks, data): 3300 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status) 3301 3302 # Add empty entries for diskless instances. 3303 for inst in diskless_instances: 3304 assert inst not in instdisk 3305 instdisk[inst] = {} 3306 3307 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and 3308 len(nnames) <= len(instanceinfo[inst].all_nodes) and 3309 compat.all(isinstance(s, (tuple, list)) and 3310 len(s) == 2 for s in statuses) 3311 for inst, nnames in instdisk.items() 3312 for nname, statuses in nnames.items()) 3313 if __debug__: 3314 instdisk_keys = set(instdisk) 3315 instanceinfo_keys = set(instanceinfo) 3316 assert instdisk_keys == instanceinfo_keys, \ 3317 ("instdisk keys (%s) do not match instanceinfo keys (%s)" % 3318 (instdisk_keys, instanceinfo_keys)) 3319 3320 return instdisk
3321 3322 @staticmethod
3323 - def _SshNodeSelector(group_uuid, all_nodes):
3324 """Create endless iterators for all potential SSH check hosts. 3325 3326 """ 3327 nodes = [node for node in all_nodes 3328 if (node.group != group_uuid and 3329 not node.offline)] 3330 keyfunc = operator.attrgetter("group") 3331 3332 return map(itertools.cycle, 3333 [sorted(map(operator.attrgetter("name"), names)) 3334 for _, names in itertools.groupby(sorted(nodes, key=keyfunc), 3335 keyfunc)])
3336 3337 @classmethod
3338 - def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3339 """Choose which nodes should talk to which other nodes. 3340 3341 We will make nodes contact all nodes in their group, and one node from 3342 every other group. 3343 3344 @warning: This algorithm has a known issue if one node group is much 3345 smaller than others (e.g. just one node). In such a case all other 3346 nodes will talk to the single node. 3347 3348 """ 3349 online_nodes = sorted(node.name for node in group_nodes if not node.offline) 3350 sel = cls._SshNodeSelector(group_uuid, all_nodes) 3351 3352 return (online_nodes, 3353 dict((name, sorted([i.next() for i in sel])) 3354 for name in online_nodes))
3355
3356 - def BuildHooksEnv(self):
3357 """Build hooks env. 3358 3359 Cluster-Verify hooks just ran in the post phase and their failure makes 3360 the output be logged in the verify output and the verification to fail. 3361 3362 """ 3363 env = { 3364 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()), 3365 } 3366 3367 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags())) 3368 for node in self.my_node_info.values()) 3369 3370 return env
3371
3372 - def BuildHooksNodes(self):
3373 """Build hooks nodes. 3374 3375 """ 3376 return ([], self.my_node_names)
3377
3378 - def Exec(self, feedback_fn):
3379 """Verify integrity of the node group, performing various test on nodes. 3380 3381 """ 3382 # This method has too many local variables. pylint: disable=R0914 3383 feedback_fn("* Verifying group '%s'" % self.group_info.name) 3384 3385 if not self.my_node_names: 3386 # empty node group 3387 feedback_fn("* Empty node group, skipping verification") 3388 return True 3389 3390 self.bad = False 3391 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3392 verbose = self.op.verbose 3393 self._feedback_fn = feedback_fn 3394 3395 vg_name = self.cfg.GetVGName() 3396 drbd_helper = self.cfg.GetDRBDHelper() 3397 cluster = self.cfg.GetClusterInfo() 3398 hypervisors = cluster.enabled_hypervisors 3399 node_data_list = [self.my_node_info[name] for name in self.my_node_names] 3400 3401 i_non_redundant = [] # Non redundant instances 3402 i_non_a_balanced = [] # Non auto-balanced instances 3403 i_offline = 0 # Count of offline instances 3404 n_offline = 0 # Count of offline nodes 3405 n_drained = 0 # Count of nodes being drained 3406 node_vol_should = {} 3407 3408 # FIXME: verify OS list 3409 3410 # File verification 3411 filemap = _ComputeAncillaryFiles(cluster, False) 3412 3413 # do local checksums 3414 master_node = self.master_node = self.cfg.GetMasterNode() 3415 master_ip = self.cfg.GetMasterIP() 3416 3417 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names)) 3418 3419 user_scripts = [] 3420 if self.cfg.GetUseExternalMipScript(): 3421 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT) 3422 3423 node_verify_param = { 3424 constants.NV_FILELIST: 3425 map(vcluster.MakeVirtualPath, 3426 utils.UniqueSequence(filename 3427 for files in filemap 3428 for filename in files)), 3429 constants.NV_NODELIST: 3430 self._SelectSshCheckNodes(node_data_list, self.group_uuid, 3431 self.all_node_info.values()), 3432 constants.NV_HYPERVISOR: hypervisors, 3433 constants.NV_HVPARAMS: 3434 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()), 3435 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip) 3436 for node in node_data_list 3437 if not node.offline], 3438 constants.NV_INSTANCELIST: hypervisors, 3439 constants.NV_VERSION: None, 3440 constants.NV_HVINFO: self.cfg.GetHypervisorType(), 3441 constants.NV_NODESETUP: None, 3442 constants.NV_TIME: None, 3443 constants.NV_MASTERIP: (master_node, master_ip), 3444 constants.NV_OSLIST: None, 3445 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(), 3446 constants.NV_USERSCRIPTS: user_scripts, 3447 } 3448 3449 if vg_name is not None: 3450 node_verify_param[constants.NV_VGLIST] = None 3451 node_verify_param[constants.NV_LVLIST] = vg_name 3452 node_verify_param[constants.NV_PVLIST] = [vg_name] 3453 3454 if drbd_helper: 3455 node_verify_param[constants.NV_DRBDLIST] = None 3456 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper 3457 3458 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE: 3459 # Load file storage paths only from master node 3460 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node 3461 3462 # bridge checks 3463 # FIXME: this needs to be changed per node-group, not cluster-wide 3464 bridges = set() 3465 default_nicpp = cluster.nicparams[constants.PP_DEFAULT] 3466 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 3467 bridges.add(default_nicpp[constants.NIC_LINK]) 3468 for instance in self.my_inst_info.values(): 3469 for nic in instance.nics: 3470 full_nic = cluster.SimpleFillNIC(nic.nicparams) 3471 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 3472 bridges.add(full_nic[constants.NIC_LINK]) 3473 3474 if bridges: 3475 node_verify_param[constants.NV_BRIDGES] = list(bridges) 3476 3477 # Build our expected cluster state 3478 node_image = dict((node.name, self.NodeImage(offline=node.offline, 3479 name=node.name, 3480 vm_capable=node.vm_capable)) 3481 for node in node_data_list) 3482 3483 # Gather OOB paths 3484 oob_paths = [] 3485 for node in self.all_node_info.values(): 3486 path = _SupportsOob(self.cfg, node) 3487 if path and path not in oob_paths: 3488 oob_paths.append(path) 3489 3490 if oob_paths: 3491 node_verify_param[constants.NV_OOB_PATHS] = oob_paths 3492 3493 for instance in self.my_inst_names: 3494 inst_config = self.my_inst_info[instance] 3495 if inst_config.admin_state == constants.ADMINST_OFFLINE: 3496 i_offline += 1 3497 3498 for nname in inst_config.all_nodes: 3499 if nname not in node_image: 3500 gnode = self.NodeImage(name=nname) 3501 gnode.ghost = (nname not in self.all_node_info) 3502 node_image[nname] = gnode 3503 3504 inst_config.MapLVsByNode(node_vol_should) 3505 3506 pnode = inst_config.primary_node 3507 node_image[pnode].pinst.append(instance) 3508 3509 for snode in inst_config.secondary_nodes: 3510 nimg = node_image[snode] 3511 nimg.sinst.append(instance) 3512 if pnode not in nimg.sbp: 3513 nimg.sbp[pnode] = [] 3514 nimg.sbp[pnode].append(instance) 3515 3516 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names) 3517 # The value of exclusive_storage should be the same across the group, so if 3518 # it's True for at least a node, we act as if it were set for all the nodes 3519 self._exclusive_storage = compat.any(es_flags.values()) 3520 if self._exclusive_storage: 3521 node_verify_param[constants.NV_EXCLUSIVEPVS] = True 3522 3523 # At this point, we have the in-memory data structures complete, 3524 # except for the runtime information, which we'll gather next 3525 3526 # Due to the way our RPC system works, exact response times cannot be 3527 # guaranteed (e.g. a broken node could run into a timeout). By keeping the 3528 # time before and after executing the request, we can at least have a time 3529 # window. 3530 nvinfo_starttime = time.time() 3531 all_nvinfo = self.rpc.call_node_verify(self.my_node_names, 3532 node_verify_param, 3533 self.cfg.GetClusterName()) 3534 nvinfo_endtime = time.time() 3535 3536 if self.extra_lv_nodes and vg_name is not None: 3537 extra_lv_nvinfo = \ 3538 self.rpc.call_node_verify(self.extra_lv_nodes, 3539 {constants.NV_LVLIST: vg_name}, 3540 self.cfg.GetClusterName()) 3541 else: 3542 extra_lv_nvinfo = {} 3543 3544 all_drbd_map = self.cfg.ComputeDRBDMap() 3545 3546 feedback_fn("* Gathering disk information (%s nodes)" % 3547 len(self.my_node_names)) 3548 instdisk = self._CollectDiskInfo(self.my_node_names, node_image, 3549 self.my_inst_info) 3550 3551 feedback_fn("* Verifying configuration file consistency") 3552 3553 # If not all nodes are being checked, we need to make sure the master node 3554 # and a non-checked vm_capable node are in the list. 3555 absent_nodes = set(self.all_node_info).difference(self.my_node_info) 3556 if absent_nodes: 3557 vf_nvinfo = all_nvinfo.copy() 3558 vf_node_info = list(self.my_node_info.values()) 3559 additional_nodes = [] 3560 if master_node not in self.my_node_info: 3561 additional_nodes.append(master_node) 3562 vf_node_info.append(self.all_node_info[master_node]) 3563 # Add the first vm_capable node we find which is not included, 3564 # excluding the master node (which we already have) 3565 for node in absent_nodes: 3566 nodeinfo = self.all_node_info[node] 3567 if (nodeinfo.vm_capable and not nodeinfo.offline and 3568 node != master_node): 3569 additional_nodes.append(node) 3570 vf_node_info.append(self.all_node_info[node]) 3571 break 3572 key = constants.NV_FILELIST 3573 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes, 3574 {key: node_verify_param[key]}, 3575 self.cfg.GetClusterName())) 3576 else: 3577 vf_nvinfo = all_nvinfo 3578 vf_node_info = self.my_node_info.values() 3579 3580 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap) 3581 3582 feedback_fn("* Verifying node status") 3583 3584 refos_img = None 3585 3586 for node_i in node_data_list: 3587 node = node_i.name 3588 nimg = node_image[node] 3589 3590 if node_i.offline: 3591 if verbose: 3592 feedback_fn("* Skipping offline node %s" % (node,)) 3593 n_offline += 1 3594 continue 3595 3596 if node == master_node: 3597 ntype = "master" 3598 elif node_i.master_candidate: 3599 ntype = "master candidate" 3600 elif node_i.drained: 3601 ntype = "drained" 3602 n_drained += 1 3603 else: 3604 ntype = "regular" 3605 if verbose: 3606 feedback_fn("* Verifying node %s (%s)" % (node, ntype)) 3607 3608 msg = all_nvinfo[node].fail_msg 3609 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s", 3610 msg) 3611 if msg: 3612 nimg.rpc_fail = True 3613 continue 3614 3615 nresult = all_nvinfo[node].payload 3616 3617 nimg.call_ok = self._VerifyNode(node_i, nresult) 3618 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime) 3619 self._VerifyNodeNetwork(node_i, nresult) 3620 self._VerifyNodeUserScripts(node_i, nresult) 3621 self._VerifyOob(node_i, nresult) 3622 self._VerifyFileStoragePaths(node_i, nresult, 3623 node == master_node) 3624 3625 if nimg.vm_capable: 3626 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg) 3627 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper, 3628 all_drbd_map) 3629 3630 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name) 3631 self._UpdateNodeInstances(node_i, nresult, nimg) 3632 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name) 3633 self._UpdateNodeOS(node_i, nresult, nimg) 3634 3635 if not nimg.os_fail: 3636 if refos_img is None: 3637 refos_img = nimg 3638 self._VerifyNodeOS(node_i, nimg, refos_img) 3639 self._VerifyNodeBridges(node_i, nresult, bridges) 3640 3641 # Check whether all running instancies are primary for the node. (This 3642 # can no longer be done from _VerifyInstance below, since some of the 3643 # wrong instances could be from other node groups.) 3644 non_primary_inst = set(nimg.instances).difference(nimg.pinst) 3645 3646 for inst in non_primary_inst: 3647 test = inst in self.all_inst_info 3648 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst, 3649 "instance should not run on node %s", node_i.name) 3650 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name, 3651 "node is running unknown instance %s", inst) 3652 3653 self._VerifyGroupLVM(node_image, vg_name) 3654 3655 for node, result in extra_lv_nvinfo.items(): 3656 self._UpdateNodeVolumes(self.all_node_info[node], result.payload, 3657 node_image[node], vg_name) 3658 3659 feedback_fn("* Verifying instance status") 3660 for instance in self.my_inst_names: 3661 if verbose: 3662 feedback_fn("* Verifying instance %s" % instance) 3663 inst_config = self.my_inst_info[instance] 3664 self._VerifyInstance(instance, inst_config, node_image, 3665 instdisk[instance]) 3666 3667 # If the instance is non-redundant we cannot survive losing its primary 3668 # node, so we are not N+1 compliant. 3669 if inst_config.disk_template not in constants.DTS_MIRRORED: 3670 i_non_redundant.append(instance) 3671 3672 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]: 3673 i_non_a_balanced.append(instance) 3674 3675 feedback_fn("* Verifying orphan volumes") 3676 reserved = utils.FieldSet(*cluster.reserved_lvs) 3677 3678 # We will get spurious "unknown volume" warnings if any node of this group 3679 # is secondary for an instance whose primary is in another group. To avoid 3680 # them, we find these instances and add their volumes to node_vol_should. 3681 for inst in self.all_inst_info.values(): 3682 for secondary in inst.secondary_nodes: 3683 if (secondary in self.my_node_info 3684 and inst.name not in self.my_inst_info): 3685 inst.MapLVsByNode(node_vol_should) 3686 break 3687 3688 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved) 3689 3690 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks: 3691 feedback_fn("* Verifying N+1 Memory redundancy") 3692 self._VerifyNPlusOneMemory(node_image, self.my_inst_info) 3693 3694 feedback_fn("* Other Notes") 3695 if i_non_redundant: 3696 feedback_fn(" - NOTICE: %d non-redundant instance(s) found." 3697 % len(i_non_redundant)) 3698 3699 if i_non_a_balanced: 3700 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found." 3701 % len(i_non_a_balanced)) 3702 3703 if i_offline: 3704 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline) 3705 3706 if n_offline: 3707 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline) 3708 3709 if n_drained: 3710 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained) 3711 3712 return not self.bad
3713
3714 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3715 """Analyze the post-hooks' result 3716 3717 This method analyses the hook result, handles it, and sends some 3718 nicely-formatted feedback back to the user. 3719 3720 @param phase: one of L{constants.HOOKS_PHASE_POST} or 3721 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 3722 @param hooks_results: the results of the multi-node hooks rpc call 3723 @param feedback_fn: function used send feedback back to the caller 3724 @param lu_result: previous Exec result 3725 @return: the new Exec result, based on the previous result 3726 and hook results 3727 3728 """ 3729 # We only really run POST phase hooks, only for non-empty groups, 3730 # and are only interested in their results 3731 if not self.my_node_names: 3732 # empty node group 3733 pass 3734 elif phase == constants.HOOKS_PHASE_POST: 3735 # Used to change hooks' output to proper indentation 3736 feedback_fn("* Hooks Results") 3737 assert hooks_results, "invalid result from hooks" 3738 3739 for node_name in hooks_results: 3740 res = hooks_results[node_name] 3741 msg = res.fail_msg 3742 test = msg and not res.offline 3743 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name, 3744 "Communication failure in hooks execution: %s", msg) 3745 if res.offline or msg: 3746 # No need to investigate payload if node is offline or gave 3747 # an error. 3748 continue 3749 for script, hkr, output in res.payload: 3750 test = hkr == constants.HKR_FAIL 3751 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name, 3752 "Script %s failed, output:", script) 3753 if test: 3754 output = self._HOOKS_INDENT_RE.sub(" ", output) 3755 feedback_fn("%s" % output) 3756 lu_result = False 3757 3758 return lu_result
3759
3760 3761 -class LUClusterVerifyDisks(NoHooksLU):
3762 """Verifies the cluster disks status. 3763 3764 """ 3765 REQ_BGL = False 3766
3767 - def ExpandNames(self):
3768 self.share_locks = _ShareAll() 3769 self.needed_locks = { 3770 locking.LEVEL_NODEGROUP: locking.ALL_SET, 3771 }
3772
3773 - def Exec(self, feedback_fn):
3774 group_names = self.owned_locks(locking.LEVEL_NODEGROUP) 3775 3776 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group 3777 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)] 3778 for group in group_names])
3779
3780 3781 -class LUGroupVerifyDisks(NoHooksLU):
3782 """Verifies the status of all disks in a node group. 3783 3784 """ 3785 REQ_BGL = False 3786
3787 - def ExpandNames(self):
3788 # Raises errors.OpPrereqError on its own if group can't be found 3789 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 3790 3791 self.share_locks = _ShareAll() 3792 self.needed_locks = { 3793 locking.LEVEL_INSTANCE: [], 3794 locking.LEVEL_NODEGROUP: [], 3795 locking.LEVEL_NODE: [], 3796 3797 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks 3798 # starts one instance of this opcode for every group, which means all 3799 # nodes will be locked for a short amount of time, so it's better to 3800 # acquire the node allocation lock as well. 3801 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 3802 }
3803
3804 - def DeclareLocks(self, level):
3805 if level == locking.LEVEL_INSTANCE: 3806 assert not self.needed_locks[locking.LEVEL_INSTANCE] 3807 3808 # Lock instances optimistically, needs verification once node and group 3809 # locks have been acquired 3810 self.needed_locks[locking.LEVEL_INSTANCE] = \ 3811 self.cfg.GetNodeGroupInstances(self.group_uuid) 3812 3813 elif level == locking.LEVEL_NODEGROUP: 3814 assert not self.needed_locks[locking.LEVEL_NODEGROUP] 3815 3816 self.needed_locks[locking.LEVEL_NODEGROUP] = \ 3817 set([self.group_uuid] + 3818 # Lock all groups used by instances optimistically; this requires 3819 # going via the node before it's locked, requiring verification 3820 # later on 3821 [group_uuid 3822 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE) 3823 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)]) 3824 3825 elif level == locking.LEVEL_NODE: 3826 # This will only lock the nodes in the group to be verified which contain 3827 # actual instances 3828 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 3829 self._LockInstancesNodes() 3830 3831 # Lock all nodes in group to be verified 3832 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 3833 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members 3834 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3835
3836 - def CheckPrereq(self):
3837 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) 3838 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) 3839 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE)) 3840 3841 assert self.group_uuid in owned_groups 3842 3843 # Check if locked instances are still correct 3844 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances) 3845 3846 # Get instance information 3847 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances)) 3848 3849 # Check if node groups for locked instances are still correct 3850 _CheckInstancesNodeGroups(self.cfg, self.instances, 3851 owned_groups, owned_nodes, self.group_uuid)
3852
3853 - def Exec(self, feedback_fn):
3854 """Verify integrity of cluster disks. 3855 3856 @rtype: tuple of three items 3857 @return: a tuple of (dict of node-to-node_error, list of instances 3858 which need activate-disks, dict of instance: (node, volume) for 3859 missing volumes 3860 3861 """ 3862 res_nodes = {} 3863 res_instances = set() 3864 res_missing = {} 3865 3866 nv_dict = _MapInstanceDisksToNodes( 3867 [inst for inst in self.instances.values() 3868 if inst.admin_state == constants.ADMINST_UP]) 3869 3870 if nv_dict: 3871 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) & 3872 set(self.cfg.GetVmCapableNodeList())) 3873 3874 node_lvs = self.rpc.call_lv_list(nodes, []) 3875 3876 for (node, node_res) in node_lvs.items(): 3877 if node_res.offline: 3878 continue 3879 3880 msg = node_res.fail_msg 3881 if msg: 3882 logging.warning("Error enumerating LVs on node %s: %s", node, msg) 3883 res_nodes[node] = msg 3884 continue 3885 3886 for lv_name, (_, _, lv_online) in node_res.payload.items(): 3887 inst = nv_dict.pop((node, lv_name), None) 3888 if not (lv_online or inst is None): 3889 res_instances.add(inst) 3890 3891 # any leftover items in nv_dict are missing LVs, let's arrange the data 3892 # better 3893 for key, inst in nv_dict.iteritems(): 3894 res_missing.setdefault(inst, []).append(list(key)) 3895 3896 return (res_nodes, list(res_instances), res_missing)
3897
3898 3899 -class LUClusterRepairDiskSizes(NoHooksLU):
3900 """Verifies the cluster disks sizes. 3901 3902 """ 3903 REQ_BGL = False 3904
3905 - def ExpandNames(self):
3906 if self.op.instances: 3907 self.wanted_names = _GetWantedInstances(self, self.op.instances) 3908 # Not getting the node allocation lock as only a specific set of 3909 # instances (and their nodes) is going to be acquired 3910 self.needed_locks = { 3911 locking.LEVEL_NODE_RES: [], 3912 locking.LEVEL_INSTANCE: self.wanted_names, 3913 } 3914 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE 3915 else: 3916 self.wanted_names = None 3917 self.needed_locks = { 3918 locking.LEVEL_NODE_RES: locking.ALL_SET, 3919 locking.LEVEL_INSTANCE: locking.ALL_SET, 3920 3921 # This opcode is acquires the node locks for all instances 3922 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 3923 } 3924 3925 self.share_locks = { 3926 locking.LEVEL_NODE_RES: 1, 3927 locking.LEVEL_INSTANCE: 0, 3928 locking.LEVEL_NODE_ALLOC: 1, 3929 }
3930
3931 - def DeclareLocks(self, level):
3932 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None: 3933 self._LockInstancesNodes(primary_only=True, level=level)
3934
3935 - def CheckPrereq(self):
3936 """Check prerequisites. 3937 3938 This only checks the optional instance list against the existing names. 3939 3940 """ 3941 if self.wanted_names is None: 3942 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE) 3943 3944 self.wanted_instances = \ 3945 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3946
3947 - def _EnsureChildSizes(self, disk):
3948 """Ensure children of the disk have the needed disk size. 3949 3950 This is valid mainly for DRBD8 and fixes an issue where the 3951 children have smaller disk size. 3952 3953 @param disk: an L{ganeti.objects.Disk} object 3954 3955 """ 3956 if disk.dev_type == constants.LD_DRBD8: 3957 assert disk.children, "Empty children for DRBD8?" 3958 fchild = disk.children[0] 3959 mismatch = fchild.size < disk.size 3960 if mismatch: 3961 self.LogInfo("Child disk has size %d, parent %d, fixing", 3962 fchild.size, disk.size) 3963 fchild.size = disk.size 3964 3965 # and we recurse on this child only, not on the metadev 3966 return self._EnsureChildSizes(fchild) or mismatch 3967 else: 3968 return False
3969
3970 - def Exec(self, feedback_fn):
3971 """Verify the size of cluster disks. 3972 3973 """ 3974 # TODO: check child disks too 3975 # TODO: check differences in size between primary/secondary nodes 3976 per_node_disks = {} 3977 for instance in self.wanted_instances: 3978 pnode = instance.primary_node 3979 if pnode not in per_node_disks: 3980 per_node_disks[pnode] = [] 3981 for idx, disk in enumerate(instance.disks): 3982 per_node_disks[pnode].append((instance, idx, disk)) 3983 3984 assert not (frozenset(per_node_disks.keys()) - 3985 self.owned_locks(locking.LEVEL_NODE_RES)), \ 3986 "Not owning correct locks" 3987 assert not self.owned_locks(locking.LEVEL_NODE) 3988 3989 changed = [] 3990 for node, dskl in per_node_disks.items(): 3991 newl = [v[2].Copy() for v in dskl] 3992 for dsk in newl: 3993 self.cfg.SetDiskID(dsk, node) 3994 result = self.rpc.call_blockdev_getsize(node, newl) 3995 if result.fail_msg: 3996 self.LogWarning("Failure in blockdev_getsize call to node" 3997 " %s, ignoring", node) 3998 continue 3999 if len(result.payload) != len(dskl): 4000 logging.warning("Invalid result from node %s: len(dksl)=%d," 4001 " result.payload=%s", node, len(dskl), result.payload) 4002 self.LogWarning("Invalid result from node %s, ignoring node results", 4003 node) 4004 continue 4005 for ((instance, idx, disk), size) in zip(dskl, result.payload): 4006 if size is None: 4007 self.LogWarning("Disk %d of instance %s did not return size" 4008 " information, ignoring", idx, instance.name) 4009 continue 4010 if not isinstance(size, (int, long)): 4011 self.LogWarning("Disk %d of instance %s did not return valid" 4012 " size information, ignoring", idx, instance.name) 4013 continue 4014 size = size >> 20 4015 if size != disk.size: 4016 self.LogInfo("Disk %d of instance %s has mismatched size," 4017 " correcting: recorded %d, actual %d", idx, 4018 instance.name, disk.size, size) 4019 disk.size = size 4020 self.cfg.Update(instance, feedback_fn) 4021 changed.append((instance.name, idx, size)) 4022 if self._EnsureChildSizes(disk): 4023 self.cfg.Update(instance, feedback_fn) 4024 changed.append((instance.name, idx, disk.size)) 4025 return changed
4026
4027 4028 -class LUClusterRename(LogicalUnit):
4029 """Rename the cluster. 4030 4031 """ 4032 HPATH = "cluster-rename" 4033 HTYPE = constants.HTYPE_CLUSTER 4034
4035 - def BuildHooksEnv(self):
4036 """Build hooks env. 4037 4038 """ 4039 return { 4040 "OP_TARGET": self.cfg.GetClusterName(), 4041 "NEW_NAME": self.op.name, 4042 }
4043
4044 - def BuildHooksNodes(self):
4045 """Build hooks nodes. 4046 4047 """ 4048 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4049
4050 - def CheckPrereq(self):
4051 """Verify that the passed name is a valid one. 4052 4053 """ 4054 hostname = netutils.GetHostname(name=self.op.name, 4055 family=self.cfg.GetPrimaryIPFamily()) 4056 4057 new_name = hostname.name 4058 self.ip = new_ip = hostname.ip 4059 old_name = self.cfg.GetClusterName() 4060 old_ip = self.cfg.GetMasterIP() 4061 if new_name == old_name and new_ip == old_ip: 4062 raise errors.OpPrereqError("Neither the name nor the IP address of the" 4063 " cluster has changed", 4064 errors.ECODE_INVAL) 4065 if new_ip != old_ip: 4066 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT): 4067 raise errors.OpPrereqError("The given cluster IP address (%s) is" 4068 " reachable on the network" % 4069 new_ip, errors.ECODE_NOTUNIQUE) 4070 4071 self.op.name = new_name
4072
4073 - def Exec(self, feedback_fn):
4074 """Rename the cluster. 4075 4076 """ 4077 clustername = self.op.name 4078 new_ip = self.ip 4079 4080 # shutdown the master IP 4081 master_params = self.cfg.GetMasterNetworkParameters() 4082 ems = self.cfg.GetUseExternalMipScript() 4083 result = self.rpc.call_node_deactivate_master_ip(master_params.name, 4084 master_params, ems) 4085 result.Raise("Could not disable the master role") 4086 4087 try: 4088 cluster = self.cfg.GetClusterInfo() 4089 cluster.cluster_name = clustername 4090 cluster.master_ip = new_ip 4091 self.cfg.Update(cluster, feedback_fn) 4092 4093 # update the known hosts file 4094 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE) 4095 node_list = self.cfg.GetOnlineNodeList() 4096 try: 4097 node_list.remove(master_params.name) 4098 except ValueError: 4099 pass 4100 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE) 4101 finally: 4102 master_params.ip = new_ip 4103 result = self.rpc.call_node_activate_master_ip(master_params.name, 4104 master_params, ems) 4105 msg = result.fail_msg 4106 if msg: 4107 self.LogWarning("Could not re-enable the master role on" 4108 " the master, please restart manually: %s", msg) 4109 4110 return clustername
4111
4112 4113 -def _ValidateNetmask(cfg, netmask):
4114 """Checks if a netmask is valid. 4115 4116 @type cfg: L{config.ConfigWriter} 4117 @param cfg: The cluster configuration 4118 @type netmask: int 4119 @param netmask: the netmask to be verified 4120 @raise errors.OpPrereqError: if the validation fails 4121 4122 """ 4123 ip_family = cfg.GetPrimaryIPFamily() 4124 try: 4125 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family) 4126 except errors.ProgrammerError: 4127 raise errors.OpPrereqError("Invalid primary ip family: %s." % 4128 ip_family, errors.ECODE_INVAL) 4129 if not ipcls.ValidateNetmask(netmask): 4130 raise errors.OpPrereqError("CIDR netmask (%s) not valid" % 4131 (netmask), errors.ECODE_INVAL)
4132
4133 4134 -class LUClusterSetParams(LogicalUnit):
4135 """Change the parameters of the cluster. 4136 4137 """ 4138 HPATH = "cluster-modify" 4139 HTYPE = constants.HTYPE_CLUSTER 4140 REQ_BGL = False 4141
4142 - def CheckArguments(self):
4143 """Check parameters 4144 4145 """ 4146 if self.op.uid_pool: 4147 uidpool.CheckUidPool(self.op.uid_pool) 4148 4149 if self.op.add_uids: 4150 uidpool.CheckUidPool(self.op.add_uids) 4151 4152 if self.op.remove_uids: 4153 uidpool.CheckUidPool(self.op.remove_uids) 4154 4155 if self.op.master_netmask is not None: 4156 _ValidateNetmask(self.cfg, self.op.master_netmask) 4157 4158 if self.op.diskparams: 4159 for dt_params in self.op.diskparams.values(): 4160 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES) 4161 try: 4162 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS) 4163 except errors.OpPrereqError, err: 4164 raise errors.OpPrereqError("While verify diskparams options: %s" % err, 4165 errors.ECODE_INVAL)
4166
4167 - def ExpandNames(self):
4168 # FIXME: in the future maybe other cluster params won't require checking on 4169 # all nodes to be modified. 4170 # FIXME: This opcode changes cluster-wide settings. Is acquiring all 4171 # resource locks the right thing, shouldn't it be the BGL instead? 4172 self.needed_locks = { 4173 locking.LEVEL_NODE: locking.ALL_SET, 4174 locking.LEVEL_INSTANCE: locking.ALL_SET, 4175 locking.LEVEL_NODEGROUP: locking.ALL_SET, 4176 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 4177 } 4178 self.share_locks = _ShareAll()
4179
4180 - def BuildHooksEnv(self):
4181 """Build hooks env. 4182 4183 """ 4184 return { 4185 "OP_TARGET": self.cfg.GetClusterName(), 4186 "NEW_VG_NAME": self.op.vg_name, 4187 }
4188
4189 - def BuildHooksNodes(self):
4190 """Build hooks nodes. 4191 4192 """ 4193 mn = self.cfg.GetMasterNode() 4194 return ([mn], [mn])
4195
4196 - def CheckPrereq(self):
4197 """Check prerequisites. 4198 4199 This checks whether the given params don't conflict and 4200 if the given volume group is valid. 4201 4202 """ 4203 if self.op.vg_name is not None and not self.op.vg_name: 4204 if self.cfg.HasAnyDiskOfType(constants.LD_LV): 4205 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based" 4206 " instances exist", errors.ECODE_INVAL) 4207 4208 if self.op.drbd_helper is not None and not self.op.drbd_helper: 4209 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8): 4210 raise errors.OpPrereqError("Cannot disable drbd helper while" 4211 " drbd-based instances exist", 4212 errors.ECODE_INVAL) 4213 4214 node_list = self.owned_locks(locking.LEVEL_NODE) 4215 4216 vm_capable_nodes = [node.name 4217 for node in self.cfg.GetAllNodesInfo().values() 4218 if node.name in node_list and node.vm_capable] 4219 4220 # if vg_name not None, checks given volume group on all nodes 4221 if self.op.vg_name: 4222 vglist = self.rpc.call_vg_list(vm_capable_nodes) 4223 for node in vm_capable_nodes: 4224 msg = vglist[node].fail_msg 4225 if msg: 4226 # ignoring down node 4227 self.LogWarning("Error while gathering data on node %s" 4228 " (ignoring node): %s", node, msg) 4229 continue 4230 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload, 4231 self.op.vg_name, 4232 constants.MIN_VG_SIZE) 4233 if vgstatus: 4234 raise errors.OpPrereqError("Error on node '%s': %s" % 4235 (node, vgstatus), errors.ECODE_ENVIRON) 4236 4237 if self.op.drbd_helper: 4238 # checks given drbd helper on all nodes 4239 helpers = self.rpc.call_drbd_helper(node_list) 4240 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list): 4241 if ninfo.offline: 4242 self.LogInfo("Not checking drbd helper on offline node %s", node) 4243 continue 4244 msg = helpers[node].fail_msg 4245 if msg: 4246 raise errors.OpPrereqError("Error checking drbd helper on node" 4247 " '%s': %s" % (node, msg), 4248 errors.ECODE_ENVIRON) 4249 node_helper = helpers[node].payload 4250 if node_helper != self.op.drbd_helper: 4251 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" % 4252 (node, node_helper), errors.ECODE_ENVIRON) 4253 4254 self.cluster = cluster = self.cfg.GetClusterInfo() 4255 # validate params changes 4256 if self.op.beparams: 4257 objects.UpgradeBeParams(self.op.beparams) 4258 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 4259 self.new_beparams = cluster.SimpleFillBE(self.op.beparams) 4260 4261 if self.op.ndparams: 4262 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) 4263 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams) 4264 4265 # TODO: we need a more general way to handle resetting 4266 # cluster-level parameters to default values 4267 if self.new_ndparams["oob_program"] == "": 4268 self.new_ndparams["oob_program"] = \ 4269 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM] 4270 4271 if self.op.hv_state: 4272 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, 4273 self.cluster.hv_state_static) 4274 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values)) 4275 for hv, values in new_hv_state.items()) 4276 4277 if self.op.disk_state: 4278 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, 4279 self.cluster.disk_state_static) 4280 self.new_disk_state = \ 4281 dict((storage, dict((name, cluster.SimpleFillDiskState(values)) 4282 for name, values in svalues.items())) 4283 for storage, svalues in new_disk_state.items()) 4284 4285 if self.op.ipolicy: 4286 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy, 4287 group_policy=False) 4288 4289 all_instances = self.cfg.GetAllInstancesInfo().values() 4290 violations = set() 4291 for group in self.cfg.GetAllNodeGroupsInfo().values(): 4292 instances = frozenset([inst for inst in all_instances 4293 if compat.any(node in group.members 4294 for node in inst.all_nodes)]) 4295 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy) 4296 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group) 4297 new = _ComputeNewInstanceViolations(ipol, 4298 new_ipolicy, instances, self.cfg) 4299 if new: 4300 violations.update(new) 4301 4302 if violations: 4303 self.LogWarning("After the ipolicy change the following instances" 4304 " violate them: %s", 4305 utils.CommaJoin(utils.NiceSort(violations))) 4306 4307 if self.op.nicparams: 4308 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES) 4309 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams) 4310 objects.NIC.CheckParameterSyntax(self.new_nicparams) 4311 nic_errors = [] 4312 4313 # check all instances for consistency 4314 for instance in self.cfg.GetAllInstancesInfo().values(): 4315 for nic_idx, nic in enumerate(instance.nics): 4316 params_copy = copy.deepcopy(nic.nicparams) 4317 params_filled = objects.FillDict(self.new_nicparams, params_copy) 4318 4319 # check parameter syntax 4320 try: 4321 objects.NIC.CheckParameterSyntax(params_filled) 4322 except errors.ConfigurationError, err: 4323 nic_errors.append("Instance %s, nic/%d: %s" % 4324 (instance.name, nic_idx, err)) 4325 4326 # if we're moving instances to routed, check that they have an ip 4327 target_mode = params_filled[constants.NIC_MODE] 4328 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip: 4329 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip" 4330 " address" % (instance.name, nic_idx)) 4331 if nic_errors: 4332 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" % 4333 "\n".join(nic_errors), errors.ECODE_INVAL) 4334 4335 # hypervisor list/parameters 4336 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {}) 4337 if self.op.hvparams: 4338 for hv_name, hv_dict in self.op.hvparams.items(): 4339 if hv_name not in self.new_hvparams: 4340 self.new_hvparams[hv_name] = hv_dict 4341 else: 4342 self.new_hvparams[hv_name].update(hv_dict) 4343 4344 # disk template parameters 4345 self.new_diskparams = objects.FillDict(cluster.diskparams, {}) 4346 if self.op.diskparams: 4347 for dt_name, dt_params in self.op.diskparams.items(): 4348 if dt_name not in self.new_diskparams: 4349 self.new_diskparams[dt_name] = dt_params 4350 else: 4351 self.new_diskparams[dt_name].update(dt_params) 4352 4353 # os hypervisor parameters 4354 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {}) 4355 if self.op.os_hvp: 4356 for os_name, hvs in self.op.os_hvp.items(): 4357 if os_name not in self.new_os_hvp: 4358 self.new_os_hvp[os_name] = hvs 4359 else: 4360 for hv_name, hv_dict in hvs.items(): 4361 if hv_dict is None: 4362 # Delete if it exists 4363 self.new_os_hvp[os_name].pop(hv_name, None) 4364 elif hv_name not in self.new_os_hvp[os_name]: 4365 self.new_os_hvp[os_name][hv_name] = hv_dict 4366 else: 4367 self.new_os_hvp[os_name][hv_name].update(hv_dict) 4368 4369 # os parameters 4370 self.new_osp = objects.FillDict(cluster.osparams, {}) 4371 if self.op.osparams: 4372 for os_name, osp in self.op.osparams.items(): 4373 if os_name not in self.new_osp: 4374 self.new_osp[os_name] = {} 4375 4376 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp, 4377 use_none=True) 4378 4379 if not self.new_osp[os_name]: 4380 # we removed all parameters 4381 del self.new_osp[os_name] 4382 else: 4383 # check the parameter validity (remote check) 4384 _CheckOSParams(self, False, [self.cfg.GetMasterNode()], 4385 os_name, self.new_osp[os_name]) 4386 4387 # changes to the hypervisor list 4388 if self.op.enabled_hypervisors is not None: 4389 self.hv_list = self.op.enabled_hypervisors 4390 for hv in self.hv_list: 4391 # if the hypervisor doesn't already exist in the cluster 4392 # hvparams, we initialize it to empty, and then (in both 4393 # cases) we make sure to fill the defaults, as we might not 4394 # have a complete defaults list if the hypervisor wasn't 4395 # enabled before 4396 if hv not in new_hvp: 4397 new_hvp[hv] = {} 4398 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv]) 4399 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES) 4400 else: 4401 self.hv_list = cluster.enabled_hypervisors 4402 4403 if self.op.hvparams or self.op.enabled_hypervisors is not None: 4404 # either the enabled list has changed, or the parameters have, validate 4405 for hv_name, hv_params in self.new_hvparams.items(): 4406 if ((self.op.hvparams and hv_name in self.op.hvparams) or 4407 (self.op.enabled_hypervisors and 4408 hv_name in self.op.enabled_hypervisors)): 4409 # either this is a new hypervisor, or its parameters have changed 4410 hv_class = hypervisor.GetHypervisorClass(hv_name) 4411 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 4412 hv_class.CheckParameterSyntax(hv_params) 4413 _CheckHVParams(self, node_list, hv_name, hv_params) 4414 4415 if self.op.os_hvp: 4416 # no need to check any newly-enabled hypervisors, since the 4417 # defaults have already been checked in the above code-block 4418 for os_name, os_hvp in self.new_os_hvp.items(): 4419 for hv_name, hv_params in os_hvp.items(): 4420 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 4421 # we need to fill in the new os_hvp on top of the actual hv_p 4422 cluster_defaults = self.new_hvparams.get(hv_name, {}) 4423 new_osp = objects.FillDict(cluster_defaults, hv_params) 4424 hv_class = hypervisor.GetHypervisorClass(hv_name) 4425 hv_class.CheckParameterSyntax(new_osp) 4426 _CheckHVParams(self, node_list, hv_name, new_osp) 4427 4428 if self.op.default_iallocator: 4429 alloc_script = utils.FindFile(self.op.default_iallocator, 4430 constants.IALLOCATOR_SEARCH_PATH, 4431 os.path.isfile) 4432 if alloc_script is None: 4433 raise errors.OpPrereqError("Invalid default iallocator script '%s'" 4434 " specified" % self.op.default_iallocator, 4435 errors.ECODE_INVAL)
4436
4437 - def Exec(self, feedback_fn):
4438 """Change the parameters of the cluster. 4439 4440 """ 4441 if self.op.vg_name is not None: 4442 new_volume = self.op.vg_name 4443 if not new_volume: 4444 new_volume = None 4445 if new_volume != self.cfg.GetVGName(): 4446 self.cfg.SetVGName(new_volume) 4447 else: 4448 feedback_fn("Cluster LVM configuration already in desired" 4449 " state, not changing") 4450 if self.op.drbd_helper is not None: 4451 new_helper = self.op.drbd_helper 4452 if not new_helper: 4453 new_helper = None 4454 if new_helper != self.cfg.GetDRBDHelper(): 4455 self.cfg.SetDRBDHelper(new_helper) 4456 else: 4457 feedback_fn("Cluster DRBD helper already in desired state," 4458 " not changing") 4459 if self.op.hvparams: 4460 self.cluster.hvparams = self.new_hvparams 4461 if self.op.os_hvp: 4462 self.cluster.os_hvp = self.new_os_hvp 4463 if self.op.enabled_hypervisors is not None: 4464 self.cluster.hvparams = self.new_hvparams 4465 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors 4466 if self.op.beparams: 4467 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams 4468 if self.op.nicparams: 4469 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams 4470 if self.op.ipolicy: 4471 self.cluster.ipolicy = self.new_ipolicy 4472 if self.op.osparams: 4473 self.cluster.osparams = self.new_osp 4474 if self.op.ndparams: 4475 self.cluster.ndparams = self.new_ndparams 4476 if self.op.diskparams: 4477 self.cluster.diskparams = self.new_diskparams 4478 if self.op.hv_state: 4479 self.cluster.hv_state_static = self.new_hv_state 4480 if self.op.disk_state: 4481 self.cluster.disk_state_static = self.new_disk_state 4482 4483 if self.op.candidate_pool_size is not None: 4484 self.cluster.candidate_pool_size = self.op.candidate_pool_size 4485 # we need to update the pool size here, otherwise the save will fail 4486 _AdjustCandidatePool(self, []) 4487 4488 if self.op.maintain_node_health is not None: 4489 if self.op.maintain_node_health and not constants.ENABLE_CONFD: 4490 feedback_fn("Note: CONFD was disabled at build time, node health" 4491 " maintenance is not useful (still enabling it)") 4492 self.cluster.maintain_node_health = self.op.maintain_node_health 4493 4494 if self.op.prealloc_wipe_disks is not None: 4495 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks 4496 4497 if self.op.add_uids is not None: 4498 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids) 4499 4500 if self.op.remove_uids is not None: 4501 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids) 4502 4503 if self.op.uid_pool is not None: 4504 self.cluster.uid_pool = self.op.uid_pool 4505 4506 if self.op.default_iallocator is not None: 4507 self.cluster.default_iallocator = self.op.default_iallocator 4508 4509 if self.op.reserved_lvs is not None: 4510 self.cluster.reserved_lvs = self.op.reserved_lvs 4511 4512 if self.op.use_external_mip_script is not None: 4513 self.cluster.use_external_mip_script = self.op.use_external_mip_script 4514 4515 def helper_os(aname, mods, desc): 4516 desc += " OS list" 4517 lst = getattr(self.cluster, aname) 4518 for key, val in mods: 4519 if key == constants.DDM_ADD: 4520 if val in lst: 4521 feedback_fn("OS %s already in %s, ignoring" % (val, desc)) 4522 else: 4523 lst.append(val) 4524 elif key == constants.DDM_REMOVE: 4525 if val in lst: 4526 lst.remove(val) 4527 else: 4528 feedback_fn("OS %s not found in %s, ignoring" % (val, desc)) 4529 else: 4530 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4531 4532 if self.op.hidden_os: 4533 helper_os("hidden_os", self.op.hidden_os, "hidden") 4534 4535 if self.op.blacklisted_os: 4536 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted") 4537 4538 if self.op.master_netdev: 4539 master_params = self.cfg.GetMasterNetworkParameters() 4540 ems = self.cfg.GetUseExternalMipScript() 4541 feedback_fn("Shutting down master ip on the current netdev (%s)" % 4542 self.cluster.master_netdev) 4543 result = self.rpc.call_node_deactivate_master_ip(master_params.name, 4544 master_params, ems) 4545 result.Raise("Could not disable the master ip") 4546 feedback_fn("Changing master_netdev from %s to %s" % 4547 (master_params.netdev, self.op.master_netdev)) 4548 self.cluster.master_netdev = self.op.master_netdev 4549 4550 if self.op.master_netmask: 4551 master_params = self.cfg.GetMasterNetworkParameters() 4552 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask) 4553 result = self.rpc.call_node_change_master_netmask(master_params.name, 4554 master_params.netmask, 4555 self.op.master_netmask, 4556 master_params.ip, 4557 master_params.netdev) 4558 if result.fail_msg: 4559 msg = "Could not change the master IP netmask: %s" % result.fail_msg 4560 feedback_fn(msg) 4561 4562 self.cluster.master_netmask = self.op.master_netmask 4563 4564 self.cfg.Update(self.cluster, feedback_fn) 4565 4566 if self.op.master_netdev: 4567 master_params = self.cfg.GetMasterNetworkParameters() 4568 feedback_fn("Starting the master ip on the new master netdev (%s)" % 4569 self.op.master_netdev) 4570 ems = self.cfg.GetUseExternalMipScript() 4571 result = self.rpc.call_node_activate_master_ip(master_params.name, 4572 master_params, ems) 4573 if result.fail_msg: 4574 self.LogWarning("Could not re-enable the master ip on" 4575 " the master, please restart manually: %s", 4576 result.fail_msg)
4577
4578 4579 -def _UploadHelper(lu, nodes, fname):
4580 """Helper for uploading a file and showing warnings. 4581 4582 """ 4583 if os.path.exists(fname): 4584 result = lu.rpc.call_upload_file(nodes, fname) 4585 for to_node, to_result in result.items(): 4586 msg = to_result.fail_msg 4587 if msg: 4588 msg = ("Copy of file %s to node %s failed: %s" % 4589 (fname, to_node, msg)) 4590 lu.LogWarning(msg)
4591
4592 4593 -def _ComputeAncillaryFiles(cluster, redist):
4594 """Compute files external to Ganeti which need to be consistent. 4595 4596 @type redist: boolean 4597 @param redist: Whether to include files which need to be redistributed 4598 4599 """ 4600 # Compute files for all nodes 4601 files_all = set([ 4602 pathutils.SSH_KNOWN_HOSTS_FILE, 4603 pathutils.CONFD_HMAC_KEY, 4604 pathutils.CLUSTER_DOMAIN_SECRET_FILE, 4605 pathutils.SPICE_CERT_FILE, 4606 pathutils.SPICE_CACERT_FILE, 4607 pathutils.RAPI_USERS_FILE, 4608 ]) 4609 4610 if redist: 4611 # we need to ship at least the RAPI certificate 4612 files_all.add(pathutils.RAPI_CERT_FILE) 4613 else: 4614 files_all.update(pathutils.ALL_CERT_FILES) 4615 files_all.update(ssconf.SimpleStore().GetFileList()) 4616 4617 if cluster.modify_etc_hosts: 4618 files_all.add(pathutils.ETC_HOSTS) 4619 4620 if cluster.use_external_mip_script: 4621 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT) 4622 4623 # Files which are optional, these must: 4624 # - be present in one other category as well 4625 # - either exist or not exist on all nodes of that category (mc, vm all) 4626 files_opt = set([ 4627 pathutils.RAPI_USERS_FILE, 4628 ]) 4629 4630 # Files which should only be on master candidates 4631 files_mc = set() 4632 4633 if not redist: 4634 files_mc.add(pathutils.CLUSTER_CONF_FILE) 4635 4636 # File storage 4637 if (not redist and 4638 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)): 4639 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE) 4640 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE) 4641 4642 # Files which should only be on VM-capable nodes 4643 files_vm = set( 4644 filename 4645 for hv_name in cluster.enabled_hypervisors 4646 for filename in 4647 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0]) 4648 4649 files_opt |= set( 4650 filename 4651 for hv_name in cluster.enabled_hypervisors 4652 for filename in 4653 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1]) 4654 4655 # Filenames in each category must be unique 4656 all_files_set = files_all | files_mc | files_vm 4657 assert (len(all_files_set) == 4658 sum(map(len, [files_all, files_mc, files_vm]))), \ 4659 "Found file listed in more than one file list" 4660 4661 # Optional files must be present in one other category 4662 assert all_files_set.issuperset(files_opt), \ 4663 "Optional file not in a different required list" 4664 4665 # This one file should never ever be re-distributed via RPC 4666 assert not (redist and 4667 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set) 4668 4669 return (files_all, files_opt, files_mc, files_vm)
4670
4671 4672 -def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4673 """Distribute additional files which are part of the cluster configuration. 4674 4675 ConfigWriter takes care of distributing the config and ssconf files, but 4676 there are more files which should be distributed to all nodes. This function 4677 makes sure those are copied. 4678 4679 @param lu: calling logical unit 4680 @param additional_nodes: list of nodes not in the config to distribute to 4681 @type additional_vm: boolean 4682 @param additional_vm: whether the additional nodes are vm-capable or not 4683 4684 """ 4685 # Gather target nodes 4686 cluster = lu.cfg.GetClusterInfo() 4687 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode()) 4688 4689 online_nodes = lu.cfg.GetOnlineNodeList() 4690 online_set = frozenset(online_nodes) 4691 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList())) 4692 4693 if additional_nodes is not None: 4694 online_nodes.extend(additional_nodes) 4695 if additional_vm: 4696 vm_nodes.extend(additional_nodes) 4697 4698 # Never distribute to master node 4699 for nodelist in [online_nodes, vm_nodes]: 4700 if master_info.name in nodelist: 4701 nodelist.remove(master_info.name) 4702 4703 # Gather file lists 4704 (files_all, _, files_mc, files_vm) = \ 4705 _ComputeAncillaryFiles(cluster, True) 4706 4707 # Never re-distribute configuration file from here 4708 assert not (pathutils.CLUSTER_CONF_FILE in files_all or 4709 pathutils.CLUSTER_CONF_FILE in files_vm) 4710 assert not files_mc, "Master candidates not handled in this function" 4711 4712 filemap = [ 4713 (online_nodes, files_all), 4714 (vm_nodes, files_vm), 4715 ] 4716 4717 # Upload the files 4718 for (node_list, files) in filemap: 4719 for fname in files: 4720 _UploadHelper(lu, node_list, fname)
4721
4722 4723 -class LUClusterRedistConf(NoHooksLU):
4724 """Force the redistribution of cluster configuration. 4725 4726 This is a very simple LU. 4727 4728 """ 4729 REQ_BGL = False 4730
4731 - def ExpandNames(self):
4732 self.needed_locks = { 4733 locking.LEVEL_NODE: locking.ALL_SET, 4734 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 4735 } 4736 self.share_locks = _ShareAll()
4737
4738 - def Exec(self, feedback_fn):
4739 """Redistribute the configuration. 4740 4741 """ 4742 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn) 4743 _RedistributeAncillaryFiles(self)
4744
4745 4746 -class LUClusterActivateMasterIp(NoHooksLU):
4747 """Activate the master IP on the master node. 4748 4749 """
4750 - def Exec(self, feedback_fn):
4751 """Activate the master IP. 4752 4753 """ 4754 master_params = self.cfg.GetMasterNetworkParameters() 4755 ems = self.cfg.GetUseExternalMipScript() 4756 result = self.rpc.call_node_activate_master_ip(master_params.name, 4757 master_params, ems) 4758 result.Raise("Could not activate the master IP")
4759
4760 4761 -class LUClusterDeactivateMasterIp(NoHooksLU):
4762 """Deactivate the master IP on the master node. 4763 4764 """
4765 - def Exec(self, feedback_fn):
4766 """Deactivate the master IP. 4767 4768 """ 4769 master_params = self.cfg.GetMasterNetworkParameters() 4770 ems = self.cfg.GetUseExternalMipScript() 4771 result = self.rpc.call_node_deactivate_master_ip(master_params.name, 4772 master_params, ems) 4773 result.Raise("Could not deactivate the master IP")
4774
4775 4776 -def _WaitForSync(lu, instance, disks=None, oneshot=False):
4777 """Sleep and poll for an instance's disk to sync. 4778 4779 """ 4780 if not instance.disks or disks is not None and not disks: 4781 return True 4782 4783 disks = _ExpandCheckDisks(instance, disks) 4784 4785 if not oneshot: 4786 lu.LogInfo("Waiting for instance %s to sync disks", instance.name) 4787 4788 node = instance.primary_node 4789 4790 for dev in disks: 4791 lu.cfg.SetDiskID(dev, node) 4792 4793 # TODO: Convert to utils.Retry 4794 4795 retries = 0 4796 degr_retries = 10 # in seconds, as we sleep 1 second each time 4797 while True: 4798 max_time = 0 4799 done = True 4800 cumul_degraded = False 4801 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance)) 4802 msg = rstats.fail_msg 4803 if msg: 4804 lu.LogWarning("Can't get any data from node %s: %s", node, msg) 4805 retries += 1 4806 if retries >= 10: 4807 raise errors.RemoteError("Can't contact node %s for mirror data," 4808 " aborting." % node) 4809 time.sleep(6) 4810 continue 4811 rstats = rstats.payload 4812 retries = 0 4813 for i, mstat in enumerate(rstats): 4814 if mstat is None: 4815 lu.LogWarning("Can't compute data for node %s/%s", 4816 node, disks[i].iv_name) 4817 continue 4818 4819 cumul_degraded = (cumul_degraded or 4820 (mstat.is_degraded and mstat.sync_percent is None)) 4821 if mstat.sync_percent is not None: 4822 done = False 4823 if mstat.estimated_time is not None: 4824 rem_time = ("%s remaining (estimated)" % 4825 utils.FormatSeconds(mstat.estimated_time)) 4826 max_time = mstat.estimated_time 4827 else: 4828 rem_time = "no time estimate" 4829 lu.LogInfo("- device %s: %5.2f%% done, %s", 4830 disks[i].iv_name, mstat.sync_percent, rem_time) 4831 4832 # if we're done but degraded, let's do a few small retries, to 4833 # make sure we see a stable and not transient situation; therefore 4834 # we force restart of the loop 4835 if (done or oneshot) and cumul_degraded and degr_retries > 0: 4836 logging.info("Degraded disks found, %d retries left", degr_retries) 4837 degr_retries -= 1 4838 time.sleep(1) 4839 continue 4840 4841 if done or oneshot: 4842 break 4843 4844 time.sleep(min(60, max_time)) 4845 4846 if done: 4847 lu.LogInfo("Instance %s's disks are in sync", instance.name) 4848 4849 return not cumul_degraded
4850
4851 4852 -def _BlockdevFind(lu, node, dev, instance):
4853 """Wrapper around call_blockdev_find to annotate diskparams. 4854 4855 @param lu: A reference to the lu object 4856 @param node: The node to call out 4857 @param dev: The device to find 4858 @param instance: The instance object the device belongs to 4859 @returns The result of the rpc call 4860 4861 """ 4862 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg) 4863 return lu.rpc.call_blockdev_find(node, disk)
4864
4865 4866 -def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4867 """Wrapper around L{_CheckDiskConsistencyInner}. 4868 4869 """ 4870 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg) 4871 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary, 4872 ldisk=ldisk)
4873
4874 4875 -def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary, 4876 ldisk=False):
4877 """Check that mirrors are not degraded. 4878 4879 @attention: The device has to be annotated already. 4880 4881 The ldisk parameter, if True, will change the test from the 4882 is_degraded attribute (which represents overall non-ok status for 4883 the device(s)) to the ldisk (representing the local storage status). 4884 4885 """ 4886 lu.cfg.SetDiskID(dev, node) 4887 4888 result = True 4889 4890 if on_primary or dev.AssembleOnSecondary(): 4891 rstats = lu.rpc.call_blockdev_find(node, dev) 4892 msg = rstats.fail_msg 4893 if msg: 4894 lu.LogWarning("Can't find disk on node %s: %s", node, msg) 4895 result = False 4896 elif not rstats.payload: 4897 lu.LogWarning("Can't find disk on node %s", node) 4898 result = False 4899 else: 4900 if ldisk: 4901 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY 4902 else: 4903 result = result and not rstats.payload.is_degraded 4904 4905 if dev.children: 4906 for child in dev.children: 4907 result = result and _CheckDiskConsistencyInner(lu, instance, child, node, 4908 on_primary) 4909 4910 return result
4911
4912 4913 -class LUOobCommand(NoHooksLU):
4914 """Logical unit for OOB handling. 4915 4916 """ 4917 REQ_BGL = False 4918 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE) 4919
4920 - def ExpandNames(self):
4921 """Gather locks we need. 4922 4923 """ 4924 if self.op.node_names: 4925 self.op.node_names = _GetWantedNodes(self, self.op.node_names) 4926 lock_names = self.op.node_names 4927 else: 4928 lock_names = locking.ALL_SET 4929 4930 self.needed_locks = { 4931 locking.LEVEL_NODE: lock_names, 4932 } 4933 4934 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1 4935 4936 if not self.op.node_names: 4937 # Acquire node allocation lock only if all nodes are affected 4938 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4939
4940 - def CheckPrereq(self):
4941 """Check prerequisites. 4942 4943 This checks: 4944 - the node exists in the configuration 4945 - OOB is supported 4946 4947 Any errors are signaled by raising errors.OpPrereqError. 4948 4949 """ 4950 self.nodes = [] 4951 self.master_node = self.cfg.GetMasterNode() 4952 4953 assert self.op.power_delay >= 0.0 4954 4955 if self.op.node_names: 4956 if (self.op.command in self._SKIP_MASTER and 4957 self.master_node in self.op.node_names): 4958 master_node_obj = self.cfg.GetNodeInfo(self.master_node) 4959 master_oob_handler = _SupportsOob(self.cfg, master_node_obj) 4960 4961 if master_oob_handler: 4962 additional_text = ("run '%s %s %s' if you want to operate on the" 4963 " master regardless") % (master_oob_handler, 4964 self.op.command, 4965 self.master_node) 4966 else: 4967 additional_text = "it does not support out-of-band operations" 4968 4969 raise errors.OpPrereqError(("Operating on the master node %s is not" 4970 " allowed for %s; %s") % 4971 (self.master_node, self.op.command, 4972 additional_text), errors.ECODE_INVAL) 4973 else: 4974 self.op.node_names = self.cfg.GetNodeList() 4975 if self.op.command in self._SKIP_MASTER: 4976 self.op.node_names.remove(self.master_node) 4977 4978 if self.op.command in self._SKIP_MASTER: 4979 assert self.master_node not in self.op.node_names 4980 4981 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names): 4982 if node is None: 4983 raise errors.OpPrereqError("Node %s not found" % node_name, 4984 errors.ECODE_NOENT) 4985 else: 4986 self.nodes.append(node) 4987 4988 if (not self.op.ignore_status and 4989 (self.op.command == constants.OOB_POWER_OFF and not node.offline)): 4990 raise errors.OpPrereqError(("Cannot power off node %s because it is" 4991 " not marked offline") % node_name, 4992 errors.ECODE_STATE)
4993
4994 - def Exec(self, feedback_fn):
4995 """Execute OOB and return result if we expect any. 4996 4997 """ 4998 master_node = self.master_node 4999 ret = [] 5000 5001 for idx, node in enumerate(utils.NiceSort(self.nodes, 5002 key=lambda node: node.name)): 5003 node_entry = [(constants.RS_NORMAL, node.name)] 5004 ret.append(node_entry) 5005 5006 oob_program = _SupportsOob(self.cfg, node) 5007 5008 if not oob_program: 5009 node_entry.append((constants.RS_UNAVAIL, None)) 5010 continue 5011 5012 logging.info("Executing out-of-band command '%s' using '%s' on %s", 5013 self.op.command, oob_program, node.name) 5014 result = self.rpc.call_run_oob(master_node, oob_program, 5015 self.op.command, node.name, 5016 self.op.timeout) 5017 5018 if result.fail_msg: 5019 self.LogWarning("Out-of-band RPC failed on node '%s': %s", 5020 node.name, result.fail_msg) 5021 node_entry.append((constants.RS_NODATA, None)) 5022 else: 5023 try: 5024 self._CheckPayload(result) 5025 except errors.OpExecError, err: 5026 self.LogWarning("Payload returned by node '%s' is not valid: %s", 5027 node.name, err) 5028 node_entry.append((constants.RS_NODATA, None)) 5029 else: 5030 if self.op.command == constants.OOB_HEALTH: 5031 # For health we should log important events 5032 for item, status in result.payload: 5033 if status in [constants.OOB_STATUS_WARNING, 5034 constants.OOB_STATUS_CRITICAL]: 5035 self.LogWarning("Item '%s' on node '%s' has status '%s'", 5036 item, node.name, status) 5037 5038 if self.op.command == constants.OOB_POWER_ON: 5039 node.powered = True 5040 elif self.op.command == constants.OOB_POWER_OFF: 5041 node.powered = False 5042 elif self.op.command == constants.OOB_POWER_STATUS: 5043 powered = result.payload[constants.OOB_POWER_STATUS_POWERED] 5044 if powered != node.powered: 5045 logging.warning(("Recorded power state (%s) of node '%s' does not" 5046 " match actual power state (%s)"), node.powered, 5047 node.name, powered) 5048 5049 # For configuration changing commands we should update the node 5050 if self.op.command in (constants.OOB_POWER_ON, 5051 constants.OOB_POWER_OFF): 5052 self.cfg.Update(node, feedback_fn) 5053 5054 node_entry.append((constants.RS_NORMAL, result.payload)) 5055 5056 if (self.op.command == constants.OOB_POWER_ON and 5057 idx < len(self.nodes) - 1): 5058 time.sleep(self.op.power_delay) 5059 5060 return ret
5061
5062 - def _CheckPayload(self, result):
5063 """Checks if the payload is valid. 5064 5065 @param result: RPC result 5066 @raises errors.OpExecError: If payload is not valid 5067 5068 """ 5069 errs = [] 5070 if self.op.command == constants.OOB_HEALTH: 5071 if not isinstance(result.payload, list): 5072 errs.append("command 'health' is expected to return a list but got %s" % 5073 type(result.payload)) 5074 else: 5075 for item, status in result.payload: 5076 if status not in constants.OOB_STATUSES: 5077 errs.append("health item '%s' has invalid status '%s'" % 5078 (item, status)) 5079 5080 if self.op.command == constants.OOB_POWER_STATUS: 5081 if not isinstance(result.payload, dict): 5082 errs.append("power-status is expected to return a dict but got %s" % 5083 type(result.payload)) 5084 5085 if self.op.command in [ 5086 constants.OOB_POWER_ON, 5087 constants.OOB_POWER_OFF, 5088 constants.OOB_POWER_CYCLE, 5089 ]: 5090 if result.payload is not None: 5091 errs.append("%s is expected to not return payload but got '%s'" % 5092 (self.op.command, result.payload)) 5093 5094 if errs: 5095 raise errors.OpExecError("Check of out-of-band payload failed due to %s" % 5096 utils.CommaJoin(errs))
5097
5098 5099 -class _OsQuery(_QueryBase):
5100 FIELDS = query.OS_FIELDS 5101
5102 - def ExpandNames(self, lu):
5103 # Lock all nodes in shared mode 5104 # Temporary removal of locks, should be reverted later 5105 # TODO: reintroduce locks when they are lighter-weight 5106 lu.needed_locks = {} 5107 #self.share_locks[locking.LEVEL_NODE] = 1 5108 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 5109 5110 # The following variables interact with _QueryBase._GetNames 5111 if self.names: 5112 self.wanted = self.names 5113 else: 5114 self.wanted = locking.ALL_SET 5115 5116 self.do_locking = self.use_locking
5117
5118 - def DeclareLocks(self, lu, level):
5119 pass
5120 5121 @staticmethod
5122 - def _DiagnoseByOS(rlist):
5123 """Remaps a per-node return list into an a per-os per-node dictionary 5124 5125 @param rlist: a map with node names as keys and OS objects as values 5126 5127 @rtype: dict 5128 @return: a dictionary with osnames as keys and as value another 5129 map, with nodes as keys and tuples of (path, status, diagnose, 5130 variants, parameters, api_versions) as values, eg:: 5131 5132 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []), 5133 (/srv/..., False, "invalid api")], 5134 "node2": [(/srv/..., True, "", [], [])]} 5135 } 5136 5137 """ 5138 all_os = {} 5139 # we build here the list of nodes that didn't fail the RPC (at RPC 5140 # level), so that nodes with a non-responding node daemon don't 5141 # make all OSes invalid 5142 good_nodes = [node_name for node_name in rlist 5143 if not rlist[node_name].fail_msg] 5144 for node_name, nr in rlist.items(): 5145 if nr.fail_msg or not nr.payload: 5146 continue 5147 for (name, path, status, diagnose, variants, 5148 params, api_versions) in nr.payload: 5149 if name not in all_os: 5150 # build a list of nodes for this os containing empty lists 5151 # for each node in node_list 5152 all_os[name] = {} 5153 for nname in good_nodes: 5154 all_os[name][nname] = [] 5155 # convert params from [name, help] to (name, help) 5156 params = [tuple(v) for v in params] 5157 all_os[name][node_name].append((path, status, diagnose, 5158 variants, params, api_versions)) 5159 return all_os
5160
5161 - def _GetQueryData(self, lu):
5162 """Computes the list of nodes and their attributes. 5163 5164 """ 5165 # Locking is not used 5166 assert not (compat.any(lu.glm.is_owned(level) 5167 for level in locking.LEVELS 5168 if level != locking.LEVEL_CLUSTER) or 5169 self.do_locking or self.use_locking) 5170 5171 valid_nodes = [node.name 5172 for node in lu.cfg.GetAllNodesInfo().values() 5173 if not node.offline and node.vm_capable] 5174 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes)) 5175 cluster = lu.cfg.GetClusterInfo() 5176 5177 data = {} 5178 5179 for (os_name, os_data) in pol.items(): 5180 info = query.OsInfo(name=os_name, valid=True, node_status=os_data, 5181 hidden=(os_name in cluster.hidden_os), 5182 blacklisted=(os_name in cluster.blacklisted_os)) 5183 5184 variants = set() 5185 parameters = set() 5186 api_versions = set() 5187 5188 for idx, osl in enumerate(os_data.values()): 5189 info.valid = bool(info.valid and osl and osl[0][1]) 5190 if not info.valid: 5191 break 5192 5193 (node_variants, node_params, node_api) = osl[0][3:6] 5194 if idx == 0: 5195 # First entry 5196 variants.update(node_variants) 5197 parameters.update(node_params) 5198 api_versions.update(node_api) 5199 else: 5200 # Filter out inconsistent values 5201 variants.intersection_update(node_variants) 5202 parameters.intersection_update(node_params) 5203 api_versions.intersection_update(node_api) 5204 5205 info.variants = list(variants) 5206 info.parameters = list(parameters) 5207 info.api_versions = list(api_versions) 5208 5209 data[os_name] = info 5210 5211 # Prepare data in requested order 5212 return [data[name] for name in self._GetNames(lu, pol.keys(), None) 5213 if name in data]
5214
5215 5216 -class LUOsDiagnose(NoHooksLU):
5217 """Logical unit for OS diagnose/query. 5218 5219 """ 5220 REQ_BGL = False 5221 5222 @staticmethod
5223 - def _BuildFilter(fields, names):
5224 """Builds a filter for querying OSes. 5225 5226 """ 5227 name_filter = qlang.MakeSimpleFilter("name", names) 5228 5229 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the 5230 # respective field is not requested 5231 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]] 5232 for fname in ["hidden", "blacklisted"] 5233 if fname not in fields] 5234 if "valid" not in fields: 5235 status_filter.append([qlang.OP_TRUE, "valid"]) 5236 5237 if status_filter: 5238 status_filter.insert(0, qlang.OP_AND) 5239 else: 5240 status_filter = None 5241 5242 if name_filter and status_filter: 5243 return [qlang.OP_AND, name_filter, status_filter] 5244 elif name_filter: 5245 return name_filter 5246 else: 5247 return status_filter
5248
5249 - def CheckArguments(self):
5250 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names), 5251 self.op.output_fields, False)
5252
5253 - def ExpandNames(self):
5254 self.oq.ExpandNames(self)
5255
5256 - def Exec(self, feedback_fn):
5257 return self.oq.OldStyleQuery(self)
5258
5259 5260 -class _ExtStorageQuery(_QueryBase):
5261 FIELDS = query.EXTSTORAGE_FIELDS 5262
5263 - def ExpandNames(self, lu):
5264 # Lock all nodes in shared mode 5265 # Temporary removal of locks, should be reverted later 5266 # TODO: reintroduce locks when they are lighter-weight 5267 lu.needed_locks = {} 5268 #self.share_locks[locking.LEVEL_NODE] = 1 5269 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 5270 5271 # The following variables interact with _QueryBase._GetNames 5272 if self.names: 5273 self.wanted = self.names 5274 else: 5275 self.wanted = locking.ALL_SET 5276 5277 self.do_locking = self.use_locking
5278
5279 - def DeclareLocks(self, lu, level):
5280 pass
5281 5282 @staticmethod
5283 - def _DiagnoseByProvider(rlist):
5284 """Remaps a per-node return list into an a per-provider per-node dictionary 5285 5286 @param rlist: a map with node names as keys and ExtStorage objects as values 5287 5288 @rtype: dict 5289 @return: a dictionary with extstorage providers as keys and as 5290 value another map, with nodes as keys and tuples of 5291 (path, status, diagnose, parameters) as values, eg:: 5292 5293 {"provider1": {"node1": [(/usr/lib/..., True, "", [])] 5294 "node2": [(/srv/..., False, "missing file")] 5295 "node3": [(/srv/..., True, "", [])] 5296 } 5297 5298 """ 5299 all_es = {} 5300 # we build here the list of nodes that didn't fail the RPC (at RPC 5301 # level), so that nodes with a non-responding node daemon don't 5302 # make all OSes invalid 5303 good_nodes = [node_name for node_name in rlist 5304 if not rlist[node_name].fail_msg] 5305 for node_name, nr in rlist.items(): 5306 if nr.fail_msg or not nr.payload: 5307 continue 5308 for (name, path, status, diagnose, params) in nr.payload: 5309 if name not in all_es: 5310 # build a list of nodes for this os containing empty lists 5311 # for each node in node_list 5312 all_es[name] = {} 5313 for nname in good_nodes: 5314 all_es[name][nname] = [] 5315 # convert params from [name, help] to (name, help) 5316 params = [tuple(v) for v in params] 5317 all_es[name][node_name].append((path, status, diagnose, params)) 5318 return all_es
5319
5320 - def _GetQueryData(self, lu):
5321 """Computes the list of nodes and their attributes. 5322 5323 """ 5324 # Locking is not used 5325 assert not (compat.any(lu.glm.is_owned(level) 5326 for level in locking.LEVELS 5327 if level != locking.LEVEL_CLUSTER) or 5328 self.do_locking or self.use_locking) 5329 5330 valid_nodes = [node.name 5331 for node in lu.cfg.GetAllNodesInfo().values() 5332 if not node.offline and node.vm_capable] 5333 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes)) 5334 5335 data = {} 5336 5337 nodegroup_list = lu.cfg.GetNodeGroupList() 5338 5339 for (es_name, es_data) in pol.items(): 5340 # For every provider compute the nodegroup validity. 5341 # To do this we need to check the validity of each node in es_data 5342 # and then construct the corresponding nodegroup dict: 5343 # { nodegroup1: status 5344 # nodegroup2: status 5345 # } 5346 ndgrp_data = {} 5347 for nodegroup in nodegroup_list: 5348 ndgrp = lu.cfg.GetNodeGroup(nodegroup) 5349 5350 nodegroup_nodes = ndgrp.members 5351 nodegroup_name = ndgrp.name 5352 node_statuses = [] 5353 5354 for node in nodegroup_nodes: 5355 if node in valid_nodes: 5356 if es_data[node] != []: 5357 node_status = es_data[node][0][1] 5358 node_statuses.append(node_status) 5359 else: 5360 node_statuses.append(False) 5361 5362 if False in node_statuses: 5363 ndgrp_data[nodegroup_name] = False 5364 else: 5365 ndgrp_data[nodegroup_name] = True 5366 5367 # Compute the provider's parameters 5368 parameters = set() 5369 for idx, esl in enumerate(es_data.values()): 5370 valid = bool(esl and esl[0][1]) 5371 if not valid: 5372 break 5373 5374 node_params = esl[0][3] 5375 if idx == 0: 5376 # First entry 5377 parameters.update(node_params) 5378 else: 5379 # Filter out inconsistent values 5380 parameters.intersection_update(node_params) 5381 5382 params = list(parameters) 5383 5384 # Now fill all the info for this provider 5385 info = query.ExtStorageInfo(name=es_name, node_status=es_data, 5386 nodegroup_status=ndgrp_data, 5387 parameters=params) 5388 5389 data[es_name] = info 5390 5391 # Prepare data in requested order 5392 return [data[name] for name in self._GetNames(lu, pol.keys(), None) 5393 if name in data]
5394
5395 5396 -class LUExtStorageDiagnose(NoHooksLU):
5397 """Logical unit for ExtStorage diagnose/query. 5398 5399 """ 5400 REQ_BGL = False 5401
5402 - def CheckArguments(self):
5403 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names), 5404 self.op.output_fields, False)
5405
5406 - def ExpandNames(self):
5407 self.eq.ExpandNames(self)
5408
5409 - def Exec(self, feedback_fn):
5410 return self.eq.OldStyleQuery(self)
5411
5412 5413 -class LUNodeRemove(LogicalUnit):
5414 """Logical unit for removing a node. 5415 5416 """ 5417 HPATH = "node-remove" 5418 HTYPE = constants.HTYPE_NODE 5419
5420 - def BuildHooksEnv(self):
5421 """Build hooks env. 5422 5423 """ 5424 return { 5425 "OP_TARGET": self.op.node_name, 5426 "NODE_NAME": self.op.node_name, 5427 }
5428
5429 - def BuildHooksNodes(self):
5430 """Build hooks nodes. 5431 5432 This doesn't run on the target node in the pre phase as a failed 5433 node would then be impossible to remove. 5434 5435 """ 5436 all_nodes = self.cfg.GetNodeList() 5437 try: 5438 all_nodes.remove(self.op.node_name) 5439 except ValueError: 5440 pass 5441 return (all_nodes, all_nodes)
5442
5443 - def CheckPrereq(self):
5444 """Check prerequisites. 5445 5446 This checks: 5447 - the node exists in the configuration 5448 - it does not have primary or secondary instances 5449 - it's not the master 5450 5451 Any errors are signaled by raising errors.OpPrereqError. 5452 5453 """ 5454 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 5455 node = self.cfg.GetNodeInfo(self.op.node_name) 5456 assert node is not None 5457 5458 masternode = self.cfg.GetMasterNode() 5459 if node.name == masternode: 5460 raise errors.OpPrereqError("Node is the master node, failover to another" 5461 " node is required", errors.ECODE_INVAL) 5462 5463 for instance_name, instance in self.cfg.GetAllInstancesInfo().items(): 5464 if node.name in instance.all_nodes: 5465 raise errors.OpPrereqError("Instance %s is still running on the node," 5466 " please remove first" % instance_name, 5467 errors.ECODE_INVAL) 5468 self.op.node_name = node.name 5469 self.node = node
5470
5471 - def Exec(self, feedback_fn):
5472 """Removes the node from the cluster. 5473 5474 """ 5475 node = self.node 5476 logging.info("Stopping the node daemon and removing configs from node %s", 5477 node.name) 5478 5479 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup 5480 5481 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \ 5482 "Not owning BGL" 5483 5484 # Promote nodes to master candidate as needed 5485 _AdjustCandidatePool(self, exceptions=[node.name]) 5486 self.context.RemoveNode(node.name) 5487 5488 # Run post hooks on the node before it's removed 5489 _RunPostHook(self, node.name) 5490 5491 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup) 5492 msg = result.fail_msg 5493 if msg: 5494 self.LogWarning("Errors encountered on the remote node while leaving" 5495 " the cluster: %s", msg) 5496 5497 # Remove node from our /etc/hosts 5498 if self.cfg.GetClusterInfo().modify_etc_hosts: 5499 master_node = self.cfg.GetMasterNode() 5500 result = self.rpc.call_etc_hosts_modify(master_node, 5501 constants.ETC_HOSTS_REMOVE, 5502 node.name, None) 5503 result.Raise("Can't update hosts file with new host data") 5504 _RedistributeAncillaryFiles(self)
5505
5506 5507 -class _NodeQuery(_QueryBase):
5508 FIELDS = query.NODE_FIELDS 5509
5510 - def ExpandNames(self, lu):
5511 lu.needed_locks = {} 5512 lu.share_locks = _ShareAll() 5513 5514 if self.names: 5515 self.wanted = _GetWantedNodes(lu, self.names) 5516 else: 5517 self.wanted = locking.ALL_SET 5518 5519 self.do_locking = (self.use_locking and 5520 query.NQ_LIVE in self.requested_data) 5521 5522 if self.do_locking: 5523 # If any non-static field is requested we need to lock the nodes 5524 lu.needed_locks[locking.LEVEL_NODE] = self.wanted 5525 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5526
5527 - def DeclareLocks(self, lu, level):
5528 pass
5529
5530 - def _GetQueryData(self, lu):
5531 """Computes the list of nodes and their attributes. 5532 5533 """ 5534 all_info = lu.cfg.GetAllNodesInfo() 5535 5536 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE) 5537 5538 # Gather data as requested 5539 if query.NQ_LIVE in self.requested_data: 5540 # filter out non-vm_capable nodes 5541 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable] 5542 5543 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes) 5544 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()], 5545 [lu.cfg.GetHypervisorType()], es_flags) 5546 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload)) 5547 for (name, nresult) in node_data.items() 5548 if not nresult.fail_msg and nresult.payload) 5549 else: 5550 live_data = None 5551 5552 if query.NQ_INST in self.requested_data: 5553 node_to_primary = dict([(name, set()) for name in nodenames]) 5554 node_to_secondary = dict([(name, set()) for name in nodenames]) 5555 5556 inst_data = lu.cfg.GetAllInstancesInfo() 5557 5558 for inst in inst_data.values(): 5559 if inst.primary_node in node_to_primary: 5560 node_to_primary[inst.primary_node].add(inst.name) 5561 for secnode in inst.secondary_nodes: 5562 if secnode in node_to_secondary: 5563 node_to_secondary[secnode].add(inst.name) 5564 else: 5565 node_to_primary = None 5566 node_to_secondary = None 5567 5568 if query.NQ_OOB in self.requested_data: 5569 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node))) 5570 for name, node in all_info.iteritems()) 5571 else: 5572 oob_support = None 5573 5574 if query.NQ_GROUP in self.requested_data: 5575 groups = lu.cfg.GetAllNodeGroupsInfo() 5576 else: 5577 groups = {} 5578 5579 return query.NodeQueryData([all_info[name] for name in nodenames], 5580 live_data, lu.cfg.GetMasterNode(), 5581 node_to_primary, node_to_secondary, groups, 5582 oob_support, lu.cfg.GetClusterInfo())
5583
5584 5585 -class LUNodeQuery(NoHooksLU):
5586 """Logical unit for querying nodes. 5587 5588 """ 5589 # pylint: disable=W0142 5590 REQ_BGL = False 5591
5592 - def CheckArguments(self):
5593 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names), 5594 self.op.output_fields, self.op.use_locking)
5595
5596 - def ExpandNames(self):
5597 self.nq.ExpandNames(self)
5598
5599 - def DeclareLocks(self, level):
5600 self.nq.DeclareLocks(self, level)
5601
5602 - def Exec(self, feedback_fn):
5603 return self.nq.OldStyleQuery(self)
5604
5605 5606 -class LUNodeQueryvols(NoHooksLU):
5607 """Logical unit for getting volumes on node(s). 5608 5609 """ 5610 REQ_BGL = False 5611 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance") 5612 _FIELDS_STATIC = utils.FieldSet("node") 5613
5614 - def CheckArguments(self):
5615 _CheckOutputFields(static=self._FIELDS_STATIC, 5616 dynamic=self._FIELDS_DYNAMIC, 5617 selected=self.op.output_fields)
5618
5619 - def ExpandNames(self):
5620 self.share_locks = _ShareAll() 5621 5622 if self.op.nodes: 5623 self.needed_locks = { 5624 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes), 5625 } 5626 else: 5627 self.needed_locks = { 5628 locking.LEVEL_NODE: locking.ALL_SET, 5629 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 5630 }
5631
5632 - def Exec(self, feedback_fn):
5633 """Computes the list of nodes and their attributes. 5634 5635 """ 5636 nodenames = self.owned_locks(locking.LEVEL_NODE) 5637 volumes = self.rpc.call_node_volumes(nodenames) 5638 5639 ilist = self.cfg.GetAllInstancesInfo() 5640 vol2inst = _MapInstanceDisksToNodes(ilist.values()) 5641 5642 output = [] 5643 for node in nodenames: 5644 nresult = volumes[node] 5645 if nresult.offline: 5646 continue 5647 msg = nresult.fail_msg 5648 if msg: 5649 self.LogWarning("Can't compute volume data on node %s: %s", node, msg) 5650 continue 5651 5652 node_vols = sorted(nresult.payload, 5653 key=operator.itemgetter("dev")) 5654 5655 for vol in node_vols: 5656 node_output = [] 5657 for field in self.op.output_fields: 5658 if field == "node": 5659 val = node 5660 elif field == "phys": 5661 val = vol["dev"] 5662 elif field == "vg": 5663 val = vol["vg"] 5664 elif field == "name": 5665 val = vol["name"] 5666 elif field == "size": 5667 val = int(float(vol["size"])) 5668 elif field == "instance": 5669 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-") 5670 else: 5671 raise errors.ParameterError(field) 5672 node_output.append(str(val)) 5673 5674 output.append(node_output) 5675 5676 return output
5677
5678 5679 -class LUNodeQueryStorage(NoHooksLU):
5680 """Logical unit for getting information on storage units on node(s). 5681 5682 """ 5683 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE) 5684 REQ_BGL = False 5685
5686 - def CheckArguments(self):
5687 _CheckOutputFields(static=self._FIELDS_STATIC, 5688 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS), 5689 selected=self.op.output_fields)
5690
5691 - def ExpandNames(self):
5692 self.share_locks = _ShareAll() 5693 5694 if self.op.nodes: 5695 self.needed_locks = { 5696 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes), 5697 } 5698 else: 5699 self.needed_locks = { 5700 locking.LEVEL_NODE: locking.ALL_SET, 5701 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 5702 }
5703
5704 - def Exec(self, feedback_fn):
5705 """Computes the list of nodes and their attributes. 5706 5707 """ 5708 self.nodes = self.owned_locks(locking.LEVEL_NODE) 5709 5710 # Always get name to sort by 5711 if constants.SF_NAME in self.op.output_fields: 5712 fields = self.op.output_fields[:] 5713 else: 5714 fields = [constants.SF_NAME] + self.op.output_fields 5715 5716 # Never ask for node or type as it's only known to the LU 5717 for extra in [constants.SF_NODE, constants.SF_TYPE]: 5718 while extra in fields: 5719 fields.remove(extra) 5720 5721 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)]) 5722 name_idx = field_idx[constants.SF_NAME] 5723 5724 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 5725 data = self.rpc.call_storage_list(self.nodes, 5726 self.op.storage_type, st_args, 5727 self.op.name, fields) 5728 5729 result = [] 5730 5731 for node in utils.NiceSort(self.nodes): 5732 nresult = data[node] 5733 if nresult.offline: 5734 continue 5735 5736 msg = nresult.fail_msg 5737 if msg: 5738 self.LogWarning("Can't get storage data from node %s: %s", node, msg) 5739 continue 5740 5741 rows = dict([(row[name_idx], row) for row in nresult.payload]) 5742 5743 for name in utils.NiceSort(rows.keys()): 5744 row = rows[name] 5745 5746 out = [] 5747 5748 for field in self.op.output_fields: 5749 if field == constants.SF_NODE: 5750 val = node 5751 elif field == constants.SF_TYPE: 5752 val = self.op.storage_type 5753 elif field in field_idx: 5754 val = row[field_idx[field]] 5755 else: 5756 raise errors.ParameterError(field) 5757 5758 out.append(val) 5759 5760 result.append(out) 5761 5762 return result
5763
5764 5765 -class _InstanceQuery(_QueryBase):
5766 FIELDS = query.INSTANCE_FIELDS 5767
5768 - def ExpandNames(self, lu):
5769 lu.needed_locks = {} 5770 lu.share_locks = _ShareAll() 5771 5772 if self.names: 5773 self.wanted = _GetWantedInstances(lu, self.names) 5774 else: 5775 self.wanted = locking.ALL_SET 5776 5777 self.do_locking = (self.use_locking and 5778 query.IQ_LIVE in self.requested_data) 5779 if self.do_locking: 5780 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted 5781 lu.needed_locks[locking.LEVEL_NODEGROUP] = [] 5782 lu.needed_locks[locking.LEVEL_NODE] = [] 5783 lu.needed_locks[locking.LEVEL_NETWORK] = [] 5784 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 5785 5786 self.do_grouplocks = (self.do_locking and 5787 query.IQ_NODES in self.requested_data)
5788
5789 - def DeclareLocks(self, lu, level):
5790 if self.do_locking: 5791 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks: 5792 assert not lu.needed_locks[locking.LEVEL_NODEGROUP] 5793 5794 # Lock all groups used by instances optimistically; this requires going 5795 # via the node before it's locked, requiring verification later on 5796 lu.needed_locks[locking.LEVEL_NODEGROUP] = \ 5797 set(group_uuid 5798 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE) 5799 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name)) 5800 elif level == locking.LEVEL_NODE: 5801 lu._LockInstancesNodes() # pylint: disable=W0212 5802 5803 elif level == locking.LEVEL_NETWORK: 5804 lu.needed_locks[locking.LEVEL_NETWORK] = \ 5805 frozenset(net_uuid 5806 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE) 5807 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5808 5809 @staticmethod
5810 - def _CheckGroupLocks(lu):
5811 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE)) 5812 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP)) 5813 5814 # Check if node groups for locked instances are still correct 5815 for instance_name in owned_instances: 5816 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5817
5818 - def _GetQueryData(self, lu):
5819 """Computes the list of instances and their attributes. 5820 5821 """ 5822 if self.do_grouplocks: 5823 self._CheckGroupLocks(lu) 5824 5825 cluster = lu.cfg.GetClusterInfo() 5826 all_info = lu.cfg.GetAllInstancesInfo() 5827 5828 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE) 5829 5830 instance_list = [all_info[name] for name in instance_names] 5831 nodes = frozenset(itertools.chain(*(inst.all_nodes 5832 for inst in instance_list))) 5833 hv_list = list(set([inst.hypervisor for inst in instance_list])) 5834 bad_nodes = [] 5835 offline_nodes = [] 5836 wrongnode_inst = set() 5837 5838 # Gather data as requested 5839 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]): 5840 live_data = {} 5841 node_data = lu.rpc.call_all_instances_info(nodes, hv_list) 5842 for name in nodes: 5843 result = node_data[name] 5844 if result.offline: 5845 # offline nodes will be in both lists 5846 assert result.fail_msg 5847 offline_nodes.append(name) 5848 if result.fail_msg: 5849 bad_nodes.append(name) 5850 elif result.payload: 5851 for inst in result.payload: 5852 if inst in all_info: 5853 if all_info[inst].primary_node == name: 5854 live_data.update(result.payload) 5855 else: 5856 wrongnode_inst.add(inst) 5857 else: 5858 # orphan instance; we don't list it here as we don't 5859 # handle this case yet in the output of instance listing 5860 logging.warning("Orphan instance '%s' found on node %s", 5861 inst, name) 5862 # else no instance is alive 5863 else: 5864 live_data = {} 5865 5866 if query.IQ_DISKUSAGE in self.requested_data: 5867 gmi = ganeti.masterd.instance 5868 disk_usage = dict((inst.name, 5869 gmi.ComputeDiskSize(inst.disk_template, 5870 [{constants.IDISK_SIZE: disk.size} 5871 for disk in inst.disks])) 5872 for inst in instance_list) 5873 else: 5874 disk_usage = None 5875 5876 if query.IQ_CONSOLE in self.requested_data: 5877 consinfo = {} 5878 for inst in instance_list: 5879 if inst.name in live_data: 5880 # Instance is running 5881 consinfo[inst.name] = _GetInstanceConsole(cluster, inst) 5882 else: 5883 consinfo[inst.name] = None 5884 assert set(consinfo.keys()) == set(instance_names) 5885 else: 5886 consinfo = None 5887 5888 if query.IQ_NODES in self.requested_data: 5889 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"), 5890 instance_list))) 5891 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names)) 5892 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid)) 5893 for uuid in set(map(operator.attrgetter("group"), 5894 nodes.values()))) 5895 else: 5896 nodes = None 5897 groups = None 5898 5899 if query.IQ_NETWORKS in self.requested_data: 5900 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name) 5901 for i in instance_list)) 5902 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids) 5903 else: 5904 networks = None 5905 5906 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(), 5907 disk_usage, offline_nodes, bad_nodes, 5908 live_data, wrongnode_inst, consinfo, 5909 nodes, groups, networks)
5910
5911 5912 -class LUQuery(NoHooksLU):
5913 """Query for resources/items of a certain kind. 5914 5915 """ 5916 # pylint: disable=W0142 5917 REQ_BGL = False 5918
5919 - def CheckArguments(self):
5920 qcls = _GetQueryImplementation(self.op.what) 5921 5922 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5923
5924 - def ExpandNames(self):
5925 self.impl.ExpandNames(self)
5926
5927 - def DeclareLocks(self, level):
5928 self.impl.DeclareLocks(self, level)
5929
5930 - def Exec(self, feedback_fn):
5931 return self.impl.NewStyleQuery(self)
5932
5933 5934 -class LUQueryFields(NoHooksLU):
5935 """Query for resources/items of a certain kind. 5936 5937 """ 5938 # pylint: disable=W0142 5939 REQ_BGL = False 5940
5941 - def CheckArguments(self):
5942 self.qcls = _GetQueryImplementation(self.op.what)
5943
5944 - def ExpandNames(self):
5945 self.needed_locks = {}
5946
5947 - def Exec(self, feedback_fn):
5948 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5949
5950 5951 -class LUNodeModifyStorage(NoHooksLU):
5952 """Logical unit for modifying a storage volume on a node. 5953 5954 """ 5955 REQ_BGL = False 5956
5957 - def CheckArguments(self):
5958 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 5959 5960 storage_type = self.op.storage_type 5961 5962 try: 5963 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type] 5964 except KeyError: 5965 raise errors.OpPrereqError("Storage units of type '%s' can not be" 5966 " modified" % storage_type, 5967 errors.ECODE_INVAL) 5968 5969 diff = set(self.op.changes.keys()) - modifiable 5970 if diff: 5971 raise errors.OpPrereqError("The following fields can not be modified for" 5972 " storage units of type '%s': %r" % 5973 (storage_type, list(diff)), 5974 errors.ECODE_INVAL)
5975
5976 - def ExpandNames(self):
5977 self.needed_locks = { 5978 locking.LEVEL_NODE: self.op.node_name, 5979 }
5980
5981 - def Exec(self, feedback_fn):
5982 """Computes the list of nodes and their attributes. 5983 5984 """ 5985 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 5986 result = self.rpc.call_storage_modify(self.op.node_name, 5987 self.op.storage_type, st_args, 5988 self.op.name, self.op.changes) 5989 result.Raise("Failed to modify storage unit '%s' on %s" % 5990 (self.op.name, self.op.node_name))
5991
5992 5993 -class LUNodeAdd(LogicalUnit):
5994 """Logical unit for adding node to the cluster. 5995 5996 """ 5997 HPATH = "node-add" 5998 HTYPE = constants.HTYPE_NODE 5999 _NFLAGS = ["master_capable", "vm_capable"] 6000
6001 - def CheckArguments(self):
6002 self.primary_ip_family = self.cfg.GetPrimaryIPFamily() 6003 # validate/normalize the node name 6004 self.hostname = netutils.GetHostname(name=self.op.node_name, 6005 family=self.primary_ip_family) 6006 self.op.node_name = self.hostname.name 6007 6008 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode(): 6009 raise errors.OpPrereqError("Cannot readd the master node", 6010 errors.ECODE_STATE) 6011 6012 if self.op.readd and self.op.group: 6013 raise errors.OpPrereqError("Cannot pass a node group when a node is" 6014 " being readded", errors.ECODE_INVAL)
6015
6016 - def BuildHooksEnv(self):
6017 """Build hooks env. 6018 6019 This will run on all nodes before, and on all nodes + the new node after. 6020 6021 """ 6022 return { 6023 "OP_TARGET": self.op.node_name, 6024 "NODE_NAME": self.op.node_name, 6025 "NODE_PIP": self.op.primary_ip, 6026 "NODE_SIP": self.op.secondary_ip, 6027 "MASTER_CAPABLE": str(self.op.master_capable), 6028 "VM_CAPABLE": str(self.op.vm_capable), 6029 }
6030
6031 - def BuildHooksNodes(self):
6032 """Build hooks nodes. 6033 6034 """ 6035 # Exclude added node 6036 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name])) 6037 post_nodes = pre_nodes + [self.op.node_name, ] 6038 6039 return (pre_nodes, post_nodes)
6040
6041 - def CheckPrereq(self):
6042 """Check prerequisites. 6043 6044 This checks: 6045 - the new node is not already in the config 6046 - it is resolvable 6047 - its parameters (single/dual homed) matches the cluster 6048 6049 Any errors are signaled by raising errors.OpPrereqError. 6050 6051 """ 6052 cfg = self.cfg 6053 hostname = self.hostname 6054 node = hostname.name 6055 primary_ip = self.op.primary_ip = hostname.ip 6056 if self.op.secondary_ip is None: 6057 if self.primary_ip_family == netutils.IP6Address.family: 6058 raise errors.OpPrereqError("When using a IPv6 primary address, a valid" 6059 " IPv4 address must be given as secondary", 6060 errors.ECODE_INVAL) 6061 self.op.secondary_ip = primary_ip 6062 6063 secondary_ip = self.op.secondary_ip 6064 if not netutils.IP4Address.IsValid(secondary_ip): 6065 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4" 6066 " address" % secondary_ip, errors.ECODE_INVAL) 6067 6068 node_list = cfg.GetNodeList() 6069 if not self.op.readd and node in node_list: 6070 raise errors.OpPrereqError("Node %s is already in the configuration" % 6071 node, errors.ECODE_EXISTS) 6072 elif self.op.readd and node not in node_list: 6073 raise errors.OpPrereqError("Node %s is not in the configuration" % node, 6074 errors.ECODE_NOENT) 6075 6076 self.changed_primary_ip = False 6077 6078 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list): 6079 if self.op.readd and node == existing_node_name: 6080 if existing_node.secondary_ip != secondary_ip: 6081 raise errors.OpPrereqError("Readded node doesn't have the same IP" 6082 " address configuration as before", 6083 errors.ECODE_INVAL) 6084 if existing_node.primary_ip != primary_ip: 6085 self.changed_primary_ip = True 6086 6087 continue 6088 6089 if (existing_node.primary_ip == primary_ip or 6090 existing_node.secondary_ip == primary_ip or 6091 existing_node.primary_ip == secondary_ip or 6092 existing_node.secondary_ip == secondary_ip): 6093 raise errors.OpPrereqError("New node ip address(es) conflict with" 6094 " existing node %s" % existing_node.name, 6095 errors.ECODE_NOTUNIQUE) 6096 6097 # After this 'if' block, None is no longer a valid value for the 6098 # _capable op attributes 6099 if self.op.readd: 6100 old_node = self.cfg.GetNodeInfo(node) 6101 assert old_node is not None, "Can't retrieve locked node %s" % node 6102 for attr in self._NFLAGS: 6103 if getattr(self.op, attr) is None: 6104 setattr(self.op, attr, getattr(old_node, attr)) 6105 else: 6106 for attr in self._NFLAGS: 6107 if getattr(self.op, attr) is None: 6108 setattr(self.op, attr, True) 6109 6110 if self.op.readd and not self.op.vm_capable: 6111 pri, sec = cfg.GetNodeInstances(node) 6112 if pri or sec: 6113 raise errors.OpPrereqError("Node %s being re-added with vm_capable" 6114 " flag set to false, but it already holds" 6115 " instances" % node, 6116 errors.ECODE_STATE) 6117 6118 # check that the type of the node (single versus dual homed) is the 6119 # same as for the master 6120 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode()) 6121 master_singlehomed = myself.secondary_ip == myself.primary_ip 6122 newbie_singlehomed = secondary_ip == primary_ip 6123 if master_singlehomed != newbie_singlehomed: 6124 if master_singlehomed: 6125 raise errors.OpPrereqError("The master has no secondary ip but the" 6126 " new node has one", 6127 errors.ECODE_INVAL) 6128 else: 6129 raise errors.OpPrereqError("The master has a secondary ip but the" 6130 " new node doesn't have one", 6131 errors.ECODE_INVAL) 6132 6133 # checks reachability 6134 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT): 6135 raise errors.OpPrereqError("Node not reachable by ping", 6136 errors.ECODE_ENVIRON) 6137 6138 if not newbie_singlehomed: 6139 # check reachability from my secondary ip to newbie's secondary ip 6140 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT, 6141 source=myself.secondary_ip): 6142 raise errors.OpPrereqError("Node secondary ip not reachable by TCP" 6143 " based ping to node daemon port", 6144 errors.ECODE_ENVIRON) 6145 6146 if self.op.readd: 6147 exceptions = [node] 6148 else: 6149 exceptions = [] 6150 6151 if self.op.master_capable: 6152 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions) 6153 else: 6154 self.master_candidate = False 6155 6156 if self.op.readd: 6157 self.new_node = old_node 6158 else: 6159 node_group = cfg.LookupNodeGroup(self.op.group) 6160 self.new_node = objects.Node(name=node, 6161 primary_ip=primary_ip, 6162 secondary_ip=secondary_ip, 6163 master_candidate=self.master_candidate, 6164 offline=False, drained=False, 6165 group=node_group, ndparams={}) 6166 6167 if self.op.ndparams: 6168 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) 6169 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node", 6170 "node", "cluster or group") 6171 6172 if self.op.hv_state: 6173 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None) 6174 6175 if self.op.disk_state: 6176 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None) 6177 6178 # TODO: If we need to have multiple DnsOnlyRunner we probably should make 6179 # it a property on the base class. 6180 rpcrunner = rpc.DnsOnlyRunner() 6181 result = rpcrunner.call_version([node])[node] 6182 result.Raise("Can't get version information from node %s" % node) 6183 if constants.PROTOCOL_VERSION == result.payload: 6184 logging.info("Communication to node %s fine, sw version %s match", 6185 node, result.payload) 6186 else: 6187 raise errors.OpPrereqError("Version mismatch master version %s," 6188 " node version %s" % 6189 (constants.PROTOCOL_VERSION, result.payload), 6190 errors.ECODE_ENVIRON) 6191 6192 vg_name = cfg.GetVGName() 6193 if vg_name is not None: 6194 vparams = {constants.NV_PVLIST: [vg_name]} 6195 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node) 6196 cname = self.cfg.GetClusterName() 6197 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node] 6198 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor) 6199 if errmsgs: 6200 raise errors.OpPrereqError("Checks on node PVs failed: %s" % 6201 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6202
6203 - def Exec(self, feedback_fn):
6204 """Adds the new node to the cluster. 6205 6206 """ 6207 new_node = self.new_node 6208 node = new_node.name 6209 6210 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \ 6211 "Not owning BGL" 6212 6213 # We adding a new node so we assume it's powered 6214 new_node.powered = True 6215 6216 # for re-adds, reset the offline/drained/master-candidate flags; 6217 # we need to reset here, otherwise offline would prevent RPC calls 6218 # later in the procedure; this also means that if the re-add 6219 # fails, we are left with a non-offlined, broken node 6220 if self.op.readd: 6221 new_node.drained = new_node.offline = False # pylint: disable=W0201 6222 self.LogInfo("Readding a node, the offline/drained flags were reset") 6223 # if we demote the node, we do cleanup later in the procedure 6224 new_node.master_candidate = self.master_candidate 6225 if self.changed_primary_ip: 6226 new_node.primary_ip = self.op.primary_ip 6227 6228 # copy the master/vm_capable flags 6229 for attr in self._NFLAGS: 6230 setattr(new_node, attr, getattr(self.op, attr)) 6231 6232 # notify the user about any possible mc promotion 6233 if new_node.master_candidate: 6234 self.LogInfo("Node will be a master candidate") 6235 6236 if self.op.ndparams: 6237 new_node.ndparams = self.op.ndparams 6238 else: 6239 new_node.ndparams = {} 6240 6241 if self.op.hv_state: 6242 new_node.hv_state_static = self.new_hv_state 6243 6244 if self.op.disk_state: 6245 new_node.disk_state_static = self.new_disk_state 6246 6247 # Add node to our /etc/hosts, and add key to known_hosts 6248 if self.cfg.GetClusterInfo().modify_etc_hosts: 6249 master_node = self.cfg.GetMasterNode() 6250 result = self.rpc.call_etc_hosts_modify(master_node, 6251 constants.ETC_HOSTS_ADD, 6252 self.hostname.name, 6253 self.hostname.ip) 6254 result.Raise("Can't update hosts file with new host data") 6255 6256 if new_node.secondary_ip != new_node.primary_ip: 6257 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip, 6258 False) 6259 6260 node_verify_list = [self.cfg.GetMasterNode()] 6261 node_verify_param = { 6262 constants.NV_NODELIST: ([node], {}), 6263 # TODO: do a node-net-test as well? 6264 } 6265 6266 result = self.rpc.call_node_verify(node_verify_list, node_verify_param, 6267 self.cfg.GetClusterName()) 6268 for verifier in node_verify_list: 6269 result[verifier].Raise("Cannot communicate with node %s" % verifier) 6270 nl_payload = result[verifier].payload[constants.NV_NODELIST] 6271 if nl_payload: 6272 for failed in nl_payload: 6273 feedback_fn("ssh/hostname verification failed" 6274 " (checking from %s): %s" % 6275 (verifier, nl_payload[failed])) 6276 raise errors.OpExecError("ssh/hostname verification failed") 6277 6278 if self.op.readd: 6279 _RedistributeAncillaryFiles(self) 6280 self.context.ReaddNode(new_node) 6281 # make sure we redistribute the config 6282 self.cfg.Update(new_node, feedback_fn) 6283 # and make sure the new node will not have old files around 6284 if not new_node.master_candidate: 6285 result = self.rpc.call_node_demote_from_mc(new_node.name) 6286 msg = result.fail_msg 6287 if msg: 6288 self.LogWarning("Node failed to demote itself from master" 6289 " candidate status: %s" % msg) 6290 else: 6291 _RedistributeAncillaryFiles(self, additional_nodes=[node], 6292 additional_vm=self.op.vm_capable) 6293 self.context.AddNode(new_node, self.proc.GetECId())
6294
6295 6296 -class LUNodeSetParams(LogicalUnit):
6297 """Modifies the parameters of a node. 6298 6299 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline) 6300 to the node role (as _ROLE_*) 6301 @cvar _R2F: a dictionary from node role to tuples of flags 6302 @cvar _FLAGS: a list of attribute names corresponding to the flags 6303 6304 """ 6305 HPATH = "node-modify" 6306 HTYPE = constants.HTYPE_NODE 6307 REQ_BGL = False 6308 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4) 6309 _F2R = { 6310 (True, False, False): _ROLE_CANDIDATE, 6311 (False, True, False): _ROLE_DRAINED, 6312 (False, False, True): _ROLE_OFFLINE, 6313 (False, False, False): _ROLE_REGULAR, 6314 } 6315 _R2F = dict((v, k) for k, v in _F2R.items()) 6316 _FLAGS = ["master_candidate", "drained", "offline"] 6317
6318 - def CheckArguments(self):
6319 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 6320 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained, 6321 self.op.master_capable, self.op.vm_capable, 6322 self.op.secondary_ip, self.op.ndparams, self.op.hv_state, 6323 self.op.disk_state] 6324 if all_mods.count(None) == len(all_mods): 6325 raise errors.OpPrereqError("Please pass at least one modification", 6326 errors.ECODE_INVAL) 6327 if all_mods.count(True) > 1: 6328 raise errors.OpPrereqError("Can't set the node into more than one" 6329 " state at the same time", 6330 errors.ECODE_INVAL) 6331 6332 # Boolean value that tells us whether we might be demoting from MC 6333 self.might_demote = (self.op.master_candidate is False or 6334 self.op.offline is True or 6335 self.op.drained is True or 6336 self.op.master_capable is False) 6337 6338 if self.op.secondary_ip: 6339 if not netutils.IP4Address.IsValid(self.op.secondary_ip): 6340 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4" 6341 " address" % self.op.secondary_ip, 6342 errors.ECODE_INVAL) 6343 6344 self.lock_all = self.op.auto_promote and self.might_demote 6345 self.lock_instances = self.op.secondary_ip is not None
6346
6347 - def _InstanceFilter(self, instance):
6348 """Filter for getting affected instances. 6349 6350 """ 6351 return (instance.disk_template in constants.DTS_INT_MIRROR and 6352 self.op.node_name in instance.all_nodes)
6353
6354 - def ExpandNames(self):
6355 if self.lock_all: 6356 self.needed_locks = { 6357 locking.LEVEL_NODE: locking.ALL_SET, 6358 6359 # Block allocations when all nodes are locked 6360 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 6361 } 6362 else: 6363 self.needed_locks = { 6364 locking.LEVEL_NODE: self.op.node_name, 6365 } 6366 6367 # Since modifying a node can have severe effects on currently running 6368 # operations the resource lock is at least acquired in shared mode 6369 self.needed_locks[locking.LEVEL_NODE_RES] = \ 6370 self.needed_locks[locking.LEVEL_NODE] 6371 6372 # Get all locks except nodes in shared mode; they are not used for anything 6373 # but read-only access 6374 self.share_locks = _ShareAll() 6375 self.share_locks[locking.LEVEL_NODE] = 0 6376 self.share_locks[locking.LEVEL_NODE_RES] = 0 6377 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0 6378 6379 if self.lock_instances: 6380 self.needed_locks[locking.LEVEL_INSTANCE] = \ 6381 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6382
6383 - def BuildHooksEnv(self):
6384 """Build hooks env. 6385 6386 This runs on the master node. 6387 6388 """ 6389 return { 6390 "OP_TARGET": self.op.node_name, 6391 "MASTER_CANDIDATE": str(self.op.master_candidate), 6392 "OFFLINE": str(self.op.offline), 6393 "DRAINED": str(self.op.drained), 6394 "MASTER_CAPABLE": str(self.op.master_capable), 6395 "VM_CAPABLE": str(self.op.vm_capable), 6396 }
6397
6398 - def BuildHooksNodes(self):
6399 """Build hooks nodes. 6400 6401 """ 6402 nl = [self.cfg.GetMasterNode(), self.op.node_name] 6403 return (nl, nl)
6404
6405 - def CheckPrereq(self):
6406 """Check prerequisites. 6407 6408 This only checks the instance list against the existing names. 6409 6410 """ 6411 node = self.node = self.cfg.GetNodeInfo(self.op.node_name) 6412 6413 if self.lock_instances: 6414 affected_instances = \ 6415 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter) 6416 6417 # Verify instance locks 6418 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE) 6419 wanted_instances = frozenset(affected_instances.keys()) 6420 if wanted_instances - owned_instances: 6421 raise errors.OpPrereqError("Instances affected by changing node %s's" 6422 " secondary IP address have changed since" 6423 " locks were acquired, wanted '%s', have" 6424 " '%s'; retry the operation" % 6425 (self.op.node_name, 6426 utils.CommaJoin(wanted_instances), 6427 utils.CommaJoin(owned_instances)), 6428 errors.ECODE_STATE) 6429 else: 6430 affected_instances = None 6431 6432 if (self.op.master_candidate is not None or 6433 self.op.drained is not None or 6434 self.op.offline is not None): 6435 # we can't change the master's node flags 6436 if self.op.node_name == self.cfg.GetMasterNode(): 6437 raise errors.OpPrereqError("The master role can be changed" 6438 " only via master-failover", 6439 errors.ECODE_INVAL) 6440 6441 if self.op.master_candidate and not node.master_capable: 6442 raise errors.OpPrereqError("Node %s is not master capable, cannot make" 6443 " it a master candidate" % node.name, 6444 errors.ECODE_STATE) 6445 6446 if self.op.vm_capable is False: 6447 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name) 6448 if ipri or isec: 6449 raise errors.OpPrereqError("Node %s hosts instances, cannot unset" 6450 " the vm_capable flag" % node.name, 6451 errors.ECODE_STATE) 6452 6453 if node.master_candidate and self.might_demote and not self.lock_all: 6454 assert not self.op.auto_promote, "auto_promote set but lock_all not" 6455 # check if after removing the current node, we're missing master 6456 # candidates 6457 (mc_remaining, mc_should, _) = \ 6458 self.cfg.GetMasterCandidateStats(exceptions=[node.name]) 6459 if mc_remaining < mc_should: 6460 raise errors.OpPrereqError("Not enough master candidates, please" 6461 " pass auto promote option to allow" 6462 " promotion (--auto-promote or RAPI" 6463 " auto_promote=True)", errors.ECODE_STATE) 6464 6465 self.old_flags = old_flags = (node.master_candidate, 6466 node.drained, node.offline) 6467 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags) 6468 self.old_role = old_role = self._F2R[old_flags] 6469 6470 # Check for ineffective changes 6471 for attr in self._FLAGS: 6472 if (getattr(self.op, attr) is False and getattr(node, attr) is False): 6473 self.LogInfo("Ignoring request to unset flag %s, already unset", attr) 6474 setattr(self.op, attr, None) 6475 6476 # Past this point, any flag change to False means a transition 6477 # away from the respective state, as only real changes are kept 6478 6479 # TODO: We might query the real power state if it supports OOB 6480 if _SupportsOob(self.cfg, node): 6481 if self.op.offline is False and not (node.powered or 6482 self.op.powered is True): 6483 raise errors.OpPrereqError(("Node %s needs to be turned on before its" 6484 " offline status can be reset") % 6485 self.op.node_name, errors.ECODE_STATE) 6486 elif self.op.powered is not None: 6487 raise errors.OpPrereqError(("Unable to change powered state for node %s" 6488 " as it does not support out-of-band" 6489 " handling") % self.op.node_name, 6490 errors.ECODE_STATE) 6491 6492 # If we're being deofflined/drained, we'll MC ourself if needed 6493 if (self.op.drained is False or self.op.offline is False or 6494 (self.op.master_capable and not node.master_capable)): 6495 if _DecideSelfPromotion(self): 6496 self.op.master_candidate = True 6497 self.LogInfo("Auto-promoting node to master candidate") 6498 6499 # If we're no longer master capable, we'll demote ourselves from MC 6500 if self.op.master_capable is False and node.master_candidate: 6501 self.LogInfo("Demoting from master candidate") 6502 self.op.master_candidate = False 6503 6504 # Compute new role 6505 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1 6506 if self.op.master_candidate: 6507 new_role = self._ROLE_CANDIDATE 6508 elif self.op.drained: 6509 new_role = self._ROLE_DRAINED 6510 elif self.op.offline: 6511 new_role = self._ROLE_OFFLINE 6512 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]: 6513 # False is still in new flags, which means we're un-setting (the 6514 # only) True flag 6515 new_role = self._ROLE_REGULAR 6516 else: # no new flags, nothing, keep old role 6517 new_role = old_role 6518 6519 self.new_role = new_role 6520 6521 if old_role == self._ROLE_OFFLINE and new_role != old_role: 6522 # Trying to transition out of offline status 6523 result = self.rpc.call_version([node.name])[node.name] 6524 if result.fail_msg: 6525 raise errors.OpPrereqError("Node %s is being de-offlined but fails" 6526 " to report its version: %s" % 6527 (node.name, result.fail_msg), 6528 errors.ECODE_STATE) 6529 else: 6530 self.LogWarning("Transitioning node from offline to online state" 6531 " without using re-add. Please make sure the node" 6532 " is healthy!") 6533 6534 # When changing the secondary ip, verify if this is a single-homed to 6535 # multi-homed transition or vice versa, and apply the relevant 6536 # restrictions. 6537 if self.op.secondary_ip: 6538 # Ok even without locking, because this can't be changed by any LU 6539 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode()) 6540 master_singlehomed = master.secondary_ip == master.primary_ip 6541 if master_singlehomed and self.op.secondary_ip != node.primary_ip: 6542 if self.op.force and node.name == master.name: 6543 self.LogWarning("Transitioning from single-homed to multi-homed" 6544 " cluster; all nodes will require a secondary IP" 6545 " address") 6546 else: 6547 raise errors.OpPrereqError("Changing the secondary ip on a" 6548 " single-homed cluster requires the" 6549 " --force option to be passed, and the" 6550 " target node to be the master", 6551 errors.ECODE_INVAL) 6552 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip: 6553 if self.op.force and node.name == master.name: 6554 self.LogWarning("Transitioning from multi-homed to single-homed" 6555 " cluster; secondary IP addresses will have to be" 6556 " removed") 6557 else: 6558 raise errors.OpPrereqError("Cannot set the secondary IP to be the" 6559 " same as the primary IP on a multi-homed" 6560 " cluster, unless the --force option is" 6561 " passed, and the target node is the" 6562 " master", errors.ECODE_INVAL) 6563 6564 assert not (frozenset(affected_instances) - 6565 self.owned_locks(locking.LEVEL_INSTANCE)) 6566 6567 if node.offline: 6568 if affected_instances: 6569 msg = ("Cannot change secondary IP address: offline node has" 6570 " instances (%s) configured to use it" % 6571 utils.CommaJoin(affected_instances.keys())) 6572 raise errors.OpPrereqError(msg, errors.ECODE_STATE) 6573 else: 6574 # On online nodes, check that no instances are running, and that 6575 # the node has the new ip and we can reach it. 6576 for instance in affected_instances.values(): 6577 _CheckInstanceState(self, instance, INSTANCE_DOWN, 6578 msg="cannot change secondary ip") 6579 6580 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True) 6581 if master.name != node.name: 6582 # check reachability from master secondary ip to new secondary ip 6583 if not netutils.TcpPing(self.op.secondary_ip, 6584 constants.DEFAULT_NODED_PORT, 6585 source=master.secondary_ip): 6586 raise errors.OpPrereqError("Node secondary ip not reachable by TCP" 6587 " based ping to node daemon port", 6588 errors.ECODE_ENVIRON) 6589 6590 if self.op.ndparams: 6591 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams) 6592 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES) 6593 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node", 6594 "node", "cluster or group") 6595 self.new_ndparams = new_ndparams 6596 6597 if self.op.hv_state: 6598 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, 6599 self.node.hv_state_static) 6600 6601 if self.op.disk_state: 6602 self.new_disk_state = \ 6603 _MergeAndVerifyDiskState(self.op.disk_state, 6604 self.node.disk_state_static)
6605
6606 - def Exec(self, feedback_fn):
6607 """Modifies a node. 6608 6609 """ 6610 node = self.node 6611 old_role = self.old_role 6612 new_role = self.new_role 6613 6614 result = [] 6615 6616 if self.op.ndparams: 6617 node.ndparams = self.new_ndparams 6618 6619 if self.op.powered is not None: 6620 node.powered = self.op.powered 6621 6622 if self.op.hv_state: 6623 node.hv_state_static = self.new_hv_state 6624 6625 if self.op.disk_state: 6626 node.disk_state_static = self.new_disk_state 6627 6628 for attr in ["master_capable", "vm_capable"]: 6629 val = getattr(self.op, attr) 6630 if val is not None: 6631 setattr(node, attr, val) 6632 result.append((attr, str(val))) 6633 6634 if new_role != old_role: 6635 # Tell the node to demote itself, if no longer MC and not offline 6636 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE: 6637 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg 6638 if msg: 6639 self.LogWarning("Node failed to demote itself: %s", msg) 6640 6641 new_flags = self._R2F[new_role] 6642 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS): 6643 if of != nf: 6644 result.append((desc, str(nf))) 6645 (node.master_candidate, node.drained, node.offline) = new_flags 6646 6647 # we locked all nodes, we adjust the CP before updating this node 6648 if self.lock_all: 6649 _AdjustCandidatePool(self, [node.name]) 6650 6651 if self.op.secondary_ip: 6652 node.secondary_ip = self.op.secondary_ip 6653 result.append(("secondary_ip", self.op.secondary_ip)) 6654 6655 # this will trigger configuration file update, if needed 6656 self.cfg.Update(node, feedback_fn) 6657 6658 # this will trigger job queue propagation or cleanup if the mc 6659 # flag changed 6660 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1: 6661 self.context.ReaddNode(node) 6662 6663 return result
6664
6665 6666 -class LUNodePowercycle(NoHooksLU):
6667 """Powercycles a node. 6668 6669 """ 6670 REQ_BGL = False 6671
6672 - def CheckArguments(self):
6673 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 6674 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force: 6675 raise errors.OpPrereqError("The node is the master and the force" 6676 " parameter was not set", 6677 errors.ECODE_INVAL)
6678
6679 - def ExpandNames(self):
6680 """Locking for PowercycleNode. 6681 6682 This is a last-resort option and shouldn't block on other 6683 jobs. Therefore, we grab no locks. 6684 6685 """ 6686 self.needed_locks = {}
6687
6688 - def Exec(self, feedback_fn):
6689 """Reboots a node. 6690 6691 """ 6692 result = self.rpc.call_node_powercycle(self.op.node_name, 6693 self.cfg.GetHypervisorType()) 6694 result.Raise("Failed to schedule the reboot") 6695 return result.payload
6696
6697 6698 -class LUClusterQuery(NoHooksLU):
6699 """Query cluster configuration. 6700 6701 """ 6702 REQ_BGL = False 6703
6704 - def ExpandNames(self):
6705 self.needed_locks = {}
6706
6707 - def Exec(self, feedback_fn):
6708 """Return cluster config. 6709 6710 """ 6711 cluster = self.cfg.GetClusterInfo() 6712 os_hvp = {} 6713 6714 # Filter just for enabled hypervisors 6715 for os_name, hv_dict in cluster.os_hvp.items(): 6716 os_hvp[os_name] = {} 6717 for hv_name, hv_params in hv_dict.items(): 6718 if hv_name in cluster.enabled_hypervisors: 6719 os_hvp[os_name][hv_name] = hv_params 6720 6721 # Convert ip_family to ip_version 6722 primary_ip_version = constants.IP4_VERSION 6723 if cluster.primary_ip_family == netutils.IP6Address.family: 6724 primary_ip_version = constants.IP6_VERSION 6725 6726 result = { 6727 "software_version": constants.RELEASE_VERSION, 6728 "protocol_version": constants.PROTOCOL_VERSION, 6729 "config_version": constants.CONFIG_VERSION, 6730 "os_api_version": max(constants.OS_API_VERSIONS), 6731 "export_version": constants.EXPORT_VERSION, 6732 "architecture": runtime.GetArchInfo(), 6733 "name": cluster.cluster_name, 6734 "master": cluster.master_node, 6735 "default_hypervisor": cluster.primary_hypervisor, 6736 "enabled_hypervisors": cluster.enabled_hypervisors, 6737 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name]) 6738 for hypervisor_name in cluster.enabled_hypervisors]), 6739 "os_hvp": os_hvp, 6740 "beparams": cluster.beparams, 6741 "osparams": cluster.osparams, 6742 "ipolicy": cluster.ipolicy, 6743 "nicparams": cluster.nicparams, 6744 "ndparams": cluster.ndparams, 6745 "diskparams": cluster.diskparams, 6746 "candidate_pool_size": cluster.candidate_pool_size, 6747 "master_netdev": cluster.master_netdev, 6748 "master_netmask": cluster.master_netmask, 6749 "use_external_mip_script": cluster.use_external_mip_script, 6750 "volume_group_name": cluster.volume_group_name, 6751 "drbd_usermode_helper": cluster.drbd_usermode_helper, 6752 "file_storage_dir": cluster.file_storage_dir, 6753 "shared_file_storage_dir": cluster.shared_file_storage_dir, 6754 "maintain_node_health": cluster.maintain_node_health, 6755 "ctime": cluster.ctime, 6756 "mtime": cluster.mtime, 6757 "uuid": cluster.uuid, 6758 "tags": list(cluster.GetTags()), 6759 "uid_pool": cluster.uid_pool, 6760 "default_iallocator": cluster.default_iallocator, 6761 "reserved_lvs": cluster.reserved_lvs, 6762 "primary_ip_version": primary_ip_version, 6763 "prealloc_wipe_disks": cluster.prealloc_wipe_disks, 6764 "hidden_os": cluster.hidden_os, 6765 "blacklisted_os": cluster.blacklisted_os, 6766 } 6767 6768 return result
6769
6770 6771 -class LUClusterConfigQuery(NoHooksLU):
6772 """Return configuration values. 6773 6774 """ 6775 REQ_BGL = False 6776
6777 - def CheckArguments(self):
6778 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6779
6780 - def ExpandNames(self):
6781 self.cq.ExpandNames(self)
6782
6783 - def DeclareLocks(self, level):
6784 self.cq.DeclareLocks(self, level)
6785
6786 - def Exec(self, feedback_fn):
6787 result = self.cq.OldStyleQuery(self) 6788 6789 assert len(result) == 1 6790 6791 return result[0]
6792
6793 6794 -class _ClusterQuery(_QueryBase):
6795 FIELDS = query.CLUSTER_FIELDS 6796 6797 #: Do not sort (there is only one item) 6798 SORT_FIELD = None 6799
6800 - def ExpandNames(self, lu):
6801 lu.needed_locks = {} 6802 6803 # The following variables interact with _QueryBase._GetNames 6804 self.wanted = locking.ALL_SET 6805 self.do_locking = self.use_locking 6806 6807 if self.do_locking: 6808 raise errors.OpPrereqError("Can not use locking for cluster queries", 6809 errors.ECODE_INVAL)
6810
6811 - def DeclareLocks(self, lu, level):
6812 pass
6813
6814 - def _GetQueryData(self, lu):
6815 """Computes the list of nodes and their attributes. 6816 6817 """ 6818 # Locking is not used 6819 assert not (compat.any(lu.glm.is_owned(level) 6820 for level in locking.LEVELS 6821 if level != locking.LEVEL_CLUSTER) or 6822 self.do_locking or self.use_locking) 6823 6824 if query.CQ_CONFIG in self.requested_data: 6825 cluster = lu.cfg.GetClusterInfo() 6826 else: 6827 cluster = NotImplemented 6828 6829 if query.CQ_QUEUE_DRAINED in self.requested_data: 6830 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE) 6831 else: 6832 drain_flag = NotImplemented 6833 6834 if query.CQ_WATCHER_PAUSE in self.requested_data: 6835 master_name = lu.cfg.GetMasterNode() 6836 6837 result = lu.rpc.call_get_watcher_pause(master_name) 6838 result.Raise("Can't retrieve watcher pause from master node '%s'" % 6839 master_name) 6840 6841 watcher_pause = result.payload 6842 else: 6843 watcher_pause = NotImplemented 6844 6845 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6846
6847 6848 -class LUInstanceActivateDisks(NoHooksLU):
6849 """Bring up an instance's disks. 6850 6851 """ 6852 REQ_BGL = False 6853
6854 - def ExpandNames(self):
6855 self._ExpandAndLockInstance() 6856 self.needed_locks[locking.LEVEL_NODE] = [] 6857 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6858
6859 - def DeclareLocks(self, level):
6860 if level == locking.LEVEL_NODE: 6861 self._LockInstancesNodes()
6862
6863 - def CheckPrereq(self):
6864 """Check prerequisites. 6865 6866 This checks that the instance is in the cluster. 6867 6868 """ 6869 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 6870 assert self.instance is not None, \ 6871 "Cannot retrieve locked instance %s" % self.op.instance_name 6872 _CheckNodeOnline(self, self.instance.primary_node)
6873
6874 - def Exec(self, feedback_fn):
6875 """Activate the disks. 6876 6877 """ 6878 disks_ok, disks_info = \ 6879 _AssembleInstanceDisks(self, self.instance, 6880 ignore_size=self.op.ignore_size) 6881 if not disks_ok: 6882 raise errors.OpExecError("Cannot activate block devices") 6883 6884 if self.op.wait_for_sync: 6885 if not _WaitForSync(self, self.instance): 6886 raise errors.OpExecError("Some disks of the instance are degraded!") 6887 6888 return disks_info
6889
6890 6891 -def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False, 6892 ignore_size=False):
6893 """Prepare the block devices for an instance. 6894 6895 This sets up the block devices on all nodes. 6896 6897 @type lu: L{LogicalUnit} 6898 @param lu: the logical unit on whose behalf we execute 6899 @type instance: L{objects.Instance} 6900 @param instance: the instance for whose disks we assemble 6901 @type disks: list of L{objects.Disk} or None 6902 @param disks: which disks to assemble (or all, if None) 6903 @type ignore_secondaries: boolean 6904 @param ignore_secondaries: if true, errors on secondary nodes 6905 won't result in an error return from the function 6906 @type ignore_size: boolean 6907 @param ignore_size: if true, the current known size of the disk 6908 will not be used during the disk activation, useful for cases 6909 when the size is wrong 6910 @return: False if the operation failed, otherwise a list of 6911 (host, instance_visible_name, node_visible_name) 6912 with the mapping from node devices to instance devices 6913 6914 """ 6915 device_info = [] 6916 disks_ok = True 6917 iname = instance.name 6918 disks = _ExpandCheckDisks(instance, disks) 6919 6920 # With the two passes mechanism we try to reduce the window of 6921 # opportunity for the race condition of switching DRBD to primary 6922 # before handshaking occured, but we do not eliminate it 6923 6924 # The proper fix would be to wait (with some limits) until the 6925 # connection has been made and drbd transitions from WFConnection 6926 # into any other network-connected state (Connected, SyncTarget, 6927 # SyncSource, etc.) 6928 6929 # 1st pass, assemble on all nodes in secondary mode 6930 for idx, inst_disk in enumerate(disks): 6931 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): 6932 if ignore_size: 6933 node_disk = node_disk.Copy() 6934 node_disk.UnsetSize() 6935 lu.cfg.SetDiskID(node_disk, node) 6936 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname, 6937 False, idx) 6938 msg = result.fail_msg 6939 if msg: 6940 is_offline_secondary = (node in instance.secondary_nodes and 6941 result.offline) 6942 lu.LogWarning("Could not prepare block device %s on node %s" 6943 " (is_primary=False, pass=1): %s", 6944 inst_disk.iv_name, node, msg) 6945 if not (ignore_secondaries or is_offline_secondary): 6946 disks_ok = False 6947 6948 # FIXME: race condition on drbd migration to primary 6949 6950 # 2nd pass, do only the primary node 6951 for idx, inst_disk in enumerate(disks): 6952 dev_path = None 6953 6954 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): 6955 if node != instance.primary_node: 6956 continue 6957 if ignore_size: 6958 node_disk = node_disk.Copy() 6959 node_disk.UnsetSize() 6960 lu.cfg.SetDiskID(node_disk, node) 6961 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname, 6962 True, idx) 6963 msg = result.fail_msg 6964 if msg: 6965 lu.LogWarning("Could not prepare block device %s on node %s" 6966 " (is_primary=True, pass=2): %s", 6967 inst_disk.iv_name, node, msg) 6968 disks_ok = False 6969 else: 6970 dev_path = result.payload 6971 6972 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path)) 6973 6974 # leave the disks configured for the primary node 6975 # this is a workaround that would be fixed better by 6976 # improving the logical/physical id handling 6977 for disk in disks: 6978 lu.cfg.SetDiskID(disk, instance.primary_node) 6979 6980 return disks_ok, device_info
6981
6982 6983 -def _StartInstanceDisks(lu, instance, force):
6984 """Start the disks of an instance. 6985 6986 """ 6987 disks_ok, _ = _AssembleInstanceDisks(lu, instance, 6988 ignore_secondaries=force) 6989 if not disks_ok: 6990 _ShutdownInstanceDisks(lu, instance) 6991 if force is not None and not force: 6992 lu.LogWarning("", 6993 hint=("If the message above refers to a secondary node," 6994 " you can retry the operation using '--force'")) 6995 raise errors.OpExecError("Disk consistency error")
6996
6997 6998 -class LUInstanceDeactivateDisks(NoHooksLU):
6999 """Shutdown an instance's disks. 7000 7001 """ 7002 REQ_BGL = False 7003
7004 - def ExpandNames(self):
7005 self._ExpandAndLockInstance() 7006 self.needed_locks[locking.LEVEL_NODE] = [] 7007 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7008
7009 - def DeclareLocks(self, level):
7010 if level == locking.LEVEL_NODE: 7011 self._LockInstancesNodes()
7012
7013 - def CheckPrereq(self):
7014 """Check prerequisites. 7015 7016 This checks that the instance is in the cluster. 7017 7018 """ 7019 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7020 assert self.instance is not None, \ 7021 "Cannot retrieve locked instance %s" % self.op.instance_name
7022
7023 - def Exec(self, feedback_fn):
7024 """Deactivate the disks 7025 7026 """ 7027 instance = self.instance 7028 if self.op.force: 7029 _ShutdownInstanceDisks(self, instance) 7030 else: 7031 _SafeShutdownInstanceDisks(self, instance)
7032
7033 7034 -def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7035 """Shutdown block devices of an instance. 7036 7037 This function checks if an instance is running, before calling 7038 _ShutdownInstanceDisks. 7039 7040 """ 7041 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks") 7042 _ShutdownInstanceDisks(lu, instance, disks=disks)
7043
7044 7045 -def _ExpandCheckDisks(instance, disks):
7046 """Return the instance disks selected by the disks list 7047 7048 @type disks: list of L{objects.Disk} or None 7049 @param disks: selected disks 7050 @rtype: list of L{objects.Disk} 7051 @return: selected instance disks to act on 7052 7053 """ 7054 if disks is None: 7055 return instance.disks 7056 else: 7057 if not set(disks).issubset(instance.disks): 7058 raise errors.ProgrammerError("Can only act on disks belonging to the" 7059 " target instance: expected a subset of %r," 7060 " got %r" % (instance.disks, disks)) 7061 return disks
7062
7063 7064 -def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7065 """Shutdown block devices of an instance. 7066 7067 This does the shutdown on all nodes of the instance. 7068 7069 If the ignore_primary is false, errors on the primary node are 7070 ignored. 7071 7072 """ 7073 all_result = True 7074 disks = _ExpandCheckDisks(instance, disks) 7075 7076 for disk in disks: 7077 for node, top_disk in disk.ComputeNodeTree(instance.primary_node): 7078 lu.cfg.SetDiskID(top_disk, node) 7079 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance)) 7080 msg = result.fail_msg 7081 if msg: 7082 lu.LogWarning("Could not shutdown block device %s on node %s: %s", 7083 disk.iv_name, node, msg) 7084 if ((node == instance.primary_node and not ignore_primary) or 7085 (node != instance.primary_node and not result.offline)): 7086 all_result = False 7087 return all_result
7088
7089 7090 -def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7091 """Checks if a node has enough free memory. 7092 7093 This function checks if a given node has the needed amount of free 7094 memory. In case the node has less memory or we cannot get the 7095 information from the node, this function raises an OpPrereqError 7096 exception. 7097 7098 @type lu: C{LogicalUnit} 7099 @param lu: a logical unit from which we get configuration data 7100 @type node: C{str} 7101 @param node: the node to check 7102 @type reason: C{str} 7103 @param reason: string to use in the error message 7104 @type requested: C{int} 7105 @param requested: the amount of memory in MiB to check for 7106 @type hypervisor_name: C{str} 7107 @param hypervisor_name: the hypervisor to ask for memory stats 7108 @rtype: integer 7109 @return: node current free memory 7110 @raise errors.OpPrereqError: if the node doesn't have enough memory, or 7111 we cannot check the node 7112 7113 """ 7114 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False) 7115 nodeinfo[node].Raise("Can't get data from node %s" % node, 7116 prereq=True, ecode=errors.ECODE_ENVIRON) 7117 (_, _, (hv_info, )) = nodeinfo[node].payload 7118 7119 free_mem = hv_info.get("memory_free", None) 7120 if not isinstance(free_mem, int): 7121 raise errors.OpPrereqError("Can't compute free memory on node %s, result" 7122 " was '%s'" % (node, free_mem), 7123 errors.ECODE_ENVIRON) 7124 if requested > free_mem: 7125 raise errors.OpPrereqError("Not enough memory on node %s for %s:" 7126 " needed %s MiB, available %s MiB" % 7127 (node, reason, requested, free_mem), 7128 errors.ECODE_NORES) 7129 return free_mem
7130
7131 7132 -def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7133 """Checks if nodes have enough free disk space in all the VGs. 7134 7135 This function checks if all given nodes have the needed amount of 7136 free disk. In case any node has less disk or we cannot get the 7137 information from the node, this function raises an OpPrereqError 7138 exception. 7139 7140 @type lu: C{LogicalUnit} 7141 @param lu: a logical unit from which we get configuration data 7142 @type nodenames: C{list} 7143 @param nodenames: the list of node names to check 7144 @type req_sizes: C{dict} 7145 @param req_sizes: the hash of vg and corresponding amount of disk in 7146 MiB to check for 7147 @raise errors.OpPrereqError: if the node doesn't have enough disk, 7148 or we cannot check the node 7149 7150 """ 7151 for vg, req_size in req_sizes.items(): 7152 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7153
7154 7155 -def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7156 """Checks if nodes have enough free disk space in the specified VG. 7157 7158 This function checks if all given nodes have the needed amount of 7159 free disk. In case any node has less disk or we cannot get the 7160 information from the node, this function raises an OpPrereqError 7161 exception. 7162 7163 @type lu: C{LogicalUnit} 7164 @param lu: a logical unit from which we get configuration data 7165 @type nodenames: C{list} 7166 @param nodenames: the list of node names to check 7167 @type vg: C{str} 7168 @param vg: the volume group to check 7169 @type requested: C{int} 7170 @param requested: the amount of disk in MiB to check for 7171 @raise errors.OpPrereqError: if the node doesn't have enough disk, 7172 or we cannot check the node 7173 7174 """ 7175 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames) 7176 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags) 7177 for node in nodenames: 7178 info = nodeinfo[node] 7179 info.Raise("Cannot get current information from node %s" % node, 7180 prereq=True, ecode=errors.ECODE_ENVIRON) 7181 (_, (vg_info, ), _) = info.payload 7182 vg_free = vg_info.get("vg_free", None) 7183 if not isinstance(vg_free, int): 7184 raise errors.OpPrereqError("Can't compute free disk space on node" 7185 " %s for vg %s, result was '%s'" % 7186 (node, vg, vg_free), errors.ECODE_ENVIRON) 7187 if requested > vg_free: 7188 raise errors.OpPrereqError("Not enough disk space on target node %s" 7189 " vg %s: required %d MiB, available %d MiB" % 7190 (node, vg, requested, vg_free), 7191 errors.ECODE_NORES)
7192
7193 7194 -def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7195 """Checks if nodes have enough physical CPUs 7196 7197 This function checks if all given nodes have the needed number of 7198 physical CPUs. In case any node has less CPUs or we cannot get the 7199 information from the node, this function raises an OpPrereqError 7200 exception. 7201 7202 @type lu: C{LogicalUnit} 7203 @param lu: a logical unit from which we get configuration data 7204 @type nodenames: C{list} 7205 @param nodenames: the list of node names to check 7206 @type requested: C{int} 7207 @param requested: the minimum acceptable number of physical CPUs 7208 @raise errors.OpPrereqError: if the node doesn't have enough CPUs, 7209 or we cannot check the node 7210 7211 """ 7212 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None) 7213 for node in nodenames: 7214 info = nodeinfo[node] 7215 info.Raise("Cannot get current information from node %s" % node, 7216 prereq=True, ecode=errors.ECODE_ENVIRON) 7217 (_, _, (hv_info, )) = info.payload 7218 num_cpus = hv_info.get("cpu_total", None) 7219 if not isinstance(num_cpus, int): 7220 raise errors.OpPrereqError("Can't compute the number of physical CPUs" 7221 " on node %s, result was '%s'" % 7222 (node, num_cpus), errors.ECODE_ENVIRON) 7223 if requested > num_cpus: 7224 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are " 7225 "required" % (node, num_cpus, requested), 7226 errors.ECODE_NORES)
7227
7228 7229 -class LUInstanceStartup(LogicalUnit):
7230 """Starts an instance. 7231 7232 """ 7233 HPATH = "instance-start" 7234 HTYPE = constants.HTYPE_INSTANCE 7235 REQ_BGL = False 7236
7237 - def CheckArguments(self):
7238 # extra beparams 7239 if self.op.beparams: 7240 # fill the beparams dict 7241 objects.UpgradeBeParams(self.op.beparams) 7242 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7243
7244 - def ExpandNames(self):
7245 self._ExpandAndLockInstance() 7246 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7247
7248 - def DeclareLocks(self, level):
7249 if level == locking.LEVEL_NODE_RES: 7250 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7251
7252 - def BuildHooksEnv(self):
7253 """Build hooks env. 7254 7255 This runs on master, primary and secondary nodes of the instance. 7256 7257 """ 7258 env = { 7259 "FORCE": self.op.force, 7260 } 7261 7262 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 7263 7264 return env
7265
7266 - def BuildHooksNodes(self):
7267 """Build hooks nodes. 7268 7269 """ 7270 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 7271 return (nl, nl)
7272
7273 - def CheckPrereq(self):
7274 """Check prerequisites. 7275 7276 This checks that the instance is in the cluster. 7277 7278 """ 7279 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7280 assert self.instance is not None, \ 7281 "Cannot retrieve locked instance %s" % self.op.instance_name 7282 7283 # extra hvparams 7284 if self.op.hvparams: 7285 # check hypervisor parameter syntax (locally) 7286 cluster = self.cfg.GetClusterInfo() 7287 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) 7288 filled_hvp = cluster.FillHV(instance) 7289 filled_hvp.update(self.op.hvparams) 7290 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor) 7291 hv_type.CheckParameterSyntax(filled_hvp) 7292 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp) 7293 7294 _CheckInstanceState(self, instance, INSTANCE_ONLINE) 7295 7296 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline 7297 7298 if self.primary_offline and self.op.ignore_offline_nodes: 7299 self.LogWarning("Ignoring offline primary node") 7300 7301 if self.op.hvparams or self.op.beparams: 7302 self.LogWarning("Overridden parameters are ignored") 7303 else: 7304 _CheckNodeOnline(self, instance.primary_node) 7305 7306 bep = self.cfg.GetClusterInfo().FillBE(instance) 7307 bep.update(self.op.beparams) 7308 7309 # check bridges existence 7310 _CheckInstanceBridgesExist(self, instance) 7311 7312 remote_info = self.rpc.call_instance_info(instance.primary_node, 7313 instance.name, 7314 instance.hypervisor) 7315 remote_info.Raise("Error checking node %s" % instance.primary_node, 7316 prereq=True, ecode=errors.ECODE_ENVIRON) 7317 if not remote_info.payload: # not running already 7318 _CheckNodeFreeMemory(self, instance.primary_node, 7319 "starting instance %s" % instance.name, 7320 bep[constants.BE_MINMEM], instance.hypervisor)
7321
7322 - def Exec(self, feedback_fn):
7323 """Start the instance. 7324 7325 """ 7326 instance = self.instance 7327 force = self.op.force 7328 7329 if not self.op.no_remember: 7330 self.cfg.MarkInstanceUp(instance.name) 7331 7332 if self.primary_offline: 7333 assert self.op.ignore_offline_nodes 7334 self.LogInfo("Primary node offline, marked instance as started") 7335 else: 7336 node_current = instance.primary_node 7337 7338 _StartInstanceDisks(self, instance, force) 7339 7340 result = \ 7341 self.rpc.call_instance_start(node_current, 7342 (instance, self.op.hvparams, 7343 self.op.beparams), 7344 self.op.startup_paused) 7345 msg = result.fail_msg 7346 if msg: 7347 _ShutdownInstanceDisks(self, instance) 7348 raise errors.OpExecError("Could not start instance: %s" % msg)
7349
7350 7351 -class LUInstanceReboot(LogicalUnit):
7352 """Reboot an instance. 7353 7354 """ 7355 HPATH = "instance-reboot" 7356 HTYPE = constants.HTYPE_INSTANCE 7357 REQ_BGL = False 7358
7359 - def ExpandNames(self):
7361
7362 - def BuildHooksEnv(self):
7363 """Build hooks env. 7364 7365 This runs on master, primary and secondary nodes of the instance. 7366 7367 """ 7368 env = { 7369 "IGNORE_SECONDARIES": self.op.ignore_secondaries, 7370 "REBOOT_TYPE": self.op.reboot_type, 7371 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 7372 } 7373 7374 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 7375 7376 return env
7377
7378 - def BuildHooksNodes(self):
7379 """Build hooks nodes. 7380 7381 """ 7382 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 7383 return (nl, nl)
7384
7385 - def CheckPrereq(self):
7386 """Check prerequisites. 7387 7388 This checks that the instance is in the cluster. 7389 7390 """ 7391 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7392 assert self.instance is not None, \ 7393 "Cannot retrieve locked instance %s" % self.op.instance_name 7394 _CheckInstanceState(self, instance, INSTANCE_ONLINE) 7395 _CheckNodeOnline(self, instance.primary_node) 7396 7397 # check bridges existence 7398 _CheckInstanceBridgesExist(self, instance)
7399
7400 - def Exec(self, feedback_fn):
7401 """Reboot the instance. 7402 7403 """ 7404 instance = self.instance 7405 ignore_secondaries = self.op.ignore_secondaries 7406 reboot_type = self.op.reboot_type 7407 7408 remote_info = self.rpc.call_instance_info(instance.primary_node, 7409 instance.name, 7410 instance.hypervisor) 7411 remote_info.Raise("Error checking node %s" % instance.primary_node) 7412 instance_running = bool(remote_info.payload) 7413 7414 node_current = instance.primary_node 7415 7416 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT, 7417 constants.INSTANCE_REBOOT_HARD]: 7418 for disk in instance.disks: 7419 self.cfg.SetDiskID(disk, node_current) 7420 result = self.rpc.call_instance_reboot(node_current, instance, 7421 reboot_type, 7422 self.op.shutdown_timeout) 7423 result.Raise("Could not reboot instance") 7424 else: 7425 if instance_running: 7426 result = self.rpc.call_instance_shutdown(node_current, instance, 7427 self.op.shutdown_timeout) 7428 result.Raise("Could not shutdown instance for full reboot") 7429 _ShutdownInstanceDisks(self, instance) 7430 else: 7431 self.LogInfo("Instance %s was already stopped, starting now", 7432 instance.name) 7433 _StartInstanceDisks(self, instance, ignore_secondaries) 7434 result = self.rpc.call_instance_start(node_current, 7435 (instance, None, None), False) 7436 msg = result.fail_msg 7437 if msg: 7438 _ShutdownInstanceDisks(self, instance) 7439 raise errors.OpExecError("Could not start instance for" 7440 " full reboot: %s" % msg) 7441 7442 self.cfg.MarkInstanceUp(instance.name)
7443
7444 7445 -class LUInstanceShutdown(LogicalUnit):
7446 """Shutdown an instance. 7447 7448 """ 7449 HPATH = "instance-stop" 7450 HTYPE = constants.HTYPE_INSTANCE 7451 REQ_BGL = False 7452
7453 - def ExpandNames(self):
7455
7456 - def BuildHooksEnv(self):
7457 """Build hooks env. 7458 7459 This runs on master, primary and secondary nodes of the instance. 7460 7461 """ 7462 env = _BuildInstanceHookEnvByObject(self, self.instance) 7463 env["TIMEOUT"] = self.op.timeout 7464 return env
7465
7466 - def BuildHooksNodes(self):
7467 """Build hooks nodes. 7468 7469 """ 7470 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 7471 return (nl, nl)
7472
7473 - def CheckPrereq(self):
7474 """Check prerequisites. 7475 7476 This checks that the instance is in the cluster. 7477 7478 """ 7479 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7480 assert self.instance is not None, \ 7481 "Cannot retrieve locked instance %s" % self.op.instance_name 7482 7483 if not self.op.force: 7484 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE) 7485 else: 7486 self.LogWarning("Ignoring offline instance check") 7487 7488 self.primary_offline = \ 7489 self.cfg.GetNodeInfo(self.instance.primary_node).offline 7490 7491 if self.primary_offline and self.op.ignore_offline_nodes: 7492 self.LogWarning("Ignoring offline primary node") 7493 else: 7494 _CheckNodeOnline(self, self.instance.primary_node)
7495
7496 - def Exec(self, feedback_fn):
7497 """Shutdown the instance. 7498 7499 """ 7500 instance = self.instance 7501 node_current = instance.primary_node 7502 timeout = self.op.timeout 7503 7504 # If the instance is offline we shouldn't mark it as down, as that 7505 # resets the offline flag. 7506 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE: 7507 self.cfg.MarkInstanceDown(instance.name) 7508 7509 if self.primary_offline: 7510 assert self.op.ignore_offline_nodes 7511 self.LogInfo("Primary node offline, marked instance as stopped") 7512 else: 7513 result = self.rpc.call_instance_shutdown(node_current, instance, timeout) 7514 msg = result.fail_msg 7515 if msg: 7516 self.LogWarning("Could not shutdown instance: %s", msg) 7517 7518 _ShutdownInstanceDisks(self, instance)
7519
7520 7521 -class LUInstanceReinstall(LogicalUnit):
7522 """Reinstall an instance. 7523 7524 """ 7525 HPATH = "instance-reinstall" 7526 HTYPE = constants.HTYPE_INSTANCE 7527 REQ_BGL = False 7528
7529 - def ExpandNames(self):
7531
7532 - def BuildHooksEnv(self):
7533 """Build hooks env. 7534 7535 This runs on master, primary and secondary nodes of the instance. 7536 7537 """ 7538 return _BuildInstanceHookEnvByObject(self, self.instance)
7539
7540 - def BuildHooksNodes(self):
7541 """Build hooks nodes. 7542 7543 """ 7544 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 7545 return (nl, nl)
7546
7547 - def CheckPrereq(self):
7548 """Check prerequisites. 7549 7550 This checks that the instance is in the cluster and is not running. 7551 7552 """ 7553 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7554 assert instance is not None, \ 7555 "Cannot retrieve locked instance %s" % self.op.instance_name 7556 _CheckNodeOnline(self, instance.primary_node, "Instance primary node" 7557 " offline, cannot reinstall") 7558 7559 if instance.disk_template == constants.DT_DISKLESS: 7560 raise errors.OpPrereqError("Instance '%s' has no disks" % 7561 self.op.instance_name, 7562 errors.ECODE_INVAL) 7563 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall") 7564 7565 if self.op.os_type is not None: 7566 # OS verification 7567 pnode = _ExpandNodeName(self.cfg, instance.primary_node) 7568 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant) 7569 instance_os = self.op.os_type 7570 else: 7571 instance_os = instance.os 7572 7573 nodelist = list(instance.all_nodes) 7574 7575 if self.op.osparams: 7576 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams) 7577 _CheckOSParams(self, True, nodelist, instance_os, i_osdict) 7578 self.os_inst = i_osdict # the new dict (without defaults) 7579 else: 7580 self.os_inst = None 7581 7582 self.instance = instance
7583
7584 - def Exec(self, feedback_fn):
7585 """Reinstall the instance. 7586 7587 """ 7588 inst = self.instance 7589 7590 if self.op.os_type is not None: 7591 feedback_fn("Changing OS to '%s'..." % self.op.os_type) 7592 inst.os = self.op.os_type 7593 # Write to configuration 7594 self.cfg.Update(inst, feedback_fn) 7595 7596 _StartInstanceDisks(self, inst, None) 7597 try: 7598 feedback_fn("Running the instance OS create scripts...") 7599 # FIXME: pass debug option from opcode to backend 7600 result = self.rpc.call_instance_os_add(inst.primary_node, 7601 (inst, self.os_inst), True, 7602 self.op.debug_level) 7603 result.Raise("Could not install OS for instance %s on node %s" % 7604 (inst.name, inst.primary_node)) 7605 finally: 7606 _ShutdownInstanceDisks(self, inst)
7607
7608 7609 -class LUInstanceRecreateDisks(LogicalUnit):
7610 """Recreate an instance's missing disks. 7611 7612 """ 7613 HPATH = "instance-recreate-disks" 7614 HTYPE = constants.HTYPE_INSTANCE 7615 REQ_BGL = False 7616 7617 _MODIFYABLE = compat.UniqueFrozenset([ 7618 constants.IDISK_SIZE, 7619 constants.IDISK_MODE, 7620 ]) 7621 7622 # New or changed disk parameters may have different semantics 7623 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([ 7624 constants.IDISK_ADOPT, 7625 7626 # TODO: Implement support changing VG while recreating 7627 constants.IDISK_VG, 7628 constants.IDISK_METAVG, 7629 constants.IDISK_PROVIDER, 7630 ])) 7631
7632 - def _RunAllocator(self):
7633 """Run the allocator based on input opcode. 7634 7635 """ 7636 be_full = self.cfg.GetClusterInfo().FillBE(self.instance) 7637 7638 # FIXME 7639 # The allocator should actually run in "relocate" mode, but current 7640 # allocators don't support relocating all the nodes of an instance at 7641 # the same time. As a workaround we use "allocate" mode, but this is 7642 # suboptimal for two reasons: 7643 # - The instance name passed to the allocator is present in the list of 7644 # existing instances, so there could be a conflict within the 7645 # internal structures of the allocator. This doesn't happen with the 7646 # current allocators, but it's a liability. 7647 # - The allocator counts the resources used by the instance twice: once 7648 # because the instance exists already, and once because it tries to 7649 # allocate a new instance. 7650 # The allocator could choose some of the nodes on which the instance is 7651 # running, but that's not a problem. If the instance nodes are broken, 7652 # they should be already be marked as drained or offline, and hence 7653 # skipped by the allocator. If instance disks have been lost for other 7654 # reasons, then recreating the disks on the same nodes should be fine. 7655 disk_template = self.instance.disk_template 7656 spindle_use = be_full[constants.BE_SPINDLE_USE] 7657 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name, 7658 disk_template=disk_template, 7659 tags=list(self.instance.GetTags()), 7660 os=self.instance.os, 7661 nics=[{}], 7662 vcpus=be_full[constants.BE_VCPUS], 7663 memory=be_full[constants.BE_MAXMEM], 7664 spindle_use=spindle_use, 7665 disks=[{constants.IDISK_SIZE: d.size, 7666 constants.IDISK_MODE: d.mode} 7667 for d in self.instance.disks], 7668 hypervisor=self.instance.hypervisor, 7669 node_whitelist=None) 7670 ial = iallocator.IAllocator(self.cfg, self.rpc, req) 7671 7672 ial.Run(self.op.iallocator) 7673 7674 assert req.RequiredNodes() == len(self.instance.all_nodes) 7675 7676 if not ial.success: 7677 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':" 7678 " %s" % (self.op.iallocator, ial.info), 7679 errors.ECODE_NORES) 7680 7681 self.op.nodes = ial.result 7682 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s", 7683 self.op.instance_name, self.op.iallocator, 7684 utils.CommaJoin(ial.result))
7685
7686 - def CheckArguments(self):
7687 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]): 7688 # Normalize and convert deprecated list of disk indices 7689 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))] 7690 7691 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks)) 7692 if duplicates: 7693 raise errors.OpPrereqError("Some disks have been specified more than" 7694 " once: %s" % utils.CommaJoin(duplicates), 7695 errors.ECODE_INVAL) 7696 7697 # We don't want _CheckIAllocatorOrNode selecting the default iallocator 7698 # when neither iallocator nor nodes are specified 7699 if self.op.iallocator or self.op.nodes: 7700 _CheckIAllocatorOrNode(self, "iallocator", "nodes") 7701 7702 for (idx, params) in self.op.disks: 7703 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES) 7704 unsupported = frozenset(params.keys()) - self._MODIFYABLE 7705 if unsupported: 7706 raise errors.OpPrereqError("Parameters for disk %s try to change" 7707 " unmodifyable parameter(s): %s" % 7708 (idx, utils.CommaJoin(unsupported)), 7709 errors.ECODE_INVAL)
7710
7711 - def ExpandNames(self):
7712 self._ExpandAndLockInstance() 7713 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 7714 7715 if self.op.nodes: 7716 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes] 7717 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes) 7718 else: 7719 self.needed_locks[locking.LEVEL_NODE] = [] 7720 if self.op.iallocator: 7721 # iallocator will select a new node in the same group 7722 self.needed_locks[locking.LEVEL_NODEGROUP] = [] 7723 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET 7724 7725 self.needed_locks[locking.LEVEL_NODE_RES] = []
7726
7727 - def DeclareLocks(self, level):
7728 if level == locking.LEVEL_NODEGROUP: 7729 assert self.op.iallocator is not None 7730 assert not self.op.nodes 7731 assert not self.needed_locks[locking.LEVEL_NODEGROUP] 7732 self.share_locks[locking.LEVEL_NODEGROUP] = 1 7733 # Lock the primary group used by the instance optimistically; this 7734 # requires going via the node before it's locked, requiring 7735 # verification later on 7736 self.needed_locks[locking.LEVEL_NODEGROUP] = \ 7737 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True) 7738 7739 elif level == locking.LEVEL_NODE: 7740 # If an allocator is used, then we lock all the nodes in the current 7741 # instance group, as we don't know yet which ones will be selected; 7742 # if we replace the nodes without using an allocator, locks are 7743 # already declared in ExpandNames; otherwise, we need to lock all the 7744 # instance nodes for disk re-creation 7745 if self.op.iallocator: 7746 assert not self.op.nodes 7747 assert not self.needed_locks[locking.LEVEL_NODE] 7748 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1 7749 7750 # Lock member nodes of the group of the primary node 7751 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP): 7752 self.needed_locks[locking.LEVEL_NODE].extend( 7753 self.cfg.GetNodeGroup(group_uuid).members) 7754 7755 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC) 7756 elif not self.op.nodes: 7757 self._LockInstancesNodes(primary_only=False) 7758 elif level == locking.LEVEL_NODE_RES: 7759 # Copy node locks 7760 self.needed_locks[locking.LEVEL_NODE_RES] = \ 7761 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7762
7763 - def BuildHooksEnv(self):
7764 """Build hooks env. 7765 7766 This runs on master, primary and secondary nodes of the instance. 7767 7768 """ 7769 return _BuildInstanceHookEnvByObject(self, self.instance)
7770
7771 - def BuildHooksNodes(self):
7772 """Build hooks nodes. 7773 7774 """ 7775 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 7776 return (nl, nl)
7777
7778 - def CheckPrereq(self):
7779 """Check prerequisites. 7780 7781 This checks that the instance is in the cluster and is not running. 7782 7783 """ 7784 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7785 assert instance is not None, \ 7786 "Cannot retrieve locked instance %s" % self.op.instance_name 7787 if self.op.nodes: 7788 if len(self.op.nodes) != len(instance.all_nodes): 7789 raise errors.OpPrereqError("Instance %s currently has %d nodes, but" 7790 " %d replacement nodes were specified" % 7791 (instance.name, len(instance.all_nodes), 7792 len(self.op.nodes)), 7793 errors.ECODE_INVAL) 7794 assert instance.disk_template != constants.DT_DRBD8 or \ 7795 len(self.op.nodes) == 2 7796 assert instance.disk_template != constants.DT_PLAIN or \ 7797 len(self.op.nodes) == 1 7798 primary_node = self.op.nodes[0] 7799 else: 7800 primary_node = instance.primary_node 7801 if not self.op.iallocator: 7802 _CheckNodeOnline(self, primary_node) 7803 7804 if instance.disk_template == constants.DT_DISKLESS: 7805 raise errors.OpPrereqError("Instance '%s' has no disks" % 7806 self.op.instance_name, errors.ECODE_INVAL) 7807 7808 # Verify if node group locks are still correct 7809 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP) 7810 if owned_groups: 7811 # Node group locks are acquired only for the primary node (and only 7812 # when the allocator is used) 7813 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups, 7814 primary_only=True) 7815 7816 # if we replace nodes *and* the old primary is offline, we don't 7817 # check the instance state 7818 old_pnode = self.cfg.GetNodeInfo(instance.primary_node) 7819 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline): 7820 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING, 7821 msg="cannot recreate disks") 7822 7823 if self.op.disks: 7824 self.disks = dict(self.op.disks) 7825 else: 7826 self.disks = dict((idx, {}) for idx in range(len(instance.disks))) 7827 7828 maxidx = max(self.disks.keys()) 7829 if maxidx >= len(instance.disks): 7830 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx, 7831 errors.ECODE_INVAL) 7832 7833 if ((self.op.nodes or self.op.iallocator) and 7834 sorted(self.disks.keys()) != range(len(instance.disks))): 7835 raise errors.OpPrereqError("Can't recreate disks partially and" 7836 " change the nodes at the same time", 7837 errors.ECODE_INVAL) 7838 7839 self.instance = instance 7840 7841 if self.op.iallocator: 7842 self._RunAllocator() 7843 # Release unneeded node and node resource locks 7844 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes) 7845 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes) 7846 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC) 7847 7848 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7849
7850 - def Exec(self, feedback_fn):
7851 """Recreate the disks. 7852 7853 """ 7854 instance = self.instance 7855 7856 assert (self.owned_locks(locking.LEVEL_NODE) == 7857 self.owned_locks(locking.LEVEL_NODE_RES)) 7858 7859 to_skip = [] 7860 mods = [] # keeps track of needed changes 7861 7862 for idx, disk in enumerate(instance.disks): 7863 try: 7864 changes = self.disks[idx] 7865 except KeyError: 7866 # Disk should not be recreated 7867 to_skip.append(idx) 7868 continue 7869 7870 # update secondaries for disks, if needed 7871 if self.op.nodes and disk.dev_type == constants.LD_DRBD8: 7872 # need to update the nodes and minors 7873 assert len(self.op.nodes) == 2 7874 assert len(disk.logical_id) == 6 # otherwise disk internals 7875 # have changed 7876 (_, _, old_port, _, _, old_secret) = disk.logical_id 7877 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name) 7878 new_id = (self.op.nodes[0], self.op.nodes[1], old_port, 7879 new_minors[0], new_minors[1], old_secret) 7880 assert len(disk.logical_id) == len(new_id) 7881 else: 7882 new_id = None 7883 7884 mods.append((idx, new_id, changes)) 7885 7886 # now that we have passed all asserts above, we can apply the mods 7887 # in a single run (to avoid partial changes) 7888 for idx, new_id, changes in mods: 7889 disk = instance.disks[idx] 7890 if new_id is not None: 7891 assert disk.dev_type == constants.LD_DRBD8 7892 disk.logical_id = new_id 7893 if changes: 7894 disk.Update(size=changes.get(constants.IDISK_SIZE, None), 7895 mode=changes.get(constants.IDISK_MODE, None)) 7896 7897 # change primary node, if needed 7898 if self.op.nodes: 7899 instance.primary_node = self.op.nodes[0] 7900 self.LogWarning("Changing the instance's nodes, you will have to" 7901 " remove any disks left on the older nodes manually") 7902 7903 if self.op.nodes: 7904 self.cfg.Update(instance, feedback_fn) 7905 7906 # All touched nodes must be locked 7907 mylocks = self.owned_locks(locking.LEVEL_NODE) 7908 assert mylocks.issuperset(frozenset(instance.all_nodes)) 7909 new_disks = _CreateDisks(self, instance, to_skip=to_skip) 7910 7911 # TODO: Release node locks before wiping, or explain why it's not possible 7912 if self.cfg.GetClusterInfo().prealloc_wipe_disks: 7913 wipedisks = [(idx, disk, 0) 7914 for (idx, disk) in enumerate(instance.disks) 7915 if idx not in to_skip] 7916 _WipeOrCleanupDisks(self, instance, disks=wipedisks, cleanup=new_disks)
7917
7918 7919 -class LUInstanceRename(LogicalUnit):
7920 """Rename an instance. 7921 7922 """ 7923 HPATH = "instance-rename" 7924 HTYPE = constants.HTYPE_INSTANCE 7925
7926 - def CheckArguments(self):
7927 """Check arguments. 7928 7929 """ 7930 if self.op.ip_check and not self.op.name_check: 7931 # TODO: make the ip check more flexible and not depend on the name check 7932 raise errors.OpPrereqError("IP address check requires a name check", 7933 errors.ECODE_INVAL)
7934
7935 - def BuildHooksEnv(self):
7936 """Build hooks env. 7937 7938 This runs on master, primary and secondary nodes of the instance. 7939 7940 """ 7941 env = _BuildInstanceHookEnvByObject(self, self.instance) 7942 env["INSTANCE_NEW_NAME"] = self.op.new_name 7943 return env
7944
7945 - def BuildHooksNodes(self):
7946 """Build hooks nodes. 7947 7948 """ 7949 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 7950 return (nl, nl)
7951
7952 - def CheckPrereq(self):
7953 """Check prerequisites. 7954 7955 This checks that the instance is in the cluster and is not running. 7956 7957 """ 7958 self.op.instance_name = _ExpandInstanceName(self.cfg, 7959 self.op.instance_name) 7960 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7961 assert instance is not None 7962 _CheckNodeOnline(self, instance.primary_node) 7963 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING, 7964 msg="cannot rename") 7965 self.instance = instance 7966 7967 new_name = self.op.new_name 7968 if self.op.name_check: 7969 hostname = _CheckHostnameSane(self, new_name) 7970 new_name = self.op.new_name = hostname.name 7971 if (self.op.ip_check and 7972 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)): 7973 raise errors.OpPrereqError("IP %s of instance %s already in use" % 7974 (hostname.ip, new_name), 7975 errors.ECODE_NOTUNIQUE) 7976 7977 instance_list = self.cfg.GetInstanceList() 7978 if new_name in instance_list and new_name != instance.name: 7979 raise errors.OpPrereqError("Instance '%s' is already in the cluster" % 7980 new_name, errors.ECODE_EXISTS)
7981
7982 - def Exec(self, feedback_fn):
7983 """Rename the instance. 7984 7985 """ 7986 inst = self.instance 7987 old_name = inst.name 7988 7989 rename_file_storage = False 7990 if (inst.disk_template in constants.DTS_FILEBASED and 7991 self.op.new_name != inst.name): 7992 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1]) 7993 rename_file_storage = True 7994 7995 self.cfg.RenameInstance(inst.name, self.op.new_name) 7996 # Change the instance lock. This is definitely safe while we hold the BGL. 7997 # Otherwise the new lock would have to be added in acquired mode. 7998 assert self.REQ_BGL 7999 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER) 8000 self.glm.remove(locking.LEVEL_INSTANCE, old_name) 8001 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name) 8002 8003 # re-read the instance from the configuration after rename 8004 inst = self.cfg.GetInstanceInfo(self.op.new_name) 8005 8006 if rename_file_storage: 8007 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1]) 8008 result = self.rpc.call_file_storage_dir_rename(inst.primary_node, 8009 old_file_storage_dir, 8010 new_file_storage_dir) 8011 result.Raise("Could not rename on node %s directory '%s' to '%s'" 8012 " (but the instance has been renamed in Ganeti)" % 8013 (inst.primary_node, old_file_storage_dir, 8014 new_file_storage_dir)) 8015 8016 _StartInstanceDisks(self, inst, None) 8017 # update info on disks 8018 info = _GetInstanceInfoText(inst) 8019 for (idx, disk) in enumerate(inst.disks): 8020 for node in inst.all_nodes: 8021 self.cfg.SetDiskID(disk, node) 8022 result = self.rpc.call_blockdev_setinfo(node, disk, info) 8023 if result.fail_msg: 8024 self.LogWarning("Error setting info on node %s for disk %s: %s", 8025 node, idx, result.fail_msg) 8026 try: 8027 result = self.rpc.call_instance_run_rename(inst.primary_node, inst, 8028 old_name, self.op.debug_level) 8029 msg = result.fail_msg 8030 if msg: 8031 msg = ("Could not run OS rename script for instance %s on node %s" 8032 " (but the instance has been renamed in Ganeti): %s" % 8033 (inst.name, inst.primary_node, msg)) 8034 self.LogWarning(msg) 8035 finally: 8036 _ShutdownInstanceDisks(self, inst) 8037 8038 return inst.name
8039
8040 8041 -class LUInstanceRemove(LogicalUnit):
8042 """Remove an instance. 8043 8044 """ 8045 HPATH = "instance-remove" 8046 HTYPE = constants.HTYPE_INSTANCE 8047 REQ_BGL = False 8048
8049 - def ExpandNames(self):
8050 self._ExpandAndLockInstance() 8051 self.needed_locks[locking.LEVEL_NODE] = [] 8052 self.needed_locks[locking.LEVEL_NODE_RES] = [] 8053 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8054
8055 - def DeclareLocks(self, level):
8056 if level == locking.LEVEL_NODE: 8057 self._LockInstancesNodes() 8058 elif level == locking.LEVEL_NODE_RES: 8059 # Copy node locks 8060 self.needed_locks[locking.LEVEL_NODE_RES] = \ 8061 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8062
8063 - def BuildHooksEnv(self):
8064 """Build hooks env. 8065 8066 This runs on master, primary and secondary nodes of the instance. 8067 8068 """ 8069 env = _BuildInstanceHookEnvByObject(self, self.instance) 8070 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout 8071 return env
8072
8073 - def BuildHooksNodes(self):
8074 """Build hooks nodes. 8075 8076 """ 8077 nl = [self.cfg.GetMasterNode()] 8078 nl_post = list(self.instance.all_nodes) + nl 8079 return (nl, nl_post)
8080
8081 - def CheckPrereq(self):
8082 """Check prerequisites. 8083 8084 This checks that the instance is in the cluster. 8085 8086 """ 8087 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 8088 assert self.instance is not None, \ 8089 "Cannot retrieve locked instance %s" % self.op.instance_name
8090
8091 - def Exec(self, feedback_fn):
8092 """Remove the instance. 8093 8094 """ 8095 instance = self.instance 8096 logging.info("Shutting down instance %s on node %s", 8097 instance.name, instance.primary_node) 8098 8099 result = self.rpc.call_instance_shutdown(instance.primary_node, instance, 8100 self.op.shutdown_timeout) 8101 msg = result.fail_msg 8102 if msg: 8103 if self.op.ignore_failures: 8104 feedback_fn("Warning: can't shutdown instance: %s" % msg) 8105 else: 8106 raise errors.OpExecError("Could not shutdown instance %s on" 8107 " node %s: %s" % 8108 (instance.name, instance.primary_node, msg)) 8109 8110 assert (self.owned_locks(locking.LEVEL_NODE) == 8111 self.owned_locks(locking.LEVEL_NODE_RES)) 8112 assert not (set(instance.all_nodes) - 8113 self.owned_locks(locking.LEVEL_NODE)), \ 8114 "Not owning correct locks" 8115 8116 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8117
8118 8119 -def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8120 """Utility function to remove an instance. 8121 8122 """ 8123 logging.info("Removing block devices for instance %s", instance.name) 8124 8125 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures): 8126 if not ignore_failures: 8127 raise errors.OpExecError("Can't remove instance's disks") 8128 feedback_fn("Warning: can't remove instance's disks") 8129 8130 logging.info("Removing instance %s out of cluster config", instance.name) 8131 8132 lu.cfg.RemoveInstance(instance.name) 8133 8134 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \ 8135 "Instance lock removal conflict" 8136 8137 # Remove lock for the instance 8138 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8139
8140 8141 -class LUInstanceQuery(NoHooksLU):
8142 """Logical unit for querying instances. 8143 8144 """ 8145 # pylint: disable=W0142 8146 REQ_BGL = False 8147
8148 - def CheckArguments(self):
8149 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names), 8150 self.op.output_fields, self.op.use_locking)
8151
8152 - def ExpandNames(self):
8153 self.iq.ExpandNames(self)
8154
8155 - def DeclareLocks(self, level):
8156 self.iq.DeclareLocks(self, level)
8157
8158 - def Exec(self, feedback_fn):
8159 return self.iq.OldStyleQuery(self)
8160
8161 8162 -def _ExpandNamesForMigration(lu):
8163 """Expands names for use with L{TLMigrateInstance}. 8164 8165 @type lu: L{LogicalUnit} 8166 8167 """ 8168 if lu.op.target_node is not None: 8169 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node) 8170 8171 lu.needed_locks[locking.LEVEL_NODE] = [] 8172 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 8173 8174 lu.needed_locks[locking.LEVEL_NODE_RES] = [] 8175 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE 8176 8177 # The node allocation lock is actually only needed for externally replicated 8178 # instances (e.g. sharedfile or RBD) and if an iallocator is used. 8179 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8180
8181 8182 -def _DeclareLocksForMigration(lu, level):
8183 """Declares locks for L{TLMigrateInstance}. 8184 8185 @type lu: L{LogicalUnit} 8186 @param level: Lock level 8187 8188 """ 8189 if level == locking.LEVEL_NODE_ALLOC: 8190 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE) 8191 8192 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name) 8193 8194 # Node locks are already declared here rather than at LEVEL_NODE as we need 8195 # the instance object anyway to declare the node allocation lock. 8196 if instance.disk_template in constants.DTS_EXT_MIRROR: 8197 if lu.op.target_node is None: 8198 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 8199 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET 8200 else: 8201 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node, 8202 lu.op.target_node] 8203 del lu.recalculate_locks[locking.LEVEL_NODE] 8204 else: 8205 lu._LockInstancesNodes() # pylint: disable=W0212 8206 8207 elif level == locking.LEVEL_NODE: 8208 # Node locks are declared together with the node allocation lock 8209 assert (lu.needed_locks[locking.LEVEL_NODE] or 8210 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET) 8211 8212 elif level == locking.LEVEL_NODE_RES: 8213 # Copy node locks 8214 lu.needed_locks[locking.LEVEL_NODE_RES] = \ 8215 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8216
8217 8218 -class LUInstanceFailover(LogicalUnit):
8219 """Failover an instance. 8220 8221 """ 8222 HPATH = "instance-failover" 8223 HTYPE = constants.HTYPE_INSTANCE 8224 REQ_BGL = False 8225
8226 - def CheckArguments(self):
8227 """Check the arguments. 8228 8229 """ 8230 self.iallocator = getattr(self.op, "iallocator", None) 8231 self.target_node = getattr(self.op, "target_node", None)
8232
8233 - def ExpandNames(self):
8234 self._ExpandAndLockInstance() 8235 _ExpandNamesForMigration(self) 8236 8237 self._migrater = \ 8238 TLMigrateInstance(self, self.op.instance_name, False, True, False, 8239 self.op.ignore_consistency, True, 8240 self.op.shutdown_timeout, self.op.ignore_ipolicy) 8241 8242 self.tasklets = [self._migrater]
8243
8244 - def DeclareLocks(self, level):
8245 _DeclareLocksForMigration(self, level)
8246
8247 - def BuildHooksEnv(self):
8248 """Build hooks env. 8249 8250 This runs on master, primary and secondary nodes of the instance. 8251 8252 """ 8253 instance = self._migrater.instance 8254 source_node = instance.primary_node 8255 target_node = self.op.target_node 8256 env = { 8257 "IGNORE_CONSISTENCY": self.op.ignore_consistency, 8258 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 8259 "OLD_PRIMARY": source_node, 8260 "NEW_PRIMARY": target_node, 8261 } 8262 8263 if instance.disk_template in constants.DTS_INT_MIRROR: 8264 env["OLD_SECONDARY"] = instance.secondary_nodes[0] 8265 env["NEW_SECONDARY"] = source_node 8266 else: 8267 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = "" 8268 8269 env.update(_BuildInstanceHookEnvByObject(self, instance)) 8270 8271 return env
8272
8273 - def BuildHooksNodes(self):
8274 """Build hooks nodes. 8275 8276 """ 8277 instance = self._migrater.instance 8278 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes) 8279 return (nl, nl + [instance.primary_node])
8280
8281 8282 -class LUInstanceMigrate(LogicalUnit):
8283 """Migrate an instance. 8284 8285 This is migration without shutting down, compared to the failover, 8286 which is done with shutdown. 8287 8288 """ 8289 HPATH = "instance-migrate" 8290 HTYPE = constants.HTYPE_INSTANCE 8291 REQ_BGL = False 8292
8293 - def ExpandNames(self):
8294 self._ExpandAndLockInstance() 8295 _ExpandNamesForMigration(self) 8296 8297 self._migrater = \ 8298 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup, 8299 False, self.op.allow_failover, False, 8300 self.op.allow_runtime_changes, 8301 constants.DEFAULT_SHUTDOWN_TIMEOUT, 8302 self.op.ignore_ipolicy) 8303 8304 self.tasklets = [self._migrater]
8305
8306 - def DeclareLocks(self, level):
8307 _DeclareLocksForMigration(self, level)
8308
8309 - def BuildHooksEnv(self):
8310 """Build hooks env. 8311 8312 This runs on master, primary and secondary nodes of the instance. 8313 8314 """ 8315 instance = self._migrater.instance 8316 source_node = instance.primary_node 8317 target_node = self.op.target_node 8318 env = _BuildInstanceHookEnvByObject(self, instance) 8319 env.update({ 8320 "MIGRATE_LIVE": self._migrater.live, 8321 "MIGRATE_CLEANUP": self.op.cleanup, 8322 "OLD_PRIMARY": source_node, 8323 "NEW_PRIMARY": target_node, 8324 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes, 8325 }) 8326 8327 if instance.disk_template in constants.DTS_INT_MIRROR: 8328 env["OLD_SECONDARY"] = target_node 8329 env["NEW_SECONDARY"] = source_node 8330 else: 8331 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None 8332 8333 return env
8334
8335 - def BuildHooksNodes(self):
8336 """Build hooks nodes. 8337 8338 """ 8339 instance = self._migrater.instance 8340 snodes = list(instance.secondary_nodes) 8341 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes 8342 return (nl, nl)
8343
8344 8345 -class LUInstanceMove(LogicalUnit):
8346 """Move an instance by data-copying. 8347 8348 """ 8349 HPATH = "instance-move" 8350 HTYPE = constants.HTYPE_INSTANCE 8351 REQ_BGL = False 8352
8353 - def ExpandNames(self):
8354 self._ExpandAndLockInstance() 8355 target_node = _ExpandNodeName(self.cfg, self.op.target_node) 8356 self.op.target_node = target_node 8357 self.needed_locks[locking.LEVEL_NODE] = [target_node] 8358 self.needed_locks[locking.LEVEL_NODE_RES] = [] 8359 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8360
8361 - def DeclareLocks(self, level):
8362 if level == locking.LEVEL_NODE: 8363 self._LockInstancesNodes(primary_only=True) 8364 elif level == locking.LEVEL_NODE_RES: 8365 # Copy node locks 8366 self.needed_locks[locking.LEVEL_NODE_RES] = \ 8367 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8368
8369 - def BuildHooksEnv(self):
8370 """Build hooks env. 8371 8372 This runs on master, primary and secondary nodes of the instance. 8373 8374 """ 8375 env = { 8376 "TARGET_NODE": self.op.target_node, 8377 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 8378 } 8379 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 8380 return env
8381
8382 - def BuildHooksNodes(self):
8383 """Build hooks nodes. 8384 8385 """ 8386 nl = [ 8387 self.cfg.GetMasterNode(), 8388 self.instance.primary_node, 8389 self.op.target_node, 8390 ] 8391 return (nl, nl)
8392
8393 - def CheckPrereq(self):
8394 """Check prerequisites. 8395 8396 This checks that the instance is in the cluster. 8397 8398 """ 8399 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 8400 assert self.instance is not None, \ 8401 "Cannot retrieve locked instance %s" % self.op.instance_name 8402 8403 if instance.disk_template not in constants.DTS_COPYABLE: 8404 raise errors.OpPrereqError("Disk template %s not suitable for copying" % 8405 instance.disk_template, errors.ECODE_STATE) 8406 8407 node = self.cfg.GetNodeInfo(self.op.target_node) 8408 assert node is not None, \ 8409 "Cannot retrieve locked node %s" % self.op.target_node 8410 8411 self.target_node = target_node = node.name 8412 8413 if target_node == instance.primary_node: 8414 raise errors.OpPrereqError("Instance %s is already on the node %s" % 8415 (instance.name, target_node), 8416 errors.ECODE_STATE) 8417 8418 bep = self.cfg.GetClusterInfo().FillBE(instance) 8419 8420 for idx, dsk in enumerate(instance.disks): 8421 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE): 8422 raise errors.OpPrereqError("Instance disk %d has a complex layout," 8423 " cannot copy" % idx, errors.ECODE_STATE) 8424 8425 _CheckNodeOnline(self, target_node) 8426 _CheckNodeNotDrained(self, target_node) 8427 _CheckNodeVmCapable(self, target_node) 8428 cluster = self.cfg.GetClusterInfo() 8429 group_info = self.cfg.GetNodeGroup(node.group) 8430 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info) 8431 _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg, 8432 ignore=self.op.ignore_ipolicy) 8433 8434 if instance.admin_state == constants.ADMINST_UP: 8435 # check memory requirements on the secondary node 8436 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" % 8437 instance.name, bep[constants.BE_MAXMEM], 8438 instance.hypervisor) 8439 else: 8440 self.LogInfo("Not checking memory on the secondary node as" 8441 " instance will not be started") 8442 8443 # check bridge existance 8444 _CheckInstanceBridgesExist(self, instance, node=target_node)
8445
8446 - def Exec(self, feedback_fn):
8447 """Move an instance. 8448 8449 The move is done by shutting it down on its present node, copying 8450 the data over (slow) and starting it on the new node. 8451 8452 """ 8453 instance = self.instance 8454 8455 source_node = instance.primary_node 8456 target_node = self.target_node 8457 8458 self.LogInfo("Shutting down instance %s on source node %s", 8459 instance.name, source_node) 8460 8461 assert (self.owned_locks(locking.LEVEL_NODE) == 8462 self.owned_locks(locking.LEVEL_NODE_RES)) 8463 8464 result = self.rpc.call_instance_shutdown(source_node, instance, 8465 self.op.shutdown_timeout) 8466 msg = result.fail_msg 8467 if msg: 8468 if self.op.ignore_consistency: 8469 self.LogWarning("Could not shutdown instance %s on node %s." 8470 " Proceeding anyway. Please make sure node" 8471 " %s is down. Error details: %s", 8472 instance.name, source_node, source_node, msg) 8473 else: 8474 raise errors.OpExecError("Could not shutdown instance %s on" 8475 " node %s: %s" % 8476 (instance.name, source_node, msg)) 8477 8478 # create the target disks 8479 try: 8480 _CreateDisks(self, instance, target_node=target_node) 8481 except errors.OpExecError: 8482 self.LogWarning("Device creation failed") 8483 self.cfg.ReleaseDRBDMinors(instance.name) 8484 raise 8485 8486 cluster_name = self.cfg.GetClusterInfo().cluster_name 8487 8488 errs = [] 8489 # activate, get path, copy the data over 8490 for idx, disk in enumerate(instance.disks): 8491 self.LogInfo("Copying data for disk %d", idx) 8492 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance), 8493 instance.name, True, idx) 8494 if result.fail_msg: 8495 self.LogWarning("Can't assemble newly created disk %d: %s", 8496 idx, result.fail_msg) 8497 errs.append(result.fail_msg) 8498 break 8499 dev_path = result.payload 8500 result = self.rpc.call_blockdev_export(source_node, (disk, instance), 8501 target_node, dev_path, 8502 cluster_name) 8503 if result.fail_msg: 8504 self.LogWarning("Can't copy data over for disk %d: %s", 8505 idx, result.fail_msg) 8506 errs.append(result.fail_msg) 8507 break 8508 8509 if errs: 8510 self.LogWarning("Some disks failed to copy, aborting") 8511 try: 8512 _RemoveDisks(self, instance, target_node=target_node) 8513 finally: 8514 self.cfg.ReleaseDRBDMinors(instance.name) 8515 raise errors.OpExecError("Errors during disk copy: %s" % 8516 (",".join(errs),)) 8517 8518 instance.primary_node = target_node 8519 self.cfg.Update(instance, feedback_fn) 8520 8521 self.LogInfo("Removing the disks on the original node") 8522 _RemoveDisks(self, instance, target_node=source_node) 8523 8524 # Only start the instance if it's marked as up 8525 if instance.admin_state == constants.ADMINST_UP: 8526 self.LogInfo("Starting instance %s on node %s", 8527 instance.name, target_node) 8528 8529 disks_ok, _ = _AssembleInstanceDisks(self, instance, 8530 ignore_secondaries=True) 8531 if not disks_ok: 8532 _ShutdownInstanceDisks(self, instance) 8533 raise errors.OpExecError("Can't activate the instance's disks") 8534 8535 result = self.rpc.call_instance_start(target_node, 8536 (instance, None, None), False) 8537 msg = result.fail_msg 8538 if msg: 8539 _ShutdownInstanceDisks(self, instance) 8540 raise errors.OpExecError("Could not start instance %s on node %s: %s" % 8541 (instance.name, target_node, msg))
8542
8543 8544 -class LUNodeMigrate(LogicalUnit):
8545 """Migrate all instances from a node. 8546 8547 """ 8548 HPATH = "node-migrate" 8549 HTYPE = constants.HTYPE_NODE 8550 REQ_BGL = False 8551
8552 - def CheckArguments(self):
8553 pass
8554
8555 - def ExpandNames(self):
8556 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 8557 8558 self.share_locks = _ShareAll() 8559 self.needed_locks = { 8560 locking.LEVEL_NODE: [self.op.node_name], 8561 }
8562
8563 - def BuildHooksEnv(self):
8564 """Build hooks env. 8565 8566 This runs on the master, the primary and all the secondaries. 8567 8568 """ 8569 return { 8570 "NODE_NAME": self.op.node_name, 8571 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes, 8572 }
8573
8574 - def BuildHooksNodes(self):
8575 """Build hooks nodes. 8576 8577 """ 8578 nl = [self.cfg.GetMasterNode()] 8579 return (nl, nl)
8580
8581 - def CheckPrereq(self):
8582 pass
8583
8584 - def Exec(self, feedback_fn):
8585 # Prepare jobs for migration instances 8586 allow_runtime_changes = self.op.allow_runtime_changes 8587 jobs = [ 8588 [opcodes.OpInstanceMigrate(instance_name=inst.name, 8589 mode=self.op.mode, 8590 live=self.op.live, 8591 iallocator=self.op.iallocator, 8592 target_node=self.op.target_node, 8593 allow_runtime_changes=allow_runtime_changes, 8594 ignore_ipolicy=self.op.ignore_ipolicy)] 8595 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)] 8596 8597 # TODO: Run iallocator in this opcode and pass correct placement options to 8598 # OpInstanceMigrate. Since other jobs can modify the cluster between 8599 # running the iallocator and the actual migration, a good consistency model 8600 # will have to be found. 8601 8602 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) == 8603 frozenset([self.op.node_name])) 8604 8605 return ResultWithJobs(jobs)
8606
8607 8608 -class TLMigrateInstance(Tasklet):
8609 """Tasklet class for instance migration. 8610 8611 @type live: boolean 8612 @ivar live: whether the migration will be done live or non-live; 8613 this variable is initalized only after CheckPrereq has run 8614 @type cleanup: boolean 8615 @ivar cleanup: Wheater we cleanup from a failed migration 8616 @type iallocator: string 8617 @ivar iallocator: The iallocator used to determine target_node 8618 @type target_node: string 8619 @ivar target_node: If given, the target_node to reallocate the instance to 8620 @type failover: boolean 8621 @ivar failover: Whether operation results in failover or migration 8622 @type fallback: boolean 8623 @ivar fallback: Whether fallback to failover is allowed if migration not 8624 possible 8625 @type ignore_consistency: boolean 8626 @ivar ignore_consistency: Wheter we should ignore consistency between source 8627 and target node 8628 @type shutdown_timeout: int 8629 @ivar shutdown_timeout: In case of failover timeout of the shutdown 8630 @type ignore_ipolicy: bool 8631 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating 8632 8633 """ 8634 8635 # Constants 8636 _MIGRATION_POLL_INTERVAL = 1 # seconds 8637 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds 8638
8639 - def __init__(self, lu, instance_name, cleanup, failover, fallback, 8640 ignore_consistency, allow_runtime_changes, shutdown_timeout, 8641 ignore_ipolicy):
8642 """Initializes this class. 8643 8644 """ 8645 Tasklet.__init__(self, lu) 8646 8647 # Parameters 8648 self.instance_name = instance_name 8649 self.cleanup = cleanup 8650 self.live = False # will be overridden later 8651 self.failover = failover 8652 self.fallback = fallback 8653 self.ignore_consistency = ignore_consistency 8654 self.shutdown_timeout = shutdown_timeout 8655 self.ignore_ipolicy = ignore_ipolicy 8656 self.allow_runtime_changes = allow_runtime_changes
8657
8658 - def CheckPrereq(self):
8659 """Check prerequisites. 8660 8661 This checks that the instance is in the cluster. 8662 8663 """ 8664 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name) 8665 instance = self.cfg.GetInstanceInfo(instance_name) 8666 assert instance is not None 8667 self.instance = instance 8668 cluster = self.cfg.GetClusterInfo() 8669 8670 if (not self.cleanup and 8671 not instance.admin_state == constants.ADMINST_UP and 8672 not self.failover and self.fallback): 8673 self.lu.LogInfo("Instance is marked down or offline, fallback allowed," 8674 " switching to failover") 8675 self.failover = True 8676 8677 if instance.disk_template not in constants.DTS_MIRRORED: 8678 if self.failover: 8679 text = "failovers" 8680 else: 8681 text = "migrations" 8682 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow" 8683 " %s" % (instance.disk_template, text), 8684 errors.ECODE_STATE) 8685 8686 if instance.disk_template in constants.DTS_EXT_MIRROR: 8687 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node") 8688 8689 if self.lu.op.iallocator: 8690 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC) 8691 self._RunAllocator() 8692 else: 8693 # We set set self.target_node as it is required by 8694 # BuildHooksEnv 8695 self.target_node = self.lu.op.target_node 8696 8697 # Check that the target node is correct in terms of instance policy 8698 nodeinfo = self.cfg.GetNodeInfo(self.target_node) 8699 group_info = self.cfg.GetNodeGroup(nodeinfo.group) 8700 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, 8701 group_info) 8702 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg, 8703 ignore=self.ignore_ipolicy) 8704 8705 # self.target_node is already populated, either directly or by the 8706 # iallocator run 8707 target_node = self.target_node 8708 if self.target_node == instance.primary_node: 8709 raise errors.OpPrereqError("Cannot migrate instance %s" 8710 " to its primary (%s)" % 8711 (instance.name, instance.primary_node), 8712 errors.ECODE_STATE) 8713 8714 if len(self.lu.tasklets) == 1: 8715 # It is safe to release locks only when we're the only tasklet 8716 # in the LU 8717 _ReleaseLocks(self.lu, locking.LEVEL_NODE, 8718 keep=[instance.primary_node, self.target_node]) 8719 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC) 8720 8721 else: 8722 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC) 8723 8724 secondary_nodes = instance.secondary_nodes 8725 if not secondary_nodes: 8726 raise errors.ConfigurationError("No secondary node but using" 8727 " %s disk template" % 8728 instance.disk_template) 8729 target_node = secondary_nodes[0] 8730 if self.lu.op.iallocator or (self.lu.op.target_node and 8731 self.lu.op.target_node != target_node): 8732 if self.failover: 8733 text = "failed over" 8734 else: 8735 text = "migrated" 8736 raise errors.OpPrereqError("Instances with disk template %s cannot" 8737 " be %s to arbitrary nodes" 8738 " (neither an iallocator nor a target" 8739 " node can be passed)" % 8740 (instance.disk_template, text), 8741 errors.ECODE_INVAL) 8742 nodeinfo = self.cfg.GetNodeInfo(target_node) 8743 group_info = self.cfg.GetNodeGroup(nodeinfo.group) 8744 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, 8745 group_info) 8746 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg, 8747 ignore=self.ignore_ipolicy) 8748 8749 i_be = cluster.FillBE(instance) 8750 8751 # check memory requirements on the secondary node 8752 if (not self.cleanup and 8753 (not self.failover or instance.admin_state == constants.ADMINST_UP)): 8754 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node, 8755 "migrating instance %s" % 8756 instance.name, 8757 i_be[constants.BE_MINMEM], 8758 instance.hypervisor) 8759 else: 8760 self.lu.LogInfo("Not checking memory on the secondary node as" 8761 " instance will not be started") 8762 8763 # check if failover must be forced instead of migration 8764 if (not self.cleanup and not self.failover and 8765 i_be[constants.BE_ALWAYS_FAILOVER]): 8766 self.lu.LogInfo("Instance configured to always failover; fallback" 8767 " to failover") 8768 self.failover = True 8769 8770 # check bridge existance 8771 _CheckInstanceBridgesExist(self.lu, instance, node=target_node) 8772 8773 if not self.cleanup: 8774 _CheckNodeNotDrained(self.lu, target_node) 8775 if not self.failover: 8776 result = self.rpc.call_instance_migratable(instance.primary_node, 8777 instance) 8778 if result.fail_msg and self.fallback: 8779 self.lu.LogInfo("Can't migrate, instance offline, fallback to" 8780 " failover") 8781 self.failover = True 8782 else: 8783 result.Raise("Can't migrate, please use failover", 8784 prereq=True, ecode=errors.ECODE_STATE) 8785 8786 assert not (self.failover and self.cleanup) 8787 8788 if not self.failover: 8789 if self.lu.op.live is not None and self.lu.op.mode is not None: 8790 raise errors.OpPrereqError("Only one of the 'live' and 'mode'" 8791 " parameters are accepted", 8792 errors.ECODE_INVAL) 8793 if self.lu.op.live is not None: 8794 if self.lu.op.live: 8795 self.lu.op.mode = constants.HT_MIGRATION_LIVE 8796 else: 8797 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE 8798 # reset the 'live' parameter to None so that repeated 8799 # invocations of CheckPrereq do not raise an exception 8800 self.lu.op.live = None 8801 elif self.lu.op.mode is None: 8802 # read the default value from the hypervisor 8803 i_hv = cluster.FillHV(self.instance, skip_globals=False) 8804 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE] 8805 8806 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE 8807 else: 8808 # Failover is never live 8809 self.live = False 8810 8811 if not (self.failover or self.cleanup): 8812 remote_info = self.rpc.call_instance_info(instance.primary_node, 8813 instance.name, 8814 instance.hypervisor) 8815 remote_info.Raise("Error checking instance on node %s" % 8816 instance.primary_node) 8817 instance_running = bool(remote_info.payload) 8818 if instance_running: 8819 self.current_mem = int(remote_info.payload["memory"])
8820
8821 - def _RunAllocator(self):
8822 """Run the allocator based on input opcode. 8823 8824 """ 8825 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC) 8826 8827 # FIXME: add a self.ignore_ipolicy option 8828 req = iallocator.IAReqRelocate(name=self.instance_name, 8829 relocate_from=[self.instance.primary_node]) 8830 ial = iallocator.IAllocator(self.cfg, self.rpc, req) 8831 8832 ial.Run(self.lu.op.iallocator) 8833 8834 if not ial.success: 8835 raise errors.OpPrereqError("Can't compute nodes using" 8836 " iallocator '%s': %s" % 8837 (self.lu.op.iallocator, ial.info), 8838 errors.ECODE_NORES) 8839 self.target_node = ial.result[0] 8840 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s", 8841 self.instance_name, self.lu.op.iallocator, 8842 utils.CommaJoin(ial.result))
8843
8844 - def _WaitUntilSync(self):
8845 """Poll with custom rpc for disk sync. 8846 8847 This uses our own step-based rpc call. 8848 8849 """ 8850 self.feedback_fn("* wait until resync is done") 8851 all_done = False 8852 while not all_done: 8853 all_done = True 8854 result = self.rpc.call_drbd_wait_sync(self.all_nodes, 8855 self.nodes_ip, 8856 (self.instance.disks, 8857 self.instance)) 8858 min_percent = 100 8859 for node, nres in result.items(): 8860 nres.Raise("Cannot resync disks on node %s" % node) 8861 node_done, node_percent = nres.payload 8862 all_done = all_done and node_done 8863 if node_percent is not None: 8864 min_percent = min(min_percent, node_percent) 8865 if not all_done: 8866 if min_percent < 100: 8867 self.feedback_fn(" - progress: %.1f%%" % min_percent) 8868 time.sleep(2)
8869
8870 - def _EnsureSecondary(self, node):
8871 """Demote a node to secondary. 8872 8873 """ 8874 self.feedback_fn("* switching node %s to secondary mode" % node) 8875 8876 for dev in self.instance.disks: 8877 self.cfg.SetDiskID(dev, node) 8878 8879 result = self.rpc.call_blockdev_close(node, self.instance.name, 8880 self.instance.disks) 8881 result.Raise("Cannot change disk to secondary on node %s" % node)
8882
8883 - def _GoStandalone(self):
8884 """Disconnect from the network. 8885 8886 """ 8887 self.feedback_fn("* changing into standalone mode") 8888 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip, 8889 self.instance.disks) 8890 for node, nres in result.items(): 8891 nres.Raise("Cannot disconnect disks node %s" % node)
8892
8893 - def _GoReconnect(self, multimaster):
8894 """Reconnect to the network. 8895 8896 """ 8897 if multimaster: 8898 msg = "dual-master" 8899 else: 8900 msg = "single-master" 8901 self.feedback_fn("* changing disks into %s mode" % msg) 8902 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip, 8903 (self.instance.disks, self.instance), 8904 self.instance.name, multimaster) 8905 for node, nres in result.items(): 8906 nres.Raise("Cannot change disks config on node %s" % node)
8907
8908 - def _ExecCleanup(self):
8909 """Try to cleanup after a failed migration. 8910 8911 The cleanup is done by: 8912 - check that the instance is running only on one node 8913 (and update the config if needed) 8914 - change disks on its secondary node to secondary 8915 - wait until disks are fully synchronized 8916 - disconnect from the network 8917 - change disks into single-master mode 8918 - wait again until disks are fully synchronized 8919 8920 """ 8921 instance = self.instance 8922 target_node = self.target_node 8923 source_node = self.source_node 8924 8925 # check running on only one node 8926 self.feedback_fn("* checking where the instance actually runs" 8927 " (if this hangs, the hypervisor might be in" 8928 " a bad state)") 8929 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor]) 8930 for node, result in ins_l.items(): 8931 result.Raise("Can't contact node %s" % node) 8932 8933 runningon_source = instance.name in ins_l[source_node].payload 8934 runningon_target = instance.name in ins_l[target_node].payload 8935 8936 if runningon_source and runningon_target: 8937 raise errors.OpExecError("Instance seems to be running on two nodes," 8938 " or the hypervisor is confused; you will have" 8939 " to ensure manually that it runs only on one" 8940 " and restart this operation") 8941 8942 if not (runningon_source or runningon_target): 8943 raise errors.OpExecError("Instance does not seem to be running at all;" 8944 " in this case it's safer to repair by" 8945 " running 'gnt-instance stop' to ensure disk" 8946 " shutdown, and then restarting it") 8947 8948 if runningon_target: 8949 # the migration has actually succeeded, we need to update the config 8950 self.feedback_fn("* instance running on secondary node (%s)," 8951 " updating config" % target_node) 8952 instance.primary_node = target_node 8953 self.cfg.Update(instance, self.feedback_fn) 8954 demoted_node = source_node 8955 else: 8956 self.feedback_fn("* instance confirmed to be running on its" 8957 " primary node (%s)" % source_node) 8958 demoted_node = target_node 8959 8960 if instance.disk_template in constants.DTS_INT_MIRROR: 8961 self._EnsureSecondary(demoted_node) 8962 try: 8963 self._WaitUntilSync() 8964 except errors.OpExecError: 8965 # we ignore here errors, since if the device is standalone, it 8966 # won't be able to sync 8967 pass 8968 self._GoStandalone() 8969 self._GoReconnect(False) 8970 self._WaitUntilSync() 8971 8972 self.feedback_fn("* done")
8973
8974 - def _RevertDiskStatus(self):
8975 """Try to revert the disk status after a failed migration. 8976 8977 """ 8978 target_node = self.target_node 8979 if self.instance.disk_template in constants.DTS_EXT_MIRROR: 8980 return 8981 8982 try: 8983 self._EnsureSecondary(target_node) 8984 self._GoStandalone() 8985 self._GoReconnect(False) 8986 self._WaitUntilSync() 8987 except errors.OpExecError, err: 8988 self.lu.LogWarning("Migration failed and I can't reconnect the drives," 8989 " please try to recover the instance manually;" 8990 " error '%s'" % str(err))
8991
8992 - def _AbortMigration(self):
8993 """Call the hypervisor code to abort a started migration. 8994 8995 """ 8996 instance = self.instance 8997 target_node = self.target_node 8998 source_node = self.source_node 8999 migration_info = self.migration_info 9000 9001 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node, 9002 instance, 9003 migration_info, 9004 False) 9005 abort_msg = abort_result.fail_msg 9006 if abort_msg: 9007 logging.error("Aborting migration failed on target node %s: %s", 9008 target_node, abort_msg) 9009 # Don't raise an exception here, as we stil have to try to revert the 9010 # disk status, even if this step failed. 9011 9012 abort_result = self.rpc.call_instance_finalize_migration_src( 9013 source_node, instance, False, self.live) 9014 abort_msg = abort_result.fail_msg 9015 if abort_msg: 9016 logging.error("Aborting migration failed on source node %s: %s", 9017 source_node, abort_msg)
9018
9019 - def _ExecMigration(self):
9020 """Migrate an instance. 9021 9022 The migrate is done by: 9023 - change the disks into dual-master mode 9024 - wait until disks are fully synchronized again 9025 - migrate the instance 9026 - change disks on the new secondary node (the old primary) to secondary 9027 - wait until disks are fully synchronized 9028 - change disks into single-master mode 9029 9030 """ 9031 instance = self.instance 9032 target_node = self.target_node 9033 source_node = self.source_node 9034 9035 # Check for hypervisor version mismatch and warn the user. 9036 nodeinfo = self.rpc.call_node_info([source_node, target_node], 9037 None, [self.instance.hypervisor], False) 9038 for ninfo in nodeinfo.values(): 9039 ninfo.Raise("Unable to retrieve node information from node '%s'" % 9040 ninfo.node) 9041 (_, _, (src_info, )) = nodeinfo[source_node].payload 9042 (_, _, (dst_info, )) = nodeinfo[target_node].payload 9043 9044 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and 9045 (constants.HV_NODEINFO_KEY_VERSION in dst_info)): 9046 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION] 9047 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION] 9048 if src_version != dst_version: 9049 self.feedback_fn("* warning: hypervisor version mismatch between" 9050 " source (%s) and target (%s) node" % 9051 (src_version, dst_version)) 9052 9053 self.feedback_fn("* checking disk consistency between source and target") 9054 for (idx, dev) in enumerate(instance.disks): 9055 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False): 9056 raise errors.OpExecError("Disk %s is degraded or not fully" 9057 " synchronized on target node," 9058 " aborting migration" % idx) 9059 9060 if self.current_mem > self.tgt_free_mem: 9061 if not self.allow_runtime_changes: 9062 raise errors.OpExecError("Memory ballooning not allowed and not enough" 9063 " free memory to fit instance %s on target" 9064 " node %s (have %dMB, need %dMB)" % 9065 (instance.name, target_node, 9066 self.tgt_free_mem, self.current_mem)) 9067 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem) 9068 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node, 9069 instance, 9070 self.tgt_free_mem) 9071 rpcres.Raise("Cannot modify instance runtime memory") 9072 9073 # First get the migration information from the remote node 9074 result = self.rpc.call_migration_info(source_node, instance) 9075 msg = result.fail_msg 9076 if msg: 9077 log_err = ("Failed fetching source migration information from %s: %s" % 9078 (source_node, msg)) 9079 logging.error(log_err) 9080 raise errors.OpExecError(log_err) 9081 9082 self.migration_info = migration_info = result.payload 9083 9084 if self.instance.disk_template not in constants.DTS_EXT_MIRROR: 9085 # Then switch the disks to master/master mode 9086 self._EnsureSecondary(target_node) 9087 self._GoStandalone() 9088 self._GoReconnect(True) 9089 self._WaitUntilSync() 9090 9091 self.feedback_fn("* preparing %s to accept the instance" % target_node) 9092 result = self.rpc.call_accept_instance(target_node, 9093 instance, 9094 migration_info, 9095 self.nodes_ip[target_node]) 9096 9097 msg = result.fail_msg 9098 if msg: 9099 logging.error("Instance pre-migration failed, trying to revert" 9100 " disk status: %s", msg) 9101 self.feedback_fn("Pre-migration failed, aborting") 9102 self._AbortMigration() 9103 self._RevertDiskStatus() 9104 raise errors.OpExecError("Could not pre-migrate instance %s: %s" % 9105 (instance.name, msg)) 9106 9107 self.feedback_fn("* migrating instance to %s" % target_node) 9108 result = self.rpc.call_instance_migrate(source_node, instance, 9109 self.nodes_ip[target_node], 9110 self.live) 9111 msg = result.fail_msg 9112 if msg: 9113 logging.error("Instance migration failed, trying to revert" 9114 " disk status: %s", msg) 9115 self.feedback_fn("Migration failed, aborting") 9116 self._AbortMigration() 9117 self._RevertDiskStatus() 9118 raise errors.OpExecError("Could not migrate instance %s: %s" % 9119 (instance.name, msg)) 9120 9121 self.feedback_fn("* starting memory transfer") 9122 last_feedback = time.time() 9123 while True: 9124 result = self.rpc.call_instance_get_migration_status(source_node, 9125 instance) 9126 msg = result.fail_msg 9127 ms = result.payload # MigrationStatus instance 9128 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES): 9129 logging.error("Instance migration failed, trying to revert" 9130 " disk status: %s", msg) 9131 self.feedback_fn("Migration failed, aborting") 9132 self._AbortMigration() 9133 self._RevertDiskStatus() 9134 if not msg: 9135 msg = "hypervisor returned failure" 9136 raise errors.OpExecError("Could not migrate instance %s: %s" % 9137 (instance.name, msg)) 9138 9139 if result.payload.status != constants.HV_MIGRATION_ACTIVE: 9140 self.feedback_fn("* memory transfer complete") 9141 break 9142 9143 if (utils.TimeoutExpired(last_feedback, 9144 self._MIGRATION_FEEDBACK_INTERVAL) and 9145 ms.transferred_ram is not None): 9146 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram) 9147 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress) 9148 last_feedback = time.time() 9149 9150 time.sleep(self._MIGRATION_POLL_INTERVAL) 9151 9152 result = self.rpc.call_instance_finalize_migration_src(source_node, 9153 instance, 9154 True, 9155 self.live) 9156 msg = result.fail_msg 9157 if msg: 9158 logging.error("Instance migration succeeded, but finalization failed" 9159 " on the source node: %s", msg) 9160 raise errors.OpExecError("Could not finalize instance migration: %s" % 9161 msg) 9162 9163 instance.primary_node = target_node 9164 9165 # distribute new instance config to the other nodes 9166 self.cfg.Update(instance, self.feedback_fn) 9167 9168 result = self.rpc.call_instance_finalize_migration_dst(target_node, 9169 instance, 9170 migration_info, 9171 True) 9172 msg = result.fail_msg 9173 if msg: 9174 logging.error("Instance migration succeeded, but finalization failed" 9175 " on the target node: %s", msg) 9176 raise errors.OpExecError("Could not finalize instance migration: %s" % 9177 msg) 9178 9179 if self.instance.disk_template not in constants.DTS_EXT_MIRROR: 9180 self._EnsureSecondary(source_node) 9181 self._WaitUntilSync() 9182 self._GoStandalone() 9183 self._GoReconnect(False) 9184 self._WaitUntilSync() 9185 9186 # If the instance's disk template is `rbd' or `ext' and there was a 9187 # successful migration, unmap the device from the source node. 9188 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT): 9189 disks = _ExpandCheckDisks(instance, instance.disks) 9190 self.feedback_fn("* unmapping instance's disks from %s" % source_node) 9191 for disk in disks: 9192 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance)) 9193 msg = result.fail_msg 9194 if msg: 9195 logging.error("Migration was successful, but couldn't unmap the" 9196 " block device %s on source node %s: %s", 9197 disk.iv_name, source_node, msg) 9198 logging.error("You need to unmap the device %s manually on %s", 9199 disk.iv_name, source_node) 9200 9201 self.feedback_fn("* done")
9202
9203 - def _ExecFailover(self):
9204 """Failover an instance. 9205 9206 The failover is done by shutting it down on its present node and 9207 starting it on the secondary. 9208 9209 """ 9210 instance = self.instance 9211 primary_node = self.cfg.GetNodeInfo(instance.primary_node) 9212 9213 source_node = instance.primary_node 9214 target_node = self.target_node 9215 9216 if instance.admin_state == constants.ADMINST_UP: 9217 self.feedback_fn("* checking disk consistency between source and target") 9218 for (idx, dev) in enumerate(instance.disks): 9219 # for drbd, these are drbd over lvm 9220 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, 9221 False): 9222 if primary_node.offline: 9223 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on" 9224 " target node %s" % 9225 (primary_node.name, idx, target_node)) 9226 elif not self.ignore_consistency: 9227 raise errors.OpExecError("Disk %s is degraded on target node," 9228 " aborting failover" % idx) 9229 else: 9230 self.feedback_fn("* not checking disk consistency as instance is not" 9231 " running") 9232 9233 self.feedback_fn("* shutting down instance on source node") 9234 logging.info("Shutting down instance %s on node %s", 9235 instance.name, source_node) 9236 9237 result = self.rpc.call_instance_shutdown(source_node, instance, 9238 self.shutdown_timeout) 9239 msg = result.fail_msg 9240 if msg: 9241 if self.ignore_consistency or primary_node.offline: 9242 self.lu.LogWarning("Could not shutdown instance %s on node %s," 9243 " proceeding anyway; please make sure node" 9244 " %s is down; error details: %s", 9245 instance.name, source_node, source_node, msg) 9246 else: 9247 raise errors.OpExecError("Could not shutdown instance %s on" 9248 " node %s: %s" % 9249 (instance.name, source_node, msg)) 9250 9251 self.feedback_fn("* deactivating the instance's disks on source node") 9252 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True): 9253 raise errors.OpExecError("Can't shut down the instance's disks") 9254 9255 instance.primary_node = target_node 9256 # distribute new instance config to the other nodes 9257 self.cfg.Update(instance, self.feedback_fn) 9258 9259 # Only start the instance if it's marked as up 9260 if instance.admin_state == constants.ADMINST_UP: 9261 self.feedback_fn("* activating the instance's disks on target node %s" % 9262 target_node) 9263 logging.info("Starting instance %s on node %s", 9264 instance.name, target_node) 9265 9266 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance, 9267 ignore_secondaries=True) 9268 if not disks_ok: 9269 _ShutdownInstanceDisks(self.lu, instance) 9270 raise errors.OpExecError("Can't activate the instance's disks") 9271 9272 self.feedback_fn("* starting the instance on the target node %s" % 9273 target_node) 9274 result = self.rpc.call_instance_start(target_node, (instance, None, None), 9275 False) 9276 msg = result.fail_msg 9277 if msg: 9278 _ShutdownInstanceDisks(self.lu, instance) 9279 raise errors.OpExecError("Could not start instance %s on node %s: %s" % 9280 (instance.name, target_node, msg))
9281
9282 - def Exec(self, feedback_fn):
9283 """Perform the migration. 9284 9285 """ 9286 self.feedback_fn = feedback_fn 9287 self.source_node = self.instance.primary_node 9288 9289 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing 9290 if self.instance.disk_template in constants.DTS_INT_MIRROR: 9291 self.target_node = self.instance.secondary_nodes[0] 9292 # Otherwise self.target_node has been populated either 9293 # directly, or through an iallocator. 9294 9295 self.all_nodes = [self.source_node, self.target_node] 9296 self.nodes_ip = dict((name, node.secondary_ip) for (name, node) 9297 in self.cfg.GetMultiNodeInfo(self.all_nodes)) 9298 9299 if self.failover: 9300 feedback_fn("Failover instance %s" % self.instance.name) 9301 self._ExecFailover() 9302 else: 9303 feedback_fn("Migrating instance %s" % self.instance.name) 9304 9305 if self.cleanup: 9306 return self._ExecCleanup() 9307 else: 9308 return self._ExecMigration()
9309
9310 9311 -def _CreateBlockDev(lu, node, instance, device, force_create, info, 9312 force_open):
9313 """Wrapper around L{_CreateBlockDevInner}. 9314 9315 This method annotates the root device first. 9316 9317 """ 9318 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg) 9319 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node) 9320 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info, 9321 force_open, excl_stor)
9322
9323 9324 -def _CreateBlockDevInner(lu, node, instance, device, force_create, 9325 info, force_open, excl_stor):
9326 """Create a tree of block devices on a given node. 9327 9328 If this device type has to be created on secondaries, create it and 9329 all its children. 9330 9331 If not, just recurse to children keeping the same 'force' value. 9332 9333 @attention: The device has to be annotated already. 9334 9335 @param lu: the lu on whose behalf we execute 9336 @param node: the node on which to create the device 9337 @type instance: L{objects.Instance} 9338 @param instance: the instance which owns the device 9339 @type device: L{objects.Disk} 9340 @param device: the device to create 9341 @type force_create: boolean 9342 @param force_create: whether to force creation of this device; this 9343 will be change to True whenever we find a device which has 9344 CreateOnSecondary() attribute 9345 @param info: the extra 'metadata' we should attach to the device 9346 (this will be represented as a LVM tag) 9347 @type force_open: boolean 9348 @param force_open: this parameter will be passes to the 9349 L{backend.BlockdevCreate} function where it specifies 9350 whether we run on primary or not, and it affects both 9351 the child assembly and the device own Open() execution 9352 @type excl_stor: boolean 9353 @param excl_stor: Whether exclusive_storage is active for the node 9354 9355 @return: list of created devices 9356 """ 9357 created_devices = [] 9358 try: 9359 if device.CreateOnSecondary(): 9360 force_create = True 9361 9362 if device.children: 9363 for child in device.children: 9364 devs = _CreateBlockDevInner(lu, node, instance, child, force_create, 9365 info, force_open, excl_stor) 9366 created_devices.extend(devs) 9367 9368 if not force_create: 9369 return created_devices 9370 9371 _CreateSingleBlockDev(lu, node, instance, device, info, force_open, 9372 excl_stor) 9373 # The device has been completely created, so there is no point in keeping 9374 # its subdevices in the list. We just add the device itself instead. 9375 created_devices = [(node, device)] 9376 return created_devices 9377 9378 except errors.DeviceCreationError, e: 9379 e.created_devices.extend(created_devices) 9380 raise e 9381 except errors.OpExecError, e: 9382 raise errors.DeviceCreationError(str(e), created_devices)
9383
9384 9385 -def _CreateSingleBlockDev(lu, node, instance, device, info, force_open, 9386 excl_stor):
9387 """Create a single block device on a given node. 9388 9389 This will not recurse over children of the device, so they must be 9390 created in advance. 9391 9392 @param lu: the lu on whose behalf we execute 9393 @param node: the node on which to create the device 9394 @type instance: L{objects.Instance} 9395 @param instance: the instance which owns the device 9396 @type device: L{objects.Disk} 9397 @param device: the device to create 9398 @param info: the extra 'metadata' we should attach to the device 9399 (this will be represented as a LVM tag) 9400 @type force_open: boolean 9401 @param force_open: this parameter will be passes to the 9402 L{backend.BlockdevCreate} function where it specifies 9403 whether we run on primary or not, and it affects both 9404 the child assembly and the device own Open() execution 9405 @type excl_stor: boolean 9406 @param excl_stor: Whether exclusive_storage is active for the node 9407 9408 """ 9409 lu.cfg.SetDiskID(device, node) 9410 result = lu.rpc.call_blockdev_create(node, device, device.size, 9411 instance.name, force_open, info, 9412 excl_stor) 9413 result.Raise("Can't create block device %s on" 9414 " node %s for instance %s" % (device, node, instance.name)) 9415 if device.physical_id is None: 9416 device.physical_id = result.payload
9417
9418 9419 -def _GenerateUniqueNames(lu, exts):
9420 """Generate a suitable LV name. 9421 9422 This will generate a logical volume name for the given instance. 9423 9424 """ 9425 results = [] 9426 for val in exts: 9427 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId()) 9428 results.append("%s%s" % (new_id, val)) 9429 return results
9430
9431 9432 -def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names, 9433 iv_name, p_minor, s_minor):
9434 """Generate a drbd8 device complete with its children. 9435 9436 """ 9437 assert len(vgnames) == len(names) == 2 9438 port = lu.cfg.AllocatePort() 9439 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId()) 9440 9441 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size, 9442 logical_id=(vgnames[0], names[0]), 9443 params={}) 9444 dev_meta = objects.Disk(dev_type=constants.LD_LV, 9445 size=constants.DRBD_META_SIZE, 9446 logical_id=(vgnames[1], names[1]), 9447 params={}) 9448 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size, 9449 logical_id=(primary, secondary, port, 9450 p_minor, s_minor, 9451 shared_secret), 9452 children=[dev_data, dev_meta], 9453 iv_name=iv_name, params={}) 9454 return drbd_dev
9455 9456 9457 _DISK_TEMPLATE_NAME_PREFIX = { 9458 constants.DT_PLAIN: "", 9459 constants.DT_RBD: ".rbd", 9460 constants.DT_EXT: ".ext", 9461 } 9462 9463 9464 _DISK_TEMPLATE_DEVICE_TYPE = { 9465 constants.DT_PLAIN: constants.LD_LV, 9466 constants.DT_FILE: constants.LD_FILE, 9467 constants.DT_SHARED_FILE: constants.LD_FILE, 9468 constants.DT_BLOCK: constants.LD_BLOCKDEV, 9469 constants.DT_RBD: constants.LD_RBD, 9470 constants.DT_EXT: constants.LD_EXT, 9471 }
9472 9473 9474 -def _GenerateDiskTemplate( 9475 lu, template_name, instance_name, primary_node, secondary_nodes, 9476 disk_info, file_storage_dir, file_driver, base_index, 9477 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage, 9478 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9479 """Generate the entire disk layout for a given template type. 9480 9481 """ 9482 vgname = lu.cfg.GetVGName() 9483 disk_count = len(disk_info) 9484 disks = [] 9485 9486 if template_name == constants.DT_DISKLESS: 9487 pass 9488 elif template_name == constants.DT_DRBD8: 9489 if len(secondary_nodes) != 1: 9490 raise errors.ProgrammerError("Wrong template configuration") 9491 remote_node = secondary_nodes[0] 9492 minors = lu.cfg.AllocateDRBDMinor( 9493 [primary_node, remote_node] * len(disk_info), instance_name) 9494 9495 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name, 9496 full_disk_params) 9497 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG] 9498 9499 names = [] 9500 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i) 9501 for i in range(disk_count)]): 9502 names.append(lv_prefix + "_data") 9503 names.append(lv_prefix + "_meta") 9504 for idx, disk in enumerate(disk_info): 9505 disk_index = idx + base_index 9506 data_vg = disk.get(constants.IDISK_VG, vgname) 9507 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg) 9508 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node, 9509 disk[constants.IDISK_SIZE], 9510 [data_vg, meta_vg], 9511 names[idx * 2:idx * 2 + 2], 9512 "disk/%d" % disk_index, 9513 minors[idx * 2], minors[idx * 2 + 1]) 9514 disk_dev.mode = disk[constants.IDISK_MODE] 9515 disks.append(disk_dev) 9516 else: 9517 if secondary_nodes: 9518 raise errors.ProgrammerError("Wrong template configuration") 9519 9520 if template_name == constants.DT_FILE: 9521 _req_file_storage() 9522 elif template_name == constants.DT_SHARED_FILE: 9523 _req_shr_file_storage() 9524 9525 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None) 9526 if name_prefix is None: 9527 names = None 9528 else: 9529 names = _GenerateUniqueNames(lu, ["%s.disk%s" % 9530 (name_prefix, base_index + i) 9531 for i in range(disk_count)]) 9532 9533 if template_name == constants.DT_PLAIN: 9534 9535 def logical_id_fn(idx, _, disk): 9536 vg = disk.get(constants.IDISK_VG, vgname) 9537 return (vg, names[idx])
9538 9539 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE): 9540 logical_id_fn = \ 9541 lambda _, disk_index, disk: (file_driver, 9542 "%s/disk%d" % (file_storage_dir, 9543 disk_index)) 9544 elif template_name == constants.DT_BLOCK: 9545 logical_id_fn = \ 9546 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL, 9547 disk[constants.IDISK_ADOPT]) 9548 elif template_name == constants.DT_RBD: 9549 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx]) 9550 elif template_name == constants.DT_EXT: 9551 def logical_id_fn(idx, _, disk): 9552 provider = disk.get(constants.IDISK_PROVIDER, None) 9553 if provider is None: 9554 raise errors.ProgrammerError("Disk template is %s, but '%s' is" 9555 " not found", constants.DT_EXT, 9556 constants.IDISK_PROVIDER) 9557 return (provider, names[idx]) 9558 else: 9559 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name) 9560 9561 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name] 9562 9563 for idx, disk in enumerate(disk_info): 9564 params = {} 9565 # Only for the Ext template add disk_info to params 9566 if template_name == constants.DT_EXT: 9567 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER] 9568 for key in disk: 9569 if key not in constants.IDISK_PARAMS: 9570 params[key] = disk[key] 9571 disk_index = idx + base_index 9572 size = disk[constants.IDISK_SIZE] 9573 feedback_fn("* disk %s, size %s" % 9574 (disk_index, utils.FormatUnit(size, "h"))) 9575 disks.append(objects.Disk(dev_type=dev_type, size=size, 9576 logical_id=logical_id_fn(idx, disk_index, disk), 9577 iv_name="disk/%d" % disk_index, 9578 mode=disk[constants.IDISK_MODE], 9579 params=params)) 9580 9581 return disks 9582
9583 9584 -def _GetInstanceInfoText(instance):
9585 """Compute that text that should be added to the disk's metadata. 9586 9587 """ 9588 return "originstname+%s" % instance.name
9589
9590 9591 -def _CalcEta(time_taken, written, total_size):
9592 """Calculates the ETA based on size written and total size. 9593 9594 @param time_taken: The time taken so far 9595 @param written: amount written so far 9596 @param total_size: The total size of data to be written 9597 @return: The remaining time in seconds 9598 9599 """ 9600 avg_time = time_taken / float(written) 9601 return (total_size - written) * avg_time
9602
9603 9604 -def _WipeDisks(lu, instance, disks=None):
9605 """Wipes instance disks. 9606 9607 @type lu: L{LogicalUnit} 9608 @param lu: the logical unit on whose behalf we execute 9609 @type instance: L{objects.Instance} 9610 @param instance: the instance whose disks we should create 9611 @type disks: None or list of tuple of (number, L{objects.Disk}, number) 9612 @param disks: Disk details; tuple contains disk index, disk object and the 9613 start offset 9614 9615 """ 9616 node = instance.primary_node 9617 9618 if disks is None: 9619 disks = [(idx, disk, 0) 9620 for (idx, disk) in enumerate(instance.disks)] 9621 9622 for (_, device, _) in disks: 9623 lu.cfg.SetDiskID(device, node) 9624 9625 logging.info("Pausing synchronization of disks of instance '%s'", 9626 instance.name) 9627 result = lu.rpc.call_blockdev_pause_resume_sync(node, 9628 (map(compat.snd, disks), 9629 instance), 9630 True) 9631 result.Raise("Failed to pause disk synchronization on node '%s'" % node) 9632 9633 for idx, success in enumerate(result.payload): 9634 if not success: 9635 logging.warn("Pausing synchronization of disk %s of instance '%s'" 9636 " failed", idx, instance.name) 9637 9638 try: 9639 for (idx, device, offset) in disks: 9640 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but 9641 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors. 9642 wipe_chunk_size = \ 9643 int(min(constants.MAX_WIPE_CHUNK, 9644 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT)) 9645 9646 size = device.size 9647 last_output = 0 9648 start_time = time.time() 9649 9650 if offset == 0: 9651 info_text = "" 9652 else: 9653 info_text = (" (from %s to %s)" % 9654 (utils.FormatUnit(offset, "h"), 9655 utils.FormatUnit(size, "h"))) 9656 9657 lu.LogInfo("* Wiping disk %s%s", idx, info_text) 9658 9659 logging.info("Wiping disk %d for instance %s on node %s using" 9660 " chunk size %s", idx, instance.name, node, wipe_chunk_size) 9661 9662 while offset < size: 9663 wipe_size = min(wipe_chunk_size, size - offset) 9664 9665 logging.debug("Wiping disk %d, offset %s, chunk %s", 9666 idx, offset, wipe_size) 9667 9668 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset, 9669 wipe_size) 9670 result.Raise("Could not wipe disk %d at offset %d for size %d" % 9671 (idx, offset, wipe_size)) 9672 9673 now = time.time() 9674 offset += wipe_size 9675 if now - last_output >= 60: 9676 eta = _CalcEta(now - start_time, offset, size) 9677 lu.LogInfo(" - done: %.1f%% ETA: %s", 9678 offset / float(size) * 100, utils.FormatSeconds(eta)) 9679 last_output = now 9680 finally: 9681 logging.info("Resuming synchronization of disks for instance '%s'", 9682 instance.name) 9683 9684 result = lu.rpc.call_blockdev_pause_resume_sync(node, 9685 (map(compat.snd, disks), 9686 instance), 9687 False) 9688 9689 if result.fail_msg: 9690 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s", 9691 node, result.fail_msg) 9692 else: 9693 for idx, success in enumerate(result.payload): 9694 if not success: 9695 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'" 9696 " failed", idx, instance.name)
9697
9698 9699 -def _WipeOrCleanupDisks(lu, instance, disks=None, cleanup=None):
9700 """Wrapper for L{_WipeDisks} that handles errors. 9701 9702 @type lu: L{LogicalUnit} 9703 @param lu: the logical unit on whose behalf we execute 9704 @type instance: L{objects.Instance} 9705 @param instance: the instance whose disks we should wipe 9706 @param disks: see L{_WipeDisks} 9707 @param cleanup: the result returned by L{_CreateDisks}, used for cleanup in 9708 case of error 9709 @raise errors.OpPrereqError: in case of failure 9710 9711 """ 9712 try: 9713 _WipeDisks(lu, instance, disks=disks) 9714 except errors.OpExecError: 9715 logging.warning("Wiping disks for instance '%s' failed", 9716 instance.name) 9717 _UndoCreateDisks(lu, cleanup) 9718 raise
9719
9720 9721 -def _UndoCreateDisks(lu, disks_created):
9722 """Undo the work performed by L{_CreateDisks}. 9723 9724 This function is called in case of an error to undo the work of 9725 L{_CreateDisks}. 9726 9727 @type lu: L{LogicalUnit} 9728 @param lu: the logical unit on whose behalf we execute 9729 @param disks_created: the result returned by L{_CreateDisks} 9730 9731 """ 9732 for (node, disk) in disks_created: 9733 lu.cfg.SetDiskID(disk, node) 9734 result = lu.rpc.call_blockdev_remove(node, disk) 9735 if result.fail_msg: 9736 logging.warning("Failed to remove newly-created disk %s on node %s:" 9737 " %s", disk, node, result.fail_msg)
9738
9739 9740 -def _CreateDisks(lu, instance, to_skip=None, target_node=None, disks=None):
9741 """Create all disks for an instance. 9742 9743 This abstracts away some work from AddInstance. 9744 9745 @type lu: L{LogicalUnit} 9746 @param lu: the logical unit on whose behalf we execute 9747 @type instance: L{objects.Instance} 9748 @param instance: the instance whose disks we should create 9749 @type to_skip: list 9750 @param to_skip: list of indices to skip 9751 @type target_node: string 9752 @param target_node: if passed, overrides the target node for creation 9753 @type disks: list of {objects.Disk} 9754 @param disks: the disks to create; if not specified, all the disks of the 9755 instance are created 9756 @return: information about the created disks, to be used to call 9757 L{_UndoCreateDisks} 9758 @raise errors.OpPrereqError: in case of error 9759 9760 """ 9761 info = _GetInstanceInfoText(instance) 9762 if target_node is None: 9763 pnode = instance.primary_node 9764 all_nodes = instance.all_nodes 9765 else: 9766 pnode = target_node 9767 all_nodes = [pnode] 9768 9769 if disks is None: 9770 disks = instance.disks 9771 9772 if instance.disk_template in constants.DTS_FILEBASED: 9773 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) 9774 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir) 9775 9776 result.Raise("Failed to create directory '%s' on" 9777 " node %s" % (file_storage_dir, pnode)) 9778 9779 disks_created = [] 9780 for idx, device in enumerate(disks): 9781 if to_skip and idx in to_skip: 9782 continue 9783 logging.info("Creating disk %s for instance '%s'", idx, instance.name) 9784 for node in all_nodes: 9785 f_create = node == pnode 9786 try: 9787 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create) 9788 disks_created.append((node, device)) 9789 except errors.DeviceCreationError, e: 9790 logging.warning("Creating disk %s for instance '%s' failed", 9791 idx, instance.name) 9792 disks_created.extend(e.created_devices) 9793 _UndoCreateDisks(lu, disks_created) 9794 raise errors.OpExecError(e.message) 9795 return disks_created
9796
9797 9798 -def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9799 """Remove all disks for an instance. 9800 9801 This abstracts away some work from `AddInstance()` and 9802 `RemoveInstance()`. Note that in case some of the devices couldn't 9803 be removed, the removal will continue with the other ones. 9804 9805 @type lu: L{LogicalUnit} 9806 @param lu: the logical unit on whose behalf we execute 9807 @type instance: L{objects.Instance} 9808 @param instance: the instance whose disks we should remove 9809 @type target_node: string 9810 @param target_node: used to override the node on which to remove the disks 9811 @rtype: boolean 9812 @return: the success of the removal 9813 9814 """ 9815 logging.info("Removing block devices for instance %s", instance.name) 9816 9817 all_result = True 9818 ports_to_release = set() 9819 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg) 9820 for (idx, device) in enumerate(anno_disks): 9821 if target_node: 9822 edata = [(target_node, device)] 9823 else: 9824 edata = device.ComputeNodeTree(instance.primary_node) 9825 for node, disk in edata: 9826 lu.cfg.SetDiskID(disk, node) 9827 result = lu.rpc.call_blockdev_remove(node, disk) 9828 if result.fail_msg: 9829 lu.LogWarning("Could not remove disk %s on node %s," 9830 " continuing anyway: %s", idx, node, result.fail_msg) 9831 if not (result.offline and node != instance.primary_node): 9832 all_result = False 9833 9834 # if this is a DRBD disk, return its port to the pool 9835 if device.dev_type in constants.LDS_DRBD: 9836 ports_to_release.add(device.logical_id[2]) 9837 9838 if all_result or ignore_failures: 9839 for port in ports_to_release: 9840 lu.cfg.AddTcpUdpPort(port) 9841 9842 if instance.disk_template in constants.DTS_FILEBASED: 9843 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) 9844 if target_node: 9845 tgt = target_node 9846 else: 9847 tgt = instance.primary_node 9848 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir) 9849 if result.fail_msg: 9850 lu.LogWarning("Could not remove directory '%s' on node %s: %s", 9851 file_storage_dir, instance.primary_node, result.fail_msg) 9852 all_result = False 9853 9854 return all_result
9855
9856 9857 -def _ComputeDiskSizePerVG(disk_template, disks):
9858 """Compute disk size requirements in the volume group 9859 9860 """ 9861 def _compute(disks, payload): 9862 """Universal algorithm. 9863 9864 """ 9865 vgs = {} 9866 for disk in disks: 9867 vgs[disk[constants.IDISK_VG]] = \ 9868 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload 9869 9870 return vgs
9871 9872 # Required free disk space as a function of disk and swap space 9873 req_size_dict = { 9874 constants.DT_DISKLESS: {}, 9875 constants.DT_PLAIN: _compute(disks, 0), 9876 # 128 MB are added for drbd metadata for each disk 9877 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE), 9878 constants.DT_FILE: {}, 9879 constants.DT_SHARED_FILE: {}, 9880 } 9881 9882 if disk_template not in req_size_dict: 9883 raise errors.ProgrammerError("Disk template '%s' size requirement" 9884 " is unknown" % disk_template) 9885 9886 return req_size_dict[disk_template] 9887
9888 9889 -def _FilterVmNodes(lu, nodenames):
9890 """Filters out non-vm_capable nodes from a list. 9891 9892 @type lu: L{LogicalUnit} 9893 @param lu: the logical unit for which we check 9894 @type nodenames: list 9895 @param nodenames: the list of nodes on which we should check 9896 @rtype: list 9897 @return: the list of vm-capable nodes 9898 9899 """ 9900 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList()) 9901 return [name for name in nodenames if name not in vm_nodes]
9902
9903 9904 -def _CheckHVParams(lu, nodenames, hvname, hvparams):
9905 """Hypervisor parameter validation. 9906 9907 This function abstract the hypervisor parameter validation to be 9908 used in both instance create and instance modify. 9909 9910 @type lu: L{LogicalUnit} 9911 @param lu: the logical unit for which we check 9912 @type nodenames: list 9913 @param nodenames: the list of nodes on which we should check 9914 @type hvname: string 9915 @param hvname: the name of the hypervisor we should use 9916 @type hvparams: dict 9917 @param hvparams: the parameters which we need to check 9918 @raise errors.OpPrereqError: if the parameters are not valid 9919 9920 """ 9921 nodenames = _FilterVmNodes(lu, nodenames) 9922 9923 cluster = lu.cfg.GetClusterInfo() 9924 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams) 9925 9926 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull) 9927 for node in nodenames: 9928 info = hvinfo[node] 9929 if info.offline: 9930 continue 9931 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9932
9933 9934 -def _CheckOSParams(lu, required, nodenames, osname, osparams):
9935 """OS parameters validation. 9936 9937 @type lu: L{LogicalUnit} 9938 @param lu: the logical unit for which we check 9939 @type required: boolean 9940 @param required: whether the validation should fail if the OS is not 9941 found 9942 @type nodenames: list 9943 @param nodenames: the list of nodes on which we should check 9944 @type osname: string 9945 @param osname: the name of the hypervisor we should use 9946 @type osparams: dict 9947 @param osparams: the parameters which we need to check 9948 @raise errors.OpPrereqError: if the parameters are not valid 9949 9950 """ 9951 nodenames = _FilterVmNodes(lu, nodenames) 9952 result = lu.rpc.call_os_validate(nodenames, required, osname, 9953 [constants.OS_VALIDATE_PARAMETERS], 9954 osparams) 9955 for node, nres in result.items(): 9956 # we don't check for offline cases since this should be run only 9957 # against the master node and/or an instance's nodes 9958 nres.Raise("OS Parameters validation failed on node %s" % node) 9959 if not nres.payload: 9960 lu.LogInfo("OS %s not found on node %s, validation skipped", 9961 osname, node)
9962
9963 9964 -def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9965 """Wrapper around IAReqInstanceAlloc. 9966 9967 @param op: The instance opcode 9968 @param disks: The computed disks 9969 @param nics: The computed nics 9970 @param beparams: The full filled beparams 9971 @param node_whitelist: List of nodes which should appear as online to the 9972 allocator (unless the node is already marked offline) 9973 9974 @returns: A filled L{iallocator.IAReqInstanceAlloc} 9975 9976 """ 9977 spindle_use = beparams[constants.BE_SPINDLE_USE] 9978 return iallocator.IAReqInstanceAlloc(name=op.instance_name, 9979 disk_template=op.disk_template, 9980 tags=op.tags, 9981 os=op.os_type, 9982 vcpus=beparams[constants.BE_VCPUS], 9983 memory=beparams[constants.BE_MAXMEM], 9984 spindle_use=spindle_use, 9985 disks=disks, 9986 nics=[n.ToDict() for n in nics], 9987 hypervisor=op.hypervisor, 9988 node_whitelist=node_whitelist)
9989
9990 9991 -def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9992 """Computes the nics. 9993 9994 @param op: The instance opcode 9995 @param cluster: Cluster configuration object 9996 @param default_ip: The default ip to assign 9997 @param cfg: An instance of the configuration object 9998 @param ec_id: Execution context ID 9999 10000 @returns: The build up nics 10001 10002 """ 10003 nics = [] 10004 for nic in op.nics: 10005 nic_mode_req = nic.get(constants.INIC_MODE, None) 10006 nic_mode = nic_mode_req 10007 if nic_mode is None or nic_mode == constants.VALUE_AUTO: 10008 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE] 10009 10010 net = nic.get(constants.INIC_NETWORK, None) 10011 link = nic.get(constants.NIC_LINK, None) 10012 ip = nic.get(constants.INIC_IP, None) 10013 10014 if net is None or net.lower() == constants.VALUE_NONE: 10015 net = None 10016 else: 10017 if nic_mode_req is not None or link is not None: 10018 raise errors.OpPrereqError("If network is given, no mode or link" 10019 " is allowed to be passed", 10020 errors.ECODE_INVAL) 10021 10022 # ip validity checks 10023 if ip is None or ip.lower() == constants.VALUE_NONE: 10024 nic_ip = None 10025 elif ip.lower() == constants.VALUE_AUTO: 10026 if not op.name_check: 10027 raise errors.OpPrereqError("IP address set to auto but name checks" 10028 " have been skipped", 10029 errors.ECODE_INVAL) 10030 nic_ip = default_ip 10031 else: 10032 # We defer pool operations until later, so that the iallocator has 10033 # filled in the instance's node(s) dimara 10034 if ip.lower() == constants.NIC_IP_POOL: 10035 if net is None: 10036 raise errors.OpPrereqError("if ip=pool, parameter network" 10037 " must be passed too", 10038 errors.ECODE_INVAL) 10039 10040 elif not netutils.IPAddress.IsValid(ip): 10041 raise errors.OpPrereqError("Invalid IP address '%s'" % ip, 10042 errors.ECODE_INVAL) 10043 10044 nic_ip = ip 10045 10046 # TODO: check the ip address for uniqueness 10047 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip: 10048 raise errors.OpPrereqError("Routed nic mode requires an ip address", 10049 errors.ECODE_INVAL) 10050 10051 # MAC address verification 10052 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO) 10053 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 10054 mac = utils.NormalizeAndValidateMac(mac) 10055 10056 try: 10057 # TODO: We need to factor this out 10058 cfg.ReserveMAC(mac, ec_id) 10059 except errors.ReservationError: 10060 raise errors.OpPrereqError("MAC address %s already in use" 10061 " in cluster" % mac, 10062 errors.ECODE_NOTUNIQUE) 10063 10064 # Build nic parameters 10065 nicparams = {} 10066 if nic_mode_req: 10067 nicparams[constants.NIC_MODE] = nic_mode 10068 if link: 10069 nicparams[constants.NIC_LINK] = link 10070 10071 check_params = cluster.SimpleFillNIC(nicparams) 10072 objects.NIC.CheckParameterSyntax(check_params) 10073 net_uuid = cfg.LookupNetwork(net) 10074 nics.append(objects.NIC(mac=mac, ip=nic_ip, 10075 network=net_uuid, nicparams=nicparams)) 10076 10077 return nics
10078
10079 10080 -def _ComputeDisks(op, default_vg):
10081 """Computes the instance disks. 10082 10083 @param op: The instance opcode 10084 @param default_vg: The default_vg to assume 10085 10086 @return: The computed disks 10087 10088 """ 10089 disks = [] 10090 for disk in op.disks: 10091 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR) 10092 if mode not in constants.DISK_ACCESS_SET: 10093 raise errors.OpPrereqError("Invalid disk access mode '%s'" % 10094 mode, errors.ECODE_INVAL) 10095 size = disk.get(constants.IDISK_SIZE, None) 10096 if size is None: 10097 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL) 10098 try: 10099 size = int(size) 10100 except (TypeError, ValueError): 10101 raise errors.OpPrereqError("Invalid disk size '%s'" % size, 10102 errors.ECODE_INVAL) 10103 10104 ext_provider = disk.get(constants.IDISK_PROVIDER, None) 10105 if ext_provider and op.disk_template != constants.DT_EXT: 10106 raise errors.OpPrereqError("The '%s' option is only valid for the %s" 10107 " disk template, not %s" % 10108 (constants.IDISK_PROVIDER, constants.DT_EXT, 10109 op.disk_template), errors.ECODE_INVAL) 10110 10111 data_vg = disk.get(constants.IDISK_VG, default_vg) 10112 new_disk = { 10113 constants.IDISK_SIZE: size, 10114 constants.IDISK_MODE: mode, 10115 constants.IDISK_VG: data_vg, 10116 } 10117 10118 if constants.IDISK_METAVG in disk: 10119 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG] 10120 if constants.IDISK_ADOPT in disk: 10121 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT] 10122 10123 # For extstorage, demand the `provider' option and add any 10124 # additional parameters (ext-params) to the dict 10125 if op.disk_template == constants.DT_EXT: 10126 if ext_provider: 10127 new_disk[constants.IDISK_PROVIDER] = ext_provider 10128 for key in disk: 10129 if key not in constants.IDISK_PARAMS: 10130 new_disk[key] = disk[key] 10131 else: 10132 raise errors.OpPrereqError("Missing provider for template '%s'" % 10133 constants.DT_EXT, errors.ECODE_INVAL) 10134 10135 disks.append(new_disk) 10136 10137 return disks
10138
10139 10140 -def _ComputeFullBeParams(op, cluster):
10141 """Computes the full beparams. 10142 10143 @param op: The instance opcode 10144 @param cluster: The cluster config object 10145 10146 @return: The fully filled beparams 10147 10148 """ 10149 default_beparams = cluster.beparams[constants.PP_DEFAULT] 10150 for param, value in op.beparams.iteritems(): 10151 if value == constants.VALUE_AUTO: 10152 op.beparams[param] = default_beparams[param] 10153 objects.UpgradeBeParams(op.beparams) 10154 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES) 10155 return cluster.SimpleFillBE(op.beparams)
10156
10157 10158 -def _CheckOpportunisticLocking(op):
10159 """Generate error if opportunistic locking is not possible. 10160 10161 """ 10162 if op.opportunistic_locking and not op.iallocator: 10163 raise errors.OpPrereqError("Opportunistic locking is only available in" 10164 " combination with an instance allocator", 10165 errors.ECODE_INVAL)
10166
10167 10168 -class LUInstanceCreate(LogicalUnit):
10169 """Create an instance. 10170 10171 """ 10172 HPATH = "instance-add" 10173 HTYPE = constants.HTYPE_INSTANCE 10174 REQ_BGL = False 10175
10176 - def CheckArguments(self):
10177 """Check arguments. 10178 10179 """ 10180 # do not require name_check to ease forward/backward compatibility 10181 # for tools 10182 if self.op.no_install and self.op.start: 10183 self.LogInfo("No-installation mode selected, disabling startup") 10184 self.op.start = False 10185 # validate/normalize the instance name 10186 self.op.instance_name = \ 10187 netutils.Hostname.GetNormalizedName(self.op.instance_name) 10188 10189 if self.op.ip_check and not self.op.name_check: 10190 # TODO: make the ip check more flexible and not depend on the name check 10191 raise errors.OpPrereqError("Cannot do IP address check without a name" 10192 " check", errors.ECODE_INVAL) 10193 10194 # check nics' parameter names 10195 for nic in self.op.nics: 10196 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES) 10197 10198 # check disks. parameter names and consistent adopt/no-adopt strategy 10199 has_adopt = has_no_adopt = False 10200 for disk in self.op.disks: 10201 if self.op.disk_template != constants.DT_EXT: 10202 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES) 10203 if constants.IDISK_ADOPT in disk: 10204 has_adopt = True 10205 else: 10206 has_no_adopt = True 10207 if has_adopt and has_no_adopt: 10208 raise errors.OpPrereqError("Either all disks are adopted or none is", 10209 errors.ECODE_INVAL) 10210 if has_adopt: 10211 if self.op.disk_template not in constants.DTS_MAY_ADOPT: 10212 raise errors.OpPrereqError("Disk adoption is not supported for the" 10213 " '%s' disk template" % 10214 self.op.disk_template, 10215 errors.ECODE_INVAL) 10216 if self.op.iallocator is not None: 10217 raise errors.OpPrereqError("Disk adoption not allowed with an" 10218 " iallocator script", errors.ECODE_INVAL) 10219 if self.op.mode == constants.INSTANCE_IMPORT: 10220 raise errors.OpPrereqError("Disk adoption not allowed for" 10221 " instance import", errors.ECODE_INVAL) 10222 else: 10223 if self.op.disk_template in constants.DTS_MUST_ADOPT: 10224 raise errors.OpPrereqError("Disk template %s requires disk adoption," 10225 " but no 'adopt' parameter given" % 10226 self.op.disk_template, 10227 errors.ECODE_INVAL) 10228 10229 self.adopt_disks = has_adopt 10230 10231 # instance name verification 10232 if self.op.name_check: 10233 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name) 10234 self.op.instance_name = self.hostname1.name 10235 # used in CheckPrereq for ip ping check 10236 self.check_ip = self.hostname1.ip 10237 else: 10238 self.check_ip = None 10239 10240 # file storage checks 10241 if (self.op.file_driver and 10242 not self.op.file_driver in constants.FILE_DRIVER): 10243 raise errors.OpPrereqError("Invalid file driver name '%s'" % 10244 self.op.file_driver, errors.ECODE_INVAL) 10245 10246 if self.op.disk_template == constants.DT_FILE: 10247 opcodes.RequireFileStorage() 10248 elif self.op.disk_template == constants.DT_SHARED_FILE: 10249 opcodes.RequireSharedFileStorage() 10250 10251 ### Node/iallocator related checks 10252 _CheckIAllocatorOrNode(self, "iallocator", "pnode") 10253 10254 if self.op.pnode is not None: 10255 if self.op.disk_template in constants.DTS_INT_MIRROR: 10256 if self.op.snode is None: 10257 raise errors.OpPrereqError("The networked disk templates need" 10258 " a mirror node", errors.ECODE_INVAL) 10259 elif self.op.snode: 10260 self.LogWarning("Secondary node will be ignored on non-mirrored disk" 10261 " template") 10262 self.op.snode = None 10263 10264 _CheckOpportunisticLocking(self.op) 10265 10266 self._cds = _GetClusterDomainSecret() 10267 10268 if self.op.mode == constants.INSTANCE_IMPORT: 10269 # On import force_variant must be True, because if we forced it at 10270 # initial install, our only chance when importing it back is that it 10271 # works again! 10272 self.op.force_variant = True 10273 10274 if self.op.no_install: 10275 self.LogInfo("No-installation mode has no effect during import") 10276 10277 elif self.op.mode == constants.INSTANCE_CREATE: 10278 if self.op.os_type is None: 10279 raise errors.OpPrereqError("No guest OS specified", 10280 errors.ECODE_INVAL) 10281 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os: 10282 raise errors.OpPrereqError("Guest OS '%s' is not allowed for" 10283 " installation" % self.op.os_type, 10284 errors.ECODE_STATE) 10285 if self.op.disk_template is None: 10286 raise errors.OpPrereqError("No disk template specified", 10287 errors.ECODE_INVAL) 10288 10289 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: 10290 # Check handshake to ensure both clusters have the same domain secret 10291 src_handshake = self.op.source_handshake 10292 if not src_handshake: 10293 raise errors.OpPrereqError("Missing source handshake", 10294 errors.ECODE_INVAL) 10295 10296 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds, 10297 src_handshake) 10298 if errmsg: 10299 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg, 10300 errors.ECODE_INVAL) 10301 10302 # Load and check source CA 10303 self.source_x509_ca_pem = self.op.source_x509_ca 10304 if not self.source_x509_ca_pem: 10305 raise errors.OpPrereqError("Missing source X509 CA", 10306 errors.ECODE_INVAL) 10307 10308 try: 10309 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem, 10310 self._cds) 10311 except OpenSSL.crypto.Error, err: 10312 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" % 10313 (err, ), errors.ECODE_INVAL) 10314 10315 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) 10316 if errcode is not None: 10317 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ), 10318 errors.ECODE_INVAL) 10319 10320 self.source_x509_ca = cert 10321 10322 src_instance_name = self.op.source_instance_name 10323 if not src_instance_name: 10324 raise errors.OpPrereqError("Missing source instance name", 10325 errors.ECODE_INVAL) 10326 10327 self.source_instance_name = \ 10328 netutils.GetHostname(name=src_instance_name).name 10329 10330 else: 10331 raise errors.OpPrereqError("Invalid instance creation mode %r" % 10332 self.op.mode, errors.ECODE_INVAL)
10333
10334 - def ExpandNames(self):
10335 """ExpandNames for CreateInstance. 10336 10337 Figure out the right locks for instance creation. 10338 10339 """ 10340 self.needed_locks = {} 10341 10342 instance_name = self.op.instance_name 10343 # this is just a preventive check, but someone might still add this 10344 # instance in the meantime, and creation will fail at lock-add time 10345 if instance_name in self.cfg.GetInstanceList(): 10346 raise errors.OpPrereqError("Instance '%s' is already in the cluster" % 10347 instance_name, errors.ECODE_EXISTS) 10348 10349 self.add_locks[locking.LEVEL_INSTANCE] = instance_name 10350 10351 if self.op.iallocator: 10352 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by 10353 # specifying a group on instance creation and then selecting nodes from 10354 # that group 10355 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 10356 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET 10357 10358 if self.op.opportunistic_locking: 10359 self.opportunistic_locks[locking.LEVEL_NODE] = True 10360 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True 10361 else: 10362 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode) 10363 nodelist = [self.op.pnode] 10364 if self.op.snode is not None: 10365 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode) 10366 nodelist.append(self.op.snode) 10367 self.needed_locks[locking.LEVEL_NODE] = nodelist 10368 10369 # in case of import lock the source node too 10370 if self.op.mode == constants.INSTANCE_IMPORT: 10371 src_node = self.op.src_node 10372 src_path = self.op.src_path 10373 10374 if src_path is None: 10375 self.op.src_path = src_path = self.op.instance_name 10376 10377 if src_node is None: 10378 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 10379 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET 10380 self.op.src_node = None 10381 if os.path.isabs(src_path): 10382 raise errors.OpPrereqError("Importing an instance from a path" 10383 " requires a source node option", 10384 errors.ECODE_INVAL) 10385 else: 10386 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node) 10387 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET: 10388 self.needed_locks[locking.LEVEL_NODE].append(src_node) 10389 if not os.path.isabs(src_path): 10390 self.op.src_path = src_path = \ 10391 utils.PathJoin(pathutils.EXPORT_DIR, src_path) 10392 10393 self.needed_locks[locking.LEVEL_NODE_RES] = \ 10394 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10395
10396 - def _RunAllocator(self):
10397 """Run the allocator based on input opcode. 10398 10399 """ 10400 if self.op.opportunistic_locking: 10401 # Only consider nodes for which a lock is held 10402 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE)) 10403 else: 10404 node_whitelist = None 10405 10406 #TODO Export network to iallocator so that it chooses a pnode 10407 # in a nodegroup that has the desired network connected to 10408 req = _CreateInstanceAllocRequest(self.op, self.disks, 10409 self.nics, self.be_full, 10410 node_whitelist) 10411 ial = iallocator.IAllocator(self.cfg, self.rpc, req) 10412 10413 ial.Run(self.op.iallocator) 10414 10415 if not ial.success: 10416 # When opportunistic locks are used only a temporary failure is generated 10417 if self.op.opportunistic_locking: 10418 ecode = errors.ECODE_TEMP_NORES 10419 else: 10420 ecode = errors.ECODE_NORES 10421 10422 raise errors.OpPrereqError("Can't compute nodes using" 10423 " iallocator '%s': %s" % 10424 (self.op.iallocator, ial.info), 10425 ecode) 10426 10427 self.op.pnode = ial.result[0] 10428 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s", 10429 self.op.instance_name, self.op.iallocator, 10430 utils.CommaJoin(ial.result)) 10431 10432 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator" 10433 10434 if req.RequiredNodes() == 2: 10435 self.op.snode = ial.result[1]
10436
10437 - def BuildHooksEnv(self):
10438 """Build hooks env. 10439 10440 This runs on master, primary and secondary nodes of the instance. 10441 10442 """ 10443 env = { 10444 "ADD_MODE": self.op.mode, 10445 } 10446 if self.op.mode == constants.INSTANCE_IMPORT: 10447 env["SRC_NODE"] = self.op.src_node 10448 env["SRC_PATH"] = self.op.src_path 10449 env["SRC_IMAGES"] = self.src_images 10450 10451 env.update(_BuildInstanceHookEnv( 10452 name=self.op.instance_name, 10453 primary_node=self.op.pnode, 10454 secondary_nodes=self.secondaries, 10455 status=self.op.start, 10456 os_type=self.op.os_type, 10457 minmem=self.be_full[constants.BE_MINMEM], 10458 maxmem=self.be_full[constants.BE_MAXMEM], 10459 vcpus=self.be_full[constants.BE_VCPUS], 10460 nics=_NICListToTuple(self, self.nics), 10461 disk_template=self.op.disk_template, 10462 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE]) 10463 for d in self.disks], 10464 bep=self.be_full, 10465 hvp=self.hv_full, 10466 hypervisor_name=self.op.hypervisor, 10467 tags=self.op.tags, 10468 )) 10469 10470 return env
10471
10472 - def BuildHooksNodes(self):
10473 """Build hooks nodes. 10474 10475 """ 10476 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries 10477 return nl, nl
10478
10479 - def _ReadExportInfo(self):
10480 """Reads the export information from disk. 10481 10482 It will override the opcode source node and path with the actual 10483 information, if these two were not specified before. 10484 10485 @return: the export information 10486 10487 """ 10488 assert self.op.mode == constants.INSTANCE_IMPORT 10489 10490 src_node = self.op.src_node 10491 src_path = self.op.src_path 10492 10493 if src_node is None: 10494 locked_nodes = self.owned_locks(locking.LEVEL_NODE) 10495 exp_list = self.rpc.call_export_list(locked_nodes) 10496 found = False 10497 for node in exp_list: 10498 if exp_list[node].fail_msg: 10499 continue 10500 if src_path in exp_list[node].payload: 10501 found = True 10502 self.op.src_node = src_node = node 10503 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR, 10504 src_path) 10505 break 10506 if not found: 10507 raise errors.OpPrereqError("No export found for relative path %s" % 10508 src_path, errors.ECODE_INVAL) 10509 10510 _CheckNodeOnline(self, src_node) 10511 result = self.rpc.call_export_info(src_node, src_path) 10512 result.Raise("No export or invalid export found in dir %s" % src_path) 10513 10514 export_info = objects.SerializableConfigParser.Loads(str(result.payload)) 10515 if not export_info.has_section(constants.INISECT_EXP): 10516 raise errors.ProgrammerError("Corrupted export config", 10517 errors.ECODE_ENVIRON) 10518 10519 ei_version = export_info.get(constants.INISECT_EXP, "version") 10520 if (int(ei_version) != constants.EXPORT_VERSION): 10521 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" % 10522 (ei_version, constants.EXPORT_VERSION), 10523 errors.ECODE_ENVIRON) 10524 return export_info
10525
10526 - def _ReadExportParams(self, einfo):
10527 """Use export parameters as defaults. 10528 10529 In case the opcode doesn't specify (as in override) some instance 10530 parameters, then try to use them from the export information, if 10531 that declares them. 10532 10533 """ 10534 self.op.os_type = einfo.get(constants.INISECT_EXP, "os") 10535 10536 if self.op.disk_template is None: 10537 if einfo.has_option(constants.INISECT_INS, "disk_template"): 10538 self.op.disk_template = einfo.get(constants.INISECT_INS, 10539 "disk_template") 10540 if self.op.disk_template not in constants.DISK_TEMPLATES: 10541 raise errors.OpPrereqError("Disk template specified in configuration" 10542 " file is not one of the allowed values:" 10543 " %s" % 10544 " ".join(constants.DISK_TEMPLATES), 10545 errors.ECODE_INVAL) 10546 else: 10547 raise errors.OpPrereqError("No disk template specified and the export" 10548 " is missing the disk_template information", 10549 errors.ECODE_INVAL) 10550 10551 if not self.op.disks: 10552 disks = [] 10553 # TODO: import the disk iv_name too 10554 for idx in range(constants.MAX_DISKS): 10555 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx): 10556 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx) 10557 disks.append({constants.IDISK_SIZE: disk_sz}) 10558 self.op.disks = disks 10559 if not disks and self.op.disk_template != constants.DT_DISKLESS: 10560 raise errors.OpPrereqError("No disk info specified and the export" 10561 " is missing the disk information", 10562 errors.ECODE_INVAL) 10563 10564 if not self.op.nics: 10565 nics = [] 10566 for idx in range(constants.MAX_NICS): 10567 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx): 10568 ndict = {} 10569 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]: 10570 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name)) 10571 ndict[name] = v 10572 nics.append(ndict) 10573 else: 10574 break 10575 self.op.nics = nics 10576 10577 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"): 10578 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split() 10579 10580 if (self.op.hypervisor is None and 10581 einfo.has_option(constants.INISECT_INS, "hypervisor")): 10582 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor") 10583 10584 if einfo.has_section(constants.INISECT_HYP): 10585 # use the export parameters but do not override the ones 10586 # specified by the user 10587 for name, value in einfo.items(constants.INISECT_HYP): 10588 if name not in self.op.hvparams: 10589 self.op.hvparams[name] = value 10590 10591 if einfo.has_section(constants.INISECT_BEP): 10592 # use the parameters, without overriding 10593 for name, value in einfo.items(constants.INISECT_BEP): 10594 if name not in self.op.beparams: 10595 self.op.beparams[name] = value 10596 # Compatibility for the old "memory" be param 10597 if name == constants.BE_MEMORY: 10598 if constants.BE_MAXMEM not in self.op.beparams: 10599 self.op.beparams[constants.BE_MAXMEM] = value 10600 if constants.BE_MINMEM not in self.op.beparams: 10601 self.op.beparams[constants.BE_MINMEM] = value 10602 else: 10603 # try to read the parameters old style, from the main section 10604 for name in constants.BES_PARAMETERS: 10605 if (name not in self.op.beparams and 10606 einfo.has_option(constants.INISECT_INS, name)): 10607 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name) 10608 10609 if einfo.has_section(constants.INISECT_OSP): 10610 # use the parameters, without overriding 10611 for name, value in einfo.items(constants.INISECT_OSP): 10612 if name not in self.op.osparams: 10613 self.op.osparams[name] = value
10614
10615 - def _RevertToDefaults(self, cluster):
10616 """Revert the instance parameters to the default values. 10617 10618 """ 10619 # hvparams 10620 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {}) 10621 for name in self.op.hvparams.keys(): 10622 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]: 10623 del self.op.hvparams[name] 10624 # beparams 10625 be_defs = cluster.SimpleFillBE({}) 10626 for name in self.op.beparams.keys(): 10627 if name in be_defs and be_defs[name] == self.op.beparams[name]: 10628 del self.op.beparams[name] 10629 # nic params 10630 nic_defs = cluster.SimpleFillNIC({}) 10631 for nic in self.op.nics: 10632 for name in constants.NICS_PARAMETERS: 10633 if name in nic and name in nic_defs and nic[name] == nic_defs[name]: 10634 del nic[name] 10635 # osparams 10636 os_defs = cluster.SimpleFillOS(self.op.os_type, {}) 10637 for name in self.op.osparams.keys(): 10638 if name in os_defs and os_defs[name] == self.op.osparams[name]: 10639 del self.op.osparams[name]
10640
10641 - def _CalculateFileStorageDir(self):
10642 """Calculate final instance file storage dir. 10643 10644 """ 10645 # file storage dir calculation/check 10646 self.instance_file_storage_dir = None 10647 if self.op.disk_template in constants.DTS_FILEBASED: 10648 # build the full file storage dir path 10649 joinargs = [] 10650 10651 if self.op.disk_template == constants.DT_SHARED_FILE: 10652 get_fsd_fn = self.cfg.GetSharedFileStorageDir 10653 else: 10654 get_fsd_fn = self.cfg.GetFileStorageDir 10655 10656 cfg_storagedir = get_fsd_fn() 10657 if not cfg_storagedir: 10658 raise errors.OpPrereqError("Cluster file storage dir not defined", 10659 errors.ECODE_STATE) 10660 joinargs.append(cfg_storagedir) 10661 10662 if self.op.file_storage_dir is not None: 10663 joinargs.append(self.op.file_storage_dir) 10664 10665 joinargs.append(self.op.instance_name) 10666 10667 # pylint: disable=W0142 10668 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10669
10670 - def CheckPrereq(self): # pylint: disable=R0914
10671 """Check prerequisites. 10672 10673 """ 10674 self._CalculateFileStorageDir() 10675 10676 if self.op.mode == constants.INSTANCE_IMPORT: 10677 export_info = self._ReadExportInfo() 10678 self._ReadExportParams(export_info) 10679 self._old_instance_name = export_info.get(constants.INISECT_INS, "name") 10680 else: 10681 self._old_instance_name = None 10682 10683 if (not self.cfg.GetVGName() and 10684 self.op.disk_template not in constants.DTS_NOT_LVM): 10685 raise errors.OpPrereqError("Cluster does not support lvm-based" 10686 " instances", errors.ECODE_STATE) 10687 10688 if (self.op.hypervisor is None or 10689 self.op.hypervisor == constants.VALUE_AUTO): 10690 self.op.hypervisor = self.cfg.GetHypervisorType() 10691 10692 cluster = self.cfg.GetClusterInfo() 10693 enabled_hvs = cluster.enabled_hypervisors 10694 if self.op.hypervisor not in enabled_hvs: 10695 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the" 10696 " cluster (%s)" % 10697 (self.op.hypervisor, ",".join(enabled_hvs)), 10698 errors.ECODE_STATE) 10699 10700 # Check tag validity 10701 for tag in self.op.tags: 10702 objects.TaggableObject.ValidateTag(tag) 10703 10704 # check hypervisor parameter syntax (locally) 10705 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) 10706 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, 10707 self.op.hvparams) 10708 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor) 10709 hv_type.CheckParameterSyntax(filled_hvp) 10710 self.hv_full = filled_hvp 10711 # check that we don't specify global parameters on an instance 10712 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor", 10713 "instance", "cluster") 10714 10715 # fill and remember the beparams dict 10716 self.be_full = _ComputeFullBeParams(self.op, cluster) 10717 10718 # build os parameters 10719 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams) 10720 10721 # now that hvp/bep are in final format, let's reset to defaults, 10722 # if told to do so 10723 if self.op.identify_defaults: 10724 self._RevertToDefaults(cluster) 10725 10726 # NIC buildup 10727 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg, 10728 self.proc.GetECId()) 10729 10730 # disk checks/pre-build 10731 default_vg = self.cfg.GetVGName() 10732 self.disks = _ComputeDisks(self.op, default_vg) 10733 10734 if self.op.mode == constants.INSTANCE_IMPORT: 10735 disk_images = [] 10736 for idx in range(len(self.disks)): 10737 option = "disk%d_dump" % idx 10738 if export_info.has_option(constants.INISECT_INS, option): 10739 # FIXME: are the old os-es, disk sizes, etc. useful? 10740 export_name = export_info.get(constants.INISECT_INS, option) 10741 image = utils.PathJoin(self.op.src_path, export_name) 10742 disk_images.append(image) 10743 else: 10744 disk_images.append(False) 10745 10746 self.src_images = disk_images 10747 10748 if self.op.instance_name == self._old_instance_name: 10749 for idx, nic in enumerate(self.nics): 10750 if nic.mac == constants.VALUE_AUTO: 10751 nic_mac_ini = "nic%d_mac" % idx 10752 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini) 10753 10754 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT 10755 10756 # ip ping checks (we use the same ip that was resolved in ExpandNames) 10757 if self.op.ip_check: 10758 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT): 10759 raise errors.OpPrereqError("IP %s of instance %s already in use" % 10760 (self.check_ip, self.op.instance_name), 10761 errors.ECODE_NOTUNIQUE) 10762 10763 #### mac address generation 10764 # By generating here the mac address both the allocator and the hooks get 10765 # the real final mac address rather than the 'auto' or 'generate' value. 10766 # There is a race condition between the generation and the instance object 10767 # creation, which means that we know the mac is valid now, but we're not 10768 # sure it will be when we actually add the instance. If things go bad 10769 # adding the instance will abort because of a duplicate mac, and the 10770 # creation job will fail. 10771 for nic in self.nics: 10772 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 10773 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId()) 10774 10775 #### allocator run 10776 10777 if self.op.iallocator is not None: 10778 self._RunAllocator() 10779 10780 # Release all unneeded node locks 10781 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node]) 10782 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks) 10783 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks) 10784 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC) 10785 10786 assert (self.owned_locks(locking.LEVEL_NODE) == 10787 self.owned_locks(locking.LEVEL_NODE_RES)), \ 10788 "Node locks differ from node resource locks" 10789 10790 #### node related checks 10791 10792 # check primary node 10793 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode) 10794 assert self.pnode is not None, \ 10795 "Cannot retrieve locked node %s" % self.op.pnode 10796 if pnode.offline: 10797 raise errors.OpPrereqError("Cannot use offline primary node '%s'" % 10798 pnode.name, errors.ECODE_STATE) 10799 if pnode.drained: 10800 raise errors.OpPrereqError("Cannot use drained primary node '%s'" % 10801 pnode.name, errors.ECODE_STATE) 10802 if not pnode.vm_capable: 10803 raise errors.OpPrereqError("Cannot use non-vm_capable primary node" 10804 " '%s'" % pnode.name, errors.ECODE_STATE) 10805 10806 self.secondaries = [] 10807 10808 # Fill in any IPs from IP pools. This must happen here, because we need to 10809 # know the nic's primary node, as specified by the iallocator 10810 for idx, nic in enumerate(self.nics): 10811 net_uuid = nic.network 10812 if net_uuid is not None: 10813 nobj = self.cfg.GetNetwork(net_uuid) 10814 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name) 10815 if netparams is None: 10816 raise errors.OpPrereqError("No netparams found for network" 10817 " %s. Propably not connected to" 10818 " node's %s nodegroup" % 10819 (nobj.name, self.pnode.name), 10820 errors.ECODE_INVAL) 10821 self.LogInfo("NIC/%d inherits netparams %s" % 10822 (idx, netparams.values())) 10823 nic.nicparams = dict(netparams) 10824 if nic.ip is not None: 10825 if nic.ip.lower() == constants.NIC_IP_POOL: 10826 try: 10827 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId()) 10828 except errors.ReservationError: 10829 raise errors.OpPrereqError("Unable to get a free IP for NIC %d" 10830 " from the address pool" % idx, 10831 errors.ECODE_STATE) 10832 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name) 10833 else: 10834 try: 10835 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId()) 10836 except errors.ReservationError: 10837 raise errors.OpPrereqError("IP address %s already in use" 10838 " or does not belong to network %s" % 10839 (nic.ip, nobj.name), 10840 errors.ECODE_NOTUNIQUE) 10841 10842 # net is None, ip None or given 10843 elif self.op.conflicts_check: 10844 _CheckForConflictingIp(self, nic.ip, self.pnode.name) 10845 10846 # mirror node verification 10847 if self.op.disk_template in constants.DTS_INT_MIRROR: 10848 if self.op.snode == pnode.name: 10849 raise errors.OpPrereqError("The secondary node cannot be the" 10850 " primary node", errors.ECODE_INVAL) 10851 _CheckNodeOnline(self, self.op.snode) 10852 _CheckNodeNotDrained(self, self.op.snode) 10853 _CheckNodeVmCapable(self, self.op.snode) 10854 self.secondaries.append(self.op.snode) 10855 10856 snode = self.cfg.GetNodeInfo(self.op.snode) 10857 if pnode.group != snode.group: 10858 self.LogWarning("The primary and secondary nodes are in two" 10859 " different node groups; the disk parameters" 10860 " from the first disk's node group will be" 10861 " used") 10862 10863 if not self.op.disk_template in constants.DTS_EXCL_STORAGE: 10864 nodes = [pnode] 10865 if self.op.disk_template in constants.DTS_INT_MIRROR: 10866 nodes.append(snode) 10867 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n) 10868 if compat.any(map(has_es, nodes)): 10869 raise errors.OpPrereqError("Disk template %s not supported with" 10870 " exclusive storage" % self.op.disk_template, 10871 errors.ECODE_STATE) 10872 10873 nodenames = [pnode.name] + self.secondaries 10874 10875 if not self.adopt_disks: 10876 if self.op.disk_template == constants.DT_RBD: 10877 # _CheckRADOSFreeSpace() is just a placeholder. 10878 # Any function that checks prerequisites can be placed here. 10879 # Check if there is enough space on the RADOS cluster. 10880 _CheckRADOSFreeSpace() 10881 elif self.op.disk_template == constants.DT_EXT: 10882 # FIXME: Function that checks prereqs if needed 10883 pass 10884 else: 10885 # Check lv size requirements, if not adopting 10886 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks) 10887 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes) 10888 10889 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data 10890 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG], 10891 disk[constants.IDISK_ADOPT]) 10892 for disk in self.disks]) 10893 if len(all_lvs) != len(self.disks): 10894 raise errors.OpPrereqError("Duplicate volume names given for adoption", 10895 errors.ECODE_INVAL) 10896 for lv_name in all_lvs: 10897 try: 10898 # FIXME: lv_name here is "vg/lv" need to ensure that other calls 10899 # to ReserveLV uses the same syntax 10900 self.cfg.ReserveLV(lv_name, self.proc.GetECId()) 10901 except errors.ReservationError: 10902 raise errors.OpPrereqError("LV named %s used by another instance" % 10903 lv_name, errors.ECODE_NOTUNIQUE) 10904 10905 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name] 10906 vg_names.Raise("Cannot get VG information from node %s" % pnode.name) 10907 10908 node_lvs = self.rpc.call_lv_list([pnode.name], 10909 vg_names.payload.keys())[pnode.name] 10910 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name) 10911 node_lvs = node_lvs.payload 10912 10913 delta = all_lvs.difference(node_lvs.keys()) 10914 if delta: 10915 raise errors.OpPrereqError("Missing logical volume(s): %s" % 10916 utils.CommaJoin(delta), 10917 errors.ECODE_INVAL) 10918 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]] 10919 if online_lvs: 10920 raise errors.OpPrereqError("Online logical volumes found, cannot" 10921 " adopt: %s" % utils.CommaJoin(online_lvs), 10922 errors.ECODE_STATE) 10923 # update the size of disk based on what is found 10924 for dsk in self.disks: 10925 dsk[constants.IDISK_SIZE] = \ 10926 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG], 10927 dsk[constants.IDISK_ADOPT])][0])) 10928 10929 elif self.op.disk_template == constants.DT_BLOCK: 10930 # Normalize and de-duplicate device paths 10931 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT]) 10932 for disk in self.disks]) 10933 if len(all_disks) != len(self.disks): 10934 raise errors.OpPrereqError("Duplicate disk names given for adoption", 10935 errors.ECODE_INVAL) 10936 baddisks = [d for d in all_disks 10937 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)] 10938 if baddisks: 10939 raise errors.OpPrereqError("Device node(s) %s lie outside %s and" 10940 " cannot be adopted" % 10941 (utils.CommaJoin(baddisks), 10942 constants.ADOPTABLE_BLOCKDEV_ROOT), 10943 errors.ECODE_INVAL) 10944 10945 node_disks = self.rpc.call_bdev_sizes([pnode.name], 10946 list(all_disks))[pnode.name] 10947 node_disks.Raise("Cannot get block device information from node %s" % 10948 pnode.name) 10949 node_disks = node_disks.payload 10950 delta = all_disks.difference(node_disks.keys()) 10951 if delta: 10952 raise errors.OpPrereqError("Missing block device(s): %s" % 10953 utils.CommaJoin(delta), 10954 errors.ECODE_INVAL) 10955 for dsk in self.disks: 10956 dsk[constants.IDISK_SIZE] = \ 10957 int(float(node_disks[dsk[constants.IDISK_ADOPT]])) 10958 10959 # Verify instance specs 10960 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None) 10961 ispec = { 10962 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None), 10963 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None), 10964 constants.ISPEC_DISK_COUNT: len(self.disks), 10965 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE] 10966 for disk in self.disks], 10967 constants.ISPEC_NIC_COUNT: len(self.nics), 10968 constants.ISPEC_SPINDLE_USE: spindle_use, 10969 } 10970 10971 group_info = self.cfg.GetNodeGroup(pnode.group) 10972 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info) 10973 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec, 10974 self.op.disk_template) 10975 if not self.op.ignore_ipolicy and res: 10976 msg = ("Instance allocation to group %s (%s) violates policy: %s" % 10977 (pnode.group, group_info.name, utils.CommaJoin(res))) 10978 raise errors.OpPrereqError(msg, errors.ECODE_INVAL) 10979 10980 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams) 10981 10982 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant) 10983 # check OS parameters (remotely) 10984 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full) 10985 10986 _CheckNicsBridgesExist(self, self.nics, self.pnode.name) 10987 10988 #TODO: _CheckExtParams (remotely) 10989 # Check parameters for extstorage 10990 10991 # memory check on primary node 10992 #TODO(dynmem): use MINMEM for checking 10993 if self.op.start: 10994 _CheckNodeFreeMemory(self, self.pnode.name, 10995 "creating instance %s" % self.op.instance_name, 10996 self.be_full[constants.BE_MAXMEM], 10997 self.op.hypervisor) 10998 10999 self.dry_run_result = list(nodenames)
11000
11001 - def Exec(self, feedback_fn):
11002 """Create and add the instance to the cluster. 11003 11004 """ 11005 instance = self.op.instance_name 11006 pnode_name = self.pnode.name 11007 11008 assert not (self.owned_locks(locking.LEVEL_NODE_RES) - 11009 self.owned_locks(locking.LEVEL_NODE)), \ 11010 "Node locks differ from node resource locks" 11011 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC) 11012 11013 ht_kind = self.op.hypervisor 11014 if ht_kind in constants.HTS_REQ_PORT: 11015 network_port = self.cfg.AllocatePort() 11016 else: 11017 network_port = None 11018 11019 # This is ugly but we got a chicken-egg problem here 11020 # We can only take the group disk parameters, as the instance 11021 # has no disks yet (we are generating them right here). 11022 node = self.cfg.GetNodeInfo(pnode_name) 11023 nodegroup = self.cfg.GetNodeGroup(node.group) 11024 disks = _GenerateDiskTemplate(self, 11025 self.op.disk_template, 11026 instance, pnode_name, 11027 self.secondaries, 11028 self.disks, 11029 self.instance_file_storage_dir, 11030 self.op.file_driver, 11031 0, 11032 feedback_fn, 11033 self.cfg.GetGroupDiskParams(nodegroup)) 11034 11035 iobj = objects.Instance(name=instance, os=self.op.os_type, 11036 primary_node=pnode_name, 11037 nics=self.nics, disks=disks, 11038 disk_template=self.op.disk_template, 11039 admin_state=constants.ADMINST_DOWN, 11040 network_port=network_port, 11041 beparams=self.op.beparams, 11042 hvparams=self.op.hvparams, 11043 hypervisor=self.op.hypervisor, 11044 osparams=self.op.osparams, 11045 ) 11046 11047 if self.op.tags: 11048 for tag in self.op.tags: 11049 iobj.AddTag(tag) 11050 11051 if self.adopt_disks: 11052 if self.op.disk_template == constants.DT_PLAIN: 11053 # rename LVs to the newly-generated names; we need to construct 11054 # 'fake' LV disks with the old data, plus the new unique_id 11055 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks] 11056 rename_to = [] 11057 for t_dsk, a_dsk in zip(tmp_disks, self.disks): 11058 rename_to.append(t_dsk.logical_id) 11059 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT]) 11060 self.cfg.SetDiskID(t_dsk, pnode_name) 11061 result = self.rpc.call_blockdev_rename(pnode_name, 11062 zip(tmp_disks, rename_to)) 11063 result.Raise("Failed to rename adoped LVs") 11064 else: 11065 feedback_fn("* creating instance disks...") 11066 try: 11067 _CreateDisks(self, iobj) 11068 except errors.OpExecError: 11069 self.LogWarning("Device creation failed") 11070 self.cfg.ReleaseDRBDMinors(instance) 11071 raise 11072 11073 feedback_fn("adding instance %s to cluster config" % instance) 11074 11075 self.cfg.AddInstance(iobj, self.proc.GetECId()) 11076 11077 # Declare that we don't want to remove the instance lock anymore, as we've 11078 # added the instance to the config 11079 del self.remove_locks[locking.LEVEL_INSTANCE] 11080 11081 if self.op.mode == constants.INSTANCE_IMPORT: 11082 # Release unused nodes 11083 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node]) 11084 else: 11085 # Release all nodes 11086 _ReleaseLocks(self, locking.LEVEL_NODE) 11087 11088 disk_abort = False 11089 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks: 11090 feedback_fn("* wiping instance disks...") 11091 try: 11092 _WipeDisks(self, iobj) 11093 except errors.OpExecError, err: 11094 logging.exception("Wiping disks failed") 11095 self.LogWarning("Wiping instance disks failed (%s)", err) 11096 disk_abort = True 11097 11098 if disk_abort: 11099 # Something is already wrong with the disks, don't do anything else 11100 pass 11101 elif self.op.wait_for_sync: 11102 disk_abort = not _WaitForSync(self, iobj) 11103 elif iobj.disk_template in constants.DTS_INT_MIRROR: 11104 # make sure the disks are not degraded (still sync-ing is ok) 11105 feedback_fn("* checking mirrors status") 11106 disk_abort = not _WaitForSync(self, iobj, oneshot=True) 11107 else: 11108 disk_abort = False 11109 11110 if disk_abort: 11111 _RemoveDisks(self, iobj) 11112 self.cfg.RemoveInstance(iobj.name) 11113 # Make sure the instance lock gets removed 11114 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name 11115 raise errors.OpExecError("There are some degraded disks for" 11116 " this instance") 11117 11118 # Release all node resource locks 11119 _ReleaseLocks(self, locking.LEVEL_NODE_RES) 11120 11121 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks: 11122 # we need to set the disks ID to the primary node, since the 11123 # preceding code might or might have not done it, depending on 11124 # disk template and other options 11125 for disk in iobj.disks: 11126 self.cfg.SetDiskID(disk, pnode_name) 11127 if self.op.mode == constants.INSTANCE_CREATE: 11128 if not self.op.no_install: 11129 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and 11130 not self.op.wait_for_sync) 11131 if pause_sync: 11132 feedback_fn("* pausing disk sync to install instance OS") 11133 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name, 11134 (iobj.disks, 11135 iobj), True) 11136 for idx, success in enumerate(result.payload): 11137 if not success: 11138 logging.warn("pause-sync of instance %s for disk %d failed", 11139 instance, idx) 11140 11141 feedback_fn("* running the instance OS create scripts...") 11142 # FIXME: pass debug option from opcode to backend 11143 os_add_result = \ 11144 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False, 11145 self.op.debug_level) 11146 if pause_sync: 11147 feedback_fn("* resuming disk sync") 11148 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name, 11149 (iobj.disks, 11150 iobj), False) 11151 for idx, success in enumerate(result.payload): 11152 if not success: 11153 logging.warn("resume-sync of instance %s for disk %d failed", 11154 instance, idx) 11155 11156 os_add_result.Raise("Could not add os for instance %s" 11157 " on node %s" % (instance, pnode_name)) 11158 11159 else: 11160 if self.op.mode == constants.INSTANCE_IMPORT: 11161 feedback_fn("* running the instance OS import scripts...") 11162 11163 transfers = [] 11164 11165 for idx, image in enumerate(self.src_images): 11166 if not image: 11167 continue 11168 11169 # FIXME: pass debug option from opcode to backend 11170 dt = masterd.instance.DiskTransfer("disk/%s" % idx, 11171 constants.IEIO_FILE, (image, ), 11172 constants.IEIO_SCRIPT, 11173 (iobj.disks[idx], idx), 11174 None) 11175 transfers.append(dt) 11176 11177 import_result = \ 11178 masterd.instance.TransferInstanceData(self, feedback_fn, 11179 self.op.src_node, pnode_name, 11180 self.pnode.secondary_ip, 11181 iobj, transfers) 11182 if not compat.all(import_result): 11183 self.LogWarning("Some disks for instance %s on node %s were not" 11184 " imported successfully" % (instance, pnode_name)) 11185 11186 rename_from = self._old_instance_name 11187 11188 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: 11189 feedback_fn("* preparing remote import...") 11190 # The source cluster will stop the instance before attempting to make 11191 # a connection. In some cases stopping an instance can take a long 11192 # time, hence the shutdown timeout is added to the connection 11193 # timeout. 11194 connect_timeout = (constants.RIE_CONNECT_TIMEOUT + 11195 self.op.source_shutdown_timeout) 11196 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) 11197 11198 assert iobj.primary_node == self.pnode.name 11199 disk_results = \ 11200 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode, 11201 self.source_x509_ca, 11202 self._cds, timeouts) 11203 if not compat.all(disk_results): 11204 # TODO: Should the instance still be started, even if some disks 11205 # failed to import (valid for local imports, too)? 11206 self.LogWarning("Some disks for instance %s on node %s were not" 11207 " imported successfully" % (instance, pnode_name)) 11208 11209 rename_from = self.source_instance_name 11210 11211 else: 11212 # also checked in the prereq part 11213 raise errors.ProgrammerError("Unknown OS initialization mode '%s'" 11214 % self.op.mode) 11215 11216 # Run rename script on newly imported instance 11217 assert iobj.name == instance 11218 feedback_fn("Running rename script for %s" % instance) 11219 result = self.rpc.call_instance_run_rename(pnode_name, iobj, 11220 rename_from, 11221 self.op.debug_level) 11222 if result.fail_msg: 11223 self.LogWarning("Failed to run rename script for %s on node" 11224 " %s: %s" % (instance, pnode_name, result.fail_msg)) 11225 11226 assert not self.owned_locks(locking.LEVEL_NODE_RES) 11227 11228 if self.op.start: 11229 iobj.admin_state = constants.ADMINST_UP 11230 self.cfg.Update(iobj, feedback_fn) 11231 logging.info("Starting instance %s on node %s", instance, pnode_name) 11232 feedback_fn("* starting instance...") 11233 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None), 11234 False) 11235 result.Raise("Could not start instance") 11236 11237 return list(iobj.all_nodes)
11238
11239 11240 -class LUInstanceMultiAlloc(NoHooksLU):
11241 """Allocates multiple instances at the same time. 11242 11243 """ 11244 REQ_BGL = False 11245
11246 - def CheckArguments(self):
11247 """Check arguments. 11248 11249 """ 11250 nodes = [] 11251 for inst in self.op.instances: 11252 if inst.iallocator is not None: 11253 raise errors.OpPrereqError("iallocator are not allowed to be set on" 11254 " instance objects", errors.ECODE_INVAL) 11255 nodes.append(bool(inst.pnode)) 11256 if inst.disk_template in constants.DTS_INT_MIRROR: 11257 nodes.append(bool(inst.snode)) 11258 11259 has_nodes = compat.any(nodes) 11260 if compat.all(nodes) ^ has_nodes: 11261 raise errors.OpPrereqError("There are instance objects providing" 11262 " pnode/snode while others do not", 11263 errors.ECODE_INVAL) 11264 11265 if not has_nodes and self.op.iallocator is None: 11266 default_iallocator = self.cfg.GetDefaultIAllocator() 11267 if default_iallocator: 11268 self.op.iallocator = default_iallocator 11269 else: 11270 raise errors.OpPrereqError("No iallocator or nodes on the instances" 11271 " given and no cluster-wide default" 11272 " iallocator found; please specify either" 11273 " an iallocator or nodes on the instances" 11274 " or set a cluster-wide default iallocator", 11275 errors.ECODE_INVAL) 11276 11277 _CheckOpportunisticLocking(self.op) 11278 11279 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances]) 11280 if dups: 11281 raise errors.OpPrereqError("There are duplicate instance names: %s" % 11282 utils.CommaJoin(dups), errors.ECODE_INVAL)
11283
11284 - def ExpandNames(self):
11285 """Calculate the locks. 11286 11287 """ 11288 self.share_locks = _ShareAll() 11289 self.needed_locks = { 11290 # iallocator will select nodes and even if no iallocator is used, 11291 # collisions with LUInstanceCreate should be avoided 11292 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 11293 } 11294 11295 if self.op.iallocator: 11296 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 11297 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET 11298 11299 if self.op.opportunistic_locking: 11300 self.opportunistic_locks[locking.LEVEL_NODE] = True 11301 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True 11302 else: 11303 nodeslist = [] 11304 for inst in self.op.instances: 11305 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode) 11306 nodeslist.append(inst.pnode) 11307 if inst.snode is not None: 11308 inst.snode = _ExpandNodeName(self.cfg, inst.snode) 11309 nodeslist.append(inst.snode) 11310 11311 self.needed_locks[locking.LEVEL_NODE] = nodeslist 11312 # Lock resources of instance's primary and secondary nodes (copy to 11313 # prevent accidential modification) 11314 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11315
11316 - def CheckPrereq(self):
11317 """Check prerequisite. 11318 11319 """ 11320 if self.op.iallocator: 11321 cluster = self.cfg.GetClusterInfo() 11322 default_vg = self.cfg.GetVGName() 11323 ec_id = self.proc.GetECId() 11324 11325 if self.op.opportunistic_locking: 11326 # Only consider nodes for which a lock is held 11327 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE)) 11328 else: 11329 node_whitelist = None 11330 11331 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg), 11332 _ComputeNics(op, cluster, None, 11333 self.cfg, ec_id), 11334 _ComputeFullBeParams(op, cluster), 11335 node_whitelist) 11336 for op in self.op.instances] 11337 11338 req = iallocator.IAReqMultiInstanceAlloc(instances=insts) 11339 ial = iallocator.IAllocator(self.cfg, self.rpc, req) 11340 11341 ial.Run(self.op.iallocator) 11342 11343 if not ial.success: 11344 raise errors.OpPrereqError("Can't compute nodes using" 11345 " iallocator '%s': %s" % 11346 (self.op.iallocator, ial.info), 11347 errors.ECODE_NORES) 11348 11349 self.ia_result = ial.result 11350 11351 if self.op.dry_run: 11352 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), { 11353 constants.JOB_IDS_KEY: [], 11354 })
11355
11356 - def _ConstructPartialResult(self):
11357 """Contructs the partial result. 11358 11359 """ 11360 if self.op.iallocator: 11361 (allocatable, failed_insts) = self.ia_result 11362 allocatable_insts = map(compat.fst, allocatable) 11363 else: 11364 allocatable_insts = [op.instance_name for op in self.op.instances] 11365 failed_insts = [] 11366 11367 return { 11368 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY: allocatable_insts, 11369 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed_insts, 11370 }
11371
11372 - def Exec(self, feedback_fn):
11373 """Executes the opcode. 11374 11375 """ 11376 jobs = [] 11377 if self.op.iallocator: 11378 op2inst = dict((op.instance_name, op) for op in self.op.instances) 11379 (allocatable, failed) = self.ia_result 11380 11381 for (name, nodes) in allocatable: 11382 op = op2inst.pop(name) 11383 11384 if len(nodes) > 1: 11385 (op.pnode, op.snode) = nodes 11386 else: 11387 (op.pnode,) = nodes 11388 11389 jobs.append([op]) 11390 11391 missing = set(op2inst.keys()) - set(failed) 11392 assert not missing, \ 11393 "Iallocator did return incomplete result: %s" % \ 11394 utils.CommaJoin(missing) 11395 else: 11396 jobs.extend([op] for op in self.op.instances) 11397 11398 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11399
11400 11401 -def _CheckRADOSFreeSpace():
11402 """Compute disk size requirements inside the RADOS cluster. 11403 11404 """ 11405 # For the RADOS cluster we assume there is always enough space. 11406 pass
11407
11408 11409 -class LUInstanceConsole(NoHooksLU):
11410 """Connect to an instance's console. 11411 11412 This is somewhat special in that it returns the command line that 11413 you need to run on the master node in order to connect to the 11414 console. 11415 11416 """ 11417 REQ_BGL = False 11418
11419 - def ExpandNames(self):
11420 self.share_locks = _ShareAll() 11421 self._ExpandAndLockInstance()
11422
11423 - def CheckPrereq(self):
11424 """Check prerequisites. 11425 11426 This checks that the instance is in the cluster. 11427 11428 """ 11429 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 11430 assert self.instance is not None, \ 11431 "Cannot retrieve locked instance %s" % self.op.instance_name 11432 _CheckNodeOnline(self, self.instance.primary_node)
11433
11434 - def Exec(self, feedback_fn):
11435 """Connect to the console of an instance 11436 11437 """ 11438 instance = self.instance 11439 node = instance.primary_node 11440 11441 node_insts = self.rpc.call_instance_list([node], 11442 [instance.hypervisor])[node] 11443 node_insts.Raise("Can't get node information from %s" % node) 11444 11445 if instance.name not in node_insts.payload: 11446 if instance.admin_state == constants.ADMINST_UP: 11447 state = constants.INSTST_ERRORDOWN 11448 elif instance.admin_state == constants.ADMINST_DOWN: 11449 state = constants.INSTST_ADMINDOWN 11450 else: 11451 state = constants.INSTST_ADMINOFFLINE 11452 raise errors.OpExecError("Instance %s is not running (state %s)" % 11453 (instance.name, state)) 11454 11455 logging.debug("Connecting to console of %s on %s", instance.name, node) 11456 11457 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11458
11459 11460 -def _GetInstanceConsole(cluster, instance):
11461 """Returns console information for an instance. 11462 11463 @type cluster: L{objects.Cluster} 11464 @type instance: L{objects.Instance} 11465 @rtype: dict 11466 11467 """ 11468 hyper = hypervisor.GetHypervisorClass(instance.hypervisor) 11469 # beparams and hvparams are passed separately, to avoid editing the 11470 # instance and then saving the defaults in the instance itself. 11471 hvparams = cluster.FillHV(instance) 11472 beparams = cluster.FillBE(instance) 11473 console = hyper.GetInstanceConsole(instance, hvparams, beparams) 11474 11475 assert console.instance == instance.name 11476 assert console.Validate() 11477 11478 return console.ToDict()
11479
11480 11481 -class LUInstanceReplaceDisks(LogicalUnit):
11482 """Replace the disks of an instance. 11483 11484 """ 11485 HPATH = "mirrors-replace" 11486 HTYPE = constants.HTYPE_INSTANCE 11487 REQ_BGL = False 11488
11489 - def CheckArguments(self):
11490 """Check arguments. 11491 11492 """ 11493 remote_node = self.op.remote_node 11494 ialloc = self.op.iallocator 11495 if self.op.mode == constants.REPLACE_DISK_CHG: 11496 if remote_node is None and ialloc is None: 11497 raise errors.OpPrereqError("When changing the secondary either an" 11498 " iallocator script must be used or the" 11499 " new node given", errors.ECODE_INVAL) 11500 else: 11501 _CheckIAllocatorOrNode(self, "iallocator", "remote_node") 11502 11503 elif remote_node is not None or ialloc is not None: 11504 # Not replacing the secondary 11505 raise errors.OpPrereqError("The iallocator and new node options can" 11506 " only be used when changing the" 11507 " secondary node", errors.ECODE_INVAL)
11508
11509 - def ExpandNames(self):
11510 self._ExpandAndLockInstance() 11511 11512 assert locking.LEVEL_NODE not in self.needed_locks 11513 assert locking.LEVEL_NODE_RES not in self.needed_locks 11514 assert locking.LEVEL_NODEGROUP not in self.needed_locks 11515 11516 assert self.op.iallocator is None or self.op.remote_node is None, \ 11517 "Conflicting options" 11518 11519 if self.op.remote_node is not None: 11520 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 11521 11522 # Warning: do not remove the locking of the new secondary here 11523 # unless DRBD8.AddChildren is changed to work in parallel; 11524 # currently it doesn't since parallel invocations of 11525 # FindUnusedMinor will conflict 11526 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node] 11527 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 11528 else: 11529 self.needed_locks[locking.LEVEL_NODE] = [] 11530 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 11531 11532 if self.op.iallocator is not None: 11533 # iallocator will select a new node in the same group 11534 self.needed_locks[locking.LEVEL_NODEGROUP] = [] 11535 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET 11536 11537 self.needed_locks[locking.LEVEL_NODE_RES] = [] 11538 11539 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode, 11540 self.op.iallocator, self.op.remote_node, 11541 self.op.disks, self.op.early_release, 11542 self.op.ignore_ipolicy) 11543 11544 self.tasklets = [self.replacer]
11545
11546 - def DeclareLocks(self, level):
11547 if level == locking.LEVEL_NODEGROUP: 11548 assert self.op.remote_node is None 11549 assert self.op.iallocator is not None 11550 assert not self.needed_locks[locking.LEVEL_NODEGROUP] 11551 11552 self.share_locks[locking.LEVEL_NODEGROUP] = 1 11553 # Lock all groups used by instance optimistically; this requires going 11554 # via the node before it's locked, requiring verification later on 11555 self.needed_locks[locking.LEVEL_NODEGROUP] = \ 11556 self.cfg.GetInstanceNodeGroups(self.op.instance_name) 11557 11558 elif level == locking.LEVEL_NODE: 11559 if self.op.iallocator is not None: 11560 assert self.op.remote_node is None 11561 assert not self.needed_locks[locking.LEVEL_NODE] 11562 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC) 11563 11564 # Lock member nodes of all locked groups 11565 self.needed_locks[locking.LEVEL_NODE] = \ 11566 [node_name 11567 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 11568 for node_name in self.cfg.GetNodeGroup(group_uuid).members] 11569 else: 11570 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC) 11571 11572 self._LockInstancesNodes() 11573 11574 elif level == locking.LEVEL_NODE_RES: 11575 # Reuse node locks 11576 self.needed_locks[locking.LEVEL_NODE_RES] = \ 11577 self.needed_locks[locking.LEVEL_NODE]
11578
11579 - def BuildHooksEnv(self):
11580 """Build hooks env. 11581 11582 This runs on the master, the primary and all the secondaries. 11583 11584 """ 11585 instance = self.replacer.instance 11586 env = { 11587 "MODE": self.op.mode, 11588 "NEW_SECONDARY": self.op.remote_node, 11589 "OLD_SECONDARY": instance.secondary_nodes[0], 11590 } 11591 env.update(_BuildInstanceHookEnvByObject(self, instance)) 11592 return env
11593
11594 - def BuildHooksNodes(self):
11595 """Build hooks nodes. 11596 11597 """ 11598 instance = self.replacer.instance 11599 nl = [ 11600 self.cfg.GetMasterNode(), 11601 instance.primary_node, 11602 ] 11603 if self.op.remote_node is not None: 11604 nl.append(self.op.remote_node) 11605 return nl, nl
11606
11607 - def CheckPrereq(self):
11608 """Check prerequisites. 11609 11610 """ 11611 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or 11612 self.op.iallocator is None) 11613 11614 # Verify if node group locks are still correct 11615 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP) 11616 if owned_groups: 11617 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups) 11618 11619 return LogicalUnit.CheckPrereq(self)
11620
11621 11622 -class TLReplaceDisks(Tasklet):
11623 """Replaces disks for an instance. 11624 11625 Note: Locking is not within the scope of this class. 11626 11627 """
11628 - def __init__(self, lu, instance_name, mode, iallocator_name, remote_node, 11629 disks, early_release, ignore_ipolicy):
11630 """Initializes this class. 11631 11632 """ 11633 Tasklet.__init__(self, lu) 11634 11635 # Parameters 11636 self.instance_name = instance_name 11637 self.mode = mode 11638 self.iallocator_name = iallocator_name 11639 self.remote_node = remote_node 11640 self.disks = disks 11641 self.early_release = early_release 11642 self.ignore_ipolicy = ignore_ipolicy 11643 11644 # Runtime data 11645 self.instance = None 11646 self.new_node = None 11647 self.target_node = None 11648 self.other_node = None 11649 self.remote_node_info = None 11650 self.node_secondary_ip = None
11651 11652 @staticmethod
11653 - def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11654 """Compute a new secondary node using an IAllocator. 11655 11656 """ 11657 req = iallocator.IAReqRelocate(name=instance_name, 11658 relocate_from=list(relocate_from)) 11659 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req) 11660 11661 ial.Run(iallocator_name) 11662 11663 if not ial.success: 11664 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':" 11665 " %s" % (iallocator_name, ial.info), 11666 errors.ECODE_NORES) 11667 11668 remote_node_name = ial.result[0] 11669 11670 lu.LogInfo("Selected new secondary for instance '%s': %s", 11671 instance_name, remote_node_name) 11672 11673 return remote_node_name
11674
11675 - def _FindFaultyDisks(self, node_name):
11676 """Wrapper for L{_FindFaultyInstanceDisks}. 11677 11678 """ 11679 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance, 11680 node_name, True)
11681
11682 - def _CheckDisksActivated(self, instance):
11683 """Checks if the instance disks are activated. 11684 11685 @param instance: The instance to check disks 11686 @return: True if they are activated, False otherwise 11687 11688 """ 11689 nodes = instance.all_nodes 11690 11691 for idx, dev in enumerate(instance.disks): 11692 for node in nodes: 11693 self.lu.LogInfo("Checking disk/%d on %s", idx, node) 11694 self.cfg.SetDiskID(dev, node) 11695 11696 result = _BlockdevFind(self, node, dev, instance) 11697 11698 if result.offline: 11699 continue 11700 elif result.fail_msg or not result.payload: 11701 return False 11702 11703 return True
11704
11705 - def CheckPrereq(self):
11706 """Check prerequisites. 11707 11708 This checks that the instance is in the cluster. 11709 11710 """ 11711 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name) 11712 assert instance is not None, \ 11713 "Cannot retrieve locked instance %s" % self.instance_name 11714 11715 if instance.disk_template != constants.DT_DRBD8: 11716 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based" 11717 " instances", errors.ECODE_INVAL) 11718 11719 if len(instance.secondary_nodes) != 1: 11720 raise errors.OpPrereqError("The instance has a strange layout," 11721 " expected one secondary but found %d" % 11722 len(instance.secondary_nodes), 11723 errors.ECODE_FAULT) 11724 11725 instance = self.instance 11726 secondary_node = instance.secondary_nodes[0] 11727 11728 if self.iallocator_name is None: 11729 remote_node = self.remote_node 11730 else: 11731 remote_node = self._RunAllocator(self.lu, self.iallocator_name, 11732 instance.name, instance.secondary_nodes) 11733 11734 if remote_node is None: 11735 self.remote_node_info = None 11736 else: 11737 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \ 11738 "Remote node '%s' is not locked" % remote_node 11739 11740 self.remote_node_info = self.cfg.GetNodeInfo(remote_node) 11741 assert self.remote_node_info is not None, \ 11742 "Cannot retrieve locked node %s" % remote_node 11743 11744 if remote_node == self.instance.primary_node: 11745 raise errors.OpPrereqError("The specified node is the primary node of" 11746 " the instance", errors.ECODE_INVAL) 11747 11748 if remote_node == secondary_node: 11749 raise errors.OpPrereqError("The specified node is already the" 11750 " secondary node of the instance", 11751 errors.ECODE_INVAL) 11752 11753 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO, 11754 constants.REPLACE_DISK_CHG): 11755 raise errors.OpPrereqError("Cannot specify disks to be replaced", 11756 errors.ECODE_INVAL) 11757 11758 if self.mode == constants.REPLACE_DISK_AUTO: 11759 if not self._CheckDisksActivated(instance): 11760 raise errors.OpPrereqError("Please run activate-disks on instance %s" 11761 " first" % self.instance_name, 11762 errors.ECODE_STATE) 11763 faulty_primary = self._FindFaultyDisks(instance.primary_node) 11764 faulty_secondary = self._FindFaultyDisks(secondary_node) 11765 11766 if faulty_primary and faulty_secondary: 11767 raise errors.OpPrereqError("Instance %s has faulty disks on more than" 11768 " one node and can not be repaired" 11769 " automatically" % self.instance_name, 11770 errors.ECODE_STATE) 11771 11772 if faulty_primary: 11773 self.disks = faulty_primary 11774 self.target_node = instance.primary_node 11775 self.other_node = secondary_node 11776 check_nodes = [self.target_node, self.other_node] 11777 elif faulty_secondary: 11778 self.disks = faulty_secondary 11779 self.target_node = secondary_node 11780 self.other_node = instance.primary_node 11781 check_nodes = [self.target_node, self.other_node] 11782 else: 11783 self.disks = [] 11784 check_nodes = [] 11785 11786 else: 11787 # Non-automatic modes 11788 if self.mode == constants.REPLACE_DISK_PRI: 11789 self.target_node = instance.primary_node 11790 self.other_node = secondary_node 11791 check_nodes = [self.target_node, self.other_node] 11792 11793 elif self.mode == constants.REPLACE_DISK_SEC: 11794 self.target_node = secondary_node 11795 self.other_node = instance.primary_node 11796 check_nodes = [self.target_node, self.other_node] 11797 11798 elif self.mode == constants.REPLACE_DISK_CHG: 11799 self.new_node = remote_node 11800 self.other_node = instance.primary_node 11801 self.target_node = secondary_node 11802 check_nodes = [self.new_node, self.other_node] 11803 11804 _CheckNodeNotDrained(self.lu, remote_node) 11805 _CheckNodeVmCapable(self.lu, remote_node) 11806 11807 old_node_info = self.cfg.GetNodeInfo(secondary_node) 11808 assert old_node_info is not None 11809 if old_node_info.offline and not self.early_release: 11810 # doesn't make sense to delay the release 11811 self.early_release = True 11812 self.lu.LogInfo("Old secondary %s is offline, automatically enabling" 11813 " early-release mode", secondary_node) 11814 11815 else: 11816 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" % 11817 self.mode) 11818 11819 # If not specified all disks should be replaced 11820 if not self.disks: 11821 self.disks = range(len(self.instance.disks)) 11822 11823 # TODO: This is ugly, but right now we can't distinguish between internal 11824 # submitted opcode and external one. We should fix that. 11825 if self.remote_node_info: 11826 # We change the node, lets verify it still meets instance policy 11827 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group) 11828 cluster = self.cfg.GetClusterInfo() 11829 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, 11830 new_group_info) 11831 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info, 11832 self.cfg, ignore=self.ignore_ipolicy) 11833 11834 for node in check_nodes: 11835 _CheckNodeOnline(self.lu, node) 11836 11837 touched_nodes = frozenset(node_name for node_name in [self.new_node, 11838 self.other_node, 11839 self.target_node] 11840 if node_name is not None) 11841 11842 # Release unneeded node and node resource locks 11843 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes) 11844 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes) 11845 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC) 11846 11847 # Release any owned node group 11848 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP) 11849 11850 # Check whether disks are valid 11851 for disk_idx in self.disks: 11852 instance.FindDisk(disk_idx) 11853 11854 # Get secondary node IP addresses 11855 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node) 11856 in self.cfg.GetMultiNodeInfo(touched_nodes))
11857
11858 - def Exec(self, feedback_fn):
11859 """Execute disk replacement. 11860 11861 This dispatches the disk replacement to the appropriate handler. 11862 11863 """ 11864 if __debug__: 11865 # Verify owned locks before starting operation 11866 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE) 11867 assert set(owned_nodes) == set(self.node_secondary_ip), \ 11868 ("Incorrect node locks, owning %s, expected %s" % 11869 (owned_nodes, self.node_secondary_ip.keys())) 11870 assert (self.lu.owned_locks(locking.LEVEL_NODE) == 11871 self.lu.owned_locks(locking.LEVEL_NODE_RES)) 11872 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC) 11873 11874 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE) 11875 assert list(owned_instances) == [self.instance_name], \ 11876 "Instance '%s' not locked" % self.instance_name 11877 11878 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \ 11879 "Should not own any node group lock at this point" 11880 11881 if not self.disks: 11882 feedback_fn("No disks need replacement for instance '%s'" % 11883 self.instance.name) 11884 return 11885 11886 feedback_fn("Replacing disk(s) %s for instance '%s'" % 11887 (utils.CommaJoin(self.disks), self.instance.name)) 11888 feedback_fn("Current primary node: %s" % self.instance.primary_node) 11889 feedback_fn("Current seconary node: %s" % 11890 utils.CommaJoin(self.instance.secondary_nodes)) 11891 11892 activate_disks = (self.instance.admin_state != constants.ADMINST_UP) 11893 11894 # Activate the instance disks if we're replacing them on a down instance 11895 if activate_disks: 11896 _StartInstanceDisks(self.lu, self.instance, True) 11897 11898 try: 11899 # Should we replace the secondary node? 11900 if self.new_node is not None: 11901 fn = self._ExecDrbd8Secondary 11902 else: 11903 fn = self._ExecDrbd8DiskOnly 11904 11905 result = fn(feedback_fn) 11906 finally: 11907 # Deactivate the instance disks if we're replacing them on a 11908 # down instance 11909 if activate_disks: 11910 _SafeShutdownInstanceDisks(self.lu, self.instance) 11911 11912 assert not self.lu.owned_locks(locking.LEVEL_NODE) 11913 11914 if __debug__: 11915 # Verify owned locks 11916 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES) 11917 nodes = frozenset(self.node_secondary_ip) 11918 assert ((self.early_release and not owned_nodes) or 11919 (not self.early_release and not (set(owned_nodes) - nodes))), \ 11920 ("Not owning the correct locks, early_release=%s, owned=%r," 11921 " nodes=%r" % (self.early_release, owned_nodes, nodes)) 11922 11923 return result
11924
11925 - def _CheckVolumeGroup(self, nodes):
11926 self.lu.LogInfo("Checking volume groups") 11927 11928 vgname = self.cfg.GetVGName() 11929 11930 # Make sure volume group exists on all involved nodes 11931 results = self.rpc.call_vg_list(nodes) 11932 if not results: 11933 raise errors.OpExecError("Can't list volume groups on the nodes") 11934 11935 for node in nodes: 11936 res = results[node] 11937 res.Raise("Error checking node %s" % node) 11938 if vgname not in res.payload: 11939 raise errors.OpExecError("Volume group '%s' not found on node %s" % 11940 (vgname, node))
11941
11942 - def _CheckDisksExistence(self, nodes):
11943 # Check disk existence 11944 for idx, dev in enumerate(self.instance.disks): 11945 if idx not in self.disks: 11946 continue 11947 11948 for node in nodes: 11949 self.lu.LogInfo("Checking disk/%d on %s", idx, node) 11950 self.cfg.SetDiskID(dev, node) 11951 11952 result = _BlockdevFind(self, node, dev, self.instance) 11953 11954 msg = result.fail_msg 11955 if msg or not result.payload: 11956 if not msg: 11957 msg = "disk not found" 11958 raise errors.OpExecError("Can't find disk/%d on node %s: %s" % 11959 (idx, node, msg))
11960
11961 - def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11962 for idx, dev in enumerate(self.instance.disks): 11963 if idx not in self.disks: 11964 continue 11965 11966 self.lu.LogInfo("Checking disk/%d consistency on node %s" % 11967 (idx, node_name)) 11968 11969 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name, 11970 on_primary, ldisk=ldisk): 11971 raise errors.OpExecError("Node %s has degraded storage, unsafe to" 11972 " replace disks for instance %s" % 11973 (node_name, self.instance.name))
11974
11975 - def _CreateNewStorage(self, node_name):
11976 """Create new storage on the primary or secondary node. 11977 11978 This is only used for same-node replaces, not for changing the 11979 secondary node, hence we don't want to modify the existing disk. 11980 11981 """ 11982 iv_names = {} 11983 11984 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg) 11985 for idx, dev in enumerate(disks): 11986 if idx not in self.disks: 11987 continue 11988 11989 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx) 11990 11991 self.cfg.SetDiskID(dev, node_name) 11992 11993 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]] 11994 names = _GenerateUniqueNames(self.lu, lv_names) 11995 11996 (data_disk, meta_disk) = dev.children 11997 vg_data = data_disk.logical_id[0] 11998 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size, 11999 logical_id=(vg_data, names[0]), 12000 params=data_disk.params) 12001 vg_meta = meta_disk.logical_id[0] 12002 lv_meta = objects.Disk(dev_type=constants.LD_LV, 12003 size=constants.DRBD_META_SIZE, 12004 logical_id=(vg_meta, names[1]), 12005 params=meta_disk.params) 12006 12007 new_lvs = [lv_data, lv_meta] 12008 old_lvs = [child.Copy() for child in dev.children] 12009 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs) 12010 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name) 12011 12012 # we pass force_create=True to force the LVM creation 12013 for new_lv in new_lvs: 12014 try: 12015 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, 12016 True, _GetInstanceInfoText(self.instance), 12017 False, excl_stor) 12018 except errors.DeviceCreationError, e: 12019 raise errors.OpExecError("Can't create block device: %s" % e.message) 12020 12021 return iv_names
12022
12023 - def _CheckDevices(self, node_name, iv_names):
12024 for name, (dev, _, _) in iv_names.iteritems(): 12025 self.cfg.SetDiskID(dev, node_name) 12026 12027 result = _BlockdevFind(self, node_name, dev, self.instance) 12028 12029 msg = result.fail_msg 12030 if msg or not result.payload: 12031 if not msg: 12032 msg = "disk not found" 12033 raise errors.OpExecError("Can't find DRBD device %s: %s" % 12034 (name, msg)) 12035 12036 if result.payload.is_degraded: 12037 raise errors.OpExecError("DRBD device %s is degraded!" % name)
12038
12039 - def _RemoveOldStorage(self, node_name, iv_names):
12040 for name, (_, old_lvs, _) in iv_names.iteritems(): 12041 self.lu.LogInfo("Remove logical volumes for %s", name) 12042 12043 for lv in old_lvs: 12044 self.cfg.SetDiskID(lv, node_name) 12045 12046 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg 12047 if msg: 12048 self.lu.LogWarning("Can't remove old LV: %s", msg, 12049 hint="remove unused LVs manually")
12050
12051 - def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
12052 """Replace a disk on the primary or secondary for DRBD 8. 12053 12054 The algorithm for replace is quite complicated: 12055 12056 1. for each disk to be replaced: 12057 12058 1. create new LVs on the target node with unique names 12059 1. detach old LVs from the drbd device 12060 1. rename old LVs to name_replaced.<time_t> 12061 1. rename new LVs to old LVs 12062 1. attach the new LVs (with the old names now) to the drbd device 12063 12064 1. wait for sync across all devices 12065 12066 1. for each modified disk: 12067 12068 1. remove old LVs (which have the name name_replaces.<time_t>) 12069 12070 Failures are not very well handled. 12071 12072 """ 12073 steps_total = 6 12074 12075 # Step: check device activation 12076 self.lu.LogStep(1, steps_total, "Check device existence") 12077 self._CheckDisksExistence([self.other_node, self.target_node]) 12078 self._CheckVolumeGroup([self.target_node, self.other_node]) 12079 12080 # Step: check other node consistency 12081 self.lu.LogStep(2, steps_total, "Check peer consistency") 12082 self._CheckDisksConsistency(self.other_node, 12083 self.other_node == self.instance.primary_node, 12084 False) 12085 12086 # Step: create new storage 12087 self.lu.LogStep(3, steps_total, "Allocate new storage") 12088 iv_names = self._CreateNewStorage(self.target_node) 12089 12090 # Step: for each lv, detach+rename*2+attach 12091 self.lu.LogStep(4, steps_total, "Changing drbd configuration") 12092 for dev, old_lvs, new_lvs in iv_names.itervalues(): 12093 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name) 12094 12095 result = self.rpc.call_blockdev_removechildren(self.target_node, dev, 12096 old_lvs) 12097 result.Raise("Can't detach drbd from local storage on node" 12098 " %s for device %s" % (self.target_node, dev.iv_name)) 12099 #dev.children = [] 12100 #cfg.Update(instance) 12101 12102 # ok, we created the new LVs, so now we know we have the needed 12103 # storage; as such, we proceed on the target node to rename 12104 # old_lv to _old, and new_lv to old_lv; note that we rename LVs 12105 # using the assumption that logical_id == physical_id (which in 12106 # turn is the unique_id on that node) 12107 12108 # FIXME(iustin): use a better name for the replaced LVs 12109 temp_suffix = int(time.time()) 12110 ren_fn = lambda d, suff: (d.physical_id[0], 12111 d.physical_id[1] + "_replaced-%s" % suff) 12112 12113 # Build the rename list based on what LVs exist on the node 12114 rename_old_to_new = [] 12115 for to_ren in old_lvs: 12116 result = self.rpc.call_blockdev_find(self.target_node, to_ren) 12117 if not result.fail_msg and result.payload: 12118 # device exists 12119 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix))) 12120 12121 self.lu.LogInfo("Renaming the old LVs on the target node") 12122 result = self.rpc.call_blockdev_rename(self.target_node, 12123 rename_old_to_new) 12124 result.Raise("Can't rename old LVs on node %s" % self.target_node) 12125 12126 # Now we rename the new LVs to the old LVs 12127 self.lu.LogInfo("Renaming the new LVs on the target node") 12128 rename_new_to_old = [(new, old.physical_id) 12129 for old, new in zip(old_lvs, new_lvs)] 12130 result = self.rpc.call_blockdev_rename(self.target_node, 12131 rename_new_to_old) 12132 result.Raise("Can't rename new LVs on node %s" % self.target_node) 12133 12134 # Intermediate steps of in memory modifications 12135 for old, new in zip(old_lvs, new_lvs): 12136 new.logical_id = old.logical_id 12137 self.cfg.SetDiskID(new, self.target_node) 12138 12139 # We need to modify old_lvs so that removal later removes the 12140 # right LVs, not the newly added ones; note that old_lvs is a 12141 # copy here 12142 for disk in old_lvs: 12143 disk.logical_id = ren_fn(disk, temp_suffix) 12144 self.cfg.SetDiskID(disk, self.target_node) 12145 12146 # Now that the new lvs have the old name, we can add them to the device 12147 self.lu.LogInfo("Adding new mirror component on %s", self.target_node) 12148 result = self.rpc.call_blockdev_addchildren(self.target_node, 12149 (dev, self.instance), new_lvs) 12150 msg = result.fail_msg 12151 if msg: 12152 for new_lv in new_lvs: 12153 msg2 = self.rpc.call_blockdev_remove(self.target_node, 12154 new_lv).fail_msg 12155 if msg2: 12156 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2, 12157 hint=("cleanup manually the unused logical" 12158 "volumes")) 12159 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg) 12160 12161 cstep = itertools.count(5) 12162 12163 if self.early_release: 12164 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage") 12165 self._RemoveOldStorage(self.target_node, iv_names) 12166 # TODO: Check if releasing locks early still makes sense 12167 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES) 12168 else: 12169 # Release all resource locks except those used by the instance 12170 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, 12171 keep=self.node_secondary_ip.keys()) 12172 12173 # Release all node locks while waiting for sync 12174 _ReleaseLocks(self.lu, locking.LEVEL_NODE) 12175 12176 # TODO: Can the instance lock be downgraded here? Take the optional disk 12177 # shutdown in the caller into consideration. 12178 12179 # Wait for sync 12180 # This can fail as the old devices are degraded and _WaitForSync 12181 # does a combined result over all disks, so we don't check its return value 12182 self.lu.LogStep(cstep.next(), steps_total, "Sync devices") 12183 _WaitForSync(self.lu, self.instance) 12184 12185 # Check all devices manually 12186 self._CheckDevices(self.instance.primary_node, iv_names) 12187 12188 # Step: remove old storage 12189 if not self.early_release: 12190 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage") 12191 self._RemoveOldStorage(self.target_node, iv_names)
12192
12193 - def _ExecDrbd8Secondary(self, feedback_fn):
12194 """Replace the secondary node for DRBD 8. 12195 12196 The algorithm for replace is quite complicated: 12197 - for all disks of the instance: 12198 - create new LVs on the new node with same names 12199 - shutdown the drbd device on the old secondary 12200 - disconnect the drbd network on the primary 12201 - create the drbd device on the new secondary 12202 - network attach the drbd on the primary, using an artifice: 12203 the drbd code for Attach() will connect to the network if it 12204 finds a device which is connected to the good local disks but 12205 not network enabled 12206 - wait for sync across all devices 12207 - remove all disks from the old secondary 12208 12209 Failures are not very well handled. 12210 12211 """ 12212 steps_total = 6 12213 12214 pnode = self.instance.primary_node 12215 12216 # Step: check device activation 12217 self.lu.LogStep(1, steps_total, "Check device existence") 12218 self._CheckDisksExistence([self.instance.primary_node]) 12219 self._CheckVolumeGroup([self.instance.primary_node]) 12220 12221 # Step: check other node consistency 12222 self.lu.LogStep(2, steps_total, "Check peer consistency") 12223 self._CheckDisksConsistency(self.instance.primary_node, True, True) 12224 12225 # Step: create new storage 12226 self.lu.LogStep(3, steps_total, "Allocate new storage") 12227 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg) 12228 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node) 12229 for idx, dev in enumerate(disks): 12230 self.lu.LogInfo("Adding new local storage on %s for disk/%d" % 12231 (self.new_node, idx)) 12232 # we pass force_create=True to force LVM creation 12233 for new_lv in dev.children: 12234 try: 12235 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv, 12236 True, _GetInstanceInfoText(self.instance), 12237 False, excl_stor) 12238 except errors.DeviceCreationError, e: 12239 raise errors.OpExecError("Can't create block device: %s" % e.message) 12240 12241 # Step 4: dbrd minors and drbd setups changes 12242 # after this, we must manually remove the drbd minors on both the 12243 # error and the success paths 12244 self.lu.LogStep(4, steps_total, "Changing drbd configuration") 12245 minors = self.cfg.AllocateDRBDMinor([self.new_node 12246 for dev in self.instance.disks], 12247 self.instance.name) 12248 logging.debug("Allocated minors %r", minors) 12249 12250 iv_names = {} 12251 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)): 12252 self.lu.LogInfo("activating a new drbd on %s for disk/%d" % 12253 (self.new_node, idx)) 12254 # create new devices on new_node; note that we create two IDs: 12255 # one without port, so the drbd will be activated without 12256 # networking information on the new node at this stage, and one 12257 # with network, for the latter activation in step 4 12258 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id 12259 if self.instance.primary_node == o_node1: 12260 p_minor = o_minor1 12261 else: 12262 assert self.instance.primary_node == o_node2, "Three-node instance?" 12263 p_minor = o_minor2 12264 12265 new_alone_id = (self.instance.primary_node, self.new_node, None, 12266 p_minor, new_minor, o_secret) 12267 new_net_id = (self.instance.primary_node, self.new_node, o_port, 12268 p_minor, new_minor, o_secret) 12269 12270 iv_names[idx] = (dev, dev.children, new_net_id) 12271 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor, 12272 new_net_id) 12273 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8, 12274 logical_id=new_alone_id, 12275 children=dev.children, 12276 size=dev.size, 12277 params={}) 12278 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd], 12279 self.cfg) 12280 try: 12281 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, 12282 anno_new_drbd, 12283 _GetInstanceInfoText(self.instance), False, 12284 excl_stor) 12285 except errors.GenericError: 12286 self.cfg.ReleaseDRBDMinors(self.instance.name) 12287 raise 12288 12289 # We have new devices, shutdown the drbd on the old secondary 12290 for idx, dev in enumerate(self.instance.disks): 12291 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx) 12292 self.cfg.SetDiskID(dev, self.target_node) 12293 msg = self.rpc.call_blockdev_shutdown(self.target_node, 12294 (dev, self.instance)).fail_msg 12295 if msg: 12296 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old" 12297 "node: %s" % (idx, msg), 12298 hint=("Please cleanup this device manually as" 12299 " soon as possible")) 12300 12301 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)") 12302 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip, 12303 self.instance.disks)[pnode] 12304 12305 msg = result.fail_msg 12306 if msg: 12307 # detaches didn't succeed (unlikely) 12308 self.cfg.ReleaseDRBDMinors(self.instance.name) 12309 raise errors.OpExecError("Can't detach the disks from the network on" 12310 " old node: %s" % (msg,)) 12311 12312 # if we managed to detach at least one, we update all the disks of 12313 # the instance to point to the new secondary 12314 self.lu.LogInfo("Updating instance configuration") 12315 for dev, _, new_logical_id in iv_names.itervalues(): 12316 dev.logical_id = new_logical_id 12317 self.cfg.SetDiskID(dev, self.instance.primary_node) 12318 12319 self.cfg.Update(self.instance, feedback_fn) 12320 12321 # Release all node locks (the configuration has been updated) 12322 _ReleaseLocks(self.lu, locking.LEVEL_NODE) 12323 12324 # and now perform the drbd attach 12325 self.lu.LogInfo("Attaching primary drbds to new secondary" 12326 " (standalone => connected)") 12327 result = self.rpc.call_drbd_attach_net([self.instance.primary_node, 12328 self.new_node], 12329 self.node_secondary_ip, 12330 (self.instance.disks, self.instance), 12331 self.instance.name, 12332 False) 12333 for to_node, to_result in result.items(): 12334 msg = to_result.fail_msg 12335 if msg: 12336 self.lu.LogWarning("Can't attach drbd disks on node %s: %s", 12337 to_node, msg, 12338 hint=("please do a gnt-instance info to see the" 12339 " status of disks")) 12340 12341 cstep = itertools.count(5) 12342 12343 if self.early_release: 12344 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage") 12345 self._RemoveOldStorage(self.target_node, iv_names) 12346 # TODO: Check if releasing locks early still makes sense 12347 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES) 12348 else: 12349 # Release all resource locks except those used by the instance 12350 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, 12351 keep=self.node_secondary_ip.keys()) 12352 12353 # TODO: Can the instance lock be downgraded here? Take the optional disk 12354 # shutdown in the caller into consideration. 12355 12356 # Wait for sync 12357 # This can fail as the old devices are degraded and _WaitForSync 12358 # does a combined result over all disks, so we don't check its return value 12359 self.lu.LogStep(cstep.next(), steps_total, "Sync devices") 12360 _WaitForSync(self.lu, self.instance) 12361 12362 # Check all devices manually 12363 self._CheckDevices(self.instance.primary_node, iv_names) 12364 12365 # Step: remove old storage 12366 if not self.early_release: 12367 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage") 12368 self._RemoveOldStorage(self.target_node, iv_names)
12369
12370 12371 -class LURepairNodeStorage(NoHooksLU):
12372 """Repairs the volume group on a node. 12373 12374 """ 12375 REQ_BGL = False 12376
12377 - def CheckArguments(self):
12378 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 12379 12380 storage_type = self.op.storage_type 12381 12382 if (constants.SO_FIX_CONSISTENCY not in 12383 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])): 12384 raise errors.OpPrereqError("Storage units of type '%s' can not be" 12385 " repaired" % storage_type, 12386 errors.ECODE_INVAL)
12387
12388 - def ExpandNames(self):
12389 self.needed_locks = { 12390 locking.LEVEL_NODE: [self.op.node_name], 12391 }
12392
12393 - def _CheckFaultyDisks(self, instance, node_name):
12394 """Ensure faulty disks abort the opcode or at least warn.""" 12395 try: 12396 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance, 12397 node_name, True): 12398 raise errors.OpPrereqError("Instance '%s' has faulty disks on" 12399 " node '%s'" % (instance.name, node_name), 12400 errors.ECODE_STATE) 12401 except errors.OpPrereqError, err: 12402 if self.op.ignore_consistency: 12403 self.LogWarning(str(err.args[0])) 12404 else: 12405 raise
12406
12407 - def CheckPrereq(self):
12408 """Check prerequisites. 12409 12410 """ 12411 # Check whether any instance on this node has faulty disks 12412 for inst in _GetNodeInstances(self.cfg, self.op.node_name): 12413 if inst.admin_state != constants.ADMINST_UP: 12414 continue 12415 check_nodes = set(inst.all_nodes) 12416 check_nodes.discard(self.op.node_name) 12417 for inst_node_name in check_nodes: 12418 self._CheckFaultyDisks(inst, inst_node_name)
12419
12420 - def Exec(self, feedback_fn):
12421 feedback_fn("Repairing storage unit '%s' on %s ..." % 12422 (self.op.name, self.op.node_name)) 12423 12424 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 12425 result = self.rpc.call_storage_execute(self.op.node_name, 12426 self.op.storage_type, st_args, 12427 self.op.name, 12428 constants.SO_FIX_CONSISTENCY) 12429 result.Raise("Failed to repair storage unit '%s' on %s" % 12430 (self.op.name, self.op.node_name))
12431
12432 12433 -class LUNodeEvacuate(NoHooksLU):
12434 """Evacuates instances off a list of nodes. 12435 12436 """ 12437 REQ_BGL = False 12438 12439 _MODE2IALLOCATOR = { 12440 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI, 12441 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC, 12442 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL, 12443 } 12444 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES 12445 assert (frozenset(_MODE2IALLOCATOR.values()) == 12446 constants.IALLOCATOR_NEVAC_MODES) 12447
12448 - def CheckArguments(self):
12449 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12450
12451 - def ExpandNames(self):
12452 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 12453 12454 if self.op.remote_node is not None: 12455 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 12456 assert self.op.remote_node 12457 12458 if self.op.remote_node == self.op.node_name: 12459 raise errors.OpPrereqError("Can not use evacuated node as a new" 12460 " secondary node", errors.ECODE_INVAL) 12461 12462 if self.op.mode != constants.NODE_EVAC_SEC: 12463 raise errors.OpPrereqError("Without the use of an iallocator only" 12464 " secondary instances can be evacuated", 12465 errors.ECODE_INVAL) 12466 12467 # Declare locks 12468 self.share_locks = _ShareAll() 12469 self.needed_locks = { 12470 locking.LEVEL_INSTANCE: [], 12471 locking.LEVEL_NODEGROUP: [], 12472 locking.LEVEL_NODE: [], 12473 } 12474 12475 # Determine nodes (via group) optimistically, needs verification once locks 12476 # have been acquired 12477 self.lock_nodes = self._DetermineNodes()
12478
12479 - def _DetermineNodes(self):
12480 """Gets the list of nodes to operate on. 12481 12482 """ 12483 if self.op.remote_node is None: 12484 # Iallocator will choose any node(s) in the same group 12485 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name]) 12486 else: 12487 group_nodes = frozenset([self.op.remote_node]) 12488 12489 # Determine nodes to be locked 12490 return set([self.op.node_name]) | group_nodes
12491
12492 - def _DetermineInstances(self):
12493 """Builds list of instances to operate on. 12494 12495 """ 12496 assert self.op.mode in constants.NODE_EVAC_MODES 12497 12498 if self.op.mode == constants.NODE_EVAC_PRI: 12499 # Primary instances only 12500 inst_fn = _GetNodePrimaryInstances 12501 assert self.op.remote_node is None, \ 12502 "Evacuating primary instances requires iallocator" 12503 elif self.op.mode == constants.NODE_EVAC_SEC: 12504 # Secondary instances only 12505 inst_fn = _GetNodeSecondaryInstances 12506 else: 12507 # All instances 12508 assert self.op.mode == constants.NODE_EVAC_ALL 12509 inst_fn = _GetNodeInstances 12510 # TODO: In 2.6, change the iallocator interface to take an evacuation mode 12511 # per instance 12512 raise errors.OpPrereqError("Due to an issue with the iallocator" 12513 " interface it is not possible to evacuate" 12514 " all instances at once; specify explicitly" 12515 " whether to evacuate primary or secondary" 12516 " instances", 12517 errors.ECODE_INVAL) 12518 12519 return inst_fn(self.cfg, self.op.node_name)
12520
12521 - def DeclareLocks(self, level):
12522 if level == locking.LEVEL_INSTANCE: 12523 # Lock instances optimistically, needs verification once node and group 12524 # locks have been acquired 12525 self.needed_locks[locking.LEVEL_INSTANCE] = \ 12526 set(i.name for i in self._DetermineInstances()) 12527 12528 elif level == locking.LEVEL_NODEGROUP: 12529 # Lock node groups for all potential target nodes optimistically, needs 12530 # verification once nodes have been acquired 12531 self.needed_locks[locking.LEVEL_NODEGROUP] = \ 12532 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes) 12533 12534 elif level == locking.LEVEL_NODE: 12535 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12536
12537 - def CheckPrereq(self):
12538 # Verify locks 12539 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE) 12540 owned_nodes = self.owned_locks(locking.LEVEL_NODE) 12541 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP) 12542 12543 need_nodes = self._DetermineNodes() 12544 12545 if not owned_nodes.issuperset(need_nodes): 12546 raise errors.OpPrereqError("Nodes in same group as '%s' changed since" 12547 " locks were acquired, current nodes are" 12548 " are '%s', used to be '%s'; retry the" 12549 " operation" % 12550 (self.op.node_name, 12551 utils.CommaJoin(need_nodes), 12552 utils.CommaJoin(owned_nodes)), 12553 errors.ECODE_STATE) 12554 12555 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes) 12556 if owned_groups != wanted_groups: 12557 raise errors.OpExecError("Node groups changed since locks were acquired," 12558 " current groups are '%s', used to be '%s';" 12559 " retry the operation" % 12560 (utils.CommaJoin(wanted_groups), 12561 utils.CommaJoin(owned_groups))) 12562 12563 # Determine affected instances 12564 self.instances = self._DetermineInstances() 12565 self.instance_names = [i.name for i in self.instances] 12566 12567 if set(self.instance_names) != owned_instances: 12568 raise errors.OpExecError("Instances on node '%s' changed since locks" 12569 " were acquired, current instances are '%s'," 12570 " used to be '%s'; retry the operation" % 12571 (self.op.node_name, 12572 utils.CommaJoin(self.instance_names), 12573 utils.CommaJoin(owned_instances))) 12574 12575 if self.instance_names: 12576 self.LogInfo("Evacuating instances from node '%s': %s", 12577 self.op.node_name, 12578 utils.CommaJoin(utils.NiceSort(self.instance_names))) 12579 else: 12580 self.LogInfo("No instances to evacuate from node '%s'", 12581 self.op.node_name) 12582 12583 if self.op.remote_node is not None: 12584 for i in self.instances: 12585 if i.primary_node == self.op.remote_node: 12586 raise errors.OpPrereqError("Node %s is the primary node of" 12587 " instance %s, cannot use it as" 12588 " secondary" % 12589 (self.op.remote_node, i.name), 12590 errors.ECODE_INVAL)
12591
12592 - def Exec(self, feedback_fn):
12593 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None) 12594 12595 if not self.instance_names: 12596 # No instances to evacuate 12597 jobs = [] 12598 12599 elif self.op.iallocator is not None: 12600 # TODO: Implement relocation to other group 12601 evac_mode = self._MODE2IALLOCATOR[self.op.mode] 12602 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode, 12603 instances=list(self.instance_names)) 12604 ial = iallocator.IAllocator(self.cfg, self.rpc, req) 12605 12606 ial.Run(self.op.iallocator) 12607 12608 if not ial.success: 12609 raise errors.OpPrereqError("Can't compute node evacuation using" 12610 " iallocator '%s': %s" % 12611 (self.op.iallocator, ial.info), 12612 errors.ECODE_NORES) 12613 12614 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True) 12615 12616 elif self.op.remote_node is not None: 12617 assert self.op.mode == constants.NODE_EVAC_SEC 12618 jobs = [ 12619 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name, 12620 remote_node=self.op.remote_node, 12621 disks=[], 12622 mode=constants.REPLACE_DISK_CHG, 12623 early_release=self.op.early_release)] 12624 for instance_name in self.instance_names] 12625 12626 else: 12627 raise errors.ProgrammerError("No iallocator or remote node") 12628 12629 return ResultWithJobs(jobs)
12630
12631 12632 -def _SetOpEarlyRelease(early_release, op):
12633 """Sets C{early_release} flag on opcodes if available. 12634 12635 """ 12636 try: 12637 op.early_release = early_release 12638 except AttributeError: 12639 assert not isinstance(op, opcodes.OpInstanceReplaceDisks) 12640 12641 return op
12642
12643 12644 -def _NodeEvacDest(use_nodes, group, nodes):
12645 """Returns group or nodes depending on caller's choice. 12646 12647 """ 12648 if use_nodes: 12649 return utils.CommaJoin(nodes) 12650 else: 12651 return group
12652
12653 12654 -def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12655 """Unpacks the result of change-group and node-evacuate iallocator requests. 12656 12657 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and 12658 L{constants.IALLOCATOR_MODE_CHG_GROUP}. 12659 12660 @type lu: L{LogicalUnit} 12661 @param lu: Logical unit instance 12662 @type alloc_result: tuple/list 12663 @param alloc_result: Result from iallocator 12664 @type early_release: bool 12665 @param early_release: Whether to release locks early if possible 12666 @type use_nodes: bool 12667 @param use_nodes: Whether to display node names instead of groups 12668 12669 """ 12670 (moved, failed, jobs) = alloc_result 12671 12672 if failed: 12673 failreason = utils.CommaJoin("%s (%s)" % (name, reason) 12674 for (name, reason) in failed) 12675 lu.LogWarning("Unable to evacuate instances %s", failreason) 12676 raise errors.OpExecError("Unable to evacuate instances %s" % failreason) 12677 12678 if moved: 12679 lu.LogInfo("Instances to be moved: %s", 12680 utils.CommaJoin("%s (to %s)" % 12681 (name, _NodeEvacDest(use_nodes, group, nodes)) 12682 for (name, group, nodes) in moved)) 12683 12684 return [map(compat.partial(_SetOpEarlyRelease, early_release), 12685 map(opcodes.OpCode.LoadOpCode, ops)) 12686 for ops in jobs]
12687
12688 12689 -def _DiskSizeInBytesToMebibytes(lu, size):
12690 """Converts a disk size in bytes to mebibytes. 12691 12692 Warns and rounds up if the size isn't an even multiple of 1 MiB. 12693 12694 """ 12695 (mib, remainder) = divmod(size, 1024 * 1024) 12696 12697 if remainder != 0: 12698 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up" 12699 " to not overwrite existing data (%s bytes will not be" 12700 " wiped)", (1024 * 1024) - remainder) 12701 mib += 1 12702 12703 return mib
12704
12705 12706 -class LUInstanceGrowDisk(LogicalUnit):
12707 """Grow a disk of an instance. 12708 12709 """ 12710 HPATH = "disk-grow" 12711 HTYPE = constants.HTYPE_INSTANCE 12712 REQ_BGL = False 12713
12714 - def ExpandNames(self):
12715 self._ExpandAndLockInstance() 12716 self.needed_locks[locking.LEVEL_NODE] = [] 12717 self.needed_locks[locking.LEVEL_NODE_RES] = [] 12718 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 12719 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12720
12721 - def DeclareLocks(self, level):
12722 if level == locking.LEVEL_NODE: 12723 self._LockInstancesNodes() 12724 elif level == locking.LEVEL_NODE_RES: 12725 # Copy node locks 12726 self.needed_locks[locking.LEVEL_NODE_RES] = \ 12727 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12728
12729 - def BuildHooksEnv(self):
12730 """Build hooks env. 12731 12732 This runs on the master, the primary and all the secondaries. 12733 12734 """ 12735 env = { 12736 "DISK": self.op.disk, 12737 "AMOUNT": self.op.amount, 12738 "ABSOLUTE": self.op.absolute, 12739 } 12740 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 12741 return env
12742
12743 - def BuildHooksNodes(self):
12744 """Build hooks nodes. 12745 12746 """ 12747 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 12748 return (nl, nl)
12749
12750 - def CheckPrereq(self):
12751 """Check prerequisites. 12752 12753 This checks that the instance is in the cluster. 12754 12755 """ 12756 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 12757 assert instance is not None, \ 12758 "Cannot retrieve locked instance %s" % self.op.instance_name 12759 nodenames = list(instance.all_nodes) 12760 for node in nodenames: 12761 _CheckNodeOnline(self, node) 12762 12763 self.instance = instance 12764 12765 if instance.disk_template not in constants.DTS_GROWABLE: 12766 raise errors.OpPrereqError("Instance's disk layout does not support" 12767 " growing", errors.ECODE_INVAL) 12768 12769 self.disk = instance.FindDisk(self.op.disk) 12770 12771 if self.op.absolute: 12772 self.target = self.op.amount 12773 self.delta = self.target - self.disk.size 12774 if self.delta < 0: 12775 raise errors.OpPrereqError("Requested size (%s) is smaller than " 12776 "current disk size (%s)" % 12777 (utils.FormatUnit(self.target, "h"), 12778 utils.FormatUnit(self.disk.size, "h")), 12779 errors.ECODE_STATE) 12780 else: 12781 self.delta = self.op.amount 12782 self.target = self.disk.size + self.delta 12783 if self.delta < 0: 12784 raise errors.OpPrereqError("Requested increment (%s) is negative" % 12785 utils.FormatUnit(self.delta, "h"), 12786 errors.ECODE_INVAL) 12787 12788 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12789
12790 - def _CheckDiskSpace(self, nodenames, req_vgspace):
12791 template = self.instance.disk_template 12792 if template not in (constants.DTS_NO_FREE_SPACE_CHECK): 12793 # TODO: check the free disk space for file, when that feature will be 12794 # supported 12795 nodes = map(self.cfg.GetNodeInfo, nodenames) 12796 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n), 12797 nodes) 12798 if es_nodes: 12799 # With exclusive storage we need to something smarter than just looking 12800 # at free space; for now, let's simply abort the operation. 12801 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage" 12802 " is enabled", errors.ECODE_STATE) 12803 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12804
12805 - def Exec(self, feedback_fn):
12806 """Execute disk grow. 12807 12808 """ 12809 instance = self.instance 12810 disk = self.disk 12811 12812 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE) 12813 assert (self.owned_locks(locking.LEVEL_NODE) == 12814 self.owned_locks(locking.LEVEL_NODE_RES)) 12815 12816 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks 12817 12818 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk]) 12819 if not disks_ok: 12820 raise errors.OpExecError("Cannot activate block device to grow") 12821 12822 feedback_fn("Growing disk %s of instance '%s' by %s to %s" % 12823 (self.op.disk, instance.name, 12824 utils.FormatUnit(self.delta, "h"), 12825 utils.FormatUnit(self.target, "h"))) 12826 12827 # First run all grow ops in dry-run mode 12828 for node in instance.all_nodes: 12829 self.cfg.SetDiskID(disk, node) 12830 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta, 12831 True, True) 12832 result.Raise("Dry-run grow request failed to node %s" % node) 12833 12834 if wipe_disks: 12835 # Get disk size from primary node for wiping 12836 self.cfg.SetDiskID(disk, instance.primary_node) 12837 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk]) 12838 result.Raise("Failed to retrieve disk size from node '%s'" % 12839 instance.primary_node) 12840 12841 (disk_size_in_bytes, ) = result.payload 12842 12843 if disk_size_in_bytes is None: 12844 raise errors.OpExecError("Failed to retrieve disk size from primary" 12845 " node '%s'" % instance.primary_node) 12846 12847 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes) 12848 12849 assert old_disk_size >= disk.size, \ 12850 ("Retrieved disk size too small (got %s, should be at least %s)" % 12851 (old_disk_size, disk.size)) 12852 else: 12853 old_disk_size = None 12854 12855 # We know that (as far as we can test) operations across different 12856 # nodes will succeed, time to run it for real on the backing storage 12857 for node in instance.all_nodes: 12858 self.cfg.SetDiskID(disk, node) 12859 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta, 12860 False, True) 12861 result.Raise("Grow request failed to node %s" % node) 12862 12863 # And now execute it for logical storage, on the primary node 12864 node = instance.primary_node 12865 self.cfg.SetDiskID(disk, node) 12866 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta, 12867 False, False) 12868 result.Raise("Grow request failed to node %s" % node) 12869 12870 disk.RecordGrow(self.delta) 12871 self.cfg.Update(instance, feedback_fn) 12872 12873 # Changes have been recorded, release node lock 12874 _ReleaseLocks(self, locking.LEVEL_NODE) 12875 12876 # Downgrade lock while waiting for sync 12877 self.glm.downgrade(locking.LEVEL_INSTANCE) 12878 12879 assert wipe_disks ^ (old_disk_size is None) 12880 12881 if wipe_disks: 12882 assert instance.disks[self.op.disk] == disk 12883 12884 # Wipe newly added disk space 12885 _WipeDisks(self, instance, 12886 disks=[(self.op.disk, disk, old_disk_size)]) 12887 12888 if self.op.wait_for_sync: 12889 disk_abort = not _WaitForSync(self, instance, disks=[disk]) 12890 if disk_abort: 12891 self.LogWarning("Disk syncing has not returned a good status; check" 12892 " the instance") 12893 if instance.admin_state != constants.ADMINST_UP: 12894 _SafeShutdownInstanceDisks(self, instance, disks=[disk]) 12895 elif instance.admin_state != constants.ADMINST_UP: 12896 self.LogWarning("Not shutting down the disk even if the instance is" 12897 " not supposed to be running because no wait for" 12898 " sync mode was requested") 12899 12900 assert self.owned_locks(locking.LEVEL_NODE_RES) 12901 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12902
12903 12904 -class LUInstanceQueryData(NoHooksLU):
12905 """Query runtime instance data. 12906 12907 """ 12908 REQ_BGL = False 12909
12910 - def ExpandNames(self):
12911 self.needed_locks = {} 12912 12913 # Use locking if requested or when non-static information is wanted 12914 if not (self.op.static or self.op.use_locking): 12915 self.LogWarning("Non-static data requested, locks need to be acquired") 12916 self.op.use_locking = True 12917 12918 if self.op.instances or not self.op.use_locking: 12919 # Expand instance names right here 12920 self.wanted_names = _GetWantedInstances(self, self.op.instances) 12921 else: 12922 # Will use acquired locks 12923 self.wanted_names = None 12924 12925 if self.op.use_locking: 12926 self.share_locks = _ShareAll() 12927 12928 if self.wanted_names is None: 12929 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET 12930 else: 12931 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names 12932 12933 self.needed_locks[locking.LEVEL_NODEGROUP] = [] 12934 self.needed_locks[locking.LEVEL_NODE] = [] 12935 self.needed_locks[locking.LEVEL_NETWORK] = [] 12936 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12937
12938 - def DeclareLocks(self, level):
12939 if self.op.use_locking: 12940 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE) 12941 if level == locking.LEVEL_NODEGROUP: 12942 12943 # Lock all groups used by instances optimistically; this requires going 12944 # via the node before it's locked, requiring verification later on 12945 self.needed_locks[locking.LEVEL_NODEGROUP] = \ 12946 frozenset(group_uuid 12947 for instance_name in owned_instances 12948 for group_uuid in 12949 self.cfg.GetInstanceNodeGroups(instance_name)) 12950 12951 elif level == locking.LEVEL_NODE: 12952 self._LockInstancesNodes() 12953 12954 elif level == locking.LEVEL_NETWORK: 12955 self.needed_locks[locking.LEVEL_NETWORK] = \ 12956 frozenset(net_uuid 12957 for instance_name in owned_instances 12958 for net_uuid in 12959 self.cfg.GetInstanceNetworks(instance_name))
12960
12961 - def CheckPrereq(self):
12962 """Check prerequisites. 12963 12964 This only checks the optional instance list against the existing names. 12965 12966 """ 12967 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) 12968 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) 12969 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE)) 12970 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK)) 12971 12972 if self.wanted_names is None: 12973 assert self.op.use_locking, "Locking was not used" 12974 self.wanted_names = owned_instances 12975 12976 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names)) 12977 12978 if self.op.use_locking: 12979 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes, 12980 None) 12981 else: 12982 assert not (owned_instances or owned_groups or 12983 owned_nodes or owned_networks) 12984 12985 self.wanted_instances = instances.values()
12986
12987 - def _ComputeBlockdevStatus(self, node, instance, dev):
12988 """Returns the status of a block device 12989 12990 """ 12991 if self.op.static or not node: 12992 return None 12993 12994 self.cfg.SetDiskID(dev, node) 12995 12996 result = self.rpc.call_blockdev_find(node, dev) 12997 if result.offline: 12998 return None 12999 13000 result.Raise("Can't compute disk status for %s" % instance.name) 13001 13002 status = result.payload 13003 if status is None: 13004 return None 13005 13006 return (status.dev_path, status.major, status.minor, 13007 status.sync_percent, status.estimated_time, 13008 status.is_degraded, status.ldisk_status)
13009
13010 - def _ComputeDiskStatus(self, instance, snode, dev):
13011 """Compute block device status. 13012 13013 """ 13014 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg) 13015 13016 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
13017
13018 - def _ComputeDiskStatusInner(self, instance, snode, dev):
13019 """Compute block device status. 13020 13021 @attention: The device has to be annotated already. 13022 13023 """ 13024 if dev.dev_type in constants.LDS_DRBD: 13025 # we change the snode then (otherwise we use the one passed in) 13026 if dev.logical_id[0] == instance.primary_node: 13027 snode = dev.logical_id[1] 13028 else: 13029 snode = dev.logical_id[0] 13030 13031 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node, 13032 instance, dev) 13033 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev) 13034 13035 if dev.children: 13036 dev_children = map(compat.partial(self._ComputeDiskStatusInner, 13037 instance, snode), 13038 dev.children) 13039 else: 13040 dev_children = [] 13041 13042 return { 13043 "iv_name": dev.iv_name, 13044 "dev_type": dev.dev_type, 13045 "logical_id": dev.logical_id, 13046 "physical_id": dev.physical_id, 13047 "pstatus": dev_pstatus, 13048 "sstatus": dev_sstatus, 13049 "children": dev_children, 13050 "mode": dev.mode, 13051 "size": dev.size, 13052 }
13053
13054 - def Exec(self, feedback_fn):
13055 """Gather and return data""" 13056 result = {} 13057 13058 cluster = self.cfg.GetClusterInfo() 13059 13060 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances)) 13061 nodes = dict(self.cfg.GetMultiNodeInfo(node_names)) 13062 13063 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group 13064 for node in nodes.values())) 13065 13066 group2name_fn = lambda uuid: groups[uuid].name 13067 for instance in self.wanted_instances: 13068 pnode = nodes[instance.primary_node] 13069 13070 if self.op.static or pnode.offline: 13071 remote_state = None 13072 if pnode.offline: 13073 self.LogWarning("Primary node %s is marked offline, returning static" 13074 " information only for instance %s" % 13075 (pnode.name, instance.name)) 13076 else: 13077 remote_info = self.rpc.call_instance_info(instance.primary_node, 13078 instance.name, 13079 instance.hypervisor) 13080 remote_info.Raise("Error checking node %s" % instance.primary_node) 13081 remote_info = remote_info.payload 13082 if remote_info and "state" in remote_info: 13083 remote_state = "up" 13084 else: 13085 if instance.admin_state == constants.ADMINST_UP: 13086 remote_state = "down" 13087 else: 13088 remote_state = instance.admin_state 13089 13090 disks = map(compat.partial(self._ComputeDiskStatus, instance, None), 13091 instance.disks) 13092 13093 snodes_group_uuids = [nodes[snode_name].group 13094 for snode_name in instance.secondary_nodes] 13095 13096 result[instance.name] = { 13097 "name": instance.name, 13098 "config_state": instance.admin_state, 13099 "run_state": remote_state, 13100 "pnode": instance.primary_node, 13101 "pnode_group_uuid": pnode.group, 13102 "pnode_group_name": group2name_fn(pnode.group), 13103 "snodes": instance.secondary_nodes, 13104 "snodes_group_uuids": snodes_group_uuids, 13105 "snodes_group_names": map(group2name_fn, snodes_group_uuids), 13106 "os": instance.os, 13107 # this happens to be the same format used for hooks 13108 "nics": _NICListToTuple(self, instance.nics), 13109 "disk_template": instance.disk_template, 13110 "disks": disks, 13111 "hypervisor": instance.hypervisor, 13112 "network_port": instance.network_port, 13113 "hv_instance": instance.hvparams, 13114 "hv_actual": cluster.FillHV(instance, skip_globals=True), 13115 "be_instance": instance.beparams, 13116 "be_actual": cluster.FillBE(instance), 13117 "os_instance": instance.osparams, 13118 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams), 13119 "serial_no": instance.serial_no, 13120 "mtime": instance.mtime, 13121 "ctime": instance.ctime, 13122 "uuid": instance.uuid, 13123 } 13124 13125 return result
13126
13127 13128 -def PrepareContainerMods(mods, private_fn):
13129 """Prepares a list of container modifications by adding a private data field. 13130 13131 @type mods: list of tuples; (operation, index, parameters) 13132 @param mods: List of modifications 13133 @type private_fn: callable or None 13134 @param private_fn: Callable for constructing a private data field for a 13135 modification 13136 @rtype: list 13137 13138 """ 13139 if private_fn is None: 13140 fn = lambda: None 13141 else: 13142 fn = private_fn 13143 13144 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13145 13146 13147 #: Type description for changes as returned by L{ApplyContainerMods}'s 13148 #: callbacks 13149 _TApplyContModsCbChanges = \ 13150 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([ 13151 ht.TNonEmptyString, 13152 ht.TAny, 13153 ])))
13154 13155 13156 -def ApplyContainerMods(kind, container, chgdesc, mods, 13157 create_fn, modify_fn, remove_fn):
13158 """Applies descriptions in C{mods} to C{container}. 13159 13160 @type kind: string 13161 @param kind: One-word item description 13162 @type container: list 13163 @param container: Container to modify 13164 @type chgdesc: None or list 13165 @param chgdesc: List of applied changes 13166 @type mods: list 13167 @param mods: Modifications as returned by L{PrepareContainerMods} 13168 @type create_fn: callable 13169 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD}); 13170 receives absolute item index, parameters and private data object as added 13171 by L{PrepareContainerMods}, returns tuple containing new item and changes 13172 as list 13173 @type modify_fn: callable 13174 @param modify_fn: Callback for modifying an existing item 13175 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters 13176 and private data object as added by L{PrepareContainerMods}, returns 13177 changes as list 13178 @type remove_fn: callable 13179 @param remove_fn: Callback on removing item; receives absolute item index, 13180 item and private data object as added by L{PrepareContainerMods} 13181 13182 """ 13183 for (op, idx, params, private) in mods: 13184 if idx == -1: 13185 # Append 13186 absidx = len(container) - 1 13187 elif idx < 0: 13188 raise IndexError("Not accepting negative indices other than -1") 13189 elif idx > len(container): 13190 raise IndexError("Got %s index %s, but there are only %s" % 13191 (kind, idx, len(container))) 13192 else: 13193 absidx = idx 13194 13195 changes = None 13196 13197 if op == constants.DDM_ADD: 13198 # Calculate where item will be added 13199 if idx == -1: 13200 addidx = len(container) 13201 else: 13202 addidx = idx 13203 13204 if create_fn is None: 13205 item = params 13206 else: 13207 (item, changes) = create_fn(addidx, params, private) 13208 13209 if idx == -1: 13210 container.append(item) 13211 else: 13212 assert idx >= 0 13213 assert idx <= len(container) 13214 # list.insert does so before the specified index 13215 container.insert(idx, item) 13216 else: 13217 # Retrieve existing item 13218 try: 13219 item = container[absidx] 13220 except IndexError: 13221 raise IndexError("Invalid %s index %s" % (kind, idx)) 13222 13223 if op == constants.DDM_REMOVE: 13224 assert not params 13225 13226 if remove_fn is not None: 13227 remove_fn(absidx, item, private) 13228 13229 changes = [("%s/%s" % (kind, absidx), "remove")] 13230 13231 assert container[absidx] == item 13232 del container[absidx] 13233 elif op == constants.DDM_MODIFY: 13234 if modify_fn is not None: 13235 changes = modify_fn(absidx, item, params, private) 13236 else: 13237 raise errors.ProgrammerError("Unhandled operation '%s'" % op) 13238 13239 assert _TApplyContModsCbChanges(changes) 13240 13241 if not (chgdesc is None or changes is None): 13242 chgdesc.extend(changes)
13243
13244 13245 -def _UpdateIvNames(base_index, disks):
13246 """Updates the C{iv_name} attribute of disks. 13247 13248 @type disks: list of L{objects.Disk} 13249 13250 """ 13251 for (idx, disk) in enumerate(disks): 13252 disk.iv_name = "disk/%s" % (base_index + idx, )
13253
13254 13255 -class _InstNicModPrivate:
13256 """Data structure for network interface modifications. 13257 13258 Used by L{LUInstanceSetParams}. 13259 13260 """
13261 - def __init__(self):
13262 self.params = None 13263 self.filled = None
13264
13265 13266 -class LUInstanceSetParams(LogicalUnit):
13267 """Modifies an instances's parameters. 13268 13269 """ 13270 HPATH = "instance-modify" 13271 HTYPE = constants.HTYPE_INSTANCE 13272 REQ_BGL = False 13273 13274 @staticmethod
13275 - def _UpgradeDiskNicMods(kind, mods, verify_fn):
13276 assert ht.TList(mods) 13277 assert not mods or len(mods[0]) in (2, 3) 13278 13279 if mods and len(mods[0]) == 2: 13280 result = [] 13281 13282 addremove = 0 13283 for op, params in mods: 13284 if op in (constants.DDM_ADD, constants.DDM_REMOVE): 13285 result.append((op, -1, params)) 13286 addremove += 1 13287 13288 if addremove > 1: 13289 raise errors.OpPrereqError("Only one %s add or remove operation is" 13290 " supported at a time" % kind, 13291 errors.ECODE_INVAL) 13292 else: 13293 result.append((constants.DDM_MODIFY, op, params)) 13294 13295 assert verify_fn(result) 13296 else: 13297 result = mods 13298 13299 return result
13300 13301 @staticmethod
13302 - def _CheckMods(kind, mods, key_types, item_fn):
13303 """Ensures requested disk/NIC modifications are valid. 13304 13305 """ 13306 for (op, _, params) in mods: 13307 assert ht.TDict(params) 13308 13309 # If 'key_types' is an empty dict, we assume we have an 13310 # 'ext' template and thus do not ForceDictType 13311 if key_types: 13312 utils.ForceDictType(params, key_types) 13313 13314 if op == constants.DDM_REMOVE: 13315 if params: 13316 raise errors.OpPrereqError("No settings should be passed when" 13317 " removing a %s" % kind, 13318 errors.ECODE_INVAL) 13319 elif op in (constants.DDM_ADD, constants.DDM_MODIFY): 13320 item_fn(op, params) 13321 else: 13322 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13323 13324 @staticmethod
13325 - def _VerifyDiskModification(op, params):
13326 """Verifies a disk modification. 13327 13328 """ 13329 if op == constants.DDM_ADD: 13330 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR) 13331 if mode not in constants.DISK_ACCESS_SET: 13332 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode, 13333 errors.ECODE_INVAL) 13334 13335 size = params.get(constants.IDISK_SIZE, None) 13336 if size is None: 13337 raise errors.OpPrereqError("Required disk parameter '%s' missing" % 13338 constants.IDISK_SIZE, errors.ECODE_INVAL) 13339 13340 try: 13341 size = int(size) 13342 except (TypeError, ValueError), err: 13343 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err, 13344 errors.ECODE_INVAL) 13345 13346 params[constants.IDISK_SIZE] = size 13347 13348 elif op == constants.DDM_MODIFY: 13349 if constants.IDISK_SIZE in params: 13350 raise errors.OpPrereqError("Disk size change not possible, use" 13351 " grow-disk", errors.ECODE_INVAL) 13352 if constants.IDISK_MODE not in params: 13353 raise errors.OpPrereqError("Disk 'mode' is the only kind of" 13354 " modification supported, but missing", 13355 errors.ECODE_NOENT) 13356 if len(params) > 1: 13357 raise errors.OpPrereqError("Disk modification doesn't support" 13358 " additional arbitrary parameters", 13359 errors.ECODE_INVAL)
13360 13361 @staticmethod
13362 - def _VerifyNicModification(op, params):
13363 """Verifies a network interface modification. 13364 13365 """ 13366 if op in (constants.DDM_ADD, constants.DDM_MODIFY): 13367 ip = params.get(constants.INIC_IP, None) 13368 req_net = params.get(constants.INIC_NETWORK, None) 13369 link = params.get(constants.NIC_LINK, None) 13370 mode = params.get(constants.NIC_MODE, None) 13371 if req_net is not None: 13372 if req_net.lower() == constants.VALUE_NONE: 13373 params[constants.INIC_NETWORK] = None 13374 req_net = None 13375 elif link is not None or mode is not None: 13376 raise errors.OpPrereqError("If network is given" 13377 " mode or link should not", 13378 errors.ECODE_INVAL) 13379 13380 if op == constants.DDM_ADD: 13381 macaddr = params.get(constants.INIC_MAC, None) 13382 if macaddr is None: 13383 params[constants.INIC_MAC] = constants.VALUE_AUTO 13384 13385 if ip is not None: 13386 if ip.lower() == constants.VALUE_NONE: 13387 params[constants.INIC_IP] = None 13388 else: 13389 if ip.lower() == constants.NIC_IP_POOL: 13390 if op == constants.DDM_ADD and req_net is None: 13391 raise errors.OpPrereqError("If ip=pool, parameter network" 13392 " cannot be none", 13393 errors.ECODE_INVAL) 13394 else: 13395 if not netutils.IPAddress.IsValid(ip): 13396 raise errors.OpPrereqError("Invalid IP address '%s'" % ip, 13397 errors.ECODE_INVAL) 13398 13399 if constants.INIC_MAC in params: 13400 macaddr = params[constants.INIC_MAC] 13401 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 13402 macaddr = utils.NormalizeAndValidateMac(macaddr) 13403 13404 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO: 13405 raise errors.OpPrereqError("'auto' is not a valid MAC address when" 13406 " modifying an existing NIC", 13407 errors.ECODE_INVAL)
13408
13409 - def CheckArguments(self):
13410 if not (self.op.nics or self.op.disks or self.op.disk_template or 13411 self.op.hvparams or self.op.beparams or self.op.os_name or 13412 self.op.offline is not None or self.op.runtime_mem): 13413 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL) 13414 13415 if self.op.hvparams: 13416 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, 13417 "hypervisor", "instance", "cluster") 13418 13419 self.op.disks = self._UpgradeDiskNicMods( 13420 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications) 13421 self.op.nics = self._UpgradeDiskNicMods( 13422 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications) 13423 13424 if self.op.disks and self.op.disk_template is not None: 13425 raise errors.OpPrereqError("Disk template conversion and other disk" 13426 " changes not supported at the same time", 13427 errors.ECODE_INVAL) 13428 13429 if (self.op.disk_template and 13430 self.op.disk_template in constants.DTS_INT_MIRROR and 13431 self.op.remote_node is None): 13432 raise errors.OpPrereqError("Changing the disk template to a mirrored" 13433 " one requires specifying a secondary node", 13434 errors.ECODE_INVAL) 13435 13436 # Check NIC modifications 13437 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES, 13438 self._VerifyNicModification)
13439
13440 - def ExpandNames(self):
13441 self._ExpandAndLockInstance() 13442 self.needed_locks[locking.LEVEL_NODEGROUP] = [] 13443 # Can't even acquire node locks in shared mode as upcoming changes in 13444 # Ganeti 2.6 will start to modify the node object on disk conversion 13445 self.needed_locks[locking.LEVEL_NODE] = [] 13446 self.needed_locks[locking.LEVEL_NODE_RES] = [] 13447 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 13448 # Look node group to look up the ipolicy 13449 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13450
13451 - def DeclareLocks(self, level):
13452 if level == locking.LEVEL_NODEGROUP: 13453 assert not self.needed_locks[locking.LEVEL_NODEGROUP] 13454 # Acquire locks for the instance's nodegroups optimistically. Needs 13455 # to be verified in CheckPrereq 13456 self.needed_locks[locking.LEVEL_NODEGROUP] = \ 13457 self.cfg.GetInstanceNodeGroups(self.op.instance_name) 13458 elif level == locking.LEVEL_NODE: 13459 self._LockInstancesNodes() 13460 if self.op.disk_template and self.op.remote_node: 13461 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 13462 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node) 13463 elif level == locking.LEVEL_NODE_RES and self.op.disk_template: 13464 # Copy node locks 13465 self.needed_locks[locking.LEVEL_NODE_RES] = \ 13466 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13467
13468 - def BuildHooksEnv(self):
13469 """Build hooks env. 13470 13471 This runs on the master, primary and secondaries. 13472 13473 """ 13474 args = {} 13475 if constants.BE_MINMEM in self.be_new: 13476 args["minmem"] = self.be_new[constants.BE_MINMEM] 13477 if constants.BE_MAXMEM in self.be_new: 13478 args["maxmem"] = self.be_new[constants.BE_MAXMEM] 13479 if constants.BE_VCPUS in self.be_new: 13480 args["vcpus"] = self.be_new[constants.BE_VCPUS] 13481 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk 13482 # information at all. 13483 13484 if self._new_nics is not None: 13485 nics = [] 13486 13487 for nic in self._new_nics: 13488 n = copy.deepcopy(nic) 13489 nicparams = self.cluster.SimpleFillNIC(n.nicparams) 13490 n.nicparams = nicparams 13491 nics.append(_NICToTuple(self, n)) 13492 13493 args["nics"] = nics 13494 13495 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args) 13496 if self.op.disk_template: 13497 env["NEW_DISK_TEMPLATE"] = self.op.disk_template 13498 if self.op.runtime_mem: 13499 env["RUNTIME_MEMORY"] = self.op.runtime_mem 13500 13501 return env
13502
13503 - def BuildHooksNodes(self):
13504 """Build hooks nodes. 13505 13506 """ 13507 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 13508 return (nl, nl)
13509
13510 - def _PrepareNicModification(self, params, private, old_ip, old_net_uuid, 13511 old_params, cluster, pnode):
13512 13513 update_params_dict = dict([(key, params[key]) 13514 for key in constants.NICS_PARAMETERS 13515 if key in params]) 13516 13517 req_link = update_params_dict.get(constants.NIC_LINK, None) 13518 req_mode = update_params_dict.get(constants.NIC_MODE, None) 13519 13520 new_net_uuid = None 13521 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid) 13522 if new_net_uuid_or_name: 13523 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name) 13524 new_net_obj = self.cfg.GetNetwork(new_net_uuid) 13525 13526 if old_net_uuid: 13527 old_net_obj = self.cfg.GetNetwork(old_net_uuid) 13528 13529 if new_net_uuid: 13530 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode) 13531 if not netparams: 13532 raise errors.OpPrereqError("No netparams found for the network" 13533 " %s, probably not connected" % 13534 new_net_obj.name, errors.ECODE_INVAL) 13535 new_params = dict(netparams) 13536 else: 13537 new_params = _GetUpdatedParams(old_params, update_params_dict) 13538 13539 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES) 13540 13541 new_filled_params = cluster.SimpleFillNIC(new_params) 13542 objects.NIC.CheckParameterSyntax(new_filled_params) 13543 13544 new_mode = new_filled_params[constants.NIC_MODE] 13545 if new_mode == constants.NIC_MODE_BRIDGED: 13546 bridge = new_filled_params[constants.NIC_LINK] 13547 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg 13548 if msg: 13549 msg = "Error checking bridges on node '%s': %s" % (pnode, msg) 13550 if self.op.force: 13551 self.warn.append(msg) 13552 else: 13553 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) 13554 13555 elif new_mode == constants.NIC_MODE_ROUTED: 13556 ip = params.get(constants.INIC_IP, old_ip) 13557 if ip is None: 13558 raise errors.OpPrereqError("Cannot set the NIC IP address to None" 13559 " on a routed NIC", errors.ECODE_INVAL) 13560 13561 elif new_mode == constants.NIC_MODE_OVS: 13562 # TODO: check OVS link 13563 self.LogInfo("OVS links are currently not checked for correctness") 13564 13565 if constants.INIC_MAC in params: 13566 mac = params[constants.INIC_MAC] 13567 if mac is None: 13568 raise errors.OpPrereqError("Cannot unset the NIC MAC address", 13569 errors.ECODE_INVAL) 13570 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 13571 # otherwise generate the MAC address 13572 params[constants.INIC_MAC] = \ 13573 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId()) 13574 else: 13575 # or validate/reserve the current one 13576 try: 13577 self.cfg.ReserveMAC(mac, self.proc.GetECId()) 13578 except errors.ReservationError: 13579 raise errors.OpPrereqError("MAC address '%s' already in use" 13580 " in cluster" % mac, 13581 errors.ECODE_NOTUNIQUE) 13582 elif new_net_uuid != old_net_uuid: 13583 13584 def get_net_prefix(net_uuid): 13585 mac_prefix = None 13586 if net_uuid: 13587 nobj = self.cfg.GetNetwork(net_uuid) 13588 mac_prefix = nobj.mac_prefix 13589 13590 return mac_prefix
13591 13592 new_prefix = get_net_prefix(new_net_uuid) 13593 old_prefix = get_net_prefix(old_net_uuid) 13594 if old_prefix != new_prefix: 13595 params[constants.INIC_MAC] = \ 13596 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId()) 13597 13598 # if there is a change in (ip, network) tuple 13599 new_ip = params.get(constants.INIC_IP, old_ip) 13600 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid): 13601 if new_ip: 13602 # if IP is pool then require a network and generate one IP 13603 if new_ip.lower() == constants.NIC_IP_POOL: 13604 if new_net_uuid: 13605 try: 13606 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId()) 13607 except errors.ReservationError: 13608 raise errors.OpPrereqError("Unable to get a free IP" 13609 " from the address pool", 13610 errors.ECODE_STATE) 13611 self.LogInfo("Chose IP %s from network %s", 13612 new_ip, 13613 new_net_obj.name) 13614 params[constants.INIC_IP] = new_ip 13615 else: 13616 raise errors.OpPrereqError("ip=pool, but no network found", 13617 errors.ECODE_INVAL) 13618 # Reserve new IP if in the new network if any 13619 elif new_net_uuid: 13620 try: 13621 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId()) 13622 self.LogInfo("Reserving IP %s in network %s", 13623 new_ip, new_net_obj.name) 13624 except errors.ReservationError: 13625 raise errors.OpPrereqError("IP %s not available in network %s" % 13626 (new_ip, new_net_obj.name), 13627 errors.ECODE_NOTUNIQUE) 13628 # new network is None so check if new IP is a conflicting IP 13629 elif self.op.conflicts_check: 13630 _CheckForConflictingIp(self, new_ip, pnode) 13631 13632 # release old IP if old network is not None 13633 if old_ip and old_net_uuid: 13634 try: 13635 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId()) 13636 except errors.AddressPoolError: 13637 logging.warning("Release IP %s not contained in network %s", 13638 old_ip, old_net_obj.name) 13639 13640 # there are no changes in (ip, network) tuple and old network is not None 13641 elif (old_net_uuid is not None and 13642 (req_link is not None or req_mode is not None)): 13643 raise errors.OpPrereqError("Not allowed to change link or mode of" 13644 " a NIC that is connected to a network", 13645 errors.ECODE_INVAL) 13646 13647 private.params = new_params 13648 private.filled = new_filled_params
13649
13650 - def _PreCheckDiskTemplate(self, pnode_info):
13651 """CheckPrereq checks related to a new disk template.""" 13652 # Arguments are passed to avoid configuration lookups 13653 instance = self.instance 13654 pnode = instance.primary_node 13655 cluster = self.cluster 13656 if instance.disk_template == self.op.disk_template: 13657 raise errors.OpPrereqError("Instance already has disk template %s" % 13658 instance.disk_template, errors.ECODE_INVAL) 13659 13660 if (instance.disk_template, 13661 self.op.disk_template) not in self._DISK_CONVERSIONS: 13662 raise errors.OpPrereqError("Unsupported disk template conversion from" 13663 " %s to %s" % (instance.disk_template, 13664 self.op.disk_template), 13665 errors.ECODE_INVAL) 13666 _CheckInstanceState(self, instance, INSTANCE_DOWN, 13667 msg="cannot change disk template") 13668 if self.op.disk_template in constants.DTS_INT_MIRROR: 13669 if self.op.remote_node == pnode: 13670 raise errors.OpPrereqError("Given new secondary node %s is the same" 13671 " as the primary node of the instance" % 13672 self.op.remote_node, errors.ECODE_STATE) 13673 _CheckNodeOnline(self, self.op.remote_node) 13674 _CheckNodeNotDrained(self, self.op.remote_node) 13675 # FIXME: here we assume that the old instance type is DT_PLAIN 13676 assert instance.disk_template == constants.DT_PLAIN 13677 disks = [{constants.IDISK_SIZE: d.size, 13678 constants.IDISK_VG: d.logical_id[0]} 13679 for d in instance.disks] 13680 required = _ComputeDiskSizePerVG(self.op.disk_template, disks) 13681 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required) 13682 13683 snode_info = self.cfg.GetNodeInfo(self.op.remote_node) 13684 snode_group = self.cfg.GetNodeGroup(snode_info.group) 13685 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, 13686 snode_group) 13687 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg, 13688 ignore=self.op.ignore_ipolicy) 13689 if pnode_info.group != snode_info.group: 13690 self.LogWarning("The primary and secondary nodes are in two" 13691 " different node groups; the disk parameters" 13692 " from the first disk's node group will be" 13693 " used") 13694 13695 if not self.op.disk_template in constants.DTS_EXCL_STORAGE: 13696 # Make sure none of the nodes require exclusive storage 13697 nodes = [pnode_info] 13698 if self.op.disk_template in constants.DTS_INT_MIRROR: 13699 assert snode_info 13700 nodes.append(snode_info) 13701 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n) 13702 if compat.any(map(has_es, nodes)): 13703 errmsg = ("Cannot convert disk template from %s to %s when exclusive" 13704 " storage is enabled" % (instance.disk_template, 13705 self.op.disk_template)) 13706 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13707
13708 - def CheckPrereq(self):
13709 """Check prerequisites. 13710 13711 This only checks the instance list against the existing names. 13712 13713 """ 13714 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE) 13715 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 13716 13717 cluster = self.cluster = self.cfg.GetClusterInfo() 13718 assert self.instance is not None, \ 13719 "Cannot retrieve locked instance %s" % self.op.instance_name 13720 13721 pnode = instance.primary_node 13722 assert pnode in self.owned_locks(locking.LEVEL_NODE) 13723 nodelist = list(instance.all_nodes) 13724 pnode_info = self.cfg.GetNodeInfo(pnode) 13725 self.diskparams = self.cfg.GetInstanceDiskParams(instance) 13726 13727 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups) 13728 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP) 13729 group_info = self.cfg.GetNodeGroup(pnode_info.group) 13730 13731 # dictionary with instance information after the modification 13732 ispec = {} 13733 13734 # Check disk modifications. This is done here and not in CheckArguments 13735 # (as with NICs), because we need to know the instance's disk template 13736 if instance.disk_template == constants.DT_EXT: 13737 self._CheckMods("disk", self.op.disks, {}, 13738 self._VerifyDiskModification) 13739 else: 13740 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES, 13741 self._VerifyDiskModification) 13742 13743 # Prepare disk/NIC modifications 13744 self.diskmod = PrepareContainerMods(self.op.disks, None) 13745 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate) 13746 13747 # Check the validity of the `provider' parameter 13748 if instance.disk_template in constants.DT_EXT: 13749 for mod in self.diskmod: 13750 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None) 13751 if mod[0] == constants.DDM_ADD: 13752 if ext_provider is None: 13753 raise errors.OpPrereqError("Instance template is '%s' and parameter" 13754 " '%s' missing, during disk add" % 13755 (constants.DT_EXT, 13756 constants.IDISK_PROVIDER), 13757 errors.ECODE_NOENT) 13758 elif mod[0] == constants.DDM_MODIFY: 13759 if ext_provider: 13760 raise errors.OpPrereqError("Parameter '%s' is invalid during disk" 13761 " modification" % 13762 constants.IDISK_PROVIDER, 13763 errors.ECODE_INVAL) 13764 else: 13765 for mod in self.diskmod: 13766 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None) 13767 if ext_provider is not None: 13768 raise errors.OpPrereqError("Parameter '%s' is only valid for" 13769 " instances of type '%s'" % 13770 (constants.IDISK_PROVIDER, 13771 constants.DT_EXT), 13772 errors.ECODE_INVAL) 13773 13774 # OS change 13775 if self.op.os_name and not self.op.force: 13776 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name, 13777 self.op.force_variant) 13778 instance_os = self.op.os_name 13779 else: 13780 instance_os = instance.os 13781 13782 assert not (self.op.disk_template and self.op.disks), \ 13783 "Can't modify disk template and apply disk changes at the same time" 13784 13785 if self.op.disk_template: 13786 self._PreCheckDiskTemplate(pnode_info) 13787 13788 # hvparams processing 13789 if self.op.hvparams: 13790 hv_type = instance.hypervisor 13791 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams) 13792 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES) 13793 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict) 13794 13795 # local check 13796 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new) 13797 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new) 13798 self.hv_proposed = self.hv_new = hv_new # the new actual values 13799 self.hv_inst = i_hvdict # the new dict (without defaults) 13800 else: 13801 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os, 13802 instance.hvparams) 13803 self.hv_new = self.hv_inst = {} 13804 13805 # beparams processing 13806 if self.op.beparams: 13807 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams, 13808 use_none=True) 13809 objects.UpgradeBeParams(i_bedict) 13810 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES) 13811 be_new = cluster.SimpleFillBE(i_bedict) 13812 self.be_proposed = self.be_new = be_new # the new actual values 13813 self.be_inst = i_bedict # the new dict (without defaults) 13814 else: 13815 self.be_new = self.be_inst = {} 13816 self.be_proposed = cluster.SimpleFillBE(instance.beparams) 13817 be_old = cluster.FillBE(instance) 13818 13819 # CPU param validation -- checking every time a parameter is 13820 # changed to cover all cases where either CPU mask or vcpus have 13821 # changed 13822 if (constants.BE_VCPUS in self.be_proposed and 13823 constants.HV_CPU_MASK in self.hv_proposed): 13824 cpu_list = \ 13825 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK]) 13826 # Verify mask is consistent with number of vCPUs. Can skip this 13827 # test if only 1 entry in the CPU mask, which means same mask 13828 # is applied to all vCPUs. 13829 if (len(cpu_list) > 1 and 13830 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]): 13831 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the" 13832 " CPU mask [%s]" % 13833 (self.be_proposed[constants.BE_VCPUS], 13834 self.hv_proposed[constants.HV_CPU_MASK]), 13835 errors.ECODE_INVAL) 13836 13837 # Only perform this test if a new CPU mask is given 13838 if constants.HV_CPU_MASK in self.hv_new: 13839 # Calculate the largest CPU number requested 13840 max_requested_cpu = max(map(max, cpu_list)) 13841 # Check that all of the instance's nodes have enough physical CPUs to 13842 # satisfy the requested CPU mask 13843 _CheckNodesPhysicalCPUs(self, instance.all_nodes, 13844 max_requested_cpu + 1, instance.hypervisor) 13845 13846 # osparams processing 13847 if self.op.osparams: 13848 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams) 13849 _CheckOSParams(self, True, nodelist, instance_os, i_osdict) 13850 self.os_inst = i_osdict # the new dict (without defaults) 13851 else: 13852 self.os_inst = {} 13853 13854 self.warn = [] 13855 13856 #TODO(dynmem): do the appropriate check involving MINMEM 13857 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and 13858 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]): 13859 mem_check_list = [pnode] 13860 if be_new[constants.BE_AUTO_BALANCE]: 13861 # either we changed auto_balance to yes or it was from before 13862 mem_check_list.extend(instance.secondary_nodes) 13863 instance_info = self.rpc.call_instance_info(pnode, instance.name, 13864 instance.hypervisor) 13865 nodeinfo = self.rpc.call_node_info(mem_check_list, None, 13866 [instance.hypervisor], False) 13867 pninfo = nodeinfo[pnode] 13868 msg = pninfo.fail_msg 13869 if msg: 13870 # Assume the primary node is unreachable and go ahead 13871 self.warn.append("Can't get info from primary node %s: %s" % 13872 (pnode, msg)) 13873 else: 13874 (_, _, (pnhvinfo, )) = pninfo.payload 13875 if not isinstance(pnhvinfo.get("memory_free", None), int): 13876 self.warn.append("Node data from primary node %s doesn't contain" 13877 " free memory information" % pnode) 13878 elif instance_info.fail_msg: 13879 self.warn.append("Can't get instance runtime information: %s" % 13880 instance_info.fail_msg) 13881 else: 13882 if instance_info.payload: 13883 current_mem = int(instance_info.payload["memory"]) 13884 else: 13885 # Assume instance not running 13886 # (there is a slight race condition here, but it's not very 13887 # probable, and we have no other way to check) 13888 # TODO: Describe race condition 13889 current_mem = 0 13890 #TODO(dynmem): do the appropriate check involving MINMEM 13891 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem - 13892 pnhvinfo["memory_free"]) 13893 if miss_mem > 0: 13894 raise errors.OpPrereqError("This change will prevent the instance" 13895 " from starting, due to %d MB of memory" 13896 " missing on its primary node" % 13897 miss_mem, errors.ECODE_NORES) 13898 13899 if be_new[constants.BE_AUTO_BALANCE]: 13900 for node, nres in nodeinfo.items(): 13901 if node not in instance.secondary_nodes: 13902 continue 13903 nres.Raise("Can't get info from secondary node %s" % node, 13904 prereq=True, ecode=errors.ECODE_STATE) 13905 (_, _, (nhvinfo, )) = nres.payload 13906 if not isinstance(nhvinfo.get("memory_free", None), int): 13907 raise errors.OpPrereqError("Secondary node %s didn't return free" 13908 " memory information" % node, 13909 errors.ECODE_STATE) 13910 #TODO(dynmem): do the appropriate check involving MINMEM 13911 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]: 13912 raise errors.OpPrereqError("This change will prevent the instance" 13913 " from failover to its secondary node" 13914 " %s, due to not enough memory" % node, 13915 errors.ECODE_STATE) 13916 13917 if self.op.runtime_mem: 13918 remote_info = self.rpc.call_instance_info(instance.primary_node, 13919 instance.name, 13920 instance.hypervisor) 13921 remote_info.Raise("Error checking node %s" % instance.primary_node) 13922 if not remote_info.payload: # not running already 13923 raise errors.OpPrereqError("Instance %s is not running" % 13924 instance.name, errors.ECODE_STATE) 13925 13926 current_memory = remote_info.payload["memory"] 13927 if (not self.op.force and 13928 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or 13929 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])): 13930 raise errors.OpPrereqError("Instance %s must have memory between %d" 13931 " and %d MB of memory unless --force is" 13932 " given" % 13933 (instance.name, 13934 self.be_proposed[constants.BE_MINMEM], 13935 self.be_proposed[constants.BE_MAXMEM]), 13936 errors.ECODE_INVAL) 13937 13938 delta = self.op.runtime_mem - current_memory 13939 if delta > 0: 13940 _CheckNodeFreeMemory(self, instance.primary_node, 13941 "ballooning memory for instance %s" % 13942 instance.name, delta, instance.hypervisor) 13943 13944 if self.op.disks and instance.disk_template == constants.DT_DISKLESS: 13945 raise errors.OpPrereqError("Disk operations not supported for" 13946 " diskless instances", errors.ECODE_INVAL) 13947 13948 def _PrepareNicCreate(_, params, private): 13949 self._PrepareNicModification(params, private, None, None, 13950 {}, cluster, pnode) 13951 return (None, None)
13952 13953 def _PrepareNicMod(_, nic, params, private): 13954 self._PrepareNicModification(params, private, nic.ip, nic.network, 13955 nic.nicparams, cluster, pnode) 13956 return None 13957 13958 def _PrepareNicRemove(_, params, __): 13959 ip = params.ip 13960 net = params.network 13961 if net is not None and ip is not None: 13962 self.cfg.ReleaseIp(net, ip, self.proc.GetECId()) 13963 13964 # Verify NIC changes (operating on copy) 13965 nics = instance.nics[:] 13966 ApplyContainerMods("NIC", nics, None, self.nicmod, 13967 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove) 13968 if len(nics) > constants.MAX_NICS: 13969 raise errors.OpPrereqError("Instance has too many network interfaces" 13970 " (%d), cannot add more" % constants.MAX_NICS, 13971 errors.ECODE_STATE) 13972 13973 # Verify disk changes (operating on a copy) 13974 disks = instance.disks[:] 13975 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None) 13976 if len(disks) > constants.MAX_DISKS: 13977 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add" 13978 " more" % constants.MAX_DISKS, 13979 errors.ECODE_STATE) 13980 disk_sizes = [disk.size for disk in instance.disks] 13981 disk_sizes.extend(params["size"] for (op, idx, params, private) in 13982 self.diskmod if op == constants.DDM_ADD) 13983 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes) 13984 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes 13985 13986 if self.op.offline is not None and self.op.offline: 13987 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, 13988 msg="can't change to offline") 13989 13990 # Pre-compute NIC changes (necessary to use result in hooks) 13991 self._nic_chgdesc = [] 13992 if self.nicmod: 13993 # Operate on copies as this is still in prereq 13994 nics = [nic.Copy() for nic in instance.nics] 13995 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod, 13996 self._CreateNewNic, self._ApplyNicMods, None) 13997 self._new_nics = nics 13998 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics) 13999 else: 14000 self._new_nics = None 14001 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics) 14002 14003 if not self.op.ignore_ipolicy: 14004 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, 14005 group_info) 14006 14007 # Fill ispec with backend parameters 14008 ispec[constants.ISPEC_SPINDLE_USE] = \ 14009 self.be_new.get(constants.BE_SPINDLE_USE, None) 14010 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS, 14011 None) 14012 14013 # Copy ispec to verify parameters with min/max values separately 14014 if self.op.disk_template: 14015 new_disk_template = self.op.disk_template 14016 else: 14017 new_disk_template = instance.disk_template 14018 ispec_max = ispec.copy() 14019 ispec_max[constants.ISPEC_MEM_SIZE] = \ 14020 self.be_new.get(constants.BE_MAXMEM, None) 14021 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max, 14022 new_disk_template) 14023 ispec_min = ispec.copy() 14024 ispec_min[constants.ISPEC_MEM_SIZE] = \ 14025 self.be_new.get(constants.BE_MINMEM, None) 14026 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min, 14027 new_disk_template) 14028 14029 if (res_max or res_min): 14030 # FIXME: Improve error message by including information about whether 14031 # the upper or lower limit of the parameter fails the ipolicy. 14032 msg = ("Instance allocation to group %s (%s) violates policy: %s" % 14033 (group_info, group_info.name, 14034 utils.CommaJoin(set(res_max + res_min)))) 14035 raise errors.OpPrereqError(msg, errors.ECODE_INVAL) 14036
14037 - def _ConvertPlainToDrbd(self, feedback_fn):
14038 """Converts an instance from plain to drbd. 14039 14040 """ 14041 feedback_fn("Converting template to drbd") 14042 instance = self.instance 14043 pnode = instance.primary_node 14044 snode = self.op.remote_node 14045 14046 assert instance.disk_template == constants.DT_PLAIN 14047 14048 # create a fake disk info for _GenerateDiskTemplate 14049 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode, 14050 constants.IDISK_VG: d.logical_id[0]} 14051 for d in instance.disks] 14052 new_disks = _GenerateDiskTemplate(self, self.op.disk_template, 14053 instance.name, pnode, [snode], 14054 disk_info, None, None, 0, feedback_fn, 14055 self.diskparams) 14056 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks, 14057 self.diskparams) 14058 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode) 14059 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode) 14060 info = _GetInstanceInfoText(instance) 14061 feedback_fn("Creating additional volumes...") 14062 # first, create the missing data and meta devices 14063 for disk in anno_disks: 14064 # unfortunately this is... not too nice 14065 _CreateSingleBlockDev(self, pnode, instance, disk.children[1], 14066 info, True, p_excl_stor) 14067 for child in disk.children: 14068 _CreateSingleBlockDev(self, snode, instance, child, info, True, 14069 s_excl_stor) 14070 # at this stage, all new LVs have been created, we can rename the 14071 # old ones 14072 feedback_fn("Renaming original volumes...") 14073 rename_list = [(o, n.children[0].logical_id) 14074 for (o, n) in zip(instance.disks, new_disks)] 14075 result = self.rpc.call_blockdev_rename(pnode, rename_list) 14076 result.Raise("Failed to rename original LVs") 14077 14078 feedback_fn("Initializing DRBD devices...") 14079 # all child devices are in place, we can now create the DRBD devices 14080 try: 14081 for disk in anno_disks: 14082 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]: 14083 f_create = node == pnode 14084 _CreateSingleBlockDev(self, node, instance, disk, info, f_create, 14085 excl_stor) 14086 except errors.GenericError, e: 14087 feedback_fn("Initializing of DRBD devices failed;" 14088 " renaming back original volumes...") 14089 for disk in new_disks: 14090 self.cfg.SetDiskID(disk, pnode) 14091 rename_back_list = [(n.children[0], o.logical_id) 14092 for (n, o) in zip(new_disks, instance.disks)] 14093 result = self.rpc.call_blockdev_rename(pnode, rename_back_list) 14094 result.Raise("Failed to rename LVs back after error %s" % str(e)) 14095 raise 14096 14097 # at this point, the instance has been modified 14098 instance.disk_template = constants.DT_DRBD8 14099 instance.disks = new_disks 14100 self.cfg.Update(instance, feedback_fn) 14101 14102 # Release node locks while waiting for sync 14103 _ReleaseLocks(self, locking.LEVEL_NODE) 14104 14105 # disks are created, waiting for sync 14106 disk_abort = not _WaitForSync(self, instance, 14107 oneshot=not self.op.wait_for_sync) 14108 if disk_abort: 14109 raise errors.OpExecError("There are some degraded disks for" 14110 " this instance, please cleanup manually")
14111 14112 # Node resource locks will be released by caller 14113
14114 - def _ConvertDrbdToPlain(self, feedback_fn):
14115 """Converts an instance from drbd to plain. 14116 14117 """ 14118 instance = self.instance 14119 14120 assert len(instance.secondary_nodes) == 1 14121 assert instance.disk_template == constants.DT_DRBD8 14122 14123 pnode = instance.primary_node 14124 snode = instance.secondary_nodes[0] 14125 feedback_fn("Converting template to plain") 14126 14127 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg) 14128 new_disks = [d.children[0] for d in instance.disks] 14129 14130 # copy over size and mode 14131 for parent, child in zip(old_disks, new_disks): 14132 child.size = parent.size 14133 child.mode = parent.mode 14134 14135 # this is a DRBD disk, return its port to the pool 14136 # NOTE: this must be done right before the call to cfg.Update! 14137 for disk in old_disks: 14138 tcp_port = disk.logical_id[2] 14139 self.cfg.AddTcpUdpPort(tcp_port) 14140 14141 # update instance structure 14142 instance.disks = new_disks 14143 instance.disk_template = constants.DT_PLAIN 14144 _UpdateIvNames(0, instance.disks) 14145 self.cfg.Update(instance, feedback_fn) 14146 14147 # Release locks in case removing disks takes a while 14148 _ReleaseLocks(self, locking.LEVEL_NODE) 14149 14150 feedback_fn("Removing volumes on the secondary node...") 14151 for disk in old_disks: 14152 self.cfg.SetDiskID(disk, snode) 14153 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg 14154 if msg: 14155 self.LogWarning("Could not remove block device %s on node %s," 14156 " continuing anyway: %s", disk.iv_name, snode, msg) 14157 14158 feedback_fn("Removing unneeded volumes on the primary node...") 14159 for idx, disk in enumerate(old_disks): 14160 meta = disk.children[1] 14161 self.cfg.SetDiskID(meta, pnode) 14162 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg 14163 if msg: 14164 self.LogWarning("Could not remove metadata for disk %d on node %s," 14165 " continuing anyway: %s", idx, pnode, msg)
14166
14167 - def _CreateNewDisk(self, idx, params, _):
14168 """Creates a new disk. 14169 14170 """ 14171 instance = self.instance 14172 14173 # add a new disk 14174 if instance.disk_template in constants.DTS_FILEBASED: 14175 (file_driver, file_path) = instance.disks[0].logical_id 14176 file_path = os.path.dirname(file_path) 14177 else: 14178 file_driver = file_path = None 14179 14180 disk = \ 14181 _GenerateDiskTemplate(self, instance.disk_template, instance.name, 14182 instance.primary_node, instance.secondary_nodes, 14183 [params], file_path, file_driver, idx, 14184 self.Log, self.diskparams)[0] 14185 14186 new_disks = _CreateDisks(self, instance, disks=[disk]) 14187 14188 if self.cluster.prealloc_wipe_disks: 14189 # Wipe new disk 14190 _WipeOrCleanupDisks(self, instance, 14191 disks=[(idx, disk, 0)], 14192 cleanup=new_disks) 14193 14194 return (disk, [ 14195 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)), 14196 ])
14197 14198 @staticmethod
14199 - def _ModifyDisk(idx, disk, params, _):
14200 """Modifies a disk. 14201 14202 """ 14203 disk.mode = params[constants.IDISK_MODE] 14204 14205 return [ 14206 ("disk.mode/%d" % idx, disk.mode), 14207 ]
14208
14209 - def _RemoveDisk(self, idx, root, _):
14210 """Removes a disk. 14211 14212 """ 14213 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg) 14214 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node): 14215 self.cfg.SetDiskID(disk, node) 14216 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg 14217 if msg: 14218 self.LogWarning("Could not remove disk/%d on node '%s': %s," 14219 " continuing anyway", idx, node, msg) 14220 14221 # if this is a DRBD disk, return its port to the pool 14222 if root.dev_type in constants.LDS_DRBD: 14223 self.cfg.AddTcpUdpPort(root.logical_id[2])
14224
14225 - def _CreateNewNic(self, idx, params, private):
14226 """Creates data structure for a new network interface. 14227 14228 """ 14229 mac = params[constants.INIC_MAC] 14230 ip = params.get(constants.INIC_IP, None) 14231 net = params.get(constants.INIC_NETWORK, None) 14232 net_uuid = self.cfg.LookupNetwork(net) 14233 #TODO: not private.filled?? can a nic have no nicparams?? 14234 nicparams = private.filled 14235 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, nicparams=nicparams) 14236 14237 return (nobj, [ 14238 ("nic.%d" % idx, 14239 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" % 14240 (mac, ip, private.filled[constants.NIC_MODE], 14241 private.filled[constants.NIC_LINK], 14242 net)), 14243 ])
14244
14245 - def _ApplyNicMods(self, idx, nic, params, private):
14246 """Modifies a network interface. 14247 14248 """ 14249 changes = [] 14250 14251 for key in [constants.INIC_MAC, constants.INIC_IP]: 14252 if key in params: 14253 changes.append(("nic.%s/%d" % (key, idx), params[key])) 14254 setattr(nic, key, params[key]) 14255 14256 new_net = params.get(constants.INIC_NETWORK, nic.network) 14257 new_net_uuid = self.cfg.LookupNetwork(new_net) 14258 if new_net_uuid != nic.network: 14259 changes.append(("nic.network/%d" % idx, new_net)) 14260 nic.network = new_net_uuid 14261 14262 if private.filled: 14263 nic.nicparams = private.filled 14264 14265 for (key, val) in nic.nicparams.items(): 14266 changes.append(("nic.%s/%d" % (key, idx), val)) 14267 14268 return changes
14269
14270 - def Exec(self, feedback_fn):
14271 """Modifies an instance. 14272 14273 All parameters take effect only at the next restart of the instance. 14274 14275 """ 14276 # Process here the warnings from CheckPrereq, as we don't have a 14277 # feedback_fn there. 14278 # TODO: Replace with self.LogWarning 14279 for warn in self.warn: 14280 feedback_fn("WARNING: %s" % warn) 14281 14282 assert ((self.op.disk_template is None) ^ 14283 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \ 14284 "Not owning any node resource locks" 14285 14286 result = [] 14287 instance = self.instance 14288 14289 # runtime memory 14290 if self.op.runtime_mem: 14291 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node, 14292 instance, 14293 self.op.runtime_mem) 14294 rpcres.Raise("Cannot modify instance runtime memory") 14295 result.append(("runtime_memory", self.op.runtime_mem)) 14296 14297 # Apply disk changes 14298 ApplyContainerMods("disk", instance.disks, result, self.diskmod, 14299 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk) 14300 _UpdateIvNames(0, instance.disks) 14301 14302 if self.op.disk_template: 14303 if __debug__: 14304 check_nodes = set(instance.all_nodes) 14305 if self.op.remote_node: 14306 check_nodes.add(self.op.remote_node) 14307 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]: 14308 owned = self.owned_locks(level) 14309 assert not (check_nodes - owned), \ 14310 ("Not owning the correct locks, owning %r, expected at least %r" % 14311 (owned, check_nodes)) 14312 14313 r_shut = _ShutdownInstanceDisks(self, instance) 14314 if not r_shut: 14315 raise errors.OpExecError("Cannot shutdown instance disks, unable to" 14316 " proceed with disk template conversion") 14317 mode = (instance.disk_template, self.op.disk_template) 14318 try: 14319 self._DISK_CONVERSIONS[mode](self, feedback_fn) 14320 except: 14321 self.cfg.ReleaseDRBDMinors(instance.name) 14322 raise 14323 result.append(("disk_template", self.op.disk_template)) 14324 14325 assert instance.disk_template == self.op.disk_template, \ 14326 ("Expected disk template '%s', found '%s'" % 14327 (self.op.disk_template, instance.disk_template)) 14328 14329 # Release node and resource locks if there are any (they might already have 14330 # been released during disk conversion) 14331 _ReleaseLocks(self, locking.LEVEL_NODE) 14332 _ReleaseLocks(self, locking.LEVEL_NODE_RES) 14333 14334 # Apply NIC changes 14335 if self._new_nics is not None: 14336 instance.nics = self._new_nics 14337 result.extend(self._nic_chgdesc) 14338 14339 # hvparams changes 14340 if self.op.hvparams: 14341 instance.hvparams = self.hv_inst 14342 for key, val in self.op.hvparams.iteritems(): 14343 result.append(("hv/%s" % key, val)) 14344 14345 # beparams changes 14346 if self.op.beparams: 14347 instance.beparams = self.be_inst 14348 for key, val in self.op.beparams.iteritems(): 14349 result.append(("be/%s" % key, val)) 14350 14351 # OS change 14352 if self.op.os_name: 14353 instance.os = self.op.os_name 14354 14355 # osparams changes 14356 if self.op.osparams: 14357 instance.osparams = self.os_inst 14358 for key, val in self.op.osparams.iteritems(): 14359 result.append(("os/%s" % key, val)) 14360 14361 if self.op.offline is None: 14362 # Ignore 14363 pass 14364 elif self.op.offline: 14365 # Mark instance as offline 14366 self.cfg.MarkInstanceOffline(instance.name) 14367 result.append(("admin_state", constants.ADMINST_OFFLINE)) 14368 else: 14369 # Mark instance as online, but stopped 14370 self.cfg.MarkInstanceDown(instance.name) 14371 result.append(("admin_state", constants.ADMINST_DOWN)) 14372 14373 self.cfg.Update(instance, feedback_fn, self.proc.GetECId()) 14374 14375 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or 14376 self.owned_locks(locking.LEVEL_NODE)), \ 14377 "All node locks should have been released by now" 14378 14379 return result
14380 14381 _DISK_CONVERSIONS = { 14382 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd, 14383 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain, 14384 } 14385
14386 14387 -class LUInstanceChangeGroup(LogicalUnit):
14388 HPATH = "instance-change-group" 14389 HTYPE = constants.HTYPE_INSTANCE 14390 REQ_BGL = False 14391
14392 - def ExpandNames(self):
14393 self.share_locks = _ShareAll() 14394 14395 self.needed_locks = { 14396 locking.LEVEL_NODEGROUP: [], 14397 locking.LEVEL_NODE: [], 14398 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 14399 } 14400 14401 self._ExpandAndLockInstance() 14402 14403 if self.op.target_groups: 14404 self.req_target_uuids = map(self.cfg.LookupNodeGroup, 14405 self.op.target_groups) 14406 else: 14407 self.req_target_uuids = None 14408 14409 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14410
14411 - def DeclareLocks(self, level):
14412 if level == locking.LEVEL_NODEGROUP: 14413 assert not self.needed_locks[locking.LEVEL_NODEGROUP] 14414 14415 if self.req_target_uuids: 14416 lock_groups = set(self.req_target_uuids) 14417 14418 # Lock all groups used by instance optimistically; this requires going 14419 # via the node before it's locked, requiring verification later on 14420 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name) 14421 lock_groups.update(instance_groups) 14422 else: 14423 # No target groups, need to lock all of them 14424 lock_groups = locking.ALL_SET 14425 14426 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups 14427 14428 elif level == locking.LEVEL_NODE: 14429 if self.req_target_uuids: 14430 # Lock all nodes used by instances 14431 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 14432 self._LockInstancesNodes() 14433 14434 # Lock all nodes in all potential target groups 14435 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) - 14436 self.cfg.GetInstanceNodeGroups(self.op.instance_name)) 14437 member_nodes = [node_name 14438 for group in lock_groups 14439 for node_name in self.cfg.GetNodeGroup(group).members] 14440 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes) 14441 else: 14442 # Lock all nodes as all groups are potential targets 14443 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14444
14445 - def CheckPrereq(self):
14446 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) 14447 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) 14448 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE)) 14449 14450 assert (self.req_target_uuids is None or 14451 owned_groups.issuperset(self.req_target_uuids)) 14452 assert owned_instances == set([self.op.instance_name]) 14453 14454 # Get instance information 14455 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 14456 14457 # Check if node groups for locked instance are still correct 14458 assert owned_nodes.issuperset(self.instance.all_nodes), \ 14459 ("Instance %s's nodes changed while we kept the lock" % 14460 self.op.instance_name) 14461 14462 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, 14463 owned_groups) 14464 14465 if self.req_target_uuids: 14466 # User requested specific target groups 14467 self.target_uuids = frozenset(self.req_target_uuids) 14468 else: 14469 # All groups except those used by the instance are potential targets 14470 self.target_uuids = owned_groups - inst_groups 14471 14472 conflicting_groups = self.target_uuids & inst_groups 14473 if conflicting_groups: 14474 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are" 14475 " used by the instance '%s'" % 14476 (utils.CommaJoin(conflicting_groups), 14477 self.op.instance_name), 14478 errors.ECODE_INVAL) 14479 14480 if not self.target_uuids: 14481 raise errors.OpPrereqError("There are no possible target groups", 14482 errors.ECODE_INVAL)
14483
14484 - def BuildHooksEnv(self):
14485 """Build hooks env. 14486 14487 """ 14488 assert self.target_uuids 14489 14490 env = { 14491 "TARGET_GROUPS": " ".join(self.target_uuids), 14492 } 14493 14494 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 14495 14496 return env
14497
14498 - def BuildHooksNodes(self):
14499 """Build hooks nodes. 14500 14501 """ 14502 mn = self.cfg.GetMasterNode() 14503 return ([mn], [mn])
14504
14505 - def Exec(self, feedback_fn):
14506 instances = list(self.owned_locks(locking.LEVEL_INSTANCE)) 14507 14508 assert instances == [self.op.instance_name], "Instance not locked" 14509 14510 req = iallocator.IAReqGroupChange(instances=instances, 14511 target_groups=list(self.target_uuids)) 14512 ial = iallocator.IAllocator(self.cfg, self.rpc, req) 14513 14514 ial.Run(self.op.iallocator) 14515 14516 if not ial.success: 14517 raise errors.OpPrereqError("Can't compute solution for changing group of" 14518 " instance '%s' using iallocator '%s': %s" % 14519 (self.op.instance_name, self.op.iallocator, 14520 ial.info), errors.ECODE_NORES) 14521 14522 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False) 14523 14524 self.LogInfo("Iallocator returned %s job(s) for changing group of" 14525 " instance '%s'", len(jobs), self.op.instance_name) 14526 14527 return ResultWithJobs(jobs)
14528
14529 14530 -class LUBackupQuery(NoHooksLU):
14531 """Query the exports list 14532 14533 """ 14534 REQ_BGL = False 14535
14536 - def CheckArguments(self):
14537 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes), 14538 ["node", "export"], self.op.use_locking)
14539
14540 - def ExpandNames(self):
14541 self.expq.ExpandNames(self)
14542
14543 - def DeclareLocks(self, level):
14544 self.expq.DeclareLocks(self, level)
14545
14546 - def Exec(self, feedback_fn):
14547 result = {} 14548 14549 for (node, expname) in self.expq.OldStyleQuery(self): 14550 if expname is None: 14551 result[node] = False 14552 else: 14553 result.setdefault(node, []).append(expname) 14554 14555 return result
14556
14557 14558 -class _ExportQuery(_QueryBase):
14559 FIELDS = query.EXPORT_FIELDS 14560 14561 #: The node name is not a unique key for this query 14562 SORT_FIELD = "node" 14563
14564 - def ExpandNames(self, lu):
14565 lu.needed_locks = {} 14566 14567 # The following variables interact with _QueryBase._GetNames 14568 if self.names: 14569 self.wanted = _GetWantedNodes(lu, self.names) 14570 else: 14571 self.wanted = locking.ALL_SET 14572 14573 self.do_locking = self.use_locking 14574 14575 if self.do_locking: 14576 lu.share_locks = _ShareAll() 14577 lu.needed_locks = { 14578 locking.LEVEL_NODE: self.wanted, 14579 } 14580 14581 if not self.names: 14582 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14583
14584 - def DeclareLocks(self, lu, level):
14585 pass
14586
14587 - def _GetQueryData(self, lu):
14588 """Computes the list of nodes and their attributes. 14589 14590 """ 14591 # Locking is not used 14592 # TODO 14593 assert not (compat.any(lu.glm.is_owned(level) 14594 for level in locking.LEVELS 14595 if level != locking.LEVEL_CLUSTER) or 14596 self.do_locking or self.use_locking) 14597 14598 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE) 14599 14600 result = [] 14601 14602 for (node, nres) in lu.rpc.call_export_list(nodes).items(): 14603 if nres.fail_msg: 14604 result.append((node, None)) 14605 else: 14606 result.extend((node, expname) for expname in nres.payload) 14607 14608 return result
14609
14610 14611 -class LUBackupPrepare(NoHooksLU):
14612 """Prepares an instance for an export and returns useful information. 14613 14614 """ 14615 REQ_BGL = False 14616
14617 - def ExpandNames(self):
14619
14620 - def CheckPrereq(self):
14621 """Check prerequisites. 14622 14623 """ 14624 instance_name = self.op.instance_name 14625 14626 self.instance = self.cfg.GetInstanceInfo(instance_name) 14627 assert self.instance is not None, \ 14628 "Cannot retrieve locked instance %s" % self.op.instance_name 14629 _CheckNodeOnline(self, self.instance.primary_node) 14630 14631 self._cds = _GetClusterDomainSecret()
14632
14633 - def Exec(self, feedback_fn):
14634 """Prepares an instance for an export. 14635 14636 """ 14637 instance = self.instance 14638 14639 if self.op.mode == constants.EXPORT_MODE_REMOTE: 14640 salt = utils.GenerateSecret(8) 14641 14642 feedback_fn("Generating X509 certificate on %s" % instance.primary_node) 14643 result = self.rpc.call_x509_cert_create(instance.primary_node, 14644 constants.RIE_CERT_VALIDITY) 14645 result.Raise("Can't create X509 key and certificate on %s" % result.node) 14646 14647 (name, cert_pem) = result.payload 14648 14649 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 14650 cert_pem) 14651 14652 return { 14653 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds), 14654 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt), 14655 salt), 14656 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt), 14657 } 14658 14659 return None
14660
14661 14662 -class LUBackupExport(LogicalUnit):
14663 """Export an instance to an image in the cluster. 14664 14665 """ 14666 HPATH = "instance-export" 14667 HTYPE = constants.HTYPE_INSTANCE 14668 REQ_BGL = False 14669
14670 - def CheckArguments(self):
14671 """Check the arguments. 14672 14673 """ 14674 self.x509_key_name = self.op.x509_key_name 14675 self.dest_x509_ca_pem = self.op.destination_x509_ca 14676 14677 if self.op.mode == constants.EXPORT_MODE_REMOTE: 14678 if not self.x509_key_name: 14679 raise errors.OpPrereqError("Missing X509 key name for encryption", 14680 errors.ECODE_INVAL) 14681 14682 if not self.dest_x509_ca_pem: 14683 raise errors.OpPrereqError("Missing destination X509 CA", 14684 errors.ECODE_INVAL)
14685
14686 - def ExpandNames(self):
14687 self._ExpandAndLockInstance() 14688 14689 # Lock all nodes for local exports 14690 if self.op.mode == constants.EXPORT_MODE_LOCAL: 14691 # FIXME: lock only instance primary and destination node 14692 # 14693 # Sad but true, for now we have do lock all nodes, as we don't know where 14694 # the previous export might be, and in this LU we search for it and 14695 # remove it from its current node. In the future we could fix this by: 14696 # - making a tasklet to search (share-lock all), then create the 14697 # new one, then one to remove, after 14698 # - removing the removal operation altogether 14699 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 14700 14701 # Allocations should be stopped while this LU runs with node locks, but 14702 # it doesn't have to be exclusive 14703 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1 14704 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14705
14706 - def DeclareLocks(self, level):
14707 """Last minute lock declaration."""
14708 # All nodes are locked anyway, so nothing to do here. 14709
14710 - def BuildHooksEnv(self):
14711 """Build hooks env. 14712 14713 This will run on the master, primary node and target node. 14714 14715 """ 14716 env = { 14717 "EXPORT_MODE": self.op.mode, 14718 "EXPORT_NODE": self.op.target_node, 14719 "EXPORT_DO_SHUTDOWN": self.op.shutdown, 14720 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 14721 # TODO: Generic function for boolean env variables 14722 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)), 14723 } 14724 14725 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 14726 14727 return env
14728
14729 - def BuildHooksNodes(self):
14730 """Build hooks nodes. 14731 14732 """ 14733 nl = [self.cfg.GetMasterNode(), self.instance.primary_node] 14734 14735 if self.op.mode == constants.EXPORT_MODE_LOCAL: 14736 nl.append(self.op.target_node) 14737 14738 return (nl, nl)
14739
14740 - def CheckPrereq(self):
14741 """Check prerequisites. 14742 14743 This checks that the instance and node names are valid. 14744 14745 """ 14746 instance_name = self.op.instance_name 14747 14748 self.instance = self.cfg.GetInstanceInfo(instance_name) 14749 assert self.instance is not None, \ 14750 "Cannot retrieve locked instance %s" % self.op.instance_name 14751 _CheckNodeOnline(self, self.instance.primary_node) 14752 14753 if (self.op.remove_instance and 14754 self.instance.admin_state == constants.ADMINST_UP and 14755 not self.op.shutdown): 14756 raise errors.OpPrereqError("Can not remove instance without shutting it" 14757 " down before", errors.ECODE_STATE) 14758 14759 if self.op.mode == constants.EXPORT_MODE_LOCAL: 14760 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node) 14761 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node) 14762 assert self.dst_node is not None 14763 14764 _CheckNodeOnline(self, self.dst_node.name) 14765 _CheckNodeNotDrained(self, self.dst_node.name) 14766 14767 self._cds = None 14768 self.dest_disk_info = None 14769 self.dest_x509_ca = None 14770 14771 elif self.op.mode == constants.EXPORT_MODE_REMOTE: 14772 self.dst_node = None 14773 14774 if len(self.op.target_node) != len(self.instance.disks): 14775 raise errors.OpPrereqError(("Received destination information for %s" 14776 " disks, but instance %s has %s disks") % 14777 (len(self.op.target_node), instance_name, 14778 len(self.instance.disks)), 14779 errors.ECODE_INVAL) 14780 14781 cds = _GetClusterDomainSecret() 14782 14783 # Check X509 key name 14784 try: 14785 (key_name, hmac_digest, hmac_salt) = self.x509_key_name 14786 except (TypeError, ValueError), err: 14787 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err, 14788 errors.ECODE_INVAL) 14789 14790 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt): 14791 raise errors.OpPrereqError("HMAC for X509 key name is wrong", 14792 errors.ECODE_INVAL) 14793 14794 # Load and verify CA 14795 try: 14796 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds) 14797 except OpenSSL.crypto.Error, err: 14798 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" % 14799 (err, ), errors.ECODE_INVAL) 14800 14801 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) 14802 if errcode is not None: 14803 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % 14804 (msg, ), errors.ECODE_INVAL) 14805 14806 self.dest_x509_ca = cert 14807 14808 # Verify target information 14809 disk_info = [] 14810 for idx, disk_data in enumerate(self.op.target_node): 14811 try: 14812 (host, port, magic) = \ 14813 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data) 14814 except errors.GenericError, err: 14815 raise errors.OpPrereqError("Target info for disk %s: %s" % 14816 (idx, err), errors.ECODE_INVAL) 14817 14818 disk_info.append((host, port, magic)) 14819 14820 assert len(disk_info) == len(self.op.target_node) 14821 self.dest_disk_info = disk_info 14822 14823 else: 14824 raise errors.ProgrammerError("Unhandled export mode %r" % 14825 self.op.mode) 14826 14827 # instance disk type verification 14828 # TODO: Implement export support for file-based disks 14829 for disk in self.instance.disks: 14830 if disk.dev_type == constants.LD_FILE: 14831 raise errors.OpPrereqError("Export not supported for instances with" 14832 " file-based disks", errors.ECODE_INVAL)
14833
14834 - def _CleanupExports(self, feedback_fn):
14835 """Removes exports of current instance from all other nodes. 14836 14837 If an instance in a cluster with nodes A..D was exported to node C, its 14838 exports will be removed from the nodes A, B and D. 14839 14840 """ 14841 assert self.op.mode != constants.EXPORT_MODE_REMOTE 14842 14843 nodelist = self.cfg.GetNodeList() 14844 nodelist.remove(self.dst_node.name) 14845 14846 # on one-node clusters nodelist will be empty after the removal 14847 # if we proceed the backup would be removed because OpBackupQuery 14848 # substitutes an empty list with the full cluster node list. 14849 iname = self.instance.name 14850 if nodelist: 14851 feedback_fn("Removing old exports for instance %s" % iname) 14852 exportlist = self.rpc.call_export_list(nodelist) 14853 for node in exportlist: 14854 if exportlist[node].fail_msg: 14855 continue 14856 if iname in exportlist[node].payload: 14857 msg = self.rpc.call_export_remove(node, iname).fail_msg 14858 if msg: 14859 self.LogWarning("Could not remove older export for instance %s" 14860 " on node %s: %s", iname, node, msg)
14861
14862 - def Exec(self, feedback_fn):
14863 """Export an instance to an image in the cluster. 14864 14865 """ 14866 assert self.op.mode in constants.EXPORT_MODES 14867 14868 instance = self.instance 14869 src_node = instance.primary_node 14870 14871 if self.op.shutdown: 14872 # shutdown the instance, but not the disks 14873 feedback_fn("Shutting down instance %s" % instance.name) 14874 result = self.rpc.call_instance_shutdown(src_node, instance, 14875 self.op.shutdown_timeout) 14876 # TODO: Maybe ignore failures if ignore_remove_failures is set 14877 result.Raise("Could not shutdown instance %s on" 14878 " node %s" % (instance.name, src_node)) 14879 14880 # set the disks ID correctly since call_instance_start needs the 14881 # correct drbd minor to create the symlinks 14882 for disk in instance.disks: 14883 self.cfg.SetDiskID(disk, src_node) 14884 14885 activate_disks = (instance.admin_state != constants.ADMINST_UP) 14886 14887 if activate_disks: 14888 # Activate the instance disks if we'exporting a stopped instance 14889 feedback_fn("Activating disks for %s" % instance.name) 14890 _StartInstanceDisks(self, instance, None) 14891 14892 try: 14893 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn, 14894 instance) 14895 14896 helper.CreateSnapshots() 14897 try: 14898 if (self.op.shutdown and 14899 instance.admin_state == constants.ADMINST_UP and 14900 not self.op.remove_instance): 14901 assert not activate_disks 14902 feedback_fn("Starting instance %s" % instance.name) 14903 result = self.rpc.call_instance_start(src_node, 14904 (instance, None, None), False) 14905 msg = result.fail_msg 14906 if msg: 14907 feedback_fn("Failed to start instance: %s" % msg) 14908 _ShutdownInstanceDisks(self, instance) 14909 raise errors.OpExecError("Could not start instance: %s" % msg) 14910 14911 if self.op.mode == constants.EXPORT_MODE_LOCAL: 14912 (fin_resu, dresults) = helper.LocalExport(self.dst_node) 14913 elif self.op.mode == constants.EXPORT_MODE_REMOTE: 14914 connect_timeout = constants.RIE_CONNECT_TIMEOUT 14915 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) 14916 14917 (key_name, _, _) = self.x509_key_name 14918 14919 dest_ca_pem = \ 14920 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, 14921 self.dest_x509_ca) 14922 14923 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info, 14924 key_name, dest_ca_pem, 14925 timeouts) 14926 finally: 14927 helper.Cleanup() 14928 14929 # Check for backwards compatibility 14930 assert len(dresults) == len(instance.disks) 14931 assert compat.all(isinstance(i, bool) for i in dresults), \ 14932 "Not all results are boolean: %r" % dresults 14933 14934 finally: 14935 if activate_disks: 14936 feedback_fn("Deactivating disks for %s" % instance.name) 14937 _ShutdownInstanceDisks(self, instance) 14938 14939 if not (compat.all(dresults) and fin_resu): 14940 failures = [] 14941 if not fin_resu: 14942 failures.append("export finalization") 14943 if not compat.all(dresults): 14944 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults) 14945 if not dsk) 14946 failures.append("disk export: disk(s) %s" % fdsk) 14947 14948 raise errors.OpExecError("Export failed, errors in %s" % 14949 utils.CommaJoin(failures)) 14950 14951 # At this point, the export was successful, we can cleanup/finish 14952 14953 # Remove instance if requested 14954 if self.op.remove_instance: 14955 feedback_fn("Removing instance %s" % instance.name) 14956 _RemoveInstance(self, feedback_fn, instance, 14957 self.op.ignore_remove_failures) 14958 14959 if self.op.mode == constants.EXPORT_MODE_LOCAL: 14960 self._CleanupExports(feedback_fn) 14961 14962 return fin_resu, dresults
14963
14964 14965 -class LUBackupRemove(NoHooksLU):
14966 """Remove exports related to the named instance. 14967 14968 """ 14969 REQ_BGL = False 14970
14971 - def ExpandNames(self):
14972 self.needed_locks = { 14973 # We need all nodes to be locked in order for RemoveExport to work, but 14974 # we don't need to lock the instance itself, as nothing will happen to it 14975 # (and we can remove exports also for a removed instance) 14976 locking.LEVEL_NODE: locking.ALL_SET, 14977 14978 # Removing backups is quick, so blocking allocations is justified 14979 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 14980 } 14981 14982 # Allocations should be stopped while this LU runs with node locks, but it 14983 # doesn't have to be exclusive 14984 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14985
14986 - def Exec(self, feedback_fn):
14987 """Remove any export. 14988 14989 """ 14990 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name) 14991 # If the instance was not found we'll try with the name that was passed in. 14992 # This will only work if it was an FQDN, though. 14993 fqdn_warn = False 14994 if not instance_name: 14995 fqdn_warn = True 14996 instance_name = self.op.instance_name 14997 14998 locked_nodes = self.owned_locks(locking.LEVEL_NODE) 14999 exportlist = self.rpc.call_export_list(locked_nodes) 15000 found = False 15001 for node in exportlist: 15002 msg = exportlist[node].fail_msg 15003 if msg: 15004 self.LogWarning("Failed to query node %s (continuing): %s", node, msg) 15005 continue 15006 if instance_name in exportlist[node].payload: 15007 found = True 15008 result = self.rpc.call_export_remove(node, instance_name) 15009 msg = result.fail_msg 15010 if msg: 15011 logging.error("Could not remove export for instance %s" 15012 " on node %s: %s", instance_name, node, msg) 15013 15014 if fqdn_warn and not found: 15015 feedback_fn("Export not found. If trying to remove an export belonging" 15016 " to a deleted instance please use its Fully Qualified" 15017 " Domain Name.")
15018
15019 15020 -class LUGroupAdd(LogicalUnit):
15021 """Logical unit for creating node groups. 15022 15023 """ 15024 HPATH = "group-add" 15025 HTYPE = constants.HTYPE_GROUP 15026 REQ_BGL = False 15027
15028 - def ExpandNames(self):
15029 # We need the new group's UUID here so that we can create and acquire the 15030 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup 15031 # that it should not check whether the UUID exists in the configuration. 15032 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId()) 15033 self.needed_locks = {} 15034 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15035
15036 - def CheckPrereq(self):
15037 """Check prerequisites. 15038 15039 This checks that the given group name is not an existing node group 15040 already. 15041 15042 """ 15043 try: 15044 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 15045 except errors.OpPrereqError: 15046 pass 15047 else: 15048 raise errors.OpPrereqError("Desired group name '%s' already exists as a" 15049 " node group (UUID: %s)" % 15050 (self.op.group_name, existing_uuid), 15051 errors.ECODE_EXISTS) 15052 15053 if self.op.ndparams: 15054 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) 15055 15056 if self.op.hv_state: 15057 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None) 15058 else: 15059 self.new_hv_state = None 15060 15061 if self.op.disk_state: 15062 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None) 15063 else: 15064 self.new_disk_state = None 15065 15066 if self.op.diskparams: 15067 for templ in constants.DISK_TEMPLATES: 15068 if templ in self.op.diskparams: 15069 utils.ForceDictType(self.op.diskparams[templ], 15070 constants.DISK_DT_TYPES) 15071 self.new_diskparams = self.op.diskparams 15072 try: 15073 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS) 15074 except errors.OpPrereqError, err: 15075 raise errors.OpPrereqError("While verify diskparams options: %s" % err, 15076 errors.ECODE_INVAL) 15077 else: 15078 self.new_diskparams = {} 15079 15080 if self.op.ipolicy: 15081 cluster = self.cfg.GetClusterInfo() 15082 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy) 15083 try: 15084 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False) 15085 except errors.ConfigurationError, err: 15086 raise errors.OpPrereqError("Invalid instance policy: %s" % err, 15087 errors.ECODE_INVAL)
15088
15089 - def BuildHooksEnv(self):
15090 """Build hooks env. 15091 15092 """ 15093 return { 15094 "GROUP_NAME": self.op.group_name, 15095 }
15096
15097 - def BuildHooksNodes(self):
15098 """Build hooks nodes. 15099 15100 """ 15101 mn = self.cfg.GetMasterNode() 15102 return ([mn], [mn])
15103
15104 - def Exec(self, feedback_fn):
15105 """Add the node group to the cluster. 15106 15107 """ 15108 group_obj = objects.NodeGroup(name=self.op.group_name, members=[], 15109 uuid=self.group_uuid, 15110 alloc_policy=self.op.alloc_policy, 15111 ndparams=self.op.ndparams, 15112 diskparams=self.new_diskparams, 15113 ipolicy=self.op.ipolicy, 15114 hv_state_static=self.new_hv_state, 15115 disk_state_static=self.new_disk_state) 15116 15117 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False) 15118 del self.remove_locks[locking.LEVEL_NODEGROUP]
15119
15120 15121 -class LUGroupAssignNodes(NoHooksLU):
15122 """Logical unit for assigning nodes to groups. 15123 15124 """ 15125 REQ_BGL = False 15126
15127 - def ExpandNames(self):
15128 # These raise errors.OpPrereqError on their own: 15129 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 15130 self.op.nodes = _GetWantedNodes(self, self.op.nodes) 15131 15132 # We want to lock all the affected nodes and groups. We have readily 15133 # available the list of nodes, and the *destination* group. To gather the 15134 # list of "source" groups, we need to fetch node information later on. 15135 self.needed_locks = { 15136 locking.LEVEL_NODEGROUP: set([self.group_uuid]), 15137 locking.LEVEL_NODE: self.op.nodes, 15138 }
15139
15140 - def DeclareLocks(self, level):
15141 if level == locking.LEVEL_NODEGROUP: 15142 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1 15143 15144 # Try to get all affected nodes' groups without having the group or node 15145 # lock yet. Needs verification later in the code flow. 15146 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes) 15147 15148 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15149
15150 - def CheckPrereq(self):
15151 """Check prerequisites. 15152 15153 """ 15154 assert self.needed_locks[locking.LEVEL_NODEGROUP] 15155 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) == 15156 frozenset(self.op.nodes)) 15157 15158 expected_locks = (set([self.group_uuid]) | 15159 self.cfg.GetNodeGroupsFromNodes(self.op.nodes)) 15160 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP) 15161 if actual_locks != expected_locks: 15162 raise errors.OpExecError("Nodes changed groups since locks were acquired," 15163 " current groups are '%s', used to be '%s'" % 15164 (utils.CommaJoin(expected_locks), 15165 utils.CommaJoin(actual_locks))) 15166 15167 self.node_data = self.cfg.GetAllNodesInfo() 15168 self.group = self.cfg.GetNodeGroup(self.group_uuid) 15169 instance_data = self.cfg.GetAllInstancesInfo() 15170 15171 if self.group is None: 15172 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" % 15173 (self.op.group_name, self.group_uuid)) 15174 15175 (new_splits, previous_splits) = \ 15176 self.CheckAssignmentForSplitInstances([(node, self.group_uuid) 15177 for node in self.op.nodes], 15178 self.node_data, instance_data) 15179 15180 if new_splits: 15181 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits)) 15182 15183 if not self.op.force: 15184 raise errors.OpExecError("The following instances get split by this" 15185 " change and --force was not given: %s" % 15186 fmt_new_splits) 15187 else: 15188 self.LogWarning("This operation will split the following instances: %s", 15189 fmt_new_splits) 15190 15191 if previous_splits: 15192 self.LogWarning("In addition, these already-split instances continue" 15193 " to be split across groups: %s", 15194 utils.CommaJoin(utils.NiceSort(previous_splits)))
15195
15196 - def Exec(self, feedback_fn):
15197 """Assign nodes to a new group. 15198 15199 """ 15200 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes] 15201 15202 self.cfg.AssignGroupNodes(mods)
15203 15204 @staticmethod
15205 - def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15206 """Check for split instances after a node assignment. 15207 15208 This method considers a series of node assignments as an atomic operation, 15209 and returns information about split instances after applying the set of 15210 changes. 15211 15212 In particular, it returns information about newly split instances, and 15213 instances that were already split, and remain so after the change. 15214 15215 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are 15216 considered. 15217 15218 @type changes: list of (node_name, new_group_uuid) pairs. 15219 @param changes: list of node assignments to consider. 15220 @param node_data: a dict with data for all nodes 15221 @param instance_data: a dict with all instances to consider 15222 @rtype: a two-tuple 15223 @return: a list of instances that were previously okay and result split as a 15224 consequence of this change, and a list of instances that were previously 15225 split and this change does not fix. 15226 15227 """ 15228 changed_nodes = dict((node, group) for node, group in changes 15229 if node_data[node].group != group) 15230 15231 all_split_instances = set() 15232 previously_split_instances = set() 15233 15234 def InstanceNodes(instance): 15235 return [instance.primary_node] + list(instance.secondary_nodes)
15236 15237 for inst in instance_data.values(): 15238 if inst.disk_template not in constants.DTS_INT_MIRROR: 15239 continue 15240 15241 instance_nodes = InstanceNodes(inst) 15242 15243 if len(set(node_data[node].group for node in instance_nodes)) > 1: 15244 previously_split_instances.add(inst.name) 15245 15246 if len(set(changed_nodes.get(node, node_data[node].group) 15247 for node in instance_nodes)) > 1: 15248 all_split_instances.add(inst.name) 15249 15250 return (list(all_split_instances - previously_split_instances), 15251 list(previously_split_instances & all_split_instances))
15252
15253 15254 -class _GroupQuery(_QueryBase):
15255 FIELDS = query.GROUP_FIELDS 15256
15257 - def ExpandNames(self, lu):
15258 lu.needed_locks = {} 15259 15260 self._all_groups = lu.cfg.GetAllNodeGroupsInfo() 15261 self._cluster = lu.cfg.GetClusterInfo() 15262 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values()) 15263 15264 if not self.names: 15265 self.wanted = [name_to_uuid[name] 15266 for name in utils.NiceSort(name_to_uuid.keys())] 15267 else: 15268 # Accept names to be either names or UUIDs. 15269 missing = [] 15270 self.wanted = [] 15271 all_uuid = frozenset(self._all_groups.keys()) 15272 15273 for name in self.names: 15274 if name in all_uuid: 15275 self.wanted.append(name) 15276 elif name in name_to_uuid: 15277 self.wanted.append(name_to_uuid[name]) 15278 else: 15279 missing.append(name) 15280 15281 if missing: 15282 raise errors.OpPrereqError("Some groups do not exist: %s" % 15283 utils.CommaJoin(missing), 15284 errors.ECODE_NOENT)
15285
15286 - def DeclareLocks(self, lu, level):
15287 pass
15288
15289 - def _GetQueryData(self, lu):
15290 """Computes the list of node groups and their attributes. 15291 15292 """ 15293 do_nodes = query.GQ_NODE in self.requested_data 15294 do_instances = query.GQ_INST in self.requested_data 15295 15296 group_to_nodes = None 15297 group_to_instances = None 15298 15299 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for 15300 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the 15301 # latter GetAllInstancesInfo() is not enough, for we have to go through 15302 # instance->node. Hence, we will need to process nodes even if we only need 15303 # instance information. 15304 if do_nodes or do_instances: 15305 all_nodes = lu.cfg.GetAllNodesInfo() 15306 group_to_nodes = dict((uuid, []) for uuid in self.wanted) 15307 node_to_group = {} 15308 15309 for node in all_nodes.values(): 15310 if node.group in group_to_nodes: 15311 group_to_nodes[node.group].append(node.name) 15312 node_to_group[node.name] = node.group 15313 15314 if do_instances: 15315 all_instances = lu.cfg.GetAllInstancesInfo() 15316 group_to_instances = dict((uuid, []) for uuid in self.wanted) 15317 15318 for instance in all_instances.values(): 15319 node = instance.primary_node 15320 if node in node_to_group: 15321 group_to_instances[node_to_group[node]].append(instance.name) 15322 15323 if not do_nodes: 15324 # Do not pass on node information if it was not requested. 15325 group_to_nodes = None 15326 15327 return query.GroupQueryData(self._cluster, 15328 [self._all_groups[uuid] 15329 for uuid in self.wanted], 15330 group_to_nodes, group_to_instances, 15331 query.GQ_DISKPARAMS in self.requested_data)
15332
15333 15334 -class LUGroupQuery(NoHooksLU):
15335 """Logical unit for querying node groups. 15336 15337 """ 15338 REQ_BGL = False 15339
15340 - def CheckArguments(self):
15341 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names), 15342 self.op.output_fields, False)
15343
15344 - def ExpandNames(self):
15345 self.gq.ExpandNames(self)
15346
15347 - def DeclareLocks(self, level):
15348 self.gq.DeclareLocks(self, level)
15349
15350 - def Exec(self, feedback_fn):
15351 return self.gq.OldStyleQuery(self)
15352
15353 15354 -class LUGroupSetParams(LogicalUnit):
15355 """Modifies the parameters of a node group. 15356 15357 """ 15358 HPATH = "group-modify" 15359 HTYPE = constants.HTYPE_GROUP 15360 REQ_BGL = False 15361
15362 - def CheckArguments(self):
15363 all_changes = [ 15364 self.op.ndparams, 15365 self.op.diskparams, 15366 self.op.alloc_policy, 15367 self.op.hv_state, 15368 self.op.disk_state, 15369 self.op.ipolicy, 15370 ] 15371 15372 if all_changes.count(None) == len(all_changes): 15373 raise errors.OpPrereqError("Please pass at least one modification", 15374 errors.ECODE_INVAL)
15375
15376 - def ExpandNames(self):
15377 # This raises errors.OpPrereqError on its own: 15378 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 15379 15380 self.needed_locks = { 15381 locking.LEVEL_INSTANCE: [], 15382 locking.LEVEL_NODEGROUP: [self.group_uuid], 15383 } 15384 15385 self.share_locks[locking.LEVEL_INSTANCE] = 1
15386
15387 - def DeclareLocks(self, level):
15388 if level == locking.LEVEL_INSTANCE: 15389 assert not self.needed_locks[locking.LEVEL_INSTANCE] 15390 15391 # Lock instances optimistically, needs verification once group lock has 15392 # been acquired 15393 self.needed_locks[locking.LEVEL_INSTANCE] = \ 15394 self.cfg.GetNodeGroupInstances(self.group_uuid)
15395 15396 @staticmethod
15397 - def _UpdateAndVerifyDiskParams(old, new):
15398 """Updates and verifies disk parameters. 15399 15400 """ 15401 new_params = _GetUpdatedParams(old, new) 15402 utils.ForceDictType(new_params, constants.DISK_DT_TYPES) 15403 return new_params
15404
15405 - def CheckPrereq(self):
15406 """Check prerequisites. 15407 15408 """ 15409 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) 15410 15411 # Check if locked instances are still correct 15412 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances) 15413 15414 self.group = self.cfg.GetNodeGroup(self.group_uuid) 15415 cluster = self.cfg.GetClusterInfo() 15416 15417 if self.group is None: 15418 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" % 15419 (self.op.group_name, self.group_uuid)) 15420 15421 if self.op.ndparams: 15422 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams) 15423 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES) 15424 self.new_ndparams = new_ndparams 15425 15426 if self.op.diskparams: 15427 diskparams = self.group.diskparams 15428 uavdp = self._UpdateAndVerifyDiskParams 15429 # For each disktemplate subdict update and verify the values 15430 new_diskparams = dict((dt, 15431 uavdp(diskparams.get(dt, {}), 15432 self.op.diskparams[dt])) 15433 for dt in constants.DISK_TEMPLATES 15434 if dt in self.op.diskparams) 15435 # As we've all subdicts of diskparams ready, lets merge the actual 15436 # dict with all updated subdicts 15437 self.new_diskparams = objects.FillDict(diskparams, new_diskparams) 15438 try: 15439 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS) 15440 except errors.OpPrereqError, err: 15441 raise errors.OpPrereqError("While verify diskparams options: %s" % err, 15442 errors.ECODE_INVAL) 15443 15444 if self.op.hv_state: 15445 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, 15446 self.group.hv_state_static) 15447 15448 if self.op.disk_state: 15449 self.new_disk_state = \ 15450 _MergeAndVerifyDiskState(self.op.disk_state, 15451 self.group.disk_state_static) 15452 15453 if self.op.ipolicy: 15454 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy, 15455 self.op.ipolicy, 15456 group_policy=True) 15457 15458 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy) 15459 inst_filter = lambda inst: inst.name in owned_instances 15460 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values() 15461 gmi = ganeti.masterd.instance 15462 violations = \ 15463 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster, 15464 self.group), 15465 new_ipolicy, instances, self.cfg) 15466 15467 if violations: 15468 self.LogWarning("After the ipolicy change the following instances" 15469 " violate them: %s", 15470 utils.CommaJoin(violations))
15471
15472 - def BuildHooksEnv(self):
15473 """Build hooks env. 15474 15475 """ 15476 return { 15477 "GROUP_NAME": self.op.group_name, 15478 "NEW_ALLOC_POLICY": self.op.alloc_policy, 15479 }
15480
15481 - def BuildHooksNodes(self):
15482 """Build hooks nodes. 15483 15484 """ 15485 mn = self.cfg.GetMasterNode() 15486 return ([mn], [mn])
15487
15488 - def Exec(self, feedback_fn):
15489 """Modifies the node group. 15490 15491 """ 15492 result = [] 15493 15494 if self.op.ndparams: 15495 self.group.ndparams = self.new_ndparams 15496 result.append(("ndparams", str(self.group.ndparams))) 15497 15498 if self.op.diskparams: 15499 self.group.diskparams = self.new_diskparams 15500 result.append(("diskparams", str(self.group.diskparams))) 15501 15502 if self.op.alloc_policy: 15503 self.group.alloc_policy = self.op.alloc_policy 15504 15505 if self.op.hv_state: 15506 self.group.hv_state_static = self.new_hv_state 15507 15508 if self.op.disk_state: 15509 self.group.disk_state_static = self.new_disk_state 15510 15511 if self.op.ipolicy: 15512 self.group.ipolicy = self.new_ipolicy 15513 15514 self.cfg.Update(self.group, feedback_fn) 15515 return result
15516
15517 15518 -class LUGroupRemove(LogicalUnit):
15519 HPATH = "group-remove" 15520 HTYPE = constants.HTYPE_GROUP 15521 REQ_BGL = False 15522
15523 - def ExpandNames(self):
15524 # This will raises errors.OpPrereqError on its own: 15525 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 15526 self.needed_locks = { 15527 locking.LEVEL_NODEGROUP: [self.group_uuid], 15528 }
15529
15530 - def CheckPrereq(self):
15531 """Check prerequisites. 15532 15533 This checks that the given group name exists as a node group, that is 15534 empty (i.e., contains no nodes), and that is not the last group of the 15535 cluster. 15536 15537 """ 15538 # Verify that the group is empty. 15539 group_nodes = [node.name 15540 for node in self.cfg.GetAllNodesInfo().values() 15541 if node.group == self.group_uuid] 15542 15543 if group_nodes: 15544 raise errors.OpPrereqError("Group '%s' not empty, has the following" 15545 " nodes: %s" % 15546 (self.op.group_name, 15547 utils.CommaJoin(utils.NiceSort(group_nodes))), 15548 errors.ECODE_STATE) 15549 15550 # Verify the cluster would not be left group-less. 15551 if len(self.cfg.GetNodeGroupList()) == 1: 15552 raise errors.OpPrereqError("Group '%s' is the only group, cannot be" 15553 " removed" % self.op.group_name, 15554 errors.ECODE_STATE)
15555
15556 - def BuildHooksEnv(self):
15557 """Build hooks env. 15558 15559 """ 15560 return { 15561 "GROUP_NAME": self.op.group_name, 15562 }
15563
15564 - def BuildHooksNodes(self):
15565 """Build hooks nodes. 15566 15567 """ 15568 mn = self.cfg.GetMasterNode() 15569 return ([mn], [mn])
15570
15571 - def Exec(self, feedback_fn):
15572 """Remove the node group. 15573 15574 """ 15575 try: 15576 self.cfg.RemoveNodeGroup(self.group_uuid) 15577 except errors.ConfigurationError: 15578 raise errors.OpExecError("Group '%s' with UUID %s disappeared" % 15579 (self.op.group_name, self.group_uuid)) 15580 15581 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15582
15583 15584 -class LUGroupRename(LogicalUnit):
15585 HPATH = "group-rename" 15586 HTYPE = constants.HTYPE_GROUP 15587 REQ_BGL = False 15588
15589 - def ExpandNames(self):
15590 # This raises errors.OpPrereqError on its own: 15591 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 15592 15593 self.needed_locks = { 15594 locking.LEVEL_NODEGROUP: [self.group_uuid], 15595 }
15596
15597 - def CheckPrereq(self):
15598 """Check prerequisites. 15599 15600 Ensures requested new name is not yet used. 15601 15602 """ 15603 try: 15604 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name) 15605 except errors.OpPrereqError: 15606 pass 15607 else: 15608 raise errors.OpPrereqError("Desired new name '%s' clashes with existing" 15609 " node group (UUID: %s)" % 15610 (self.op.new_name, new_name_uuid), 15611 errors.ECODE_EXISTS)
15612
15613 - def BuildHooksEnv(self):
15614 """Build hooks env. 15615 15616 """ 15617 return { 15618 "OLD_NAME": self.op.group_name, 15619 "NEW_NAME": self.op.new_name, 15620 }
15621
15622 - def BuildHooksNodes(self):
15623 """Build hooks nodes. 15624 15625 """ 15626 mn = self.cfg.GetMasterNode() 15627 15628 all_nodes = self.cfg.GetAllNodesInfo() 15629 all_nodes.pop(mn, None) 15630 15631 run_nodes = [mn] 15632 run_nodes.extend(node.name for node in all_nodes.values() 15633 if node.group == self.group_uuid) 15634 15635 return (run_nodes, run_nodes)
15636
15637 - def Exec(self, feedback_fn):
15638 """Rename the node group. 15639 15640 """ 15641 group = self.cfg.GetNodeGroup(self.group_uuid) 15642 15643 if group is None: 15644 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" % 15645 (self.op.group_name, self.group_uuid)) 15646 15647 group.name = self.op.new_name 15648 self.cfg.Update(group, feedback_fn) 15649 15650 return self.op.new_name
15651
15652 15653 -class LUGroupEvacuate(LogicalUnit):
15654 HPATH = "group-evacuate" 15655 HTYPE = constants.HTYPE_GROUP 15656 REQ_BGL = False 15657
15658 - def ExpandNames(self):
15659 # This raises errors.OpPrereqError on its own: 15660 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 15661 15662 if self.op.target_groups: 15663 self.req_target_uuids = map(self.cfg.LookupNodeGroup, 15664 self.op.target_groups) 15665 else: 15666 self.req_target_uuids = [] 15667 15668 if self.group_uuid in self.req_target_uuids: 15669 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used" 15670 " as a target group (targets are %s)" % 15671 (self.group_uuid, 15672 utils.CommaJoin(self.req_target_uuids)), 15673 errors.ECODE_INVAL) 15674 15675 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator) 15676 15677 self.share_locks = _ShareAll() 15678 self.needed_locks = { 15679 locking.LEVEL_INSTANCE: [], 15680 locking.LEVEL_NODEGROUP: [], 15681 locking.LEVEL_NODE: [], 15682 }
15683
15684 - def DeclareLocks(self, level):
15685 if level == locking.LEVEL_INSTANCE: 15686 assert not self.needed_locks[locking.LEVEL_INSTANCE] 15687 15688 # Lock instances optimistically, needs verification once node and group 15689 # locks have been acquired 15690 self.needed_locks[locking.LEVEL_INSTANCE] = \ 15691 self.cfg.GetNodeGroupInstances(self.group_uuid) 15692 15693 elif level == locking.LEVEL_NODEGROUP: 15694 assert not self.needed_locks[locking.LEVEL_NODEGROUP] 15695 15696 if self.req_target_uuids: 15697 lock_groups = set([self.group_uuid] + self.req_target_uuids) 15698 15699 # Lock all groups used by instances optimistically; this requires going 15700 # via the node before it's locked, requiring verification later on 15701 lock_groups.update(group_uuid 15702 for instance_name in 15703 self.owned_locks(locking.LEVEL_INSTANCE) 15704 for group_uuid in 15705 self.cfg.GetInstanceNodeGroups(instance_name)) 15706 else: 15707 # No target groups, need to lock all of them 15708 lock_groups = locking.ALL_SET 15709 15710 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups 15711 15712 elif level == locking.LEVEL_NODE: 15713 # This will only lock the nodes in the group to be evacuated which 15714 # contain actual instances 15715 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 15716 self._LockInstancesNodes() 15717 15718 # Lock all nodes in group to be evacuated and target groups 15719 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) 15720 assert self.group_uuid in owned_groups 15721 member_nodes = [node_name 15722 for group in owned_groups 15723 for node_name in self.cfg.GetNodeGroup(group).members] 15724 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15725
15726 - def CheckPrereq(self):
15727 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) 15728 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) 15729 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE)) 15730 15731 assert owned_groups.issuperset(self.req_target_uuids) 15732 assert self.group_uuid in owned_groups 15733 15734 # Check if locked instances are still correct 15735 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances) 15736 15737 # Get instance information 15738 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances)) 15739 15740 # Check if node groups for locked instances are still correct 15741 _CheckInstancesNodeGroups(self.cfg, self.instances, 15742 owned_groups, owned_nodes, self.group_uuid) 15743 15744 if self.req_target_uuids: 15745 # User requested specific target groups 15746 self.target_uuids = self.req_target_uuids 15747 else: 15748 # All groups except the one to be evacuated are potential targets 15749 self.target_uuids = [group_uuid for group_uuid in owned_groups 15750 if group_uuid != self.group_uuid] 15751 15752 if not self.target_uuids: 15753 raise errors.OpPrereqError("There are no possible target groups", 15754 errors.ECODE_INVAL)
15755
15756 - def BuildHooksEnv(self):
15757 """Build hooks env. 15758 15759 """ 15760 return { 15761 "GROUP_NAME": self.op.group_name, 15762 "TARGET_GROUPS": " ".join(self.target_uuids), 15763 }
15764
15765 - def BuildHooksNodes(self):
15766 """Build hooks nodes. 15767 15768 """ 15769 mn = self.cfg.GetMasterNode() 15770 15771 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 15772 15773 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members 15774 15775 return (run_nodes, run_nodes)
15776
15777 - def Exec(self, feedback_fn):
15778 instances = list(self.owned_locks(locking.LEVEL_INSTANCE)) 15779 15780 assert self.group_uuid not in self.target_uuids 15781 15782 req = iallocator.IAReqGroupChange(instances=instances, 15783 target_groups=self.target_uuids) 15784 ial = iallocator.IAllocator(self.cfg, self.rpc, req) 15785 15786 ial.Run(self.op.iallocator) 15787 15788 if not ial.success: 15789 raise errors.OpPrereqError("Can't compute group evacuation using" 15790 " iallocator '%s': %s" % 15791 (self.op.iallocator, ial.info), 15792 errors.ECODE_NORES) 15793 15794 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False) 15795 15796 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s", 15797 len(jobs), self.op.group_name) 15798 15799 return ResultWithJobs(jobs)
15800
15801 15802 -class TagsLU(NoHooksLU): # pylint: disable=W0223
15803 """Generic tags LU. 15804 15805 This is an abstract class which is the parent of all the other tags LUs. 15806 15807 """
15808 - def ExpandNames(self):
15809 self.group_uuid = None 15810 self.needed_locks = {} 15811 15812 if self.op.kind == constants.TAG_NODE: 15813 self.op.name = _ExpandNodeName(self.cfg, self.op.name) 15814 lock_level = locking.LEVEL_NODE 15815 lock_name = self.op.name 15816 elif self.op.kind == constants.TAG_INSTANCE: 15817 self.op.name = _ExpandInstanceName(self.cfg, self.op.name) 15818 lock_level = locking.LEVEL_INSTANCE 15819 lock_name = self.op.name 15820 elif self.op.kind == constants.TAG_NODEGROUP: 15821 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name) 15822 lock_level = locking.LEVEL_NODEGROUP 15823 lock_name = self.group_uuid 15824 elif self.op.kind == constants.TAG_NETWORK: 15825 self.network_uuid = self.cfg.LookupNetwork(self.op.name) 15826 lock_level = locking.LEVEL_NETWORK 15827 lock_name = self.network_uuid 15828 else: 15829 lock_level = None 15830 lock_name = None 15831 15832 if lock_level and getattr(self.op, "use_locking", True): 15833 self.needed_locks[lock_level] = lock_name
15834 15835 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's 15836 # not possible to acquire the BGL based on opcode parameters) 15837
15838 - def CheckPrereq(self):
15839 """Check prerequisites. 15840 15841 """ 15842 if self.op.kind == constants.TAG_CLUSTER: 15843 self.target = self.cfg.GetClusterInfo() 15844 elif self.op.kind == constants.TAG_NODE: 15845 self.target = self.cfg.GetNodeInfo(self.op.name) 15846 elif self.op.kind == constants.TAG_INSTANCE: 15847 self.target = self.cfg.GetInstanceInfo(self.op.name) 15848 elif self.op.kind == constants.TAG_NODEGROUP: 15849 self.target = self.cfg.GetNodeGroup(self.group_uuid) 15850 elif self.op.kind == constants.TAG_NETWORK: 15851 self.target = self.cfg.GetNetwork(self.network_uuid) 15852 else: 15853 raise errors.OpPrereqError("Wrong tag type requested (%s)" % 15854 str(self.op.kind), errors.ECODE_INVAL)
15855
15856 15857 -class LUTagsGet(TagsLU):
15858 """Returns the tags of a given object. 15859 15860 """ 15861 REQ_BGL = False 15862
15863 - def ExpandNames(self):
15864 TagsLU.ExpandNames(self) 15865 15866 # Share locks as this is only a read operation 15867 self.share_locks = _ShareAll()
15868
15869 - def Exec(self, feedback_fn):
15870 """Returns the tag list. 15871 15872 """ 15873 return list(self.target.GetTags())
15874
15875 15876 -class LUTagsSearch(NoHooksLU):
15877 """Searches the tags for a given pattern. 15878 15879 """ 15880 REQ_BGL = False 15881
15882 - def ExpandNames(self):
15883 self.needed_locks = {}
15884
15885 - def CheckPrereq(self):
15886 """Check prerequisites. 15887 15888 This checks the pattern passed for validity by compiling it. 15889 15890 """ 15891 try: 15892 self.re = re.compile(self.op.pattern) 15893 except re.error, err: 15894 raise errors.OpPrereqError("Invalid search pattern '%s': %s" % 15895 (self.op.pattern, err), errors.ECODE_INVAL)
15896
15897 - def Exec(self, feedback_fn):
15898 """Returns the tag list. 15899 15900 """ 15901 cfg = self.cfg 15902 tgts = [("/cluster", cfg.GetClusterInfo())] 15903 ilist = cfg.GetAllInstancesInfo().values() 15904 tgts.extend([("/instances/%s" % i.name, i) for i in ilist]) 15905 nlist = cfg.GetAllNodesInfo().values() 15906 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist]) 15907 tgts.extend(("/nodegroup/%s" % n.name, n) 15908 for n in cfg.GetAllNodeGroupsInfo().values()) 15909 results = [] 15910 for path, target in tgts: 15911 for tag in target.GetTags(): 15912 if self.re.search(tag): 15913 results.append((path, tag)) 15914 return results
15915
15916 15917 -class LUTagsSet(TagsLU):
15918 """Sets a tag on a given object. 15919 15920 """ 15921 REQ_BGL = False 15922
15923 - def CheckPrereq(self):
15924 """Check prerequisites. 15925 15926 This checks the type and length of the tag name and value. 15927 15928 """ 15929 TagsLU.CheckPrereq(self) 15930 for tag in self.op.tags: 15931 objects.TaggableObject.ValidateTag(tag)
15932
15933 - def Exec(self, feedback_fn):
15934 """Sets the tag. 15935 15936 """ 15937 try: 15938 for tag in self.op.tags: 15939 self.target.AddTag(tag) 15940 except errors.TagError, err: 15941 raise errors.OpExecError("Error while setting tag: %s" % str(err)) 15942 self.cfg.Update(self.target, feedback_fn)
15943
15944 15945 -class LUTagsDel(TagsLU):
15946 """Delete a list of tags from a given object. 15947 15948 """ 15949 REQ_BGL = False 15950
15951 - def CheckPrereq(self):
15952 """Check prerequisites. 15953 15954 This checks that we have the given tag. 15955 15956 """ 15957 TagsLU.CheckPrereq(self) 15958 for tag in self.op.tags: 15959 objects.TaggableObject.ValidateTag(tag) 15960 del_tags = frozenset(self.op.tags) 15961 cur_tags = self.target.GetTags() 15962 15963 diff_tags = del_tags - cur_tags 15964 if diff_tags: 15965 diff_names = ("'%s'" % i for i in sorted(diff_tags)) 15966 raise errors.OpPrereqError("Tag(s) %s not found" % 15967 (utils.CommaJoin(diff_names), ), 15968 errors.ECODE_NOENT)
15969
15970 - def Exec(self, feedback_fn):
15971 """Remove the tag from the object. 15972 15973 """ 15974 for tag in self.op.tags: 15975 self.target.RemoveTag(tag) 15976 self.cfg.Update(self.target, feedback_fn)
15977
15978 15979 -class LUTestDelay(NoHooksLU):
15980 """Sleep for a specified amount of time. 15981 15982 This LU sleeps on the master and/or nodes for a specified amount of 15983 time. 15984 15985 """ 15986 REQ_BGL = False 15987
15988 - def ExpandNames(self):
15989 """Expand names and set required locks. 15990 15991 This expands the node list, if any. 15992 15993 """ 15994 self.needed_locks = {} 15995 if self.op.on_nodes: 15996 # _GetWantedNodes can be used here, but is not always appropriate to use 15997 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for 15998 # more information. 15999 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes) 16000 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
16001
16002 - def _TestDelay(self):
16003 """Do the actual sleep. 16004 16005 """ 16006 if self.op.on_master: 16007 if not utils.TestDelay(self.op.duration): 16008 raise errors.OpExecError("Error during master delay test") 16009 if self.op.on_nodes: 16010 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration) 16011 for node, node_result in result.items(): 16012 node_result.Raise("Failure during rpc call to node %s" % node)
16013
16014 - def Exec(self, feedback_fn):
16015 """Execute the test delay opcode, with the wanted repetitions. 16016 16017 """ 16018 if self.op.repeat == 0: 16019 self._TestDelay() 16020 else: 16021 top_value = self.op.repeat - 1 16022 for i in range(self.op.repeat): 16023 self.LogInfo("Test delay iteration %d/%d", i, top_value) 16024 self._TestDelay()
16025
16026 16027 -class LURestrictedCommand(NoHooksLU):
16028 """Logical unit for executing restricted commands. 16029 16030 """ 16031 REQ_BGL = False 16032
16033 - def ExpandNames(self):
16034 if self.op.nodes: 16035 self.op.nodes = _GetWantedNodes(self, self.op.nodes) 16036 16037 self.needed_locks = { 16038 locking.LEVEL_NODE: self.op.nodes, 16039 } 16040 self.share_locks = { 16041 locking.LEVEL_NODE: not self.op.use_locking, 16042 }
16043
16044 - def CheckPrereq(self):
16045 """Check prerequisites. 16046 16047 """
16048
16049 - def Exec(self, feedback_fn):
16050 """Execute restricted command and return output. 16051 16052 """ 16053 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE)) 16054 16055 # Check if correct locks are held 16056 assert set(self.op.nodes).issubset(owned_nodes) 16057 16058 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command) 16059 16060 result = [] 16061 16062 for node_name in self.op.nodes: 16063 nres = rpcres[node_name] 16064 if nres.fail_msg: 16065 msg = ("Command '%s' on node '%s' failed: %s" % 16066 (self.op.command, node_name, nres.fail_msg)) 16067 result.append((False, msg)) 16068 else: 16069 result.append((True, nres.payload)) 16070 16071 return result
16072
16073 16074 -class LUTestJqueue(NoHooksLU):
16075 """Utility LU to test some aspects of the job queue. 16076 16077 """ 16078 REQ_BGL = False 16079 16080 # Must be lower than default timeout for WaitForJobChange to see whether it 16081 # notices changed jobs 16082 _CLIENT_CONNECT_TIMEOUT = 20.0 16083 _CLIENT_CONFIRM_TIMEOUT = 60.0 16084 16085 @classmethod
16086 - def _NotifyUsingSocket(cls, cb, errcls):
16087 """Opens a Unix socket and waits for another program to connect. 16088 16089 @type cb: callable 16090 @param cb: Callback to send socket name to client 16091 @type errcls: class 16092 @param errcls: Exception class to use for errors 16093 16094 """ 16095 # Using a temporary directory as there's no easy way to create temporary 16096 # sockets without writing a custom loop around tempfile.mktemp and 16097 # socket.bind 16098 tmpdir = tempfile.mkdtemp() 16099 try: 16100 tmpsock = utils.PathJoin(tmpdir, "sock") 16101 16102 logging.debug("Creating temporary socket at %s", tmpsock) 16103 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) 16104 try: 16105 sock.bind(tmpsock) 16106 sock.listen(1) 16107 16108 # Send details to client 16109 cb(tmpsock) 16110 16111 # Wait for client to connect before continuing 16112 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT) 16113 try: 16114 (conn, _) = sock.accept() 16115 except socket.error, err: 16116 raise errcls("Client didn't connect in time (%s)" % err) 16117 finally: 16118 sock.close() 16119 finally: 16120 # Remove as soon as client is connected 16121 shutil.rmtree(tmpdir) 16122 16123 # Wait for client to close 16124 try: 16125 try: 16126 # pylint: disable=E1101 16127 # Instance of '_socketobject' has no ... member 16128 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT) 16129 conn.recv(1) 16130 except socket.error, err: 16131 raise errcls("Client failed to confirm notification (%s)" % err) 16132 finally: 16133 conn.close()
16134
16135 - def _SendNotification(self, test, arg, sockname):
16136 """Sends a notification to the client. 16137 16138 @type test: string 16139 @param test: Test name 16140 @param arg: Test argument (depends on test) 16141 @type sockname: string 16142 @param sockname: Socket path 16143 16144 """ 16145 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16146
16147 - def _Notify(self, prereq, test, arg):
16148 """Notifies the client of a test. 16149 16150 @type prereq: bool 16151 @param prereq: Whether this is a prereq-phase test 16152 @type test: string 16153 @param test: Test name 16154 @param arg: Test argument (depends on test) 16155 16156 """ 16157 if prereq: 16158 errcls = errors.OpPrereqError 16159 else: 16160 errcls = errors.OpExecError 16161 16162 return self._NotifyUsingSocket(compat.partial(self._SendNotification, 16163 test, arg), 16164 errcls)
16165
16166 - def CheckArguments(self):
16167 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1 16168 self.expandnames_calls = 0
16169
16170 - def ExpandNames(self):
16171 checkargs_calls = getattr(self, "checkargs_calls", 0) 16172 if checkargs_calls < 1: 16173 raise errors.ProgrammerError("CheckArguments was not called") 16174 16175 self.expandnames_calls += 1 16176 16177 if self.op.notify_waitlock: 16178 self._Notify(True, constants.JQT_EXPANDNAMES, None) 16179 16180 self.LogInfo("Expanding names") 16181 16182 # Get lock on master node (just to get a lock, not for a particular reason) 16183 self.needed_locks = { 16184 locking.LEVEL_NODE: self.cfg.GetMasterNode(), 16185 }
16186
16187 - def Exec(self, feedback_fn):
16188 if self.expandnames_calls < 1: 16189 raise errors.ProgrammerError("ExpandNames was not called") 16190 16191 if self.op.notify_exec: 16192 self._Notify(False, constants.JQT_EXEC, None) 16193 16194 self.LogInfo("Executing") 16195 16196 if self.op.log_messages: 16197 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages)) 16198 for idx, msg in enumerate(self.op.log_messages): 16199 self.LogInfo("Sending log message %s", idx + 1) 16200 feedback_fn(constants.JQT_MSGPREFIX + msg) 16201 # Report how many test messages have been sent 16202 self._Notify(False, constants.JQT_LOGMSG, idx + 1) 16203 16204 if self.op.fail: 16205 raise errors.OpExecError("Opcode failure was requested") 16206 16207 return True
16208
16209 16210 -class LUTestAllocator(NoHooksLU):
16211 """Run allocator tests. 16212 16213 This LU runs the allocator tests 16214 16215 """
16216 - def CheckPrereq(self):
16217 """Check prerequisites. 16218 16219 This checks the opcode parameters depending on the director and mode test. 16220 16221 """ 16222 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC, 16223 constants.IALLOCATOR_MODE_MULTI_ALLOC): 16224 for attr in ["memory", "disks", "disk_template", 16225 "os", "tags", "nics", "vcpus"]: 16226 if not hasattr(self.op, attr): 16227 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" % 16228 attr, errors.ECODE_INVAL) 16229 iname = self.cfg.ExpandInstanceName(self.op.name) 16230 if iname is not None: 16231 raise errors.OpPrereqError("Instance '%s' already in the cluster" % 16232 iname, errors.ECODE_EXISTS) 16233 if not isinstance(self.op.nics, list): 16234 raise errors.OpPrereqError("Invalid parameter 'nics'", 16235 errors.ECODE_INVAL) 16236 if not isinstance(self.op.disks, list): 16237 raise errors.OpPrereqError("Invalid parameter 'disks'", 16238 errors.ECODE_INVAL) 16239 for row in self.op.disks: 16240 if (not isinstance(row, dict) or 16241 constants.IDISK_SIZE not in row or 16242 not isinstance(row[constants.IDISK_SIZE], int) or 16243 constants.IDISK_MODE not in row or 16244 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET): 16245 raise errors.OpPrereqError("Invalid contents of the 'disks'" 16246 " parameter", errors.ECODE_INVAL) 16247 if self.op.hypervisor is None: 16248 self.op.hypervisor = self.cfg.GetHypervisorType() 16249 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: 16250 fname = _ExpandInstanceName(self.cfg, self.op.name) 16251 self.op.name = fname 16252 self.relocate_from = \ 16253 list(self.cfg.GetInstanceInfo(fname).secondary_nodes) 16254 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP, 16255 constants.IALLOCATOR_MODE_NODE_EVAC): 16256 if not self.op.instances: 16257 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL) 16258 self.op.instances = _GetWantedInstances(self, self.op.instances) 16259 else: 16260 raise errors.OpPrereqError("Invalid test allocator mode '%s'" % 16261 self.op.mode, errors.ECODE_INVAL) 16262 16263 if self.op.direction == constants.IALLOCATOR_DIR_OUT: 16264 if self.op.iallocator is None: 16265 raise errors.OpPrereqError("Missing allocator name", 16266 errors.ECODE_INVAL) 16267 elif self.op.direction != constants.IALLOCATOR_DIR_IN: 16268 raise errors.OpPrereqError("Wrong allocator test '%s'" % 16269 self.op.direction, errors.ECODE_INVAL)
16270
16271 - def Exec(self, feedback_fn):
16272 """Run the allocator test. 16273 16274 """ 16275 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC: 16276 req = iallocator.IAReqInstanceAlloc(name=self.op.name, 16277 memory=self.op.memory, 16278 disks=self.op.disks, 16279 disk_template=self.op.disk_template, 16280 os=self.op.os, 16281 tags=self.op.tags, 16282 nics=self.op.nics, 16283 vcpus=self.op.vcpus, 16284 spindle_use=self.op.spindle_use, 16285 hypervisor=self.op.hypervisor, 16286 node_whitelist=None) 16287 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: 16288 req = iallocator.IAReqRelocate(name=self.op.name, 16289 relocate_from=list(self.relocate_from)) 16290 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP: 16291 req = iallocator.IAReqGroupChange(instances=self.op.instances, 16292 target_groups=self.op.target_groups) 16293 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC: 16294 req = iallocator.IAReqNodeEvac(instances=self.op.instances, 16295 evac_mode=self.op.evac_mode) 16296 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC: 16297 disk_template = self.op.disk_template 16298 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx), 16299 memory=self.op.memory, 16300 disks=self.op.disks, 16301 disk_template=disk_template, 16302 os=self.op.os, 16303 tags=self.op.tags, 16304 nics=self.op.nics, 16305 vcpus=self.op.vcpus, 16306 spindle_use=self.op.spindle_use, 16307 hypervisor=self.op.hypervisor) 16308 for idx in range(self.op.count)] 16309 req = iallocator.IAReqMultiInstanceAlloc(instances=insts) 16310 else: 16311 raise errors.ProgrammerError("Uncatched mode %s in" 16312 " LUTestAllocator.Exec", self.op.mode) 16313 16314 ial = iallocator.IAllocator(self.cfg, self.rpc, req) 16315 if self.op.direction == constants.IALLOCATOR_DIR_IN: 16316 result = ial.in_text 16317 else: 16318 ial.Run(self.op.iallocator, validate=False) 16319 result = ial.out_text 16320 return result
16321
16322 16323 -class LUNetworkAdd(LogicalUnit):
16324 """Logical unit for creating networks. 16325 16326 """ 16327 HPATH = "network-add" 16328 HTYPE = constants.HTYPE_NETWORK 16329 REQ_BGL = False 16330
16331 - def BuildHooksNodes(self):
16332 """Build hooks nodes. 16333 16334 """ 16335 mn = self.cfg.GetMasterNode() 16336 return ([mn], [mn])
16337
16338 - def CheckArguments(self):
16339 if self.op.mac_prefix: 16340 self.op.mac_prefix = \ 16341 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16342
16343 - def ExpandNames(self):
16344 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId()) 16345 16346 if self.op.conflicts_check: 16347 self.share_locks[locking.LEVEL_NODE] = 1 16348 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1 16349 self.needed_locks = { 16350 locking.LEVEL_NODE: locking.ALL_SET, 16351 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 16352 } 16353 else: 16354 self.needed_locks = {} 16355 16356 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16357
16358 - def CheckPrereq(self):
16359 if self.op.network is None: 16360 raise errors.OpPrereqError("Network must be given", 16361 errors.ECODE_INVAL) 16362 16363 try: 16364 existing_uuid = self.cfg.LookupNetwork(self.op.network_name) 16365 except errors.OpPrereqError: 16366 pass 16367 else: 16368 raise errors.OpPrereqError("Desired network name '%s' already exists as a" 16369 " network (UUID: %s)" % 16370 (self.op.network_name, existing_uuid), 16371 errors.ECODE_EXISTS) 16372 16373 # Check tag validity 16374 for tag in self.op.tags: 16375 objects.TaggableObject.ValidateTag(tag)
16376
16377 - def BuildHooksEnv(self):
16378 """Build hooks env. 16379 16380 """ 16381 args = { 16382 "name": self.op.network_name, 16383 "subnet": self.op.network, 16384 "gateway": self.op.gateway, 16385 "network6": self.op.network6, 16386 "gateway6": self.op.gateway6, 16387 "mac_prefix": self.op.mac_prefix, 16388 "tags": self.op.tags, 16389 } 16390 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16391
16392 - def Exec(self, feedback_fn):
16393 """Add the ip pool to the cluster. 16394 16395 """ 16396 nobj = objects.Network(name=self.op.network_name, 16397 network=self.op.network, 16398 gateway=self.op.gateway, 16399 network6=self.op.network6, 16400 gateway6=self.op.gateway6, 16401 mac_prefix=self.op.mac_prefix, 16402 uuid=self.network_uuid) 16403 # Initialize the associated address pool 16404 try: 16405 pool = network.AddressPool.InitializeNetwork(nobj) 16406 except errors.AddressPoolError, err: 16407 raise errors.OpExecError("Cannot create IP address pool for network" 16408 " '%s': %s" % (self.op.network_name, err)) 16409 16410 # Check if we need to reserve the nodes and the cluster master IP 16411 # These may not be allocated to any instances in routed mode, as 16412 # they wouldn't function anyway. 16413 if self.op.conflicts_check: 16414 for node in self.cfg.GetAllNodesInfo().values(): 16415 for ip in [node.primary_ip, node.secondary_ip]: 16416 try: 16417 if pool.Contains(ip): 16418 pool.Reserve(ip) 16419 self.LogInfo("Reserved IP address of node '%s' (%s)", 16420 node.name, ip) 16421 except errors.AddressPoolError, err: 16422 self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s", 16423 ip, node.name, err) 16424 16425 master_ip = self.cfg.GetClusterInfo().master_ip 16426 try: 16427 if pool.Contains(master_ip): 16428 pool.Reserve(master_ip) 16429 self.LogInfo("Reserved cluster master IP address (%s)", master_ip) 16430 except errors.AddressPoolError, err: 16431 self.LogWarning("Cannot reserve cluster master IP address (%s): %s", 16432 master_ip, err) 16433 16434 if self.op.add_reserved_ips: 16435 for ip in self.op.add_reserved_ips: 16436 try: 16437 pool.Reserve(ip, external=True) 16438 except errors.AddressPoolError, err: 16439 raise errors.OpExecError("Cannot reserve IP address '%s': %s" % 16440 (ip, err)) 16441 16442 if self.op.tags: 16443 for tag in self.op.tags: 16444 nobj.AddTag(tag) 16445 16446 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False) 16447 del self.remove_locks[locking.LEVEL_NETWORK]
16448
16449 16450 -class LUNetworkRemove(LogicalUnit):
16451 HPATH = "network-remove" 16452 HTYPE = constants.HTYPE_NETWORK 16453 REQ_BGL = False 16454
16455 - def ExpandNames(self):
16456 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name) 16457 16458 self.share_locks[locking.LEVEL_NODEGROUP] = 1 16459 self.needed_locks = { 16460 locking.LEVEL_NETWORK: [self.network_uuid], 16461 locking.LEVEL_NODEGROUP: locking.ALL_SET, 16462 }
16463
16464 - def CheckPrereq(self):
16465 """Check prerequisites. 16466 16467 This checks that the given network name exists as a network, that is 16468 empty (i.e., contains no nodes), and that is not the last group of the 16469 cluster. 16470 16471 """ 16472 # Verify that the network is not conncted. 16473 node_groups = [group.name 16474 for group in self.cfg.GetAllNodeGroupsInfo().values() 16475 if self.network_uuid in group.networks] 16476 16477 if node_groups: 16478 self.LogWarning("Network '%s' is connected to the following" 16479 " node groups: %s" % 16480 (self.op.network_name, 16481 utils.CommaJoin(utils.NiceSort(node_groups)))) 16482 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16483
16484 - def BuildHooksEnv(self):
16485 """Build hooks env. 16486 16487 """ 16488 return { 16489 "NETWORK_NAME": self.op.network_name, 16490 }
16491
16492 - def BuildHooksNodes(self):
16493 """Build hooks nodes. 16494 16495 """ 16496 mn = self.cfg.GetMasterNode() 16497 return ([mn], [mn])
16498
16499 - def Exec(self, feedback_fn):
16500 """Remove the network. 16501 16502 """ 16503 try: 16504 self.cfg.RemoveNetwork(self.network_uuid) 16505 except errors.ConfigurationError: 16506 raise errors.OpExecError("Network '%s' with UUID %s disappeared" % 16507 (self.op.network_name, self.network_uuid))
16508
16509 16510 -class LUNetworkSetParams(LogicalUnit):
16511 """Modifies the parameters of a network. 16512 16513 """ 16514 HPATH = "network-modify" 16515 HTYPE = constants.HTYPE_NETWORK 16516 REQ_BGL = False 16517
16518 - def CheckArguments(self):
16519 if (self.op.gateway and 16520 (self.op.add_reserved_ips or self.op.remove_reserved_ips)): 16521 raise errors.OpPrereqError("Cannot modify gateway and reserved ips" 16522 " at once", errors.ECODE_INVAL)
16523
16524 - def ExpandNames(self):
16525 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name) 16526 16527 self.needed_locks = { 16528 locking.LEVEL_NETWORK: [self.network_uuid], 16529 }
16530
16531 - def CheckPrereq(self):
16532 """Check prerequisites. 16533 16534 """ 16535 self.network = self.cfg.GetNetwork(self.network_uuid) 16536 self.gateway = self.network.gateway 16537 self.mac_prefix = self.network.mac_prefix 16538 self.network6 = self.network.network6 16539 self.gateway6 = self.network.gateway6 16540 self.tags = self.network.tags 16541 16542 self.pool = network.AddressPool(self.network) 16543 16544 if self.op.gateway: 16545 if self.op.gateway == constants.VALUE_NONE: 16546 self.gateway = None 16547 else: 16548 self.gateway = self.op.gateway 16549 if self.pool.IsReserved(self.gateway): 16550 raise errors.OpPrereqError("Gateway IP address '%s' is already" 16551 " reserved" % self.gateway, 16552 errors.ECODE_STATE) 16553 16554 if self.op.mac_prefix: 16555 if self.op.mac_prefix == constants.VALUE_NONE: 16556 self.mac_prefix = None 16557 else: 16558 self.mac_prefix = \ 16559 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix) 16560 16561 if self.op.gateway6: 16562 if self.op.gateway6 == constants.VALUE_NONE: 16563 self.gateway6 = None 16564 else: 16565 self.gateway6 = self.op.gateway6 16566 16567 if self.op.network6: 16568 if self.op.network6 == constants.VALUE_NONE: 16569 self.network6 = None 16570 else: 16571 self.network6 = self.op.network6
16572
16573 - def BuildHooksEnv(self):
16574 """Build hooks env. 16575 16576 """ 16577 args = { 16578 "name": self.op.network_name, 16579 "subnet": self.network.network, 16580 "gateway": self.gateway, 16581 "network6": self.network6, 16582 "gateway6": self.gateway6, 16583 "mac_prefix": self.mac_prefix, 16584 "tags": self.tags, 16585 } 16586 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16587
16588 - def BuildHooksNodes(self):
16589 """Build hooks nodes. 16590 16591 """ 16592 mn = self.cfg.GetMasterNode() 16593 return ([mn], [mn])
16594
16595 - def Exec(self, feedback_fn):
16596 """Modifies the network. 16597 16598 """ 16599 #TODO: reserve/release via temporary reservation manager 16600 # extend cfg.ReserveIp/ReleaseIp with the external flag 16601 if self.op.gateway: 16602 if self.gateway == self.network.gateway: 16603 self.LogWarning("Gateway is already %s", self.gateway) 16604 else: 16605 if self.gateway: 16606 self.pool.Reserve(self.gateway, external=True) 16607 if self.network.gateway: 16608 self.pool.Release(self.network.gateway, external=True) 16609 self.network.gateway = self.gateway 16610 16611 if self.op.add_reserved_ips: 16612 for ip in self.op.add_reserved_ips: 16613 try: 16614 if self.pool.IsReserved(ip): 16615 self.LogWarning("IP address %s is already reserved", ip) 16616 else: 16617 self.pool.Reserve(ip, external=True) 16618 except errors.AddressPoolError, err: 16619 self.LogWarning("Cannot reserve IP address %s: %s", ip, err) 16620 16621 if self.op.remove_reserved_ips: 16622 for ip in self.op.remove_reserved_ips: 16623 if ip == self.network.gateway: 16624 self.LogWarning("Cannot unreserve Gateway's IP") 16625 continue 16626 try: 16627 if not self.pool.IsReserved(ip): 16628 self.LogWarning("IP address %s is already unreserved", ip) 16629 else: 16630 self.pool.Release(ip, external=True) 16631 except errors.AddressPoolError, err: 16632 self.LogWarning("Cannot release IP address %s: %s", ip, err) 16633 16634 if self.op.mac_prefix: 16635 self.network.mac_prefix = self.mac_prefix 16636 16637 if self.op.network6: 16638 self.network.network6 = self.network6 16639 16640 if self.op.gateway6: 16641 self.network.gateway6 = self.gateway6 16642 16643 self.pool.Validate() 16644 16645 self.cfg.Update(self.network, feedback_fn)
16646
16647 16648 -class _NetworkQuery(_QueryBase):
16649 FIELDS = query.NETWORK_FIELDS 16650
16651 - def ExpandNames(self, lu):
16652 lu.needed_locks = {} 16653 lu.share_locks = _ShareAll() 16654 16655 self.do_locking = self.use_locking 16656 16657 all_networks = lu.cfg.GetAllNetworksInfo() 16658 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values()) 16659 16660 if self.names: 16661 missing = [] 16662 self.wanted = [] 16663 16664 for name in self.names: 16665 if name in name_to_uuid: 16666 self.wanted.append(name_to_uuid[name]) 16667 else: 16668 missing.append(name) 16669 16670 if missing: 16671 raise errors.OpPrereqError("Some networks do not exist: %s" % missing, 16672 errors.ECODE_NOENT) 16673 else: 16674 self.wanted = locking.ALL_SET 16675 16676 if self.do_locking: 16677 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted 16678 if query.NETQ_INST in self.requested_data: 16679 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET 16680 if query.NETQ_GROUP in self.requested_data: 16681 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16682
16683 - def DeclareLocks(self, lu, level):
16684 pass
16685
16686 - def _GetQueryData(self, lu):
16687 """Computes the list of networks and their attributes. 16688 16689 """ 16690 all_networks = lu.cfg.GetAllNetworksInfo() 16691 16692 network_uuids = self._GetNames(lu, all_networks.keys(), 16693 locking.LEVEL_NETWORK) 16694 16695 do_instances = query.NETQ_INST in self.requested_data 16696 do_groups = query.NETQ_GROUP in self.requested_data 16697 16698 network_to_instances = None 16699 network_to_groups = None 16700 16701 # For NETQ_GROUP, we need to map network->[groups] 16702 if do_groups: 16703 all_groups = lu.cfg.GetAllNodeGroupsInfo() 16704 network_to_groups = dict((uuid, []) for uuid in network_uuids) 16705 for _, group in all_groups.iteritems(): 16706 for net_uuid in network_uuids: 16707 netparams = group.networks.get(net_uuid, None) 16708 if netparams: 16709 info = (group.name, netparams[constants.NIC_MODE], 16710 netparams[constants.NIC_LINK]) 16711 16712 network_to_groups[net_uuid].append(info) 16713 16714 if do_instances: 16715 all_instances = lu.cfg.GetAllInstancesInfo() 16716 network_to_instances = dict((uuid, []) for uuid in network_uuids) 16717 for instance in all_instances.values(): 16718 for nic in instance.nics: 16719 if nic.network in network_uuids: 16720 network_to_instances[nic.network].append(instance.name) 16721 break 16722 16723 if query.NETQ_STATS in self.requested_data: 16724 stats = \ 16725 dict((uuid, 16726 self._GetStats(network.AddressPool(all_networks[uuid]))) 16727 for uuid in network_uuids) 16728 else: 16729 stats = None 16730 16731 return query.NetworkQueryData([all_networks[uuid] 16732 for uuid in network_uuids], 16733 network_to_groups, 16734 network_to_instances, 16735 stats)
16736 16737 @staticmethod
16738 - def _GetStats(pool):
16739 """Returns statistics for a network address pool. 16740 16741 """ 16742 return { 16743 "free_count": pool.GetFreeCount(), 16744 "reserved_count": pool.GetReservedCount(), 16745 "map": pool.GetMap(), 16746 "external_reservations": 16747 utils.CommaJoin(pool.GetExternalReservations()), 16748 }
16749
16750 16751 -class LUNetworkQuery(NoHooksLU):
16752 """Logical unit for querying networks. 16753 16754 """ 16755 REQ_BGL = False 16756
16757 - def CheckArguments(self):
16758 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names), 16759 self.op.output_fields, self.op.use_locking)
16760
16761 - def ExpandNames(self):
16762 self.nq.ExpandNames(self)
16763
16764 - def Exec(self, feedback_fn):
16765 return self.nq.OldStyleQuery(self)
16766
16767 16768 -class LUNetworkConnect(LogicalUnit):
16769 """Connect a network to a nodegroup 16770 16771 """ 16772 HPATH = "network-connect" 16773 HTYPE = constants.HTYPE_NETWORK 16774 REQ_BGL = False 16775
16776 - def ExpandNames(self):
16777 self.network_name = self.op.network_name 16778 self.group_name = self.op.group_name 16779 self.network_mode = self.op.network_mode 16780 self.network_link = self.op.network_link 16781 16782 self.network_uuid = self.cfg.LookupNetwork(self.network_name) 16783 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name) 16784 16785 self.needed_locks = { 16786 locking.LEVEL_INSTANCE: [], 16787 locking.LEVEL_NODEGROUP: [self.group_uuid], 16788 } 16789 self.share_locks[locking.LEVEL_INSTANCE] = 1 16790 16791 if self.op.conflicts_check: 16792 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid] 16793 self.share_locks[locking.LEVEL_NETWORK] = 1
16794
16795 - def DeclareLocks(self, level):
16796 if level == locking.LEVEL_INSTANCE: 16797 assert not self.needed_locks[locking.LEVEL_INSTANCE] 16798 16799 # Lock instances optimistically, needs verification once group lock has 16800 # been acquired 16801 if self.op.conflicts_check: 16802 self.needed_locks[locking.LEVEL_INSTANCE] = \ 16803 self.cfg.GetNodeGroupInstances(self.group_uuid)
16804
16805 - def BuildHooksEnv(self):
16806 ret = { 16807 "GROUP_NAME": self.group_name, 16808 "GROUP_NETWORK_MODE": self.network_mode, 16809 "GROUP_NETWORK_LINK": self.network_link, 16810 } 16811 return ret
16812
16813 - def BuildHooksNodes(self):
16814 nodes = self.cfg.GetNodeGroup(self.group_uuid).members 16815 return (nodes, nodes)
16816
16817 - def CheckPrereq(self):
16818 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) 16819 16820 assert self.group_uuid in owned_groups 16821 16822 # Check if locked instances are still correct 16823 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) 16824 if self.op.conflicts_check: 16825 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances) 16826 16827 self.netparams = { 16828 constants.NIC_MODE: self.network_mode, 16829 constants.NIC_LINK: self.network_link, 16830 } 16831 objects.NIC.CheckParameterSyntax(self.netparams) 16832 16833 self.group = self.cfg.GetNodeGroup(self.group_uuid) 16834 #if self.network_mode == constants.NIC_MODE_BRIDGED: 16835 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid) 16836 self.connected = False 16837 if self.network_uuid in self.group.networks: 16838 self.LogWarning("Network '%s' is already mapped to group '%s'" % 16839 (self.network_name, self.group.name)) 16840 self.connected = True 16841 16842 # check only if not already connected 16843 elif self.op.conflicts_check: 16844 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid)) 16845 16846 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip), 16847 "connect to", owned_instances)
16848
16849 - def Exec(self, feedback_fn):
16850 # Connect the network and update the group only if not already connected 16851 if not self.connected: 16852 self.group.networks[self.network_uuid] = self.netparams 16853 self.cfg.Update(self.group, feedback_fn)
16854
16855 16856 -def _NetworkConflictCheck(lu, check_fn, action, instances):
16857 """Checks for network interface conflicts with a network. 16858 16859 @type lu: L{LogicalUnit} 16860 @type check_fn: callable receiving one parameter (L{objects.NIC}) and 16861 returning boolean 16862 @param check_fn: Function checking for conflict 16863 @type action: string 16864 @param action: Part of error message (see code) 16865 @raise errors.OpPrereqError: If conflicting IP addresses are found. 16866 16867 """ 16868 conflicts = [] 16869 16870 for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances): 16871 instconflicts = [(idx, nic.ip) 16872 for (idx, nic) in enumerate(instance.nics) 16873 if check_fn(nic)] 16874 16875 if instconflicts: 16876 conflicts.append((instance.name, instconflicts)) 16877 16878 if conflicts: 16879 lu.LogWarning("IP addresses from network '%s', which is about to %s" 16880 " node group '%s', are in use: %s" % 16881 (lu.network_name, action, lu.group.name, 16882 utils.CommaJoin(("%s: %s" % 16883 (name, _FmtNetworkConflict(details))) 16884 for (name, details) in conflicts))) 16885 16886 raise errors.OpPrereqError("Conflicting IP addresses found; " 16887 " remove/modify the corresponding network" 16888 " interfaces", errors.ECODE_STATE)
16889
16890 16891 -def _FmtNetworkConflict(details):
16892 """Utility for L{_NetworkConflictCheck}. 16893 16894 """ 16895 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr) 16896 for (idx, ipaddr) in details)
16897
16898 16899 -class LUNetworkDisconnect(LogicalUnit):
16900 """Disconnect a network to a nodegroup 16901 16902 """ 16903 HPATH = "network-disconnect" 16904 HTYPE = constants.HTYPE_NETWORK 16905 REQ_BGL = False 16906
16907 - def ExpandNames(self):
16908 self.network_name = self.op.network_name 16909 self.group_name = self.op.group_name 16910 16911 self.network_uuid = self.cfg.LookupNetwork(self.network_name) 16912 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name) 16913 16914 self.needed_locks = { 16915 locking.LEVEL_INSTANCE: [], 16916 locking.LEVEL_NODEGROUP: [self.group_uuid], 16917 } 16918 self.share_locks[locking.LEVEL_INSTANCE] = 1
16919
16920 - def DeclareLocks(self, level):
16921 if level == locking.LEVEL_INSTANCE: 16922 assert not self.needed_locks[locking.LEVEL_INSTANCE] 16923 16924 # Lock instances optimistically, needs verification once group lock has 16925 # been acquired 16926 self.needed_locks[locking.LEVEL_INSTANCE] = \ 16927 self.cfg.GetNodeGroupInstances(self.group_uuid)
16928
16929 - def BuildHooksEnv(self):
16930 ret = { 16931 "GROUP_NAME": self.group_name, 16932 } 16933 return ret
16934
16935 - def BuildHooksNodes(self):
16936 nodes = self.cfg.GetNodeGroup(self.group_uuid).members 16937 return (nodes, nodes)
16938
16939 - def CheckPrereq(self):
16940 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) 16941 16942 assert self.group_uuid in owned_groups 16943 16944 # Check if locked instances are still correct 16945 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) 16946 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances) 16947 16948 self.group = self.cfg.GetNodeGroup(self.group_uuid) 16949 self.connected = True 16950 if self.network_uuid not in self.group.networks: 16951 self.LogWarning("Network '%s' is not mapped to group '%s'", 16952 self.network_name, self.group.name) 16953 self.connected = False 16954 16955 # We need this check only if network is not already connected 16956 else: 16957 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid, 16958 "disconnect from", owned_instances)
16959
16960 - def Exec(self, feedback_fn):
16961 # Disconnect the network and update the group only if network is connected 16962 if self.connected: 16963 del self.group.networks[self.network_uuid] 16964 self.cfg.Update(self.group, feedback_fn)
16965 16966 16967 #: Query type implementations 16968 _QUERY_IMPL = { 16969 constants.QR_CLUSTER: _ClusterQuery, 16970 constants.QR_INSTANCE: _InstanceQuery, 16971 constants.QR_NODE: _NodeQuery, 16972 constants.QR_GROUP: _GroupQuery, 16973 constants.QR_NETWORK: _NetworkQuery, 16974 constants.QR_OS: _OsQuery, 16975 constants.QR_EXTSTORAGE: _ExtStorageQuery, 16976 constants.QR_EXPORT: _ExportQuery, 16977 } 16978 16979 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16980 16981 16982 -def _GetQueryImplementation(name):
16983 """Returns the implemtnation for a query type. 16984 16985 @param name: Query type, must be one of L{constants.QR_VIA_OP} 16986 16987 """ 16988 try: 16989 return _QUERY_IMPL[name] 16990 except KeyError: 16991 raise errors.OpPrereqError("Unknown query resource '%s'" % name, 16992 errors.ECODE_INVAL)
16993
16994 16995 -def _CheckForConflictingIp(lu, ip, node):
16996 """In case of conflicting IP address raise error. 16997 16998 @type ip: string 16999 @param ip: IP address 17000 @type node: string 17001 @param node: node name 17002 17003 """ 17004 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node) 17005 if conf_net is not None: 17006 raise errors.OpPrereqError(("The requested IP address (%s) belongs to" 17007 " network %s, but the target NIC does not." % 17008 (ip, conf_net)), 17009 errors.ECODE_STATE) 17010 17011 return (None, None)
17012