Package ganeti :: Module cmdlib
[hide private]
[frames] | no frames]

Source Code for Module ganeti.cmdlib

    1  # 
    2  # 
    3   
    4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc. 
    5  # 
    6  # This program is free software; you can redistribute it and/or modify 
    7  # it under the terms of the GNU General Public License as published by 
    8  # the Free Software Foundation; either version 2 of the License, or 
    9  # (at your option) any later version. 
   10  # 
   11  # This program is distributed in the hope that it will be useful, but 
   12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
   13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
   14  # General Public License for more details. 
   15  # 
   16  # You should have received a copy of the GNU General Public License 
   17  # along with this program; if not, write to the Free Software 
   18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
   19  # 02110-1301, USA. 
   20   
   21   
   22  """Module implementing the master-side code.""" 
   23   
   24  # pylint: disable=W0201,C0302 
   25   
   26  # W0201 since most LU attributes are defined in CheckPrereq or similar 
   27  # functions 
   28   
   29  # C0302: since we have waaaay too many lines in this module 
   30   
   31  import os 
   32  import os.path 
   33  import time 
   34  import re 
   35  import logging 
   36  import copy 
   37  import OpenSSL 
   38  import socket 
   39  import tempfile 
   40  import shutil 
   41  import itertools 
   42  import operator 
   43   
   44  from ganeti import ssh 
   45  from ganeti import utils 
   46  from ganeti import errors 
   47  from ganeti import hypervisor 
   48  from ganeti import locking 
   49  from ganeti import constants 
   50  from ganeti import objects 
   51  from ganeti import serializer 
   52  from ganeti import ssconf 
   53  from ganeti import uidpool 
   54  from ganeti import compat 
   55  from ganeti import masterd 
   56  from ganeti import netutils 
   57  from ganeti import query 
   58  from ganeti import qlang 
   59  from ganeti import opcodes 
   60  from ganeti import ht 
   61  from ganeti import rpc 
   62  from ganeti import runtime 
   63   
   64  import ganeti.masterd.instance # pylint: disable=W0611 
   65   
   66   
   67  #: Size of DRBD meta block device 
   68  DRBD_META_SIZE = 128 
   69   
   70  # States of instance 
   71  INSTANCE_DOWN = [constants.ADMINST_DOWN] 
   72  INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP] 
   73  INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE] 
   74   
   75  #: Instance status in which an instance can be marked as offline/online 
   76  CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([ 
   77    constants.ADMINST_OFFLINE, 
   78    ])) 
79 80 81 -class ResultWithJobs:
82 """Data container for LU results with jobs. 83 84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized 85 by L{mcpu._ProcessResult}. The latter will then submit the jobs 86 contained in the C{jobs} attribute and include the job IDs in the opcode 87 result. 88 89 """
90 - def __init__(self, jobs, **kwargs):
91 """Initializes this class. 92 93 Additional return values can be specified as keyword arguments. 94 95 @type jobs: list of lists of L{opcode.OpCode} 96 @param jobs: A list of lists of opcode objects 97 98 """ 99 self.jobs = jobs 100 self.other = kwargs
101
102 103 -class LogicalUnit(object):
104 """Logical Unit base class. 105 106 Subclasses must follow these rules: 107 - implement ExpandNames 108 - implement CheckPrereq (except when tasklets are used) 109 - implement Exec (except when tasklets are used) 110 - implement BuildHooksEnv 111 - implement BuildHooksNodes 112 - redefine HPATH and HTYPE 113 - optionally redefine their run requirements: 114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively 115 116 Note that all commands require root permissions. 117 118 @ivar dry_run_result: the value (if any) that will be returned to the caller 119 in dry-run mode (signalled by opcode dry_run parameter) 120 121 """ 122 HPATH = None 123 HTYPE = None 124 REQ_BGL = True 125
126 - def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit. 128 129 This needs to be overridden in derived classes in order to check op 130 validity. 131 132 """ 133 self.proc = processor 134 self.op = op 135 self.cfg = context.cfg 136 self.glm = context.glm 137 # readability alias 138 self.owned_locks = context.glm.list_owned 139 self.context = context 140 self.rpc = rpc_runner 141 # Dicts used to declare locking needs to mcpu 142 self.needed_locks = None 143 self.share_locks = dict.fromkeys(locking.LEVELS, 0) 144 self.add_locks = {} 145 self.remove_locks = {} 146 # Used to force good behavior when calling helper functions 147 self.recalculate_locks = {} 148 # logging 149 self.Log = processor.Log # pylint: disable=C0103 150 self.LogWarning = processor.LogWarning # pylint: disable=C0103 151 self.LogInfo = processor.LogInfo # pylint: disable=C0103 152 self.LogStep = processor.LogStep # pylint: disable=C0103 153 # support for dry-run 154 self.dry_run_result = None 155 # support for generic debug attribute 156 if (not hasattr(self.op, "debug_level") or 157 not isinstance(self.op.debug_level, int)): 158 self.op.debug_level = 0 159 160 # Tasklets 161 self.tasklets = None 162 163 # Validate opcode parameters and set defaults 164 self.op.Validate(True) 165 166 self.CheckArguments()
167
168 - def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments. 170 171 This method is for doing a simple syntactic check and ensure 172 validity of opcode parameters, without any cluster-related 173 checks. While the same can be accomplished in ExpandNames and/or 174 CheckPrereq, doing these separate is better because: 175 176 - ExpandNames is left as as purely a lock-related function 177 - CheckPrereq is run after we have acquired locks (and possible 178 waited for them) 179 180 The function is allowed to change the self.op attribute so that 181 later methods can no longer worry about missing parameters. 182 183 """ 184 pass
185
186 - def ExpandNames(self):
187 """Expand names for this LU. 188 189 This method is called before starting to execute the opcode, and it should 190 update all the parameters of the opcode to their canonical form (e.g. a 191 short node name must be fully expanded after this method has successfully 192 completed). This way locking, hooks, logging, etc. can work correctly. 193 194 LUs which implement this method must also populate the self.needed_locks 195 member, as a dict with lock levels as keys, and a list of needed lock names 196 as values. Rules: 197 198 - use an empty dict if you don't need any lock 199 - if you don't need any lock at a particular level omit that 200 level (note that in this case C{DeclareLocks} won't be called 201 at all for that level) 202 - if you need locks at a level, but you can't calculate it in 203 this function, initialise that level with an empty list and do 204 further processing in L{LogicalUnit.DeclareLocks} (see that 205 function's docstring) 206 - don't put anything for the BGL level 207 - if you want all locks at a level use L{locking.ALL_SET} as a value 208 209 If you need to share locks (rather than acquire them exclusively) at one 210 level you can modify self.share_locks, setting a true value (usually 1) for 211 that level. By default locks are not shared. 212 213 This function can also define a list of tasklets, which then will be 214 executed in order instead of the usual LU-level CheckPrereq and Exec 215 functions, if those are not defined by the LU. 216 217 Examples:: 218 219 # Acquire all nodes and one instance 220 self.needed_locks = { 221 locking.LEVEL_NODE: locking.ALL_SET, 222 locking.LEVEL_INSTANCE: ['instance1.example.com'], 223 } 224 # Acquire just two nodes 225 self.needed_locks = { 226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'], 227 } 228 # Acquire no locks 229 self.needed_locks = {} # No, you can't leave it to the default value None 230 231 """ 232 # The implementation of this method is mandatory only if the new LU is 233 # concurrent, so that old LUs don't need to be changed all at the same 234 # time. 235 if self.REQ_BGL: 236 self.needed_locks = {} # Exclusive LUs don't need locks. 237 else: 238 raise NotImplementedError
239
240 - def DeclareLocks(self, level):
241 """Declare LU locking needs for a level 242 243 While most LUs can just declare their locking needs at ExpandNames time, 244 sometimes there's the need to calculate some locks after having acquired 245 the ones before. This function is called just before acquiring locks at a 246 particular level, but after acquiring the ones at lower levels, and permits 247 such calculations. It can be used to modify self.needed_locks, and by 248 default it does nothing. 249 250 This function is only called if you have something already set in 251 self.needed_locks for the level. 252 253 @param level: Locking level which is going to be locked 254 @type level: member of L{ganeti.locking.LEVELS} 255 256 """
257
258 - def CheckPrereq(self):
259 """Check prerequisites for this LU. 260 261 This method should check that the prerequisites for the execution 262 of this LU are fulfilled. It can do internode communication, but 263 it should be idempotent - no cluster or system changes are 264 allowed. 265 266 The method should raise errors.OpPrereqError in case something is 267 not fulfilled. Its return value is ignored. 268 269 This method should also update all the parameters of the opcode to 270 their canonical form if it hasn't been done by ExpandNames before. 271 272 """ 273 if self.tasklets is not None: 274 for (idx, tl) in enumerate(self.tasklets): 275 logging.debug("Checking prerequisites for tasklet %s/%s", 276 idx + 1, len(self.tasklets)) 277 tl.CheckPrereq() 278 else: 279 pass
280
281 - def Exec(self, feedback_fn):
282 """Execute the LU. 283 284 This method should implement the actual work. It should raise 285 errors.OpExecError for failures that are somewhat dealt with in 286 code, or expected. 287 288 """ 289 if self.tasklets is not None: 290 for (idx, tl) in enumerate(self.tasklets): 291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets)) 292 tl.Exec(feedback_fn) 293 else: 294 raise NotImplementedError
295
296 - def BuildHooksEnv(self):
297 """Build hooks environment for this LU. 298 299 @rtype: dict 300 @return: Dictionary containing the environment that will be used for 301 running the hooks for this LU. The keys of the dict must not be prefixed 302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner 303 will extend the environment with additional variables. If no environment 304 should be defined, an empty dictionary should be returned (not C{None}). 305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function 306 will not be called. 307 308 """ 309 raise NotImplementedError
310
311 - def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks. 313 314 @rtype: tuple; (list, list) 315 @return: Tuple containing a list of node names on which the hook 316 should run before the execution and a list of node names on which the 317 hook should run after the execution. No nodes should be returned as an 318 empty list (and not None). 319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function 320 will not be called. 321 322 """ 323 raise NotImplementedError
324
325 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks. 327 328 This method is called every time a hooks phase is executed, and notifies 329 the Logical Unit about the hooks' result. The LU can then use it to alter 330 its result based on the hooks. By default the method does nothing and the 331 previous result is passed back unchanged but any LU can define it if it 332 wants to use the local cluster hook-scripts somehow. 333 334 @param phase: one of L{constants.HOOKS_PHASE_POST} or 335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 336 @param hook_results: the results of the multi-node hooks rpc call 337 @param feedback_fn: function used send feedback back to the caller 338 @param lu_result: the previous Exec result this LU had, or None 339 in the PRE phase 340 @return: the new Exec result, based on the previous result 341 and hook results 342 343 """ 344 # API must be kept, thus we ignore the unused argument and could 345 # be a function warnings 346 # pylint: disable=W0613,R0201 347 return lu_result
348
349 - def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance. 351 352 Many LUs that work on an instance take its name in self.op.instance_name 353 and need to expand it and then declare the expanded name for locking. This 354 function does it, and then updates self.op.instance_name to the expanded 355 name. It also initializes needed_locks as a dict, if this hasn't been done 356 before. 357 358 """ 359 if self.needed_locks is None: 360 self.needed_locks = {} 361 else: 362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \ 363 "_ExpandAndLockInstance called with instance-level locks set" 364 self.op.instance_name = _ExpandInstanceName(self.cfg, 365 self.op.instance_name) 366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
367
368 - def _LockInstancesNodes(self, primary_only=False, 369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking. 371 372 This function should be called after locking one or more instances to lock 373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE] 374 with all primary or secondary nodes for instances already locked and 375 present in self.needed_locks[locking.LEVEL_INSTANCE]. 376 377 It should be called from DeclareLocks, and for safety only works if 378 self.recalculate_locks[locking.LEVEL_NODE] is set. 379 380 In the future it may grow parameters to just lock some instance's nodes, or 381 to just lock primaries or secondary nodes, if needed. 382 383 If should be called in DeclareLocks in a way similar to:: 384 385 if level == locking.LEVEL_NODE: 386 self._LockInstancesNodes() 387 388 @type primary_only: boolean 389 @param primary_only: only lock primary nodes of locked instances 390 @param level: Which lock level to use for locking nodes 391 392 """ 393 assert level in self.recalculate_locks, \ 394 "_LockInstancesNodes helper function called with no nodes to recalculate" 395 396 # TODO: check if we're really been called with the instance locks held 397 398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the 399 # future we might want to have different behaviors depending on the value 400 # of self.recalculate_locks[locking.LEVEL_NODE] 401 wanted_nodes = [] 402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE) 403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i): 404 wanted_nodes.append(instance.primary_node) 405 if not primary_only: 406 wanted_nodes.extend(instance.secondary_nodes) 407 408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE: 409 self.needed_locks[level] = wanted_nodes 410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND: 411 self.needed_locks[level].extend(wanted_nodes) 412 else: 413 raise errors.ProgrammerError("Unknown recalculation mode") 414 415 del self.recalculate_locks[level]
416
417 418 -class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks. 420 421 This LU is intended as a parent for other LogicalUnits which will 422 run no hooks, in order to reduce duplicate code. 423 424 """ 425 HPATH = None 426 HTYPE = None 427
428 - def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu. 430 431 This just raises an error. 432 433 """ 434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
435
436 - def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU. 438 439 """ 440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
441
442 443 -class Tasklet:
444 """Tasklet base class. 445 446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or 447 they can mix legacy code with tasklets. Locking needs to be done in the LU, 448 tasklets know nothing about locks. 449 450 Subclasses must follow these rules: 451 - Implement CheckPrereq 452 - Implement Exec 453 454 """
455 - def __init__(self, lu):
456 self.lu = lu 457 458 # Shortcuts 459 self.cfg = lu.cfg 460 self.rpc = lu.rpc
461
462 - def CheckPrereq(self):
463 """Check prerequisites for this tasklets. 464 465 This method should check whether the prerequisites for the execution of 466 this tasklet are fulfilled. It can do internode communication, but it 467 should be idempotent - no cluster or system changes are allowed. 468 469 The method should raise errors.OpPrereqError in case something is not 470 fulfilled. Its return value is ignored. 471 472 This method should also update all parameters to their canonical form if it 473 hasn't been done before. 474 475 """ 476 pass
477
478 - def Exec(self, feedback_fn):
479 """Execute the tasklet. 480 481 This method should implement the actual work. It should raise 482 errors.OpExecError for failures that are somewhat dealt with in code, or 483 expected. 484 485 """ 486 raise NotImplementedError
487
488 489 -class _QueryBase:
490 """Base for query utility classes. 491 492 """ 493 #: Attribute holding field definitions 494 FIELDS = None 495 496 #: Field to sort by 497 SORT_FIELD = "name" 498
499 - def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class. 501 502 """ 503 self.use_locking = use_locking 504 505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter, 506 namefield=self.SORT_FIELD) 507 self.requested_data = self.query.RequestedData() 508 self.names = self.query.RequestedNames() 509 510 # Sort only if no names were requested 511 self.sort_by_name = not self.names 512 513 self.do_locking = None 514 self.wanted = None
515
516 - def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query. 518 519 """ 520 if self.do_locking: 521 names = lu.owned_locks(lock_level) 522 else: 523 names = all_names 524 525 if self.wanted == locking.ALL_SET: 526 assert not self.names 527 # caller didn't specify names, so ordering is not important 528 return utils.NiceSort(names) 529 530 # caller specified names and we must keep the same order 531 assert self.names 532 assert not self.do_locking or lu.glm.is_owned(lock_level) 533 534 missing = set(self.wanted).difference(names) 535 if missing: 536 raise errors.OpExecError("Some items were removed before retrieving" 537 " their data: %s" % missing) 538 539 # Return expanded names 540 return self.wanted
541
542 - def ExpandNames(self, lu):
543 """Expand names for this query. 544 545 See L{LogicalUnit.ExpandNames}. 546 547 """ 548 raise NotImplementedError()
549
550 - def DeclareLocks(self, lu, level):
551 """Declare locks for this query. 552 553 See L{LogicalUnit.DeclareLocks}. 554 555 """ 556 raise NotImplementedError()
557
558 - def _GetQueryData(self, lu):
559 """Collects all data for this query. 560 561 @return: Query data object 562 563 """ 564 raise NotImplementedError()
565
566 - def NewStyleQuery(self, lu):
567 """Collect data and execute query. 568 569 """ 570 return query.GetQueryResponse(self.query, self._GetQueryData(lu), 571 sort_by_name=self.sort_by_name)
572
573 - def OldStyleQuery(self, lu):
574 """Collect data and execute query. 575 576 """ 577 return self.query.OldStyleQuery(self._GetQueryData(lu), 578 sort_by_name=self.sort_by_name)
579
580 581 -def _ShareAll():
582 """Returns a dict declaring all lock levels shared. 583 584 """ 585 return dict.fromkeys(locking.LEVELS, 1)
586
587 588 -def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}. 590 591 Converts the data into a single dictionary. This is fine for most use cases, 592 but some require information from more than one volume group or hypervisor. 593 594 """ 595 (bootid, (vg_info, ), (hv_info, )) = data 596 597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), { 598 "bootid": bootid, 599 })
600
601 602 -def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method. 604 605 @param instance: The instance object 606 @type devs: List of L{objects.Disk} 607 @param devs: The root devices (not any of its children!) 608 @param cfg: The config object 609 @returns The annotated disk copies 610 @see L{rpc.AnnotateDiskParams} 611 612 """ 613 return rpc.AnnotateDiskParams(instance.disk_template, devs, 614 cfg.GetInstanceDiskParams(instance))
615
616 617 -def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes, 618 cur_group_uuid):
619 """Checks if node groups for locked instances are still correct. 620 621 @type cfg: L{config.ConfigWriter} 622 @param cfg: Cluster configuration 623 @type instances: dict; string as key, L{objects.Instance} as value 624 @param instances: Dictionary, instance name as key, instance object as value 625 @type owned_groups: iterable of string 626 @param owned_groups: List of owned groups 627 @type owned_nodes: iterable of string 628 @param owned_nodes: List of owned nodes 629 @type cur_group_uuid: string or None 630 @param cur_group_uuid: Optional group UUID to check against instance's groups 631 632 """ 633 for (name, inst) in instances.items(): 634 assert owned_nodes.issuperset(inst.all_nodes), \ 635 "Instance %s's nodes changed while we kept the lock" % name 636 637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups) 638 639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \ 640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
641
642 643 -def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance. 645 646 @type cfg: L{config.ConfigWriter} 647 @param cfg: The cluster configuration 648 @type instance_name: string 649 @param instance_name: Instance name 650 @type owned_groups: set or frozenset 651 @param owned_groups: List of currently owned node groups 652 653 """ 654 inst_groups = cfg.GetInstanceNodeGroups(instance_name) 655 656 if not owned_groups.issuperset(inst_groups): 657 raise errors.OpPrereqError("Instance %s's node groups changed since" 658 " locks were acquired, current groups are" 659 " are '%s', owning groups '%s'; retry the" 660 " operation" % 661 (instance_name, 662 utils.CommaJoin(inst_groups), 663 utils.CommaJoin(owned_groups)), 664 errors.ECODE_STATE) 665 666 return inst_groups
667
668 669 -def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct. 671 672 @type cfg: L{config.ConfigWriter} 673 @param cfg: The cluster configuration 674 @type group_uuid: string 675 @param group_uuid: Node group UUID 676 @type owned_instances: set or frozenset 677 @param owned_instances: List of currently owned instances 678 679 """ 680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid) 681 if owned_instances != wanted_instances: 682 raise errors.OpPrereqError("Instances in node group '%s' changed since" 683 " locks were acquired, wanted '%s', have '%s';" 684 " retry the operation" % 685 (group_uuid, 686 utils.CommaJoin(wanted_instances), 687 utils.CommaJoin(owned_instances)), 688 errors.ECODE_STATE) 689 690 return wanted_instances
691
692 693 -def _SupportsOob(cfg, node):
694 """Tells if node supports OOB. 695 696 @type cfg: L{config.ConfigWriter} 697 @param cfg: The cluster configuration 698 @type node: L{objects.Node} 699 @param node: The node 700 @return: The OOB script if supported or an empty string otherwise 701 702 """ 703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
704
705 706 -def _CopyLockList(names):
707 """Makes a copy of a list of lock names. 708 709 Handles L{locking.ALL_SET} correctly. 710 711 """ 712 if names == locking.ALL_SET: 713 return locking.ALL_SET 714 else: 715 return names[:]
716
717 718 -def _GetWantedNodes(lu, nodes):
719 """Returns list of checked and expanded node names. 720 721 @type lu: L{LogicalUnit} 722 @param lu: the logical unit on whose behalf we execute 723 @type nodes: list 724 @param nodes: list of node names or None for all nodes 725 @rtype: list 726 @return: the list of nodes, sorted 727 @raise errors.ProgrammerError: if the nodes parameter is wrong type 728 729 """ 730 if nodes: 731 return [_ExpandNodeName(lu.cfg, name) for name in nodes] 732 733 return utils.NiceSort(lu.cfg.GetNodeList())
734
735 736 -def _GetWantedInstances(lu, instances):
737 """Returns list of checked and expanded instance names. 738 739 @type lu: L{LogicalUnit} 740 @param lu: the logical unit on whose behalf we execute 741 @type instances: list 742 @param instances: list of instance names or None for all instances 743 @rtype: list 744 @return: the list of instances, sorted 745 @raise errors.OpPrereqError: if the instances parameter is wrong type 746 @raise errors.OpPrereqError: if any of the passed instances is not found 747 748 """ 749 if instances: 750 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances] 751 else: 752 wanted = utils.NiceSort(lu.cfg.GetInstanceList()) 753 return wanted
754
755 756 -def _GetUpdatedParams(old_params, update_dict, 757 use_default=True, use_none=False):
758 """Return the new version of a parameter dictionary. 759 760 @type old_params: dict 761 @param old_params: old parameters 762 @type update_dict: dict 763 @param update_dict: dict containing new parameter values, or 764 constants.VALUE_DEFAULT to reset the parameter to its default 765 value 766 @param use_default: boolean 767 @type use_default: whether to recognise L{constants.VALUE_DEFAULT} 768 values as 'to be deleted' values 769 @param use_none: boolean 770 @type use_none: whether to recognise C{None} values as 'to be 771 deleted' values 772 @rtype: dict 773 @return: the new parameter dictionary 774 775 """ 776 params_copy = copy.deepcopy(old_params) 777 for key, val in update_dict.iteritems(): 778 if ((use_default and val == constants.VALUE_DEFAULT) or 779 (use_none and val is None)): 780 try: 781 del params_copy[key] 782 except KeyError: 783 pass 784 else: 785 params_copy[key] = val 786 return params_copy
787
788 789 -def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
790 """Return the new version of a instance policy. 791 792 @param group_policy: whether this policy applies to a group and thus 793 we should support removal of policy entries 794 795 """ 796 use_none = use_default = group_policy 797 ipolicy = copy.deepcopy(old_ipolicy) 798 for key, value in new_ipolicy.items(): 799 if key not in constants.IPOLICY_ALL_KEYS: 800 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key, 801 errors.ECODE_INVAL) 802 if key in constants.IPOLICY_ISPECS: 803 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES) 804 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value, 805 use_none=use_none, 806 use_default=use_default) 807 else: 808 if (not value or value == [constants.VALUE_DEFAULT] or 809 value == constants.VALUE_DEFAULT): 810 if group_policy: 811 del ipolicy[key] 812 else: 813 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'" 814 " on the cluster'" % key, 815 errors.ECODE_INVAL) 816 else: 817 if key in constants.IPOLICY_PARAMETERS: 818 # FIXME: we assume all such values are float 819 try: 820 ipolicy[key] = float(value) 821 except (TypeError, ValueError), err: 822 raise errors.OpPrereqError("Invalid value for attribute" 823 " '%s': '%s', error: %s" % 824 (key, value, err), errors.ECODE_INVAL) 825 else: 826 # FIXME: we assume all others are lists; this should be redone 827 # in a nicer way 828 ipolicy[key] = list(value) 829 try: 830 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy) 831 except errors.ConfigurationError, err: 832 raise errors.OpPrereqError("Invalid instance policy: %s" % err, 833 errors.ECODE_INVAL) 834 return ipolicy
835
836 837 -def _UpdateAndVerifySubDict(base, updates, type_check):
838 """Updates and verifies a dict with sub dicts of the same type. 839 840 @param base: The dict with the old data 841 @param updates: The dict with the new data 842 @param type_check: Dict suitable to ForceDictType to verify correct types 843 @returns: A new dict with updated and verified values 844 845 """ 846 def fn(old, value): 847 new = _GetUpdatedParams(old, value) 848 utils.ForceDictType(new, type_check) 849 return new
850 851 ret = copy.deepcopy(base) 852 ret.update(dict((key, fn(base.get(key, {}), value)) 853 for key, value in updates.items())) 854 return ret 855
856 857 -def _MergeAndVerifyHvState(op_input, obj_input):
858 """Combines the hv state from an opcode with the one of the object 859 860 @param op_input: The input dict from the opcode 861 @param obj_input: The input dict from the objects 862 @return: The verified and updated dict 863 864 """ 865 if op_input: 866 invalid_hvs = set(op_input) - constants.HYPER_TYPES 867 if invalid_hvs: 868 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:" 869 " %s" % utils.CommaJoin(invalid_hvs), 870 errors.ECODE_INVAL) 871 if obj_input is None: 872 obj_input = {} 873 type_check = constants.HVSTS_PARAMETER_TYPES 874 return _UpdateAndVerifySubDict(obj_input, op_input, type_check) 875 876 return None
877
878 879 -def _MergeAndVerifyDiskState(op_input, obj_input):
880 """Combines the disk state from an opcode with the one of the object 881 882 @param op_input: The input dict from the opcode 883 @param obj_input: The input dict from the objects 884 @return: The verified and updated dict 885 """ 886 if op_input: 887 invalid_dst = set(op_input) - constants.DS_VALID_TYPES 888 if invalid_dst: 889 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" % 890 utils.CommaJoin(invalid_dst), 891 errors.ECODE_INVAL) 892 type_check = constants.DSS_PARAMETER_TYPES 893 if obj_input is None: 894 obj_input = {} 895 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value, 896 type_check)) 897 for key, value in op_input.items()) 898 899 return None
900
901 902 -def _ReleaseLocks(lu, level, names=None, keep=None):
903 """Releases locks owned by an LU. 904 905 @type lu: L{LogicalUnit} 906 @param level: Lock level 907 @type names: list or None 908 @param names: Names of locks to release 909 @type keep: list or None 910 @param keep: Names of locks to retain 911 912 """ 913 assert not (keep is not None and names is not None), \ 914 "Only one of the 'names' and the 'keep' parameters can be given" 915 916 if names is not None: 917 should_release = names.__contains__ 918 elif keep: 919 should_release = lambda name: name not in keep 920 else: 921 should_release = None 922 923 owned = lu.owned_locks(level) 924 if not owned: 925 # Not owning any lock at this level, do nothing 926 pass 927 928 elif should_release: 929 retain = [] 930 release = [] 931 932 # Determine which locks to release 933 for name in owned: 934 if should_release(name): 935 release.append(name) 936 else: 937 retain.append(name) 938 939 assert len(lu.owned_locks(level)) == (len(retain) + len(release)) 940 941 # Release just some locks 942 lu.glm.release(level, names=release) 943 944 assert frozenset(lu.owned_locks(level)) == frozenset(retain) 945 else: 946 # Release everything 947 lu.glm.release(level) 948 949 assert not lu.glm.is_owned(level), "No locks should be owned"
950
951 952 -def _MapInstanceDisksToNodes(instances):
953 """Creates a map from (node, volume) to instance name. 954 955 @type instances: list of L{objects.Instance} 956 @rtype: dict; tuple of (node name, volume name) as key, instance name as value 957 958 """ 959 return dict(((node, vol), inst.name) 960 for inst in instances 961 for (node, vols) in inst.MapLVsByNode().items() 962 for vol in vols)
963
964 965 -def _RunPostHook(lu, node_name):
966 """Runs the post-hook for an opcode on a single node. 967 968 """ 969 hm = lu.proc.BuildHooksManager(lu) 970 try: 971 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name]) 972 except Exception, err: # pylint: disable=W0703 973 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
974
975 976 -def _CheckOutputFields(static, dynamic, selected):
977 """Checks whether all selected fields are valid. 978 979 @type static: L{utils.FieldSet} 980 @param static: static fields set 981 @type dynamic: L{utils.FieldSet} 982 @param dynamic: dynamic fields set 983 984 """ 985 f = utils.FieldSet() 986 f.Extend(static) 987 f.Extend(dynamic) 988 989 delta = f.NonMatching(selected) 990 if delta: 991 raise errors.OpPrereqError("Unknown output fields selected: %s" 992 % ",".join(delta), errors.ECODE_INVAL)
993
994 995 -def _CheckGlobalHvParams(params):
996 """Validates that given hypervisor params are not global ones. 997 998 This will ensure that instances don't get customised versions of 999 global params. 1000 1001 """ 1002 used_globals = constants.HVC_GLOBALS.intersection(params) 1003 if used_globals: 1004 msg = ("The following hypervisor parameters are global and cannot" 1005 " be customized at instance level, please modify them at" 1006 " cluster level: %s" % utils.CommaJoin(used_globals)) 1007 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1008
1009 1010 -def _CheckNodeOnline(lu, node, msg=None):
1011 """Ensure that a given node is online. 1012 1013 @param lu: the LU on behalf of which we make the check 1014 @param node: the node to check 1015 @param msg: if passed, should be a message to replace the default one 1016 @raise errors.OpPrereqError: if the node is offline 1017 1018 """ 1019 if msg is None: 1020 msg = "Can't use offline node" 1021 if lu.cfg.GetNodeInfo(node).offline: 1022 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1023
1024 1025 -def _CheckNodeNotDrained(lu, node):
1026 """Ensure that a given node is not drained. 1027 1028 @param lu: the LU on behalf of which we make the check 1029 @param node: the node to check 1030 @raise errors.OpPrereqError: if the node is drained 1031 1032 """ 1033 if lu.cfg.GetNodeInfo(node).drained: 1034 raise errors.OpPrereqError("Can't use drained node %s" % node, 1035 errors.ECODE_STATE)
1036
1037 1038 -def _CheckNodeVmCapable(lu, node):
1039 """Ensure that a given node is vm capable. 1040 1041 @param lu: the LU on behalf of which we make the check 1042 @param node: the node to check 1043 @raise errors.OpPrereqError: if the node is not vm capable 1044 1045 """ 1046 if not lu.cfg.GetNodeInfo(node).vm_capable: 1047 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node, 1048 errors.ECODE_STATE)
1049
1050 1051 -def _CheckNodeHasOS(lu, node, os_name, force_variant):
1052 """Ensure that a node supports a given OS. 1053 1054 @param lu: the LU on behalf of which we make the check 1055 @param node: the node to check 1056 @param os_name: the OS to query about 1057 @param force_variant: whether to ignore variant errors 1058 @raise errors.OpPrereqError: if the node is not supporting the OS 1059 1060 """ 1061 result = lu.rpc.call_os_get(node, os_name) 1062 result.Raise("OS '%s' not in supported OS list for node %s" % 1063 (os_name, node), 1064 prereq=True, ecode=errors.ECODE_INVAL) 1065 if not force_variant: 1066 _CheckOSVariant(result.payload, os_name)
1067
1068 1069 -def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1070 """Ensure that a node has the given secondary ip. 1071 1072 @type lu: L{LogicalUnit} 1073 @param lu: the LU on behalf of which we make the check 1074 @type node: string 1075 @param node: the node to check 1076 @type secondary_ip: string 1077 @param secondary_ip: the ip to check 1078 @type prereq: boolean 1079 @param prereq: whether to throw a prerequisite or an execute error 1080 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True 1081 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False 1082 1083 """ 1084 result = lu.rpc.call_node_has_ip_address(node, secondary_ip) 1085 result.Raise("Failure checking secondary ip on node %s" % node, 1086 prereq=prereq, ecode=errors.ECODE_ENVIRON) 1087 if not result.payload: 1088 msg = ("Node claims it doesn't have the secondary ip you gave (%s)," 1089 " please fix and re-run this command" % secondary_ip) 1090 if prereq: 1091 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) 1092 else: 1093 raise errors.OpExecError(msg)
1094
1095 1096 -def _GetClusterDomainSecret():
1097 """Reads the cluster domain secret. 1098 1099 """ 1100 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE, 1101 strict=True)
1102
1103 1104 -def _CheckInstanceState(lu, instance, req_states, msg=None):
1105 """Ensure that an instance is in one of the required states. 1106 1107 @param lu: the LU on behalf of which we make the check 1108 @param instance: the instance to check 1109 @param msg: if passed, should be a message to replace the default one 1110 @raise errors.OpPrereqError: if the instance is not in the required state 1111 1112 """ 1113 if msg is None: 1114 msg = "can't use instance from outside %s states" % ", ".join(req_states) 1115 if instance.admin_state not in req_states: 1116 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" % 1117 (instance.name, instance.admin_state, msg), 1118 errors.ECODE_STATE) 1119 1120 if constants.ADMINST_UP not in req_states: 1121 pnode = instance.primary_node 1122 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode] 1123 ins_l.Raise("Can't contact node %s for instance information" % pnode, 1124 prereq=True, ecode=errors.ECODE_ENVIRON) 1125 1126 if instance.name in ins_l.payload: 1127 raise errors.OpPrereqError("Instance %s is running, %s" % 1128 (instance.name, msg), errors.ECODE_STATE)
1129
1130 1131 -def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1132 """Computes if value is in the desired range. 1133 1134 @param name: name of the parameter for which we perform the check 1135 @param qualifier: a qualifier used in the error message (e.g. 'disk/1', 1136 not just 'disk') 1137 @param ipolicy: dictionary containing min, max and std values 1138 @param value: actual value that we want to use 1139 @return: None or element not meeting the criteria 1140 1141 1142 """ 1143 if value in [None, constants.VALUE_AUTO]: 1144 return None 1145 max_v = ipolicy[constants.ISPECS_MAX].get(name, value) 1146 min_v = ipolicy[constants.ISPECS_MIN].get(name, value) 1147 if value > max_v or min_v > value: 1148 if qualifier: 1149 fqn = "%s/%s" % (name, qualifier) 1150 else: 1151 fqn = name 1152 return ("%s value %s is not in range [%s, %s]" % 1153 (fqn, value, min_v, max_v)) 1154 return None
1155
1156 1157 -def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count, 1158 nic_count, disk_sizes, spindle_use, 1159 _compute_fn=_ComputeMinMaxSpec):
1160 """Verifies ipolicy against provided specs. 1161 1162 @type ipolicy: dict 1163 @param ipolicy: The ipolicy 1164 @type mem_size: int 1165 @param mem_size: The memory size 1166 @type cpu_count: int 1167 @param cpu_count: Used cpu cores 1168 @type disk_count: int 1169 @param disk_count: Number of disks used 1170 @type nic_count: int 1171 @param nic_count: Number of nics used 1172 @type disk_sizes: list of ints 1173 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count}) 1174 @type spindle_use: int 1175 @param spindle_use: The number of spindles this instance uses 1176 @param _compute_fn: The compute function (unittest only) 1177 @return: A list of violations, or an empty list of no violations are found 1178 1179 """ 1180 assert disk_count == len(disk_sizes) 1181 1182 test_settings = [ 1183 (constants.ISPEC_MEM_SIZE, "", mem_size), 1184 (constants.ISPEC_CPU_COUNT, "", cpu_count), 1185 (constants.ISPEC_DISK_COUNT, "", disk_count), 1186 (constants.ISPEC_NIC_COUNT, "", nic_count), 1187 (constants.ISPEC_SPINDLE_USE, "", spindle_use), 1188 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d) 1189 for idx, d in enumerate(disk_sizes)] 1190 1191 return filter(None, 1192 (_compute_fn(name, qualifier, ipolicy, value) 1193 for (name, qualifier, value) in test_settings))
1194
1195 1196 -def _ComputeIPolicyInstanceViolation(ipolicy, instance, 1197 _compute_fn=_ComputeIPolicySpecViolation):
1198 """Compute if instance meets the specs of ipolicy. 1199 1200 @type ipolicy: dict 1201 @param ipolicy: The ipolicy to verify against 1202 @type instance: L{objects.Instance} 1203 @param instance: The instance to verify 1204 @param _compute_fn: The function to verify ipolicy (unittest only) 1205 @see: L{_ComputeIPolicySpecViolation} 1206 1207 """ 1208 mem_size = instance.beparams.get(constants.BE_MAXMEM, None) 1209 cpu_count = instance.beparams.get(constants.BE_VCPUS, None) 1210 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None) 1211 disk_count = len(instance.disks) 1212 disk_sizes = [disk.size for disk in instance.disks] 1213 nic_count = len(instance.nics) 1214 1215 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count, 1216 disk_sizes, spindle_use)
1217
1218 1219 -def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec, 1220 _compute_fn=_ComputeIPolicySpecViolation):
1221 """Compute if instance specs meets the specs of ipolicy. 1222 1223 @type ipolicy: dict 1224 @param ipolicy: The ipolicy to verify against 1225 @param instance_spec: dict 1226 @param instance_spec: The instance spec to verify 1227 @param _compute_fn: The function to verify ipolicy (unittest only) 1228 @see: L{_ComputeIPolicySpecViolation} 1229 1230 """ 1231 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None) 1232 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None) 1233 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0) 1234 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, []) 1235 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0) 1236 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None) 1237 1238 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count, 1239 disk_sizes, spindle_use)
1240
1241 1242 -def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group, 1243 target_group, 1244 _compute_fn=_ComputeIPolicyInstanceViolation):
1245 """Compute if instance meets the specs of the new target group. 1246 1247 @param ipolicy: The ipolicy to verify 1248 @param instance: The instance object to verify 1249 @param current_group: The current group of the instance 1250 @param target_group: The new group of the instance 1251 @param _compute_fn: The function to verify ipolicy (unittest only) 1252 @see: L{_ComputeIPolicySpecViolation} 1253 1254 """ 1255 if current_group == target_group: 1256 return [] 1257 else: 1258 return _compute_fn(ipolicy, instance)
1259
1260 1261 -def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False, 1262 _compute_fn=_ComputeIPolicyNodeViolation):
1263 """Checks that the target node is correct in terms of instance policy. 1264 1265 @param ipolicy: The ipolicy to verify 1266 @param instance: The instance object to verify 1267 @param node: The new node to relocate 1268 @param ignore: Ignore violations of the ipolicy 1269 @param _compute_fn: The function to verify ipolicy (unittest only) 1270 @see: L{_ComputeIPolicySpecViolation} 1271 1272 """ 1273 primary_node = lu.cfg.GetNodeInfo(instance.primary_node) 1274 res = _compute_fn(ipolicy, instance, primary_node.group, node.group) 1275 1276 if res: 1277 msg = ("Instance does not meet target node group's (%s) instance" 1278 " policy: %s") % (node.group, utils.CommaJoin(res)) 1279 if ignore: 1280 lu.LogWarning(msg) 1281 else: 1282 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1283
1284 1285 -def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1286 """Computes a set of any instances that would violate the new ipolicy. 1287 1288 @param old_ipolicy: The current (still in-place) ipolicy 1289 @param new_ipolicy: The new (to become) ipolicy 1290 @param instances: List of instances to verify 1291 @return: A list of instances which violates the new ipolicy but 1292 did not before 1293 1294 """ 1295 return (_ComputeViolatingInstances(new_ipolicy, instances) - 1296 _ComputeViolatingInstances(old_ipolicy, instances))
1297
1298 1299 -def _ExpandItemName(fn, name, kind):
1300 """Expand an item name. 1301 1302 @param fn: the function to use for expansion 1303 @param name: requested item name 1304 @param kind: text description ('Node' or 'Instance') 1305 @return: the resolved (full) name 1306 @raise errors.OpPrereqError: if the item is not found 1307 1308 """ 1309 full_name = fn(name) 1310 if full_name is None: 1311 raise errors.OpPrereqError("%s '%s' not known" % (kind, name), 1312 errors.ECODE_NOENT) 1313 return full_name
1314
1315 1316 -def _ExpandNodeName(cfg, name):
1317 """Wrapper over L{_ExpandItemName} for nodes.""" 1318 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1319
1320 1321 -def _ExpandInstanceName(cfg, name):
1322 """Wrapper over L{_ExpandItemName} for instance.""" 1323 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1324
1325 1326 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, 1327 minmem, maxmem, vcpus, nics, disk_template, disks, 1328 bep, hvp, hypervisor_name, tags):
1329 """Builds instance related env variables for hooks 1330 1331 This builds the hook environment from individual variables. 1332 1333 @type name: string 1334 @param name: the name of the instance 1335 @type primary_node: string 1336 @param primary_node: the name of the instance's primary node 1337 @type secondary_nodes: list 1338 @param secondary_nodes: list of secondary nodes as strings 1339 @type os_type: string 1340 @param os_type: the name of the instance's OS 1341 @type status: string 1342 @param status: the desired status of the instance 1343 @type minmem: string 1344 @param minmem: the minimum memory size of the instance 1345 @type maxmem: string 1346 @param maxmem: the maximum memory size of the instance 1347 @type vcpus: string 1348 @param vcpus: the count of VCPUs the instance has 1349 @type nics: list 1350 @param nics: list of tuples (ip, mac, mode, link) representing 1351 the NICs the instance has 1352 @type disk_template: string 1353 @param disk_template: the disk template of the instance 1354 @type disks: list 1355 @param disks: the list of (size, mode) pairs 1356 @type bep: dict 1357 @param bep: the backend parameters for the instance 1358 @type hvp: dict 1359 @param hvp: the hypervisor parameters for the instance 1360 @type hypervisor_name: string 1361 @param hypervisor_name: the hypervisor for the instance 1362 @type tags: list 1363 @param tags: list of instance tags as strings 1364 @rtype: dict 1365 @return: the hook environment for this instance 1366 1367 """ 1368 env = { 1369 "OP_TARGET": name, 1370 "INSTANCE_NAME": name, 1371 "INSTANCE_PRIMARY": primary_node, 1372 "INSTANCE_SECONDARIES": " ".join(secondary_nodes), 1373 "INSTANCE_OS_TYPE": os_type, 1374 "INSTANCE_STATUS": status, 1375 "INSTANCE_MINMEM": minmem, 1376 "INSTANCE_MAXMEM": maxmem, 1377 # TODO(2.7) remove deprecated "memory" value 1378 "INSTANCE_MEMORY": maxmem, 1379 "INSTANCE_VCPUS": vcpus, 1380 "INSTANCE_DISK_TEMPLATE": disk_template, 1381 "INSTANCE_HYPERVISOR": hypervisor_name, 1382 } 1383 if nics: 1384 nic_count = len(nics) 1385 for idx, (ip, mac, mode, link) in enumerate(nics): 1386 if ip is None: 1387 ip = "" 1388 env["INSTANCE_NIC%d_IP" % idx] = ip 1389 env["INSTANCE_NIC%d_MAC" % idx] = mac 1390 env["INSTANCE_NIC%d_MODE" % idx] = mode 1391 env["INSTANCE_NIC%d_LINK" % idx] = link 1392 if mode == constants.NIC_MODE_BRIDGED: 1393 env["INSTANCE_NIC%d_BRIDGE" % idx] = link 1394 else: 1395 nic_count = 0 1396 1397 env["INSTANCE_NIC_COUNT"] = nic_count 1398 1399 if disks: 1400 disk_count = len(disks) 1401 for idx, (size, mode) in enumerate(disks): 1402 env["INSTANCE_DISK%d_SIZE" % idx] = size 1403 env["INSTANCE_DISK%d_MODE" % idx] = mode 1404 else: 1405 disk_count = 0 1406 1407 env["INSTANCE_DISK_COUNT"] = disk_count 1408 1409 if not tags: 1410 tags = [] 1411 1412 env["INSTANCE_TAGS"] = " ".join(tags) 1413 1414 for source, kind in [(bep, "BE"), (hvp, "HV")]: 1415 for key, value in source.items(): 1416 env["INSTANCE_%s_%s" % (kind, key)] = value 1417 1418 return env
1419
1420 1421 -def _NICListToTuple(lu, nics):
1422 """Build a list of nic information tuples. 1423 1424 This list is suitable to be passed to _BuildInstanceHookEnv or as a return 1425 value in LUInstanceQueryData. 1426 1427 @type lu: L{LogicalUnit} 1428 @param lu: the logical unit on whose behalf we execute 1429 @type nics: list of L{objects.NIC} 1430 @param nics: list of nics to convert to hooks tuples 1431 1432 """ 1433 hooks_nics = [] 1434 cluster = lu.cfg.GetClusterInfo() 1435 for nic in nics: 1436 ip = nic.ip 1437 mac = nic.mac 1438 filled_params = cluster.SimpleFillNIC(nic.nicparams) 1439 mode = filled_params[constants.NIC_MODE] 1440 link = filled_params[constants.NIC_LINK] 1441 hooks_nics.append((ip, mac, mode, link)) 1442 return hooks_nics
1443
1444 1445 -def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1446 """Builds instance related env variables for hooks from an object. 1447 1448 @type lu: L{LogicalUnit} 1449 @param lu: the logical unit on whose behalf we execute 1450 @type instance: L{objects.Instance} 1451 @param instance: the instance for which we should build the 1452 environment 1453 @type override: dict 1454 @param override: dictionary with key/values that will override 1455 our values 1456 @rtype: dict 1457 @return: the hook environment dictionary 1458 1459 """ 1460 cluster = lu.cfg.GetClusterInfo() 1461 bep = cluster.FillBE(instance) 1462 hvp = cluster.FillHV(instance) 1463 args = { 1464 "name": instance.name, 1465 "primary_node": instance.primary_node, 1466 "secondary_nodes": instance.secondary_nodes, 1467 "os_type": instance.os, 1468 "status": instance.admin_state, 1469 "maxmem": bep[constants.BE_MAXMEM], 1470 "minmem": bep[constants.BE_MINMEM], 1471 "vcpus": bep[constants.BE_VCPUS], 1472 "nics": _NICListToTuple(lu, instance.nics), 1473 "disk_template": instance.disk_template, 1474 "disks": [(disk.size, disk.mode) for disk in instance.disks], 1475 "bep": bep, 1476 "hvp": hvp, 1477 "hypervisor_name": instance.hypervisor, 1478 "tags": instance.tags, 1479 } 1480 if override: 1481 args.update(override) 1482 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1483
1484 1485 -def _AdjustCandidatePool(lu, exceptions):
1486 """Adjust the candidate pool after node operations. 1487 1488 """ 1489 mod_list = lu.cfg.MaintainCandidatePool(exceptions) 1490 if mod_list: 1491 lu.LogInfo("Promoted nodes to master candidate role: %s", 1492 utils.CommaJoin(node.name for node in mod_list)) 1493 for name in mod_list: 1494 lu.context.ReaddNode(name) 1495 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions) 1496 if mc_now > mc_max: 1497 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % 1498 (mc_now, mc_max))
1499
1500 1501 -def _DecideSelfPromotion(lu, exceptions=None):
1502 """Decide whether I should promote myself as a master candidate. 1503 1504 """ 1505 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size 1506 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions) 1507 # the new node will increase mc_max with one, so: 1508 mc_should = min(mc_should + 1, cp_size) 1509 return mc_now < mc_should
1510
1511 1512 -def _CalculateGroupIPolicy(cluster, group):
1513 """Calculate instance policy for group. 1514 1515 """ 1516 return cluster.SimpleFillIPolicy(group.ipolicy)
1517
1518 1519 -def _ComputeViolatingInstances(ipolicy, instances):
1520 """Computes a set of instances who violates given ipolicy. 1521 1522 @param ipolicy: The ipolicy to verify 1523 @type instances: object.Instance 1524 @param instances: List of instances to verify 1525 @return: A frozenset of instance names violating the ipolicy 1526 1527 """ 1528 return frozenset([inst.name for inst in instances 1529 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1530
1531 1532 -def _CheckNicsBridgesExist(lu, target_nics, target_node):
1533 """Check that the brigdes needed by a list of nics exist. 1534 1535 """ 1536 cluster = lu.cfg.GetClusterInfo() 1537 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics] 1538 brlist = [params[constants.NIC_LINK] for params in paramslist 1539 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED] 1540 if brlist: 1541 result = lu.rpc.call_bridges_exist(target_node, brlist) 1542 result.Raise("Error checking bridges on destination node '%s'" % 1543 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1544
1545 1546 -def _CheckInstanceBridgesExist(lu, instance, node=None):
1547 """Check that the brigdes needed by an instance exist. 1548 1549 """ 1550 if node is None: 1551 node = instance.primary_node 1552 _CheckNicsBridgesExist(lu, instance.nics, node)
1553
1554 1555 -def _CheckOSVariant(os_obj, name):
1556 """Check whether an OS name conforms to the os variants specification. 1557 1558 @type os_obj: L{objects.OS} 1559 @param os_obj: OS object to check 1560 @type name: string 1561 @param name: OS name passed by the user, to check for validity 1562 1563 """ 1564 variant = objects.OS.GetVariant(name) 1565 if not os_obj.supported_variants: 1566 if variant: 1567 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'" 1568 " passed)" % (os_obj.name, variant), 1569 errors.ECODE_INVAL) 1570 return 1571 if not variant: 1572 raise errors.OpPrereqError("OS name must include a variant", 1573 errors.ECODE_INVAL) 1574 1575 if variant not in os_obj.supported_variants: 1576 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1577
1578 1579 -def _GetNodeInstancesInner(cfg, fn):
1580 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1581
1582 1583 -def _GetNodeInstances(cfg, node_name):
1584 """Returns a list of all primary and secondary instances on a node. 1585 1586 """ 1587 1588 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1589
1590 1591 -def _GetNodePrimaryInstances(cfg, node_name):
1592 """Returns primary instances on a node. 1593 1594 """ 1595 return _GetNodeInstancesInner(cfg, 1596 lambda inst: node_name == inst.primary_node)
1597
1598 1599 -def _GetNodeSecondaryInstances(cfg, node_name):
1600 """Returns secondary instances on a node. 1601 1602 """ 1603 return _GetNodeInstancesInner(cfg, 1604 lambda inst: node_name in inst.secondary_nodes)
1605
1606 1607 -def _GetStorageTypeArgs(cfg, storage_type):
1608 """Returns the arguments for a storage type. 1609 1610 """ 1611 # Special case for file storage 1612 if storage_type == constants.ST_FILE: 1613 # storage.FileStorage wants a list of storage directories 1614 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]] 1615 1616 return []
1617
1618 1619 -def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1620 faulty = [] 1621 1622 for dev in instance.disks: 1623 cfg.SetDiskID(dev, node_name) 1624 1625 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks, 1626 instance)) 1627 result.Raise("Failed to get disk status from node %s" % node_name, 1628 prereq=prereq, ecode=errors.ECODE_ENVIRON) 1629 1630 for idx, bdev_status in enumerate(result.payload): 1631 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY: 1632 faulty.append(idx) 1633 1634 return faulty
1635
1636 1637 -def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1638 """Check the sanity of iallocator and node arguments and use the 1639 cluster-wide iallocator if appropriate. 1640 1641 Check that at most one of (iallocator, node) is specified. If none is 1642 specified, then the LU's opcode's iallocator slot is filled with the 1643 cluster-wide default iallocator. 1644 1645 @type iallocator_slot: string 1646 @param iallocator_slot: the name of the opcode iallocator slot 1647 @type node_slot: string 1648 @param node_slot: the name of the opcode target node slot 1649 1650 """ 1651 node = getattr(lu.op, node_slot, None) 1652 iallocator = getattr(lu.op, iallocator_slot, None) 1653 1654 if node is not None and iallocator is not None: 1655 raise errors.OpPrereqError("Do not specify both, iallocator and node", 1656 errors.ECODE_INVAL) 1657 elif node is None and iallocator is None: 1658 default_iallocator = lu.cfg.GetDefaultIAllocator() 1659 if default_iallocator: 1660 setattr(lu.op, iallocator_slot, default_iallocator) 1661 else: 1662 raise errors.OpPrereqError("No iallocator or node given and no" 1663 " cluster-wide default iallocator found;" 1664 " please specify either an iallocator or a" 1665 " node, or set a cluster-wide default" 1666 " iallocator")
1667
1668 1669 -def _GetDefaultIAllocator(cfg, iallocator):
1670 """Decides on which iallocator to use. 1671 1672 @type cfg: L{config.ConfigWriter} 1673 @param cfg: Cluster configuration object 1674 @type iallocator: string or None 1675 @param iallocator: Iallocator specified in opcode 1676 @rtype: string 1677 @return: Iallocator name 1678 1679 """ 1680 if not iallocator: 1681 # Use default iallocator 1682 iallocator = cfg.GetDefaultIAllocator() 1683 1684 if not iallocator: 1685 raise errors.OpPrereqError("No iallocator was specified, neither in the" 1686 " opcode nor as a cluster-wide default", 1687 errors.ECODE_INVAL) 1688 1689 return iallocator
1690
1691 1692 -def _CheckHostnameSane(lu, name):
1693 """Ensures that a given hostname resolves to a 'sane' name. 1694 1695 The given name is required to be a prefix of the resolved hostname, 1696 to prevent accidental mismatches. 1697 1698 @param lu: the logical unit on behalf of which we're checking 1699 @param name: the name we should resolve and check 1700 @return: the resolved hostname object 1701 1702 """ 1703 hostname = netutils.GetHostname(name=name) 1704 if hostname.name != name: 1705 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name) 1706 if not utils.MatchNameComponent(name, [hostname.name]): 1707 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the" 1708 " same as given hostname '%s'") % 1709 (hostname.name, name), errors.ECODE_INVAL) 1710 return hostname
1711
1712 1713 -class LUClusterPostInit(LogicalUnit):
1714 """Logical unit for running hooks after cluster initialization. 1715 1716 """ 1717 HPATH = "cluster-init" 1718 HTYPE = constants.HTYPE_CLUSTER 1719
1720 - def BuildHooksEnv(self):
1721 """Build hooks env. 1722 1723 """ 1724 return { 1725 "OP_TARGET": self.cfg.GetClusterName(), 1726 }
1727
1728 - def BuildHooksNodes(self):
1729 """Build hooks nodes. 1730 1731 """ 1732 return ([], [self.cfg.GetMasterNode()])
1733
1734 - def Exec(self, feedback_fn):
1735 """Nothing to do. 1736 1737 """ 1738 return True
1739
1740 1741 -class LUClusterDestroy(LogicalUnit):
1742 """Logical unit for destroying the cluster. 1743 1744 """ 1745 HPATH = "cluster-destroy" 1746 HTYPE = constants.HTYPE_CLUSTER 1747
1748 - def BuildHooksEnv(self):
1749 """Build hooks env. 1750 1751 """ 1752 return { 1753 "OP_TARGET": self.cfg.GetClusterName(), 1754 }
1755
1756 - def BuildHooksNodes(self):
1757 """Build hooks nodes. 1758 1759 """ 1760 return ([], [])
1761
1762 - def CheckPrereq(self):
1763 """Check prerequisites. 1764 1765 This checks whether the cluster is empty. 1766 1767 Any errors are signaled by raising errors.OpPrereqError. 1768 1769 """ 1770 master = self.cfg.GetMasterNode() 1771 1772 nodelist = self.cfg.GetNodeList() 1773 if len(nodelist) != 1 or nodelist[0] != master: 1774 raise errors.OpPrereqError("There are still %d node(s) in" 1775 " this cluster." % (len(nodelist) - 1), 1776 errors.ECODE_INVAL) 1777 instancelist = self.cfg.GetInstanceList() 1778 if instancelist: 1779 raise errors.OpPrereqError("There are still %d instance(s) in" 1780 " this cluster." % len(instancelist), 1781 errors.ECODE_INVAL)
1782
1783 - def Exec(self, feedback_fn):
1784 """Destroys the cluster. 1785 1786 """ 1787 master_params = self.cfg.GetMasterNetworkParameters() 1788 1789 # Run post hooks on master node before it's removed 1790 _RunPostHook(self, master_params.name) 1791 1792 ems = self.cfg.GetUseExternalMipScript() 1793 result = self.rpc.call_node_deactivate_master_ip(master_params.name, 1794 master_params, ems) 1795 if result.fail_msg: 1796 self.LogWarning("Error disabling the master IP address: %s", 1797 result.fail_msg) 1798 1799 return master_params.name
1800
1801 1802 -def _VerifyCertificate(filename):
1803 """Verifies a certificate for L{LUClusterVerifyConfig}. 1804 1805 @type filename: string 1806 @param filename: Path to PEM file 1807 1808 """ 1809 try: 1810 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 1811 utils.ReadFile(filename)) 1812 except Exception, err: # pylint: disable=W0703 1813 return (LUClusterVerifyConfig.ETYPE_ERROR, 1814 "Failed to load X509 certificate %s: %s" % (filename, err)) 1815 1816 (errcode, msg) = \ 1817 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN, 1818 constants.SSL_CERT_EXPIRATION_ERROR) 1819 1820 if msg: 1821 fnamemsg = "While verifying %s: %s" % (filename, msg) 1822 else: 1823 fnamemsg = None 1824 1825 if errcode is None: 1826 return (None, fnamemsg) 1827 elif errcode == utils.CERT_WARNING: 1828 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg) 1829 elif errcode == utils.CERT_ERROR: 1830 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg) 1831 1832 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1833
1834 1835 -def _GetAllHypervisorParameters(cluster, instances):
1836 """Compute the set of all hypervisor parameters. 1837 1838 @type cluster: L{objects.Cluster} 1839 @param cluster: the cluster object 1840 @param instances: list of L{objects.Instance} 1841 @param instances: additional instances from which to obtain parameters 1842 @rtype: list of (origin, hypervisor, parameters) 1843 @return: a list with all parameters found, indicating the hypervisor they 1844 apply to, and the origin (can be "cluster", "os X", or "instance Y") 1845 1846 """ 1847 hvp_data = [] 1848 1849 for hv_name in cluster.enabled_hypervisors: 1850 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name))) 1851 1852 for os_name, os_hvp in cluster.os_hvp.items(): 1853 for hv_name, hv_params in os_hvp.items(): 1854 if hv_params: 1855 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name) 1856 hvp_data.append(("os %s" % os_name, hv_name, full_params)) 1857 1858 # TODO: collapse identical parameter values in a single one 1859 for instance in instances: 1860 if instance.hvparams: 1861 hvp_data.append(("instance %s" % instance.name, instance.hypervisor, 1862 cluster.FillHV(instance))) 1863 1864 return hvp_data
1865
1866 1867 -class _VerifyErrors(object):
1868 """Mix-in for cluster/group verify LUs. 1869 1870 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects 1871 self.op and self._feedback_fn to be available.) 1872 1873 """ 1874 1875 ETYPE_FIELD = "code" 1876 ETYPE_ERROR = "ERROR" 1877 ETYPE_WARNING = "WARNING" 1878
1879 - def _Error(self, ecode, item, msg, *args, **kwargs):
1880 """Format an error message. 1881 1882 Based on the opcode's error_codes parameter, either format a 1883 parseable error code, or a simpler error string. 1884 1885 This must be called only from Exec and functions called from Exec. 1886 1887 """ 1888 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) 1889 itype, etxt, _ = ecode 1890 # first complete the msg 1891 if args: 1892 msg = msg % args 1893 # then format the whole message 1894 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101 1895 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) 1896 else: 1897 if item: 1898 item = " " + item 1899 else: 1900 item = "" 1901 msg = "%s: %s%s: %s" % (ltype, itype, item, msg) 1902 # and finally report it via the feedback_fn 1903 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1904
1905 - def _ErrorIf(self, cond, ecode, *args, **kwargs):
1906 """Log an error message if the passed condition is True. 1907 1908 """ 1909 cond = (bool(cond) 1910 or self.op.debug_simulate_errors) # pylint: disable=E1101 1911 1912 # If the error code is in the list of ignored errors, demote the error to a 1913 # warning 1914 (_, etxt, _) = ecode 1915 if etxt in self.op.ignore_errors: # pylint: disable=E1101 1916 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING 1917 1918 if cond: 1919 self._Error(ecode, *args, **kwargs) 1920 1921 # do not mark the operation as failed for WARN cases only 1922 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR: 1923 self.bad = self.bad or cond
1924
1925 1926 -class LUClusterVerify(NoHooksLU):
1927 """Submits all jobs necessary to verify the cluster. 1928 1929 """ 1930 REQ_BGL = False 1931
1932 - def ExpandNames(self):
1933 self.needed_locks = {}
1934
1935 - def Exec(self, feedback_fn):
1936 jobs = [] 1937 1938 if self.op.group_name: 1939 groups = [self.op.group_name] 1940 depends_fn = lambda: None 1941 else: 1942 groups = self.cfg.GetNodeGroupList() 1943 1944 # Verify global configuration 1945 jobs.append([ 1946 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors) 1947 ]) 1948 1949 # Always depend on global verification 1950 depends_fn = lambda: [(-len(jobs), [])] 1951 1952 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group, 1953 ignore_errors=self.op.ignore_errors, 1954 depends=depends_fn())] 1955 for group in groups) 1956 1957 # Fix up all parameters 1958 for op in itertools.chain(*jobs): # pylint: disable=W0142 1959 op.debug_simulate_errors = self.op.debug_simulate_errors 1960 op.verbose = self.op.verbose 1961 op.error_codes = self.op.error_codes 1962 try: 1963 op.skip_checks = self.op.skip_checks 1964 except AttributeError: 1965 assert not isinstance(op, opcodes.OpClusterVerifyGroup) 1966 1967 return ResultWithJobs(jobs)
1968
1969 1970 -class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1971 """Verifies the cluster config. 1972 1973 """ 1974 REQ_BGL = False 1975
1976 - def _VerifyHVP(self, hvp_data):
1977 """Verifies locally the syntax of the hypervisor parameters. 1978 1979 """ 1980 for item, hv_name, hv_params in hvp_data: 1981 msg = ("hypervisor %s parameters syntax check (source %s): %%s" % 1982 (item, hv_name)) 1983 try: 1984 hv_class = hypervisor.GetHypervisorClass(hv_name) 1985 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1986 hv_class.CheckParameterSyntax(hv_params) 1987 except errors.GenericError, err: 1988 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1989
1990 - def ExpandNames(self):
1991 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET) 1992 self.share_locks = _ShareAll()
1993
1994 - def CheckPrereq(self):
1995 """Check prerequisites. 1996 1997 """ 1998 # Retrieve all information 1999 self.all_group_info = self.cfg.GetAllNodeGroupsInfo() 2000 self.all_node_info = self.cfg.GetAllNodesInfo() 2001 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2002
2003 - def Exec(self, feedback_fn):
2004 """Verify integrity of cluster, performing various test on nodes. 2005 2006 """ 2007 self.bad = False 2008 self._feedback_fn = feedback_fn 2009 2010 feedback_fn("* Verifying cluster config") 2011 2012 for msg in self.cfg.VerifyConfig(): 2013 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg) 2014 2015 feedback_fn("* Verifying cluster certificate files") 2016 2017 for cert_filename in constants.ALL_CERT_FILES: 2018 (errcode, msg) = _VerifyCertificate(cert_filename) 2019 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode) 2020 2021 feedback_fn("* Verifying hypervisor parameters") 2022 2023 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(), 2024 self.all_inst_info.values())) 2025 2026 feedback_fn("* Verifying all nodes belong to an existing group") 2027 2028 # We do this verification here because, should this bogus circumstance 2029 # occur, it would never be caught by VerifyGroup, which only acts on 2030 # nodes/instances reachable from existing node groups. 2031 2032 dangling_nodes = set(node.name for node in self.all_node_info.values() 2033 if node.group not in self.all_group_info) 2034 2035 dangling_instances = {} 2036 no_node_instances = [] 2037 2038 for inst in self.all_inst_info.values(): 2039 if inst.primary_node in dangling_nodes: 2040 dangling_instances.setdefault(inst.primary_node, []).append(inst.name) 2041 elif inst.primary_node not in self.all_node_info: 2042 no_node_instances.append(inst.name) 2043 2044 pretty_dangling = [ 2045 "%s (%s)" % 2046 (node.name, 2047 utils.CommaJoin(dangling_instances.get(node.name, 2048 ["no instances"]))) 2049 for node in dangling_nodes] 2050 2051 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES, 2052 None, 2053 "the following nodes (and their instances) belong to a non" 2054 " existing group: %s", utils.CommaJoin(pretty_dangling)) 2055 2056 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST, 2057 None, 2058 "the following instances have a non-existing primary-node:" 2059 " %s", utils.CommaJoin(no_node_instances)) 2060 2061 return not self.bad
2062
2063 2064 -class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2065 """Verifies the status of a node group. 2066 2067 """ 2068 HPATH = "cluster-verify" 2069 HTYPE = constants.HTYPE_CLUSTER 2070 REQ_BGL = False 2071 2072 _HOOKS_INDENT_RE = re.compile("^", re.M) 2073
2074 - class NodeImage(object):
2075 """A class representing the logical and physical status of a node. 2076 2077 @type name: string 2078 @ivar name: the node name to which this object refers 2079 @ivar volumes: a structure as returned from 2080 L{ganeti.backend.GetVolumeList} (runtime) 2081 @ivar instances: a list of running instances (runtime) 2082 @ivar pinst: list of configured primary instances (config) 2083 @ivar sinst: list of configured secondary instances (config) 2084 @ivar sbp: dictionary of {primary-node: list of instances} for all 2085 instances for which this node is secondary (config) 2086 @ivar mfree: free memory, as reported by hypervisor (runtime) 2087 @ivar dfree: free disk, as reported by the node (runtime) 2088 @ivar offline: the offline status (config) 2089 @type rpc_fail: boolean 2090 @ivar rpc_fail: whether the RPC verify call was successfull (overall, 2091 not whether the individual keys were correct) (runtime) 2092 @type lvm_fail: boolean 2093 @ivar lvm_fail: whether the RPC call didn't return valid LVM data 2094 @type hyp_fail: boolean 2095 @ivar hyp_fail: whether the RPC call didn't return the instance list 2096 @type ghost: boolean 2097 @ivar ghost: whether this is a known node or not (config) 2098 @type os_fail: boolean 2099 @ivar os_fail: whether the RPC call didn't return valid OS data 2100 @type oslist: list 2101 @ivar oslist: list of OSes as diagnosed by DiagnoseOS 2102 @type vm_capable: boolean 2103 @ivar vm_capable: whether the node can host instances 2104 2105 """
2106 - def __init__(self, offline=False, name=None, vm_capable=True):
2107 self.name = name 2108 self.volumes = {} 2109 self.instances = [] 2110 self.pinst = [] 2111 self.sinst = [] 2112 self.sbp = {} 2113 self.mfree = 0 2114 self.dfree = 0 2115 self.offline = offline 2116 self.vm_capable = vm_capable 2117 self.rpc_fail = False 2118 self.lvm_fail = False 2119 self.hyp_fail = False 2120 self.ghost = False 2121 self.os_fail = False 2122 self.oslist = {}
2123
2124 - def ExpandNames(self):
2125 # This raises errors.OpPrereqError on its own: 2126 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 2127 2128 # Get instances in node group; this is unsafe and needs verification later 2129 inst_names = \ 2130 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 2131 2132 self.needed_locks = { 2133 locking.LEVEL_INSTANCE: inst_names, 2134 locking.LEVEL_NODEGROUP: [self.group_uuid], 2135 locking.LEVEL_NODE: [], 2136 } 2137 2138 self.share_locks = _ShareAll()
2139
2140 - def DeclareLocks(self, level):
2141 if level == locking.LEVEL_NODE: 2142 # Get members of node group; this is unsafe and needs verification later 2143 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members) 2144 2145 all_inst_info = self.cfg.GetAllInstancesInfo() 2146 2147 # In Exec(), we warn about mirrored instances that have primary and 2148 # secondary living in separate node groups. To fully verify that 2149 # volumes for these instances are healthy, we will need to do an 2150 # extra call to their secondaries. We ensure here those nodes will 2151 # be locked. 2152 for inst in self.owned_locks(locking.LEVEL_INSTANCE): 2153 # Important: access only the instances whose lock is owned 2154 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR: 2155 nodes.update(all_inst_info[inst].secondary_nodes) 2156 2157 self.needed_locks[locking.LEVEL_NODE] = nodes
2158
2159 - def CheckPrereq(self):
2160 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 2161 self.group_info = self.cfg.GetNodeGroup(self.group_uuid) 2162 2163 group_nodes = set(self.group_info.members) 2164 group_instances = \ 2165 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 2166 2167 unlocked_nodes = \ 2168 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) 2169 2170 unlocked_instances = \ 2171 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE)) 2172 2173 if unlocked_nodes: 2174 raise errors.OpPrereqError("Missing lock for nodes: %s" % 2175 utils.CommaJoin(unlocked_nodes), 2176 errors.ECODE_STATE) 2177 2178 if unlocked_instances: 2179 raise errors.OpPrereqError("Missing lock for instances: %s" % 2180 utils.CommaJoin(unlocked_instances), 2181 errors.ECODE_STATE) 2182 2183 self.all_node_info = self.cfg.GetAllNodesInfo() 2184 self.all_inst_info = self.cfg.GetAllInstancesInfo() 2185 2186 self.my_node_names = utils.NiceSort(group_nodes) 2187 self.my_inst_names = utils.NiceSort(group_instances) 2188 2189 self.my_node_info = dict((name, self.all_node_info[name]) 2190 for name in self.my_node_names) 2191 2192 self.my_inst_info = dict((name, self.all_inst_info[name]) 2193 for name in self.my_inst_names) 2194 2195 # We detect here the nodes that will need the extra RPC calls for verifying 2196 # split LV volumes; they should be locked. 2197 extra_lv_nodes = set() 2198 2199 for inst in self.my_inst_info.values(): 2200 if inst.disk_template in constants.DTS_INT_MIRROR: 2201 for nname in inst.all_nodes: 2202 if self.all_node_info[nname].group != self.group_uuid: 2203 extra_lv_nodes.add(nname) 2204 2205 unlocked_lv_nodes = \ 2206 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) 2207 2208 if unlocked_lv_nodes: 2209 raise errors.OpPrereqError("Missing node locks for LV check: %s" % 2210 utils.CommaJoin(unlocked_lv_nodes), 2211 errors.ECODE_STATE) 2212 self.extra_lv_nodes = list(extra_lv_nodes)
2213
2214 - def _VerifyNode(self, ninfo, nresult):
2215 """Perform some basic validation on data returned from a node. 2216 2217 - check the result data structure is well formed and has all the 2218 mandatory fields 2219 - check ganeti version 2220 2221 @type ninfo: L{objects.Node} 2222 @param ninfo: the node to check 2223 @param nresult: the results from the node 2224 @rtype: boolean 2225 @return: whether overall this call was successful (and we can expect 2226 reasonable values in the respose) 2227 2228 """ 2229 node = ninfo.name 2230 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2231 2232 # main result, nresult should be a non-empty dict 2233 test = not nresult or not isinstance(nresult, dict) 2234 _ErrorIf(test, constants.CV_ENODERPC, node, 2235 "unable to verify node: no data returned") 2236 if test: 2237 return False 2238 2239 # compares ganeti version 2240 local_version = constants.PROTOCOL_VERSION 2241 remote_version = nresult.get("version", None) 2242 test = not (remote_version and 2243 isinstance(remote_version, (list, tuple)) and 2244 len(remote_version) == 2) 2245 _ErrorIf(test, constants.CV_ENODERPC, node, 2246 "connection to node returned invalid data") 2247 if test: 2248 return False 2249 2250 test = local_version != remote_version[0] 2251 _ErrorIf(test, constants.CV_ENODEVERSION, node, 2252 "incompatible protocol versions: master %s," 2253 " node %s", local_version, remote_version[0]) 2254 if test: 2255 return False 2256 2257 # node seems compatible, we can actually try to look into its results 2258 2259 # full package version 2260 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], 2261 constants.CV_ENODEVERSION, node, 2262 "software version mismatch: master %s, node %s", 2263 constants.RELEASE_VERSION, remote_version[1], 2264 code=self.ETYPE_WARNING) 2265 2266 hyp_result = nresult.get(constants.NV_HYPERVISOR, None) 2267 if ninfo.vm_capable and isinstance(hyp_result, dict): 2268 for hv_name, hv_result in hyp_result.iteritems(): 2269 test = hv_result is not None 2270 _ErrorIf(test, constants.CV_ENODEHV, node, 2271 "hypervisor %s verify failure: '%s'", hv_name, hv_result) 2272 2273 hvp_result = nresult.get(constants.NV_HVPARAMS, None) 2274 if ninfo.vm_capable and isinstance(hvp_result, list): 2275 for item, hv_name, hv_result in hvp_result: 2276 _ErrorIf(True, constants.CV_ENODEHV, node, 2277 "hypervisor %s parameter verify failure (source %s): %s", 2278 hv_name, item, hv_result) 2279 2280 test = nresult.get(constants.NV_NODESETUP, 2281 ["Missing NODESETUP results"]) 2282 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s", 2283 "; ".join(test)) 2284 2285 return True
2286
2287 - def _VerifyNodeTime(self, ninfo, nresult, 2288 nvinfo_starttime, nvinfo_endtime):
2289 """Check the node time. 2290 2291 @type ninfo: L{objects.Node} 2292 @param ninfo: the node to check 2293 @param nresult: the remote results for the node 2294 @param nvinfo_starttime: the start time of the RPC call 2295 @param nvinfo_endtime: the end time of the RPC call 2296 2297 """ 2298 node = ninfo.name 2299 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2300 2301 ntime = nresult.get(constants.NV_TIME, None) 2302 try: 2303 ntime_merged = utils.MergeTime(ntime) 2304 except (ValueError, TypeError): 2305 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time") 2306 return 2307 2308 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): 2309 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) 2310 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): 2311 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) 2312 else: 2313 ntime_diff = None 2314 2315 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node, 2316 "Node time diverges by at least %s from master node time", 2317 ntime_diff)
2318
2319 - def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2320 """Check the node LVM results. 2321 2322 @type ninfo: L{objects.Node} 2323 @param ninfo: the node to check 2324 @param nresult: the remote results for the node 2325 @param vg_name: the configured VG name 2326 2327 """ 2328 if vg_name is None: 2329 return 2330 2331 node = ninfo.name 2332 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2333 2334 # checks vg existence and size > 20G 2335 vglist = nresult.get(constants.NV_VGLIST, None) 2336 test = not vglist 2337 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups") 2338 if not test: 2339 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, 2340 constants.MIN_VG_SIZE) 2341 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus) 2342 2343 # check pv names 2344 pvlist = nresult.get(constants.NV_PVLIST, None) 2345 test = pvlist is None 2346 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node") 2347 if not test: 2348 # check that ':' is not present in PV names, since it's a 2349 # special character for lvcreate (denotes the range of PEs to 2350 # use on the PV) 2351 for _, pvname, owner_vg in pvlist: 2352 test = ":" in pvname 2353 _ErrorIf(test, constants.CV_ENODELVM, node, 2354 "Invalid character ':' in PV '%s' of VG '%s'", 2355 pvname, owner_vg)
2356
2357 - def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2358 """Check the node bridges. 2359 2360 @type ninfo: L{objects.Node} 2361 @param ninfo: the node to check 2362 @param nresult: the remote results for the node 2363 @param bridges: the expected list of bridges 2364 2365 """ 2366 if not bridges: 2367 return 2368 2369 node = ninfo.name 2370 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2371 2372 missing = nresult.get(constants.NV_BRIDGES, None) 2373 test = not isinstance(missing, list) 2374 _ErrorIf(test, constants.CV_ENODENET, node, 2375 "did not return valid bridge information") 2376 if not test: 2377 _ErrorIf(bool(missing), constants.CV_ENODENET, node, 2378 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2379
2380 - def _VerifyNodeUserScripts(self, ninfo, nresult):
2381 """Check the results of user scripts presence and executability on the node 2382 2383 @type ninfo: L{objects.Node} 2384 @param ninfo: the node to check 2385 @param nresult: the remote results for the node 2386 2387 """ 2388 node = ninfo.name 2389 2390 test = not constants.NV_USERSCRIPTS in nresult 2391 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node, 2392 "did not return user scripts information") 2393 2394 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None) 2395 if not test: 2396 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node, 2397 "user scripts not present or not executable: %s" % 2398 utils.CommaJoin(sorted(broken_scripts)))
2399
2400 - def _VerifyNodeNetwork(self, ninfo, nresult):
2401 """Check the node network connectivity results. 2402 2403 @type ninfo: L{objects.Node} 2404 @param ninfo: the node to check 2405 @param nresult: the remote results for the node 2406 2407 """ 2408 node = ninfo.name 2409 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2410 2411 test = constants.NV_NODELIST not in nresult 2412 _ErrorIf(test, constants.CV_ENODESSH, node, 2413 "node hasn't returned node ssh connectivity data") 2414 if not test: 2415 if nresult[constants.NV_NODELIST]: 2416 for a_node, a_msg in nresult[constants.NV_NODELIST].items(): 2417 _ErrorIf(True, constants.CV_ENODESSH, node, 2418 "ssh communication with node '%s': %s", a_node, a_msg) 2419 2420 test = constants.NV_NODENETTEST not in nresult 2421 _ErrorIf(test, constants.CV_ENODENET, node, 2422 "node hasn't returned node tcp connectivity data") 2423 if not test: 2424 if nresult[constants.NV_NODENETTEST]: 2425 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) 2426 for anode in nlist: 2427 _ErrorIf(True, constants.CV_ENODENET, node, 2428 "tcp communication with node '%s': %s", 2429 anode, nresult[constants.NV_NODENETTEST][anode]) 2430 2431 test = constants.NV_MASTERIP not in nresult 2432 _ErrorIf(test, constants.CV_ENODENET, node, 2433 "node hasn't returned node master IP reachability data") 2434 if not test: 2435 if not nresult[constants.NV_MASTERIP]: 2436 if node == self.master_node: 2437 msg = "the master node cannot reach the master IP (not configured?)" 2438 else: 2439 msg = "cannot reach the master IP" 2440 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2441
2442 - def _VerifyInstance(self, instance, instanceconfig, node_image, 2443 diskstatus):
2444 """Verify an instance. 2445 2446 This function checks to see if the required block devices are 2447 available on the instance's node. 2448 2449 """ 2450 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2451 node_current = instanceconfig.primary_node 2452 2453 node_vol_should = {} 2454 instanceconfig.MapLVsByNode(node_vol_should) 2455 2456 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info) 2457 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig) 2458 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err)) 2459 2460 for node in node_vol_should: 2461 n_img = node_image[node] 2462 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 2463 # ignore missing volumes on offline or broken nodes 2464 continue 2465 for volume in node_vol_should[node]: 2466 test = volume not in n_img.volumes 2467 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance, 2468 "volume %s missing on node %s", volume, node) 2469 2470 if instanceconfig.admin_state == constants.ADMINST_UP: 2471 pri_img = node_image[node_current] 2472 test = instance not in pri_img.instances and not pri_img.offline 2473 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance, 2474 "instance not running on its primary node %s", 2475 node_current) 2476 2477 diskdata = [(nname, success, status, idx) 2478 for (nname, disks) in diskstatus.items() 2479 for idx, (success, status) in enumerate(disks)] 2480 2481 for nname, success, bdev_status, idx in diskdata: 2482 # the 'ghost node' construction in Exec() ensures that we have a 2483 # node here 2484 snode = node_image[nname] 2485 bad_snode = snode.ghost or snode.offline 2486 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and 2487 not success and not bad_snode, 2488 constants.CV_EINSTANCEFAULTYDISK, instance, 2489 "couldn't retrieve status for disk/%s on %s: %s", 2490 idx, nname, bdev_status) 2491 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and 2492 success and bdev_status.ldisk_status == constants.LDS_FAULTY), 2493 constants.CV_EINSTANCEFAULTYDISK, instance, 2494 "disk/%s on %s is faulty", idx, nname)
2495
2496 - def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2497 """Verify if there are any unknown volumes in the cluster. 2498 2499 The .os, .swap and backup volumes are ignored. All other volumes are 2500 reported as unknown. 2501 2502 @type reserved: L{ganeti.utils.FieldSet} 2503 @param reserved: a FieldSet of reserved volume names 2504 2505 """ 2506 for node, n_img in node_image.items(): 2507 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or 2508 self.all_node_info[node].group != self.group_uuid): 2509 # skip non-healthy nodes 2510 continue 2511 for volume in n_img.volumes: 2512 test = ((node not in node_vol_should or 2513 volume not in node_vol_should[node]) and 2514 not reserved.Matches(volume)) 2515 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node, 2516 "volume %s is unknown", volume)
2517
2518 - def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2519 """Verify N+1 Memory Resilience. 2520 2521 Check that if one single node dies we can still start all the 2522 instances it was primary for. 2523 2524 """ 2525 cluster_info = self.cfg.GetClusterInfo() 2526 for node, n_img in node_image.items(): 2527 # This code checks that every node which is now listed as 2528 # secondary has enough memory to host all instances it is 2529 # supposed to should a single other node in the cluster fail. 2530 # FIXME: not ready for failover to an arbitrary node 2531 # FIXME: does not support file-backed instances 2532 # WARNING: we currently take into account down instances as well 2533 # as up ones, considering that even if they're down someone 2534 # might want to start them even in the event of a node failure. 2535 if n_img.offline or self.all_node_info[node].group != self.group_uuid: 2536 # we're skipping nodes marked offline and nodes in other groups from 2537 # the N+1 warning, since most likely we don't have good memory 2538 # infromation from them; we already list instances living on such 2539 # nodes, and that's enough warning 2540 continue 2541 #TODO(dynmem): also consider ballooning out other instances 2542 for prinode, instances in n_img.sbp.items(): 2543 needed_mem = 0 2544 for instance in instances: 2545 bep = cluster_info.FillBE(instance_cfg[instance]) 2546 if bep[constants.BE_AUTO_BALANCE]: 2547 needed_mem += bep[constants.BE_MINMEM] 2548 test = n_img.mfree < needed_mem 2549 self._ErrorIf(test, constants.CV_ENODEN1, node, 2550 "not enough memory to accomodate instance failovers" 2551 " should node %s fail (%dMiB needed, %dMiB available)", 2552 prinode, needed_mem, n_img.mfree)
2553 2554 @classmethod
2555 - def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo, 2556 (files_all, files_opt, files_mc, files_vm)):
2557 """Verifies file checksums collected from all nodes. 2558 2559 @param errorif: Callback for reporting errors 2560 @param nodeinfo: List of L{objects.Node} objects 2561 @param master_node: Name of master node 2562 @param all_nvinfo: RPC results 2563 2564 """ 2565 # Define functions determining which nodes to consider for a file 2566 files2nodefn = [ 2567 (files_all, None), 2568 (files_mc, lambda node: (node.master_candidate or 2569 node.name == master_node)), 2570 (files_vm, lambda node: node.vm_capable), 2571 ] 2572 2573 # Build mapping from filename to list of nodes which should have the file 2574 nodefiles = {} 2575 for (files, fn) in files2nodefn: 2576 if fn is None: 2577 filenodes = nodeinfo 2578 else: 2579 filenodes = filter(fn, nodeinfo) 2580 nodefiles.update((filename, 2581 frozenset(map(operator.attrgetter("name"), filenodes))) 2582 for filename in files) 2583 2584 assert set(nodefiles) == (files_all | files_mc | files_vm) 2585 2586 fileinfo = dict((filename, {}) for filename in nodefiles) 2587 ignore_nodes = set() 2588 2589 for node in nodeinfo: 2590 if node.offline: 2591 ignore_nodes.add(node.name) 2592 continue 2593 2594 nresult = all_nvinfo[node.name] 2595 2596 if nresult.fail_msg or not nresult.payload: 2597 node_files = None 2598 else: 2599 node_files = nresult.payload.get(constants.NV_FILELIST, None) 2600 2601 test = not (node_files and isinstance(node_files, dict)) 2602 errorif(test, constants.CV_ENODEFILECHECK, node.name, 2603 "Node did not return file checksum data") 2604 if test: 2605 ignore_nodes.add(node.name) 2606 continue 2607 2608 # Build per-checksum mapping from filename to nodes having it 2609 for (filename, checksum) in node_files.items(): 2610 assert filename in nodefiles 2611 fileinfo[filename].setdefault(checksum, set()).add(node.name) 2612 2613 for (filename, checksums) in fileinfo.items(): 2614 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum" 2615 2616 # Nodes having the file 2617 with_file = frozenset(node_name 2618 for nodes in fileinfo[filename].values() 2619 for node_name in nodes) - ignore_nodes 2620 2621 expected_nodes = nodefiles[filename] - ignore_nodes 2622 2623 # Nodes missing file 2624 missing_file = expected_nodes - with_file 2625 2626 if filename in files_opt: 2627 # All or no nodes 2628 errorif(missing_file and missing_file != expected_nodes, 2629 constants.CV_ECLUSTERFILECHECK, None, 2630 "File %s is optional, but it must exist on all or no" 2631 " nodes (not found on %s)", 2632 filename, utils.CommaJoin(utils.NiceSort(missing_file))) 2633 else: 2634 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None, 2635 "File %s is missing from node(s) %s", filename, 2636 utils.CommaJoin(utils.NiceSort(missing_file))) 2637 2638 # Warn if a node has a file it shouldn't 2639 unexpected = with_file - expected_nodes 2640 errorif(unexpected, 2641 constants.CV_ECLUSTERFILECHECK, None, 2642 "File %s should not exist on node(s) %s", 2643 filename, utils.CommaJoin(utils.NiceSort(unexpected))) 2644 2645 # See if there are multiple versions of the file 2646 test = len(checksums) > 1 2647 if test: 2648 variants = ["variant %s on %s" % 2649 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes))) 2650 for (idx, (checksum, nodes)) in 2651 enumerate(sorted(checksums.items()))] 2652 else: 2653 variants = [] 2654 2655 errorif(test, constants.CV_ECLUSTERFILECHECK, None, 2656 "File %s found with %s different checksums (%s)", 2657 filename, len(checksums), "; ".join(variants))
2658
2659 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper, 2660 drbd_map):
2661 """Verifies and the node DRBD status. 2662 2663 @type ninfo: L{objects.Node} 2664 @param ninfo: the node to check 2665 @param nresult: the remote results for the node 2666 @param instanceinfo: the dict of instances 2667 @param drbd_helper: the configured DRBD usermode helper 2668 @param drbd_map: the DRBD map as returned by 2669 L{ganeti.config.ConfigWriter.ComputeDRBDMap} 2670 2671 """ 2672 node = ninfo.name 2673 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2674 2675 if drbd_helper: 2676 helper_result = nresult.get(constants.NV_DRBDHELPER, None) 2677 test = (helper_result == None) 2678 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, 2679 "no drbd usermode helper returned") 2680 if helper_result: 2681 status, payload = helper_result 2682 test = not status 2683 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, 2684 "drbd usermode helper check unsuccessful: %s", payload) 2685 test = status and (payload != drbd_helper) 2686 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, 2687 "wrong drbd usermode helper: %s", payload) 2688 2689 # compute the DRBD minors 2690 node_drbd = {} 2691 for minor, instance in drbd_map[node].items(): 2692 test = instance not in instanceinfo 2693 _ErrorIf(test, constants.CV_ECLUSTERCFG, None, 2694 "ghost instance '%s' in temporary DRBD map", instance) 2695 # ghost instance should not be running, but otherwise we 2696 # don't give double warnings (both ghost instance and 2697 # unallocated minor in use) 2698 if test: 2699 node_drbd[minor] = (instance, False) 2700 else: 2701 instance = instanceinfo[instance] 2702 node_drbd[minor] = (instance.name, 2703 instance.admin_state == constants.ADMINST_UP) 2704 2705 # and now check them 2706 used_minors = nresult.get(constants.NV_DRBDLIST, []) 2707 test = not isinstance(used_minors, (tuple, list)) 2708 _ErrorIf(test, constants.CV_ENODEDRBD, node, 2709 "cannot parse drbd status file: %s", str(used_minors)) 2710 if test: 2711 # we cannot check drbd status 2712 return 2713 2714 for minor, (iname, must_exist) in node_drbd.items(): 2715 test = minor not in used_minors and must_exist 2716 _ErrorIf(test, constants.CV_ENODEDRBD, node, 2717 "drbd minor %d of instance %s is not active", minor, iname) 2718 for minor in used_minors: 2719 test = minor not in node_drbd 2720 _ErrorIf(test, constants.CV_ENODEDRBD, node, 2721 "unallocated drbd minor %d is in use", minor)
2722
2723 - def _UpdateNodeOS(self, ninfo, nresult, nimg):
2724 """Builds the node OS structures. 2725 2726 @type ninfo: L{objects.Node} 2727 @param ninfo: the node to check 2728 @param nresult: the remote results for the node 2729 @param nimg: the node image object 2730 2731 """ 2732 node = ninfo.name 2733 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2734 2735 remote_os = nresult.get(constants.NV_OSLIST, None) 2736 test = (not isinstance(remote_os, list) or 2737 not compat.all(isinstance(v, list) and len(v) == 7 2738 for v in remote_os)) 2739 2740 _ErrorIf(test, constants.CV_ENODEOS, node, 2741 "node hasn't returned valid OS data") 2742 2743 nimg.os_fail = test 2744 2745 if test: 2746 return 2747 2748 os_dict = {} 2749 2750 for (name, os_path, status, diagnose, 2751 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]: 2752 2753 if name not in os_dict: 2754 os_dict[name] = [] 2755 2756 # parameters is a list of lists instead of list of tuples due to 2757 # JSON lacking a real tuple type, fix it: 2758 parameters = [tuple(v) for v in parameters] 2759 os_dict[name].append((os_path, status, diagnose, 2760 set(variants), set(parameters), set(api_ver))) 2761 2762 nimg.oslist = os_dict
2763
2764 - def _VerifyNodeOS(self, ninfo, nimg, base):
2765 """Verifies the node OS list. 2766 2767 @type ninfo: L{objects.Node} 2768 @param ninfo: the node to check 2769 @param nimg: the node image object 2770 @param base: the 'template' node we match against (e.g. from the master) 2771 2772 """ 2773 node = ninfo.name 2774 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2775 2776 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" 2777 2778 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l] 2779 for os_name, os_data in nimg.oslist.items(): 2780 assert os_data, "Empty OS status for OS %s?!" % os_name 2781 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] 2782 _ErrorIf(not f_status, constants.CV_ENODEOS, node, 2783 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag) 2784 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node, 2785 "OS '%s' has multiple entries (first one shadows the rest): %s", 2786 os_name, utils.CommaJoin([v[0] for v in os_data])) 2787 # comparisons with the 'base' image 2788 test = os_name not in base.oslist 2789 _ErrorIf(test, constants.CV_ENODEOS, node, 2790 "Extra OS %s not present on reference node (%s)", 2791 os_name, base.name) 2792 if test: 2793 continue 2794 assert base.oslist[os_name], "Base node has empty OS status?" 2795 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0] 2796 if not b_status: 2797 # base OS is invalid, skipping 2798 continue 2799 for kind, a, b in [("API version", f_api, b_api), 2800 ("variants list", f_var, b_var), 2801 ("parameters", beautify_params(f_param), 2802 beautify_params(b_param))]: 2803 _ErrorIf(a != b, constants.CV_ENODEOS, node, 2804 "OS %s for %s differs from reference node %s: [%s] vs. [%s]", 2805 kind, os_name, base.name, 2806 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b))) 2807 2808 # check any missing OSes 2809 missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) 2810 _ErrorIf(missing, constants.CV_ENODEOS, node, 2811 "OSes present on reference node %s but missing on this node: %s", 2812 base.name, utils.CommaJoin(missing))
2813
2814 - def _VerifyOob(self, ninfo, nresult):
2815 """Verifies out of band functionality of a node. 2816 2817 @type ninfo: L{objects.Node} 2818 @param ninfo: the node to check 2819 @param nresult: the remote results for the node 2820 2821 """ 2822 node = ninfo.name 2823 # We just have to verify the paths on master and/or master candidates 2824 # as the oob helper is invoked on the master 2825 if ((ninfo.master_candidate or ninfo.master_capable) and 2826 constants.NV_OOB_PATHS in nresult): 2827 for path_result in nresult[constants.NV_OOB_PATHS]: 2828 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2829
2830 - def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2831 """Verifies and updates the node volume data. 2832 2833 This function will update a L{NodeImage}'s internal structures 2834 with data from the remote call. 2835 2836 @type ninfo: L{objects.Node} 2837 @param ninfo: the node to check 2838 @param nresult: the remote results for the node 2839 @param nimg: the node image object 2840 @param vg_name: the configured VG name 2841 2842 """ 2843 node = ninfo.name 2844 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2845 2846 nimg.lvm_fail = True 2847 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") 2848 if vg_name is None: 2849 pass 2850 elif isinstance(lvdata, basestring): 2851 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s", 2852 utils.SafeEncode(lvdata)) 2853 elif not isinstance(lvdata, dict): 2854 _ErrorIf(True, constants.CV_ENODELVM, node, 2855 "rpc call to node failed (lvlist)") 2856 else: 2857 nimg.volumes = lvdata 2858 nimg.lvm_fail = False
2859
2860 - def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2861 """Verifies and updates the node instance list. 2862 2863 If the listing was successful, then updates this node's instance 2864 list. Otherwise, it marks the RPC call as failed for the instance 2865 list key. 2866 2867 @type ninfo: L{objects.Node} 2868 @param ninfo: the node to check 2869 @param nresult: the remote results for the node 2870 @param nimg: the node image object 2871 2872 """ 2873 idata = nresult.get(constants.NV_INSTANCELIST, None) 2874 test = not isinstance(idata, list) 2875 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, 2876 "rpc call to node failed (instancelist): %s", 2877 utils.SafeEncode(str(idata))) 2878 if test: 2879 nimg.hyp_fail = True 2880 else: 2881 nimg.instances = idata
2882
2883 - def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2884 """Verifies and computes a node information map 2885 2886 @type ninfo: L{objects.Node} 2887 @param ninfo: the node to check 2888 @param nresult: the remote results for the node 2889 @param nimg: the node image object 2890 @param vg_name: the configured VG name 2891 2892 """ 2893 node = ninfo.name 2894 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2895 2896 # try to read free memory (from the hypervisor) 2897 hv_info = nresult.get(constants.NV_HVINFO, None) 2898 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info 2899 _ErrorIf(test, constants.CV_ENODEHV, node, 2900 "rpc call to node failed (hvinfo)") 2901 if not test: 2902 try: 2903 nimg.mfree = int(hv_info["memory_free"]) 2904 except (ValueError, TypeError): 2905 _ErrorIf(True, constants.CV_ENODERPC, node, 2906 "node returned invalid nodeinfo, check hypervisor") 2907 2908 # FIXME: devise a free space model for file based instances as well 2909 if vg_name is not None: 2910 test = (constants.NV_VGLIST not in nresult or 2911 vg_name not in nresult[constants.NV_VGLIST]) 2912 _ErrorIf(test, constants.CV_ENODELVM, node, 2913 "node didn't return data for the volume group '%s'" 2914 " - it is either missing or broken", vg_name) 2915 if not test: 2916 try: 2917 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) 2918 except (ValueError, TypeError): 2919 _ErrorIf(True, constants.CV_ENODERPC, node, 2920 "node returned invalid LVM info, check LVM status")
2921
2922 - def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2923 """Gets per-disk status information for all instances. 2924 2925 @type nodelist: list of strings 2926 @param nodelist: Node names 2927 @type node_image: dict of (name, L{objects.Node}) 2928 @param node_image: Node objects 2929 @type instanceinfo: dict of (name, L{objects.Instance}) 2930 @param instanceinfo: Instance objects 2931 @rtype: {instance: {node: [(succes, payload)]}} 2932 @return: a dictionary of per-instance dictionaries with nodes as 2933 keys and disk information as values; the disk information is a 2934 list of tuples (success, payload) 2935 2936 """ 2937 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2938 2939 node_disks = {} 2940 node_disks_devonly = {} 2941 diskless_instances = set() 2942 diskless = constants.DT_DISKLESS 2943 2944 for nname in nodelist: 2945 node_instances = list(itertools.chain(node_image[nname].pinst, 2946 node_image[nname].sinst)) 2947 diskless_instances.update(inst for inst in node_instances 2948 if instanceinfo[inst].disk_template == diskless) 2949 disks = [(inst, disk) 2950 for inst in node_instances 2951 for disk in instanceinfo[inst].disks] 2952 2953 if not disks: 2954 # No need to collect data 2955 continue 2956 2957 node_disks[nname] = disks 2958 2959 # _AnnotateDiskParams makes already copies of the disks 2960 devonly = [] 2961 for (inst, dev) in disks: 2962 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg) 2963 self.cfg.SetDiskID(anno_disk, nname) 2964 devonly.append(anno_disk) 2965 2966 node_disks_devonly[nname] = devonly 2967 2968 assert len(node_disks) == len(node_disks_devonly) 2969 2970 # Collect data from all nodes with disks 2971 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(), 2972 node_disks_devonly) 2973 2974 assert len(result) == len(node_disks) 2975 2976 instdisk = {} 2977 2978 for (nname, nres) in result.items(): 2979 disks = node_disks[nname] 2980 2981 if nres.offline: 2982 # No data from this node 2983 data = len(disks) * [(False, "node offline")] 2984 else: 2985 msg = nres.fail_msg 2986 _ErrorIf(msg, constants.CV_ENODERPC, nname, 2987 "while getting disk information: %s", msg) 2988 if msg: 2989 # No data from this node 2990 data = len(disks) * [(False, msg)] 2991 else: 2992 data = [] 2993 for idx, i in enumerate(nres.payload): 2994 if isinstance(i, (tuple, list)) and len(i) == 2: 2995 data.append(i) 2996 else: 2997 logging.warning("Invalid result from node %s, entry %d: %s", 2998 nname, idx, i) 2999 data.append((False, "Invalid result from the remote node")) 3000 3001 for ((inst, _), status) in zip(disks, data): 3002 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status) 3003 3004 # Add empty entries for diskless instances. 3005 for inst in diskless_instances: 3006 assert inst not in instdisk 3007 instdisk[inst] = {} 3008 3009 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and 3010 len(nnames) <= len(instanceinfo[inst].all_nodes) and 3011 compat.all(isinstance(s, (tuple, list)) and 3012 len(s) == 2 for s in statuses) 3013 for inst, nnames in instdisk.items() 3014 for nname, statuses in nnames.items()) 3015 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure" 3016 3017 return instdisk
3018 3019 @staticmethod
3020 - def _SshNodeSelector(group_uuid, all_nodes):
3021 """Create endless iterators for all potential SSH check hosts. 3022 3023 """ 3024 nodes = [node for node in all_nodes 3025 if (node.group != group_uuid and 3026 not node.offline)] 3027 keyfunc = operator.attrgetter("group") 3028 3029 return map(itertools.cycle, 3030 [sorted(map(operator.attrgetter("name"), names)) 3031 for _, names in itertools.groupby(sorted(nodes, key=keyfunc), 3032 keyfunc)])
3033 3034 @classmethod
3035 - def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3036 """Choose which nodes should talk to which other nodes. 3037 3038 We will make nodes contact all nodes in their group, and one node from 3039 every other group. 3040 3041 @warning: This algorithm has a known issue if one node group is much 3042 smaller than others (e.g. just one node). In such a case all other 3043 nodes will talk to the single node. 3044 3045 """ 3046 online_nodes = sorted(node.name for node in group_nodes if not node.offline) 3047 sel = cls._SshNodeSelector(group_uuid, all_nodes) 3048 3049 return (online_nodes, 3050 dict((name, sorted([i.next() for i in sel])) 3051 for name in online_nodes))
3052
3053 - def BuildHooksEnv(self):
3054 """Build hooks env. 3055 3056 Cluster-Verify hooks just ran in the post phase and their failure makes 3057 the output be logged in the verify output and the verification to fail. 3058 3059 """ 3060 env = { 3061 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()) 3062 } 3063 3064 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags())) 3065 for node in self.my_node_info.values()) 3066 3067 return env
3068
3069 - def BuildHooksNodes(self):
3070 """Build hooks nodes. 3071 3072 """ 3073 return ([], self.my_node_names)
3074
3075 - def Exec(self, feedback_fn):
3076 """Verify integrity of the node group, performing various test on nodes. 3077 3078 """ 3079 # This method has too many local variables. pylint: disable=R0914 3080 feedback_fn("* Verifying group '%s'" % self.group_info.name) 3081 3082 if not self.my_node_names: 3083 # empty node group 3084 feedback_fn("* Empty node group, skipping verification") 3085 return True 3086 3087 self.bad = False 3088 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3089 verbose = self.op.verbose 3090 self._feedback_fn = feedback_fn 3091 3092 vg_name = self.cfg.GetVGName() 3093 drbd_helper = self.cfg.GetDRBDHelper() 3094 cluster = self.cfg.GetClusterInfo() 3095 groupinfo = self.cfg.GetAllNodeGroupsInfo() 3096 hypervisors = cluster.enabled_hypervisors 3097 node_data_list = [self.my_node_info[name] for name in self.my_node_names] 3098 3099 i_non_redundant = [] # Non redundant instances 3100 i_non_a_balanced = [] # Non auto-balanced instances 3101 i_offline = 0 # Count of offline instances 3102 n_offline = 0 # Count of offline nodes 3103 n_drained = 0 # Count of nodes being drained 3104 node_vol_should = {} 3105 3106 # FIXME: verify OS list 3107 3108 # File verification 3109 filemap = _ComputeAncillaryFiles(cluster, False) 3110 3111 # do local checksums 3112 master_node = self.master_node = self.cfg.GetMasterNode() 3113 master_ip = self.cfg.GetMasterIP() 3114 3115 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names)) 3116 3117 user_scripts = [] 3118 if self.cfg.GetUseExternalMipScript(): 3119 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT) 3120 3121 node_verify_param = { 3122 constants.NV_FILELIST: 3123 utils.UniqueSequence(filename 3124 for files in filemap 3125 for filename in files), 3126 constants.NV_NODELIST: 3127 self._SelectSshCheckNodes(node_data_list, self.group_uuid, 3128 self.all_node_info.values()), 3129 constants.NV_HYPERVISOR: hypervisors, 3130 constants.NV_HVPARAMS: 3131 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()), 3132 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip) 3133 for node in node_data_list 3134 if not node.offline], 3135 constants.NV_INSTANCELIST: hypervisors, 3136 constants.NV_VERSION: None, 3137 constants.NV_HVINFO: self.cfg.GetHypervisorType(), 3138 constants.NV_NODESETUP: None, 3139 constants.NV_TIME: None, 3140 constants.NV_MASTERIP: (master_node, master_ip), 3141 constants.NV_OSLIST: None, 3142 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(), 3143 constants.NV_USERSCRIPTS: user_scripts, 3144 } 3145 3146 if vg_name is not None: 3147 node_verify_param[constants.NV_VGLIST] = None 3148 node_verify_param[constants.NV_LVLIST] = vg_name 3149 node_verify_param[constants.NV_PVLIST] = [vg_name] 3150 3151 if drbd_helper: 3152 node_verify_param[constants.NV_DRBDLIST] = None 3153 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper 3154 3155 # bridge checks 3156 # FIXME: this needs to be changed per node-group, not cluster-wide 3157 bridges = set() 3158 default_nicpp = cluster.nicparams[constants.PP_DEFAULT] 3159 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 3160 bridges.add(default_nicpp[constants.NIC_LINK]) 3161 for instance in self.my_inst_info.values(): 3162 for nic in instance.nics: 3163 full_nic = cluster.SimpleFillNIC(nic.nicparams) 3164 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 3165 bridges.add(full_nic[constants.NIC_LINK]) 3166 3167 if bridges: 3168 node_verify_param[constants.NV_BRIDGES] = list(bridges) 3169 3170 # Build our expected cluster state 3171 node_image = dict((node.name, self.NodeImage(offline=node.offline, 3172 name=node.name, 3173 vm_capable=node.vm_capable)) 3174 for node in node_data_list) 3175 3176 # Gather OOB paths 3177 oob_paths = [] 3178 for node in self.all_node_info.values(): 3179 path = _SupportsOob(self.cfg, node) 3180 if path and path not in oob_paths: 3181 oob_paths.append(path) 3182 3183 if oob_paths: 3184 node_verify_param[constants.NV_OOB_PATHS] = oob_paths 3185 3186 for instance in self.my_inst_names: 3187 inst_config = self.my_inst_info[instance] 3188 if inst_config.admin_state == constants.ADMINST_OFFLINE: 3189 i_offline += 1 3190 3191 for nname in inst_config.all_nodes: 3192 if nname not in node_image: 3193 gnode = self.NodeImage(name=nname) 3194 gnode.ghost = (nname not in self.all_node_info) 3195 node_image[nname] = gnode 3196 3197 inst_config.MapLVsByNode(node_vol_should) 3198 3199 pnode = inst_config.primary_node 3200 node_image[pnode].pinst.append(instance) 3201 3202 for snode in inst_config.secondary_nodes: 3203 nimg = node_image[snode] 3204 nimg.sinst.append(instance) 3205 if pnode not in nimg.sbp: 3206 nimg.sbp[pnode] = [] 3207 nimg.sbp[pnode].append(instance) 3208 3209 # At this point, we have the in-memory data structures complete, 3210 # except for the runtime information, which we'll gather next 3211 3212 # Due to the way our RPC system works, exact response times cannot be 3213 # guaranteed (e.g. a broken node could run into a timeout). By keeping the 3214 # time before and after executing the request, we can at least have a time 3215 # window. 3216 nvinfo_starttime = time.time() 3217 all_nvinfo = self.rpc.call_node_verify(self.my_node_names, 3218 node_verify_param, 3219 self.cfg.GetClusterName()) 3220 nvinfo_endtime = time.time() 3221 3222 if self.extra_lv_nodes and vg_name is not None: 3223 extra_lv_nvinfo = \ 3224 self.rpc.call_node_verify(self.extra_lv_nodes, 3225 {constants.NV_LVLIST: vg_name}, 3226 self.cfg.GetClusterName()) 3227 else: 3228 extra_lv_nvinfo = {} 3229 3230 all_drbd_map = self.cfg.ComputeDRBDMap() 3231 3232 feedback_fn("* Gathering disk information (%s nodes)" % 3233 len(self.my_node_names)) 3234 instdisk = self._CollectDiskInfo(self.my_node_names, node_image, 3235 self.my_inst_info) 3236 3237 feedback_fn("* Verifying configuration file consistency") 3238 3239 # If not all nodes are being checked, we need to make sure the master node 3240 # and a non-checked vm_capable node are in the list. 3241 absent_nodes = set(self.all_node_info).difference(self.my_node_info) 3242 if absent_nodes: 3243 vf_nvinfo = all_nvinfo.copy() 3244 vf_node_info = list(self.my_node_info.values()) 3245 additional_nodes = [] 3246 if master_node not in self.my_node_info: 3247 additional_nodes.append(master_node) 3248 vf_node_info.append(self.all_node_info[master_node]) 3249 # Add the first vm_capable node we find which is not included, 3250 # excluding the master node (which we already have) 3251 for node in absent_nodes: 3252 nodeinfo = self.all_node_info[node] 3253 if (nodeinfo.vm_capable and not nodeinfo.offline and 3254 node != master_node): 3255 additional_nodes.append(node) 3256 vf_node_info.append(self.all_node_info[node]) 3257 break 3258 key = constants.NV_FILELIST 3259 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes, 3260 {key: node_verify_param[key]}, 3261 self.cfg.GetClusterName())) 3262 else: 3263 vf_nvinfo = all_nvinfo 3264 vf_node_info = self.my_node_info.values() 3265 3266 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap) 3267 3268 feedback_fn("* Verifying node status") 3269 3270 refos_img = None 3271 3272 for node_i in node_data_list: 3273 node = node_i.name 3274 nimg = node_image[node] 3275 3276 if node_i.offline: 3277 if verbose: 3278 feedback_fn("* Skipping offline node %s" % (node,)) 3279 n_offline += 1 3280 continue 3281 3282 if node == master_node: 3283 ntype = "master" 3284 elif node_i.master_candidate: 3285 ntype = "master candidate" 3286 elif node_i.drained: 3287 ntype = "drained" 3288 n_drained += 1 3289 else: 3290 ntype = "regular" 3291 if verbose: 3292 feedback_fn("* Verifying node %s (%s)" % (node, ntype)) 3293 3294 msg = all_nvinfo[node].fail_msg 3295 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s", 3296 msg) 3297 if msg: 3298 nimg.rpc_fail = True 3299 continue 3300 3301 nresult = all_nvinfo[node].payload 3302 3303 nimg.call_ok = self._VerifyNode(node_i, nresult) 3304 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime) 3305 self._VerifyNodeNetwork(node_i, nresult) 3306 self._VerifyNodeUserScripts(node_i, nresult) 3307 self._VerifyOob(node_i, nresult) 3308 3309 if nimg.vm_capable: 3310 self._VerifyNodeLVM(node_i, nresult, vg_name) 3311 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper, 3312 all_drbd_map) 3313 3314 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name) 3315 self._UpdateNodeInstances(node_i, nresult, nimg) 3316 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name) 3317 self._UpdateNodeOS(node_i, nresult, nimg) 3318 3319 if not nimg.os_fail: 3320 if refos_img is None: 3321 refos_img = nimg 3322 self._VerifyNodeOS(node_i, nimg, refos_img) 3323 self._VerifyNodeBridges(node_i, nresult, bridges) 3324 3325 # Check whether all running instancies are primary for the node. (This 3326 # can no longer be done from _VerifyInstance below, since some of the 3327 # wrong instances could be from other node groups.) 3328 non_primary_inst = set(nimg.instances).difference(nimg.pinst) 3329 3330 for inst in non_primary_inst: 3331 test = inst in self.all_inst_info 3332 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst, 3333 "instance should not run on node %s", node_i.name) 3334 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name, 3335 "node is running unknown instance %s", inst) 3336 3337 for node, result in extra_lv_nvinfo.items(): 3338 self._UpdateNodeVolumes(self.all_node_info[node], result.payload, 3339 node_image[node], vg_name) 3340 3341 feedback_fn("* Verifying instance status") 3342 for instance in self.my_inst_names: 3343 if verbose: 3344 feedback_fn("* Verifying instance %s" % instance) 3345 inst_config = self.my_inst_info[instance] 3346 self._VerifyInstance(instance, inst_config, node_image, 3347 instdisk[instance]) 3348 inst_nodes_offline = [] 3349 3350 pnode = inst_config.primary_node 3351 pnode_img = node_image[pnode] 3352 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, 3353 constants.CV_ENODERPC, pnode, "instance %s, connection to" 3354 " primary node failed", instance) 3355 3356 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and 3357 pnode_img.offline, 3358 constants.CV_EINSTANCEBADNODE, instance, 3359 "instance is marked as running and lives on offline node %s", 3360 inst_config.primary_node) 3361 3362 # If the instance is non-redundant we cannot survive losing its primary 3363 # node, so we are not N+1 compliant. 3364 if inst_config.disk_template not in constants.DTS_MIRRORED: 3365 i_non_redundant.append(instance) 3366 3367 _ErrorIf(len(inst_config.secondary_nodes) > 1, 3368 constants.CV_EINSTANCELAYOUT, 3369 instance, "instance has multiple secondary nodes: %s", 3370 utils.CommaJoin(inst_config.secondary_nodes), 3371 code=self.ETYPE_WARNING) 3372 3373 if inst_config.disk_template in constants.DTS_INT_MIRROR: 3374 pnode = inst_config.primary_node 3375 instance_nodes = utils.NiceSort(inst_config.all_nodes) 3376 instance_groups = {} 3377 3378 for node in instance_nodes: 3379 instance_groups.setdefault(self.all_node_info[node].group, 3380 []).append(node) 3381 3382 pretty_list = [ 3383 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name) 3384 # Sort so that we always list the primary node first. 3385 for group, nodes in sorted(instance_groups.items(), 3386 key=lambda (_, nodes): pnode in nodes, 3387 reverse=True)] 3388 3389 self._ErrorIf(len(instance_groups) > 1, 3390 constants.CV_EINSTANCESPLITGROUPS, 3391 instance, "instance has primary and secondary nodes in" 3392 " different groups: %s", utils.CommaJoin(pretty_list), 3393 code=self.ETYPE_WARNING) 3394 3395 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]: 3396 i_non_a_balanced.append(instance) 3397 3398 for snode in inst_config.secondary_nodes: 3399 s_img = node_image[snode] 3400 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC, 3401 snode, "instance %s, connection to secondary node failed", 3402 instance) 3403 3404 if s_img.offline: 3405 inst_nodes_offline.append(snode) 3406 3407 # warn that the instance lives on offline nodes 3408 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance, 3409 "instance has offline secondary node(s) %s", 3410 utils.CommaJoin(inst_nodes_offline)) 3411 # ... or ghost/non-vm_capable nodes 3412 for node in inst_config.all_nodes: 3413 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE, 3414 instance, "instance lives on ghost node %s", node) 3415 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE, 3416 instance, "instance lives on non-vm_capable node %s", node) 3417 3418 feedback_fn("* Verifying orphan volumes") 3419 reserved = utils.FieldSet(*cluster.reserved_lvs) 3420 3421 # We will get spurious "unknown volume" warnings if any node of this group 3422 # is secondary for an instance whose primary is in another group. To avoid 3423 # them, we find these instances and add their volumes to node_vol_should. 3424 for inst in self.all_inst_info.values(): 3425 for secondary in inst.secondary_nodes: 3426 if (secondary in self.my_node_info 3427 and inst.name not in self.my_inst_info): 3428 inst.MapLVsByNode(node_vol_should) 3429 break 3430 3431 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved) 3432 3433 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks: 3434 feedback_fn("* Verifying N+1 Memory redundancy") 3435 self._VerifyNPlusOneMemory(node_image, self.my_inst_info) 3436 3437 feedback_fn("* Other Notes") 3438 if i_non_redundant: 3439 feedback_fn(" - NOTICE: %d non-redundant instance(s) found." 3440 % len(i_non_redundant)) 3441 3442 if i_non_a_balanced: 3443 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found." 3444 % len(i_non_a_balanced)) 3445 3446 if i_offline: 3447 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline) 3448 3449 if n_offline: 3450 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline) 3451 3452 if n_drained: 3453 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained) 3454 3455 return not self.bad
3456
3457 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3458 """Analyze the post-hooks' result 3459 3460 This method analyses the hook result, handles it, and sends some 3461 nicely-formatted feedback back to the user. 3462 3463 @param phase: one of L{constants.HOOKS_PHASE_POST} or 3464 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 3465 @param hooks_results: the results of the multi-node hooks rpc call 3466 @param feedback_fn: function used send feedback back to the caller 3467 @param lu_result: previous Exec result 3468 @return: the new Exec result, based on the previous result 3469 and hook results 3470 3471 """ 3472 # We only really run POST phase hooks, only for non-empty groups, 3473 # and are only interested in their results 3474 if not self.my_node_names: 3475 # empty node group 3476 pass 3477 elif phase == constants.HOOKS_PHASE_POST: 3478 # Used to change hooks' output to proper indentation 3479 feedback_fn("* Hooks Results") 3480 assert hooks_results, "invalid result from hooks" 3481 3482 for node_name in hooks_results: 3483 res = hooks_results[node_name] 3484 msg = res.fail_msg 3485 test = msg and not res.offline 3486 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name, 3487 "Communication failure in hooks execution: %s", msg) 3488 if res.offline or msg: 3489 # No need to investigate payload if node is offline or gave 3490 # an error. 3491 continue 3492 for script, hkr, output in res.payload: 3493 test = hkr == constants.HKR_FAIL 3494 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name, 3495 "Script %s failed, output:", script) 3496 if test: 3497 output = self._HOOKS_INDENT_RE.sub(" ", output) 3498 feedback_fn("%s" % output) 3499 lu_result = False 3500 3501 return lu_result
3502
3503 3504 -class LUClusterVerifyDisks(NoHooksLU):
3505 """Verifies the cluster disks status. 3506 3507 """ 3508 REQ_BGL = False 3509
3510 - def ExpandNames(self):
3511 self.share_locks = _ShareAll() 3512 self.needed_locks = { 3513 locking.LEVEL_NODEGROUP: locking.ALL_SET, 3514 }
3515
3516 - def Exec(self, feedback_fn):
3517 group_names = self.owned_locks(locking.LEVEL_NODEGROUP) 3518 3519 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group 3520 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)] 3521 for group in group_names])
3522
3523 3524 -class LUGroupVerifyDisks(NoHooksLU):
3525 """Verifies the status of all disks in a node group. 3526 3527 """ 3528 REQ_BGL = False 3529
3530 - def ExpandNames(self):
3531 # Raises errors.OpPrereqError on its own if group can't be found 3532 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 3533 3534 self.share_locks = _ShareAll() 3535 self.needed_locks = { 3536 locking.LEVEL_INSTANCE: [], 3537 locking.LEVEL_NODEGROUP: [], 3538 locking.LEVEL_NODE: [], 3539 }
3540
3541 - def DeclareLocks(self, level):
3542 if level == locking.LEVEL_INSTANCE: 3543 assert not self.needed_locks[locking.LEVEL_INSTANCE] 3544 3545 # Lock instances optimistically, needs verification once node and group 3546 # locks have been acquired 3547 self.needed_locks[locking.LEVEL_INSTANCE] = \ 3548 self.cfg.GetNodeGroupInstances(self.group_uuid) 3549 3550 elif level == locking.LEVEL_NODEGROUP: 3551 assert not self.needed_locks[locking.LEVEL_NODEGROUP] 3552 3553 self.needed_locks[locking.LEVEL_NODEGROUP] = \ 3554 set([self.group_uuid] + 3555 # Lock all groups used by instances optimistically; this requires 3556 # going via the node before it's locked, requiring verification 3557 # later on 3558 [group_uuid 3559 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE) 3560 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)]) 3561 3562 elif level == locking.LEVEL_NODE: 3563 # This will only lock the nodes in the group to be verified which contain 3564 # actual instances 3565 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 3566 self._LockInstancesNodes() 3567 3568 # Lock all nodes in group to be verified 3569 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 3570 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members 3571 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3572
3573 - def CheckPrereq(self):
3574 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) 3575 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) 3576 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE)) 3577 3578 assert self.group_uuid in owned_groups 3579 3580 # Check if locked instances are still correct 3581 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances) 3582 3583 # Get instance information 3584 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances)) 3585 3586 # Check if node groups for locked instances are still correct 3587 _CheckInstancesNodeGroups(self.cfg, self.instances, 3588 owned_groups, owned_nodes, self.group_uuid)
3589
3590 - def Exec(self, feedback_fn):
3591 """Verify integrity of cluster disks. 3592 3593 @rtype: tuple of three items 3594 @return: a tuple of (dict of node-to-node_error, list of instances 3595 which need activate-disks, dict of instance: (node, volume) for 3596 missing volumes 3597 3598 """ 3599 res_nodes = {} 3600 res_instances = set() 3601 res_missing = {} 3602 3603 nv_dict = _MapInstanceDisksToNodes([inst 3604 for inst in self.instances.values() 3605 if inst.admin_state == constants.ADMINST_UP]) 3606 3607 if nv_dict: 3608 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) & 3609 set(self.cfg.GetVmCapableNodeList())) 3610 3611 node_lvs = self.rpc.call_lv_list(nodes, []) 3612 3613 for (node, node_res) in node_lvs.items(): 3614 if node_res.offline: 3615 continue 3616 3617 msg = node_res.fail_msg 3618 if msg: 3619 logging.warning("Error enumerating LVs on node %s: %s", node, msg) 3620 res_nodes[node] = msg 3621 continue 3622 3623 for lv_name, (_, _, lv_online) in node_res.payload.items(): 3624 inst = nv_dict.pop((node, lv_name), None) 3625 if not (lv_online or inst is None): 3626 res_instances.add(inst) 3627 3628 # any leftover items in nv_dict are missing LVs, let's arrange the data 3629 # better 3630 for key, inst in nv_dict.iteritems(): 3631 res_missing.setdefault(inst, []).append(list(key)) 3632 3633 return (res_nodes, list(res_instances), res_missing)
3634
3635 3636 -class LUClusterRepairDiskSizes(NoHooksLU):
3637 """Verifies the cluster disks sizes. 3638 3639 """ 3640 REQ_BGL = False 3641
3642 - def ExpandNames(self):
3643 if self.op.instances: 3644 self.wanted_names = _GetWantedInstances(self, self.op.instances) 3645 self.needed_locks = { 3646 locking.LEVEL_NODE_RES: [], 3647 locking.LEVEL_INSTANCE: self.wanted_names, 3648 } 3649 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE 3650 else: 3651 self.wanted_names = None 3652 self.needed_locks = { 3653 locking.LEVEL_NODE_RES: locking.ALL_SET, 3654 locking.LEVEL_INSTANCE: locking.ALL_SET, 3655 } 3656 self.share_locks = { 3657 locking.LEVEL_NODE_RES: 1, 3658 locking.LEVEL_INSTANCE: 0, 3659 }
3660
3661 - def DeclareLocks(self, level):
3662 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None: 3663 self._LockInstancesNodes(primary_only=True, level=level)
3664
3665 - def CheckPrereq(self):
3666 """Check prerequisites. 3667 3668 This only checks the optional instance list against the existing names. 3669 3670 """ 3671 if self.wanted_names is None: 3672 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE) 3673 3674 self.wanted_instances = \ 3675 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3676
3677 - def _EnsureChildSizes(self, disk):
3678 """Ensure children of the disk have the needed disk size. 3679 3680 This is valid mainly for DRBD8 and fixes an issue where the 3681 children have smaller disk size. 3682 3683 @param disk: an L{ganeti.objects.Disk} object 3684 3685 """ 3686 if disk.dev_type == constants.LD_DRBD8: 3687 assert disk.children, "Empty children for DRBD8?" 3688 fchild = disk.children[0] 3689 mismatch = fchild.size < disk.size 3690 if mismatch: 3691 self.LogInfo("Child disk has size %d, parent %d, fixing", 3692 fchild.size, disk.size) 3693 fchild.size = disk.size 3694 3695 # and we recurse on this child only, not on the metadev 3696 return self._EnsureChildSizes(fchild) or mismatch 3697 else: 3698 return False
3699
3700 - def Exec(self, feedback_fn):
3701 """Verify the size of cluster disks. 3702 3703 """ 3704 # TODO: check child disks too 3705 # TODO: check differences in size between primary/secondary nodes 3706 per_node_disks = {} 3707 for instance in self.wanted_instances: 3708 pnode = instance.primary_node 3709 if pnode not in per_node_disks: 3710 per_node_disks[pnode] = [] 3711 for idx, disk in enumerate(instance.disks): 3712 per_node_disks[pnode].append((instance, idx, disk)) 3713 3714 assert not (frozenset(per_node_disks.keys()) - 3715 self.owned_locks(locking.LEVEL_NODE_RES)), \ 3716 "Not owning correct locks" 3717 assert not self.owned_locks(locking.LEVEL_NODE) 3718 3719 changed = [] 3720 for node, dskl in per_node_disks.items(): 3721 newl = [v[2].Copy() for v in dskl] 3722 for dsk in newl: 3723 self.cfg.SetDiskID(dsk, node) 3724 result = self.rpc.call_blockdev_getsize(node, newl) 3725 if result.fail_msg: 3726 self.LogWarning("Failure in blockdev_getsize call to node" 3727 " %s, ignoring", node) 3728 continue 3729 if len(result.payload) != len(dskl): 3730 logging.warning("Invalid result from node %s: len(dksl)=%d," 3731 " result.payload=%s", node, len(dskl), result.payload) 3732 self.LogWarning("Invalid result from node %s, ignoring node results", 3733 node) 3734 continue 3735 for ((instance, idx, disk), size) in zip(dskl, result.payload): 3736 if size is None: 3737 self.LogWarning("Disk %d of instance %s did not return size" 3738 " information, ignoring", idx, instance.name) 3739 continue 3740 if not isinstance(size, (int, long)): 3741 self.LogWarning("Disk %d of instance %s did not return valid" 3742 " size information, ignoring", idx, instance.name) 3743 continue 3744 size = size >> 20 3745 if size != disk.size: 3746 self.LogInfo("Disk %d of instance %s has mismatched size," 3747 " correcting: recorded %d, actual %d", idx, 3748 instance.name, disk.size, size) 3749 disk.size = size 3750 self.cfg.Update(instance, feedback_fn) 3751 changed.append((instance.name, idx, size)) 3752 if self._EnsureChildSizes(disk): 3753 self.cfg.Update(instance, feedback_fn) 3754 changed.append((instance.name, idx, disk.size)) 3755 return changed
3756
3757 3758 -class LUClusterRename(LogicalUnit):
3759 """Rename the cluster. 3760 3761 """ 3762 HPATH = "cluster-rename" 3763 HTYPE = constants.HTYPE_CLUSTER 3764
3765 - def BuildHooksEnv(self):
3766 """Build hooks env. 3767 3768 """ 3769 return { 3770 "OP_TARGET": self.cfg.GetClusterName(), 3771 "NEW_NAME": self.op.name, 3772 }
3773
3774 - def BuildHooksNodes(self):
3775 """Build hooks nodes. 3776 3777 """ 3778 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3779
3780 - def CheckPrereq(self):
3781 """Verify that the passed name is a valid one. 3782 3783 """ 3784 hostname = netutils.GetHostname(name=self.op.name, 3785 family=self.cfg.GetPrimaryIPFamily()) 3786 3787 new_name = hostname.name 3788 self.ip = new_ip = hostname.ip 3789 old_name = self.cfg.GetClusterName() 3790 old_ip = self.cfg.GetMasterIP() 3791 if new_name == old_name and new_ip == old_ip: 3792 raise errors.OpPrereqError("Neither the name nor the IP address of the" 3793 " cluster has changed", 3794 errors.ECODE_INVAL) 3795 if new_ip != old_ip: 3796 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT): 3797 raise errors.OpPrereqError("The given cluster IP address (%s) is" 3798 " reachable on the network" % 3799 new_ip, errors.ECODE_NOTUNIQUE) 3800 3801 self.op.name = new_name
3802
3803 - def Exec(self, feedback_fn):
3804 """Rename the cluster. 3805 3806 """ 3807 clustername = self.op.name 3808 new_ip = self.ip 3809 3810 # shutdown the master IP 3811 master_params = self.cfg.GetMasterNetworkParameters() 3812 ems = self.cfg.GetUseExternalMipScript() 3813 result = self.rpc.call_node_deactivate_master_ip(master_params.name, 3814 master_params, ems) 3815 result.Raise("Could not disable the master role") 3816 3817 try: 3818 cluster = self.cfg.GetClusterInfo() 3819 cluster.cluster_name = clustername 3820 cluster.master_ip = new_ip 3821 self.cfg.Update(cluster, feedback_fn) 3822 3823 # update the known hosts file 3824 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE) 3825 node_list = self.cfg.GetOnlineNodeList() 3826 try: 3827 node_list.remove(master_params.name) 3828 except ValueError: 3829 pass 3830 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE) 3831 finally: 3832 master_params.ip = new_ip 3833 result = self.rpc.call_node_activate_master_ip(master_params.name, 3834 master_params, ems) 3835 msg = result.fail_msg 3836 if msg: 3837 self.LogWarning("Could not re-enable the master role on" 3838 " the master, please restart manually: %s", msg) 3839 3840 return clustername
3841
3842 3843 -def _ValidateNetmask(cfg, netmask):
3844 """Checks if a netmask is valid. 3845 3846 @type cfg: L{config.ConfigWriter} 3847 @param cfg: The cluster configuration 3848 @type netmask: int 3849 @param netmask: the netmask to be verified 3850 @raise errors.OpPrereqError: if the validation fails 3851 3852 """ 3853 ip_family = cfg.GetPrimaryIPFamily() 3854 try: 3855 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family) 3856 except errors.ProgrammerError: 3857 raise errors.OpPrereqError("Invalid primary ip family: %s." % 3858 ip_family) 3859 if not ipcls.ValidateNetmask(netmask): 3860 raise errors.OpPrereqError("CIDR netmask (%s) not valid" % 3861 (netmask))
3862
3863 3864 -class LUClusterSetParams(LogicalUnit):
3865 """Change the parameters of the cluster. 3866 3867 """ 3868 HPATH = "cluster-modify" 3869 HTYPE = constants.HTYPE_CLUSTER 3870 REQ_BGL = False 3871
3872 - def CheckArguments(self):
3873 """Check parameters 3874 3875 """ 3876 if self.op.uid_pool: 3877 uidpool.CheckUidPool(self.op.uid_pool) 3878 3879 if self.op.add_uids: 3880 uidpool.CheckUidPool(self.op.add_uids) 3881 3882 if self.op.remove_uids: 3883 uidpool.CheckUidPool(self.op.remove_uids) 3884 3885 if self.op.master_netmask is not None: 3886 _ValidateNetmask(self.cfg, self.op.master_netmask) 3887 3888 if self.op.diskparams: 3889 for dt_params in self.op.diskparams.values(): 3890 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES) 3891 try: 3892 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS) 3893 except errors.OpPrereqError, err: 3894 raise errors.OpPrereqError("While verify diskparams options: %s" % err, 3895 errors.ECODE_INVAL)
3896
3897 - def ExpandNames(self):
3898 # FIXME: in the future maybe other cluster params won't require checking on 3899 # all nodes to be modified. 3900 self.needed_locks = { 3901 locking.LEVEL_NODE: locking.ALL_SET, 3902 locking.LEVEL_INSTANCE: locking.ALL_SET, 3903 locking.LEVEL_NODEGROUP: locking.ALL_SET, 3904 } 3905 self.share_locks = { 3906 locking.LEVEL_NODE: 1, 3907 locking.LEVEL_INSTANCE: 1, 3908 locking.LEVEL_NODEGROUP: 1, 3909 }
3910
3911 - def BuildHooksEnv(self):
3912 """Build hooks env. 3913 3914 """ 3915 return { 3916 "OP_TARGET": self.cfg.GetClusterName(), 3917 "NEW_VG_NAME": self.op.vg_name, 3918 }
3919
3920 - def BuildHooksNodes(self):
3921 """Build hooks nodes. 3922 3923 """ 3924 mn = self.cfg.GetMasterNode() 3925 return ([mn], [mn])
3926
3927 - def CheckPrereq(self):
3928 """Check prerequisites. 3929 3930 This checks whether the given params don't conflict and 3931 if the given volume group is valid. 3932 3933 """ 3934 if self.op.vg_name is not None and not self.op.vg_name: 3935 if self.cfg.HasAnyDiskOfType(constants.LD_LV): 3936 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based" 3937 " instances exist", errors.ECODE_INVAL) 3938 3939 if self.op.drbd_helper is not None and not self.op.drbd_helper: 3940 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8): 3941 raise errors.OpPrereqError("Cannot disable drbd helper while" 3942 " drbd-based instances exist", 3943 errors.ECODE_INVAL) 3944 3945 node_list = self.owned_locks(locking.LEVEL_NODE) 3946 3947 # if vg_name not None, checks given volume group on all nodes 3948 if self.op.vg_name: 3949 vglist = self.rpc.call_vg_list(node_list) 3950 for node in node_list: 3951 msg = vglist[node].fail_msg 3952 if msg: 3953 # ignoring down node 3954 self.LogWarning("Error while gathering data on node %s" 3955 " (ignoring node): %s", node, msg) 3956 continue 3957 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload, 3958 self.op.vg_name, 3959 constants.MIN_VG_SIZE) 3960 if vgstatus: 3961 raise errors.OpPrereqError("Error on node '%s': %s" % 3962 (node, vgstatus), errors.ECODE_ENVIRON) 3963 3964 if self.op.drbd_helper: 3965 # checks given drbd helper on all nodes 3966 helpers = self.rpc.call_drbd_helper(node_list) 3967 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list): 3968 if ninfo.offline: 3969 self.LogInfo("Not checking drbd helper on offline node %s", node) 3970 continue 3971 msg = helpers[node].fail_msg 3972 if msg: 3973 raise errors.OpPrereqError("Error checking drbd helper on node" 3974 " '%s': %s" % (node, msg), 3975 errors.ECODE_ENVIRON) 3976 node_helper = helpers[node].payload 3977 if node_helper != self.op.drbd_helper: 3978 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" % 3979 (node, node_helper), errors.ECODE_ENVIRON) 3980 3981 self.cluster = cluster = self.cfg.GetClusterInfo() 3982 # validate params changes 3983 if self.op.beparams: 3984 objects.UpgradeBeParams(self.op.beparams) 3985 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 3986 self.new_beparams = cluster.SimpleFillBE(self.op.beparams) 3987 3988 if self.op.ndparams: 3989 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) 3990 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams) 3991 3992 # TODO: we need a more general way to handle resetting 3993 # cluster-level parameters to default values 3994 if self.new_ndparams["oob_program"] == "": 3995 self.new_ndparams["oob_program"] = \ 3996 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM] 3997 3998 if self.op.hv_state: 3999 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, 4000 self.cluster.hv_state_static) 4001 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values)) 4002 for hv, values in new_hv_state.items()) 4003 4004 if self.op.disk_state: 4005 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, 4006 self.cluster.disk_state_static) 4007 self.new_disk_state = \ 4008 dict((storage, dict((name, cluster.SimpleFillDiskState(values)) 4009 for name, values in svalues.items())) 4010 for storage, svalues in new_disk_state.items()) 4011 4012 if self.op.ipolicy: 4013 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy, 4014 group_policy=False) 4015 4016 all_instances = self.cfg.GetAllInstancesInfo().values() 4017 violations = set() 4018 for group in self.cfg.GetAllNodeGroupsInfo().values(): 4019 instances = frozenset([inst for inst in all_instances 4020 if compat.any(node in group.members 4021 for node in inst.all_nodes)]) 4022 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy) 4023 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster, 4024 group), 4025 new_ipolicy, instances) 4026 if new: 4027 violations.update(new) 4028 4029 if violations: 4030 self.LogWarning("After the ipolicy change the following instances" 4031 " violate them: %s", 4032 utils.CommaJoin(utils.NiceSort(violations))) 4033 4034 if self.op.nicparams: 4035 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES) 4036 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams) 4037 objects.NIC.CheckParameterSyntax(self.new_nicparams) 4038 nic_errors = [] 4039 4040 # check all instances for consistency 4041 for instance in self.cfg.GetAllInstancesInfo().values(): 4042 for nic_idx, nic in enumerate(instance.nics): 4043 params_copy = copy.deepcopy(nic.nicparams) 4044 params_filled = objects.FillDict(self.new_nicparams, params_copy) 4045 4046 # check parameter syntax 4047 try: 4048 objects.NIC.CheckParameterSyntax(params_filled) 4049 except errors.ConfigurationError, err: 4050 nic_errors.append("Instance %s, nic/%d: %s" % 4051 (instance.name, nic_idx, err)) 4052 4053 # if we're moving instances to routed, check that they have an ip 4054 target_mode = params_filled[constants.NIC_MODE] 4055 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip: 4056 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip" 4057 " address" % (instance.name, nic_idx)) 4058 if nic_errors: 4059 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" % 4060 "\n".join(nic_errors)) 4061 4062 # hypervisor list/parameters 4063 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {}) 4064 if self.op.hvparams: 4065 for hv_name, hv_dict in self.op.hvparams.items(): 4066 if hv_name not in self.new_hvparams: 4067 self.new_hvparams[hv_name] = hv_dict 4068 else: 4069 self.new_hvparams[hv_name].update(hv_dict) 4070 4071 # disk template parameters 4072 self.new_diskparams = objects.FillDict(cluster.diskparams, {}) 4073 if self.op.diskparams: 4074 for dt_name, dt_params in self.op.diskparams.items(): 4075 if dt_name not in self.op.diskparams: 4076 self.new_diskparams[dt_name] = dt_params 4077 else: 4078 self.new_diskparams[dt_name].update(dt_params) 4079 4080 # os hypervisor parameters 4081 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {}) 4082 if self.op.os_hvp: 4083 for os_name, hvs in self.op.os_hvp.items(): 4084 if os_name not in self.new_os_hvp: 4085 self.new_os_hvp[os_name] = hvs 4086 else: 4087 for hv_name, hv_dict in hvs.items(): 4088 if hv_dict is None: 4089 # Delete if it exists 4090 self.new_os_hvp[os_name].pop(hv_name, None) 4091 elif hv_name not in self.new_os_hvp[os_name]: 4092 self.new_os_hvp[os_name][hv_name] = hv_dict 4093 else: 4094 self.new_os_hvp[os_name][hv_name].update(hv_dict) 4095 4096 # os parameters 4097 self.new_osp = objects.FillDict(cluster.osparams, {}) 4098 if self.op.osparams: 4099 for os_name, osp in self.op.osparams.items(): 4100 if os_name not in self.new_osp: 4101 self.new_osp[os_name] = {} 4102 4103 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp, 4104 use_none=True) 4105 4106 if not self.new_osp[os_name]: 4107 # we removed all parameters 4108 del self.new_osp[os_name] 4109 else: 4110 # check the parameter validity (remote check) 4111 _CheckOSParams(self, False, [self.cfg.GetMasterNode()], 4112 os_name, self.new_osp[os_name]) 4113 4114 # changes to the hypervisor list 4115 if self.op.enabled_hypervisors is not None: 4116 self.hv_list = self.op.enabled_hypervisors 4117 for hv in self.hv_list: 4118 # if the hypervisor doesn't already exist in the cluster 4119 # hvparams, we initialize it to empty, and then (in both 4120 # cases) we make sure to fill the defaults, as we might not 4121 # have a complete defaults list if the hypervisor wasn't 4122 # enabled before 4123 if hv not in new_hvp: 4124 new_hvp[hv] = {} 4125 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv]) 4126 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES) 4127 else: 4128 self.hv_list = cluster.enabled_hypervisors 4129 4130 if self.op.hvparams or self.op.enabled_hypervisors is not None: 4131 # either the enabled list has changed, or the parameters have, validate 4132 for hv_name, hv_params in self.new_hvparams.items(): 4133 if ((self.op.hvparams and hv_name in self.op.hvparams) or 4134 (self.op.enabled_hypervisors and 4135 hv_name in self.op.enabled_hypervisors)): 4136 # either this is a new hypervisor, or its parameters have changed 4137 hv_class = hypervisor.GetHypervisorClass(hv_name) 4138 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 4139 hv_class.CheckParameterSyntax(hv_params) 4140 _CheckHVParams(self, node_list, hv_name, hv_params) 4141 4142 if self.op.os_hvp: 4143 # no need to check any newly-enabled hypervisors, since the 4144 # defaults have already been checked in the above code-block 4145 for os_name, os_hvp in self.new_os_hvp.items(): 4146 for hv_name, hv_params in os_hvp.items(): 4147 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 4148 # we need to fill in the new os_hvp on top of the actual hv_p 4149 cluster_defaults = self.new_hvparams.get(hv_name, {}) 4150 new_osp = objects.FillDict(cluster_defaults, hv_params) 4151 hv_class = hypervisor.GetHypervisorClass(hv_name) 4152 hv_class.CheckParameterSyntax(new_osp) 4153 _CheckHVParams(self, node_list, hv_name, new_osp) 4154 4155 if self.op.default_iallocator: 4156 alloc_script = utils.FindFile(self.op.default_iallocator, 4157 constants.IALLOCATOR_SEARCH_PATH, 4158 os.path.isfile) 4159 if alloc_script is None: 4160 raise errors.OpPrereqError("Invalid default iallocator script '%s'" 4161 " specified" % self.op.default_iallocator, 4162 errors.ECODE_INVAL)
4163
4164 - def Exec(self, feedback_fn):
4165 """Change the parameters of the cluster. 4166 4167 """ 4168 if self.op.vg_name is not None: 4169 new_volume = self.op.vg_name 4170 if not new_volume: 4171 new_volume = None 4172 if new_volume != self.cfg.GetVGName(): 4173 self.cfg.SetVGName(new_volume) 4174 else: 4175 feedback_fn("Cluster LVM configuration already in desired" 4176 " state, not changing") 4177 if self.op.drbd_helper is not None: 4178 new_helper = self.op.drbd_helper 4179 if not new_helper: 4180 new_helper = None 4181 if new_helper != self.cfg.GetDRBDHelper(): 4182 self.cfg.SetDRBDHelper(new_helper) 4183 else: 4184 feedback_fn("Cluster DRBD helper already in desired state," 4185 " not changing") 4186 if self.op.hvparams: 4187 self.cluster.hvparams = self.new_hvparams 4188 if self.op.os_hvp: 4189 self.cluster.os_hvp = self.new_os_hvp 4190 if self.op.enabled_hypervisors is not None: 4191 self.cluster.hvparams = self.new_hvparams 4192 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors 4193 if self.op.beparams: 4194 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams 4195 if self.op.nicparams: 4196 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams 4197 if self.op.ipolicy: 4198 self.cluster.ipolicy = self.new_ipolicy 4199 if self.op.osparams: 4200 self.cluster.osparams = self.new_osp 4201 if self.op.ndparams: 4202 self.cluster.ndparams = self.new_ndparams 4203 if self.op.diskparams: 4204 self.cluster.diskparams = self.new_diskparams 4205 if self.op.hv_state: 4206 self.cluster.hv_state_static = self.new_hv_state 4207 if self.op.disk_state: 4208 self.cluster.disk_state_static = self.new_disk_state 4209 4210 if self.op.candidate_pool_size is not None: 4211 self.cluster.candidate_pool_size = self.op.candidate_pool_size 4212 # we need to update the pool size here, otherwise the save will fail 4213 _AdjustCandidatePool(self, []) 4214 4215 if self.op.maintain_node_health is not None: 4216 if self.op.maintain_node_health and not constants.ENABLE_CONFD: 4217 feedback_fn("Note: CONFD was disabled at build time, node health" 4218 " maintenance is not useful (still enabling it)") 4219 self.cluster.maintain_node_health = self.op.maintain_node_health 4220 4221 if self.op.prealloc_wipe_disks is not None: 4222 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks 4223 4224 if self.op.add_uids is not None: 4225 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids) 4226 4227 if self.op.remove_uids is not None: 4228 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids) 4229 4230 if self.op.uid_pool is not None: 4231 self.cluster.uid_pool = self.op.uid_pool 4232 4233 if self.op.default_iallocator is not None: 4234 self.cluster.default_iallocator = self.op.default_iallocator 4235 4236 if self.op.reserved_lvs is not None: 4237 self.cluster.reserved_lvs = self.op.reserved_lvs 4238 4239 if self.op.use_external_mip_script is not None: 4240 self.cluster.use_external_mip_script = self.op.use_external_mip_script 4241 4242 def helper_os(aname, mods, desc): 4243 desc += " OS list" 4244 lst = getattr(self.cluster, aname) 4245 for key, val in mods: 4246 if key == constants.DDM_ADD: 4247 if val in lst: 4248 feedback_fn("OS %s already in %s, ignoring" % (val, desc)) 4249 else: 4250 lst.append(val) 4251 elif key == constants.DDM_REMOVE: 4252 if val in lst: 4253 lst.remove(val) 4254 else: 4255 feedback_fn("OS %s not found in %s, ignoring" % (val, desc)) 4256 else: 4257 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4258 4259 if self.op.hidden_os: 4260 helper_os("hidden_os", self.op.hidden_os, "hidden") 4261 4262 if self.op.blacklisted_os: 4263 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted") 4264 4265 if self.op.master_netdev: 4266 master_params = self.cfg.GetMasterNetworkParameters() 4267 ems = self.cfg.GetUseExternalMipScript() 4268 feedback_fn("Shutting down master ip on the current netdev (%s)" % 4269 self.cluster.master_netdev) 4270 result = self.rpc.call_node_deactivate_master_ip(master_params.name, 4271 master_params, ems) 4272 result.Raise("Could not disable the master ip") 4273 feedback_fn("Changing master_netdev from %s to %s" % 4274 (master_params.netdev, self.op.master_netdev)) 4275 self.cluster.master_netdev = self.op.master_netdev 4276 4277 if self.op.master_netmask: 4278 master_params = self.cfg.GetMasterNetworkParameters() 4279 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask) 4280 result = self.rpc.call_node_change_master_netmask(master_params.name, 4281 master_params.netmask, 4282 self.op.master_netmask, 4283 master_params.ip, 4284 master_params.netdev) 4285 if result.fail_msg: 4286 msg = "Could not change the master IP netmask: %s" % result.fail_msg 4287 feedback_fn(msg) 4288 4289 self.cluster.master_netmask = self.op.master_netmask 4290 4291 self.cfg.Update(self.cluster, feedback_fn) 4292 4293 if self.op.master_netdev: 4294 master_params = self.cfg.GetMasterNetworkParameters() 4295 feedback_fn("Starting the master ip on the new master netdev (%s)" % 4296 self.op.master_netdev) 4297 ems = self.cfg.GetUseExternalMipScript() 4298 result = self.rpc.call_node_activate_master_ip(master_params.name, 4299 master_params, ems) 4300 if result.fail_msg: 4301 self.LogWarning("Could not re-enable the master ip on" 4302 " the master, please restart manually: %s", 4303 result.fail_msg)
4304
4305 4306 -def _UploadHelper(lu, nodes, fname):
4307 """Helper for uploading a file and showing warnings. 4308 4309 """ 4310 if os.path.exists(fname): 4311 result = lu.rpc.call_upload_file(nodes, fname) 4312 for to_node, to_result in result.items(): 4313 msg = to_result.fail_msg 4314 if msg: 4315 msg = ("Copy of file %s to node %s failed: %s" % 4316 (fname, to_node, msg)) 4317 lu.proc.LogWarning(msg)
4318
4319 4320 -def _ComputeAncillaryFiles(cluster, redist):
4321 """Compute files external to Ganeti which need to be consistent. 4322 4323 @type redist: boolean 4324 @param redist: Whether to include files which need to be redistributed 4325 4326 """ 4327 # Compute files for all nodes 4328 files_all = set([ 4329 constants.SSH_KNOWN_HOSTS_FILE, 4330 constants.CONFD_HMAC_KEY, 4331 constants.CLUSTER_DOMAIN_SECRET_FILE, 4332 constants.SPICE_CERT_FILE, 4333 constants.SPICE_CACERT_FILE, 4334 constants.RAPI_USERS_FILE, 4335 ]) 4336 4337 if not redist: 4338 files_all.update(constants.ALL_CERT_FILES) 4339 files_all.update(ssconf.SimpleStore().GetFileList()) 4340 else: 4341 # we need to ship at least the RAPI certificate 4342 files_all.add(constants.RAPI_CERT_FILE) 4343 4344 if cluster.modify_etc_hosts: 4345 files_all.add(constants.ETC_HOSTS) 4346 4347 if cluster.use_external_mip_script: 4348 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT) 4349 4350 # Files which are optional, these must: 4351 # - be present in one other category as well 4352 # - either exist or not exist on all nodes of that category (mc, vm all) 4353 files_opt = set([ 4354 constants.RAPI_USERS_FILE, 4355 ]) 4356 4357 # Files which should only be on master candidates 4358 files_mc = set() 4359 4360 if not redist: 4361 files_mc.add(constants.CLUSTER_CONF_FILE) 4362 4363 # Files which should only be on VM-capable nodes 4364 files_vm = set(filename 4365 for hv_name in cluster.enabled_hypervisors 4366 for filename in 4367 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0]) 4368 4369 files_opt |= set(filename 4370 for hv_name in cluster.enabled_hypervisors 4371 for filename in 4372 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1]) 4373 4374 # Filenames in each category must be unique 4375 all_files_set = files_all | files_mc | files_vm 4376 assert (len(all_files_set) == 4377 sum(map(len, [files_all, files_mc, files_vm]))), \ 4378 "Found file listed in more than one file list" 4379 4380 # Optional files must be present in one other category 4381 assert all_files_set.issuperset(files_opt), \ 4382 "Optional file not in a different required list" 4383 4384 return (files_all, files_opt, files_mc, files_vm)
4385
4386 4387 -def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4388 """Distribute additional files which are part of the cluster configuration. 4389 4390 ConfigWriter takes care of distributing the config and ssconf files, but 4391 there are more files which should be distributed to all nodes. This function 4392 makes sure those are copied. 4393 4394 @param lu: calling logical unit 4395 @param additional_nodes: list of nodes not in the config to distribute to 4396 @type additional_vm: boolean 4397 @param additional_vm: whether the additional nodes are vm-capable or not 4398 4399 """ 4400 # Gather target nodes 4401 cluster = lu.cfg.GetClusterInfo() 4402 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode()) 4403 4404 online_nodes = lu.cfg.GetOnlineNodeList() 4405 online_set = frozenset(online_nodes) 4406 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList())) 4407 4408 if additional_nodes is not None: 4409 online_nodes.extend(additional_nodes) 4410 if additional_vm: 4411 vm_nodes.extend(additional_nodes) 4412 4413 # Never distribute to master node 4414 for nodelist in [online_nodes, vm_nodes]: 4415 if master_info.name in nodelist: 4416 nodelist.remove(master_info.name) 4417 4418 # Gather file lists 4419 (files_all, _, files_mc, files_vm) = \ 4420 _ComputeAncillaryFiles(cluster, True) 4421 4422 # Never re-distribute configuration file from here 4423 assert not (constants.CLUSTER_CONF_FILE in files_all or 4424 constants.CLUSTER_CONF_FILE in files_vm) 4425 assert not files_mc, "Master candidates not handled in this function" 4426 4427 filemap = [ 4428 (online_nodes, files_all), 4429 (vm_nodes, files_vm), 4430 ] 4431 4432 # Upload the files 4433 for (node_list, files) in filemap: 4434 for fname in files: 4435 _UploadHelper(lu, node_list, fname)
4436
4437 4438 -class LUClusterRedistConf(NoHooksLU):
4439 """Force the redistribution of cluster configuration. 4440 4441 This is a very simple LU. 4442 4443 """ 4444 REQ_BGL = False 4445
4446 - def ExpandNames(self):
4447 self.needed_locks = { 4448 locking.LEVEL_NODE: locking.ALL_SET, 4449 } 4450 self.share_locks[locking.LEVEL_NODE] = 1
4451
4452 - def Exec(self, feedback_fn):
4453 """Redistribute the configuration. 4454 4455 """ 4456 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn) 4457 _RedistributeAncillaryFiles(self)
4458
4459 4460 -class LUClusterActivateMasterIp(NoHooksLU):
4461 """Activate the master IP on the master node. 4462 4463 """
4464 - def Exec(self, feedback_fn):
4465 """Activate the master IP. 4466 4467 """ 4468 master_params = self.cfg.GetMasterNetworkParameters() 4469 ems = self.cfg.GetUseExternalMipScript() 4470 result = self.rpc.call_node_activate_master_ip(master_params.name, 4471 master_params, ems) 4472 result.Raise("Could not activate the master IP")
4473
4474 4475 -class LUClusterDeactivateMasterIp(NoHooksLU):
4476 """Deactivate the master IP on the master node. 4477 4478 """
4479 - def Exec(self, feedback_fn):
4480 """Deactivate the master IP. 4481 4482 """ 4483 master_params = self.cfg.GetMasterNetworkParameters() 4484 ems = self.cfg.GetUseExternalMipScript() 4485 result = self.rpc.call_node_deactivate_master_ip(master_params.name, 4486 master_params, ems) 4487 result.Raise("Could not deactivate the master IP")
4488
4489 4490 -def _WaitForSync(lu, instance, disks=None, oneshot=False):
4491 """Sleep and poll for an instance's disk to sync. 4492 4493 """ 4494 if not instance.disks or disks is not None and not disks: 4495 return True 4496 4497 disks = _ExpandCheckDisks(instance, disks) 4498 4499 if not oneshot: 4500 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name) 4501 4502 node = instance.primary_node 4503 4504 for dev in disks: 4505 lu.cfg.SetDiskID(dev, node) 4506 4507 # TODO: Convert to utils.Retry 4508 4509 retries = 0 4510 degr_retries = 10 # in seconds, as we sleep 1 second each time 4511 while True: 4512 max_time = 0 4513 done = True 4514 cumul_degraded = False 4515 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance)) 4516 msg = rstats.fail_msg 4517 if msg: 4518 lu.LogWarning("Can't get any data from node %s: %s", node, msg) 4519 retries += 1 4520 if retries >= 10: 4521 raise errors.RemoteError("Can't contact node %s for mirror data," 4522 " aborting." % node) 4523 time.sleep(6) 4524 continue 4525 rstats = rstats.payload 4526 retries = 0 4527 for i, mstat in enumerate(rstats): 4528 if mstat is None: 4529 lu.LogWarning("Can't compute data for node %s/%s", 4530 node, disks[i].iv_name) 4531 continue 4532 4533 cumul_degraded = (cumul_degraded or 4534 (mstat.is_degraded and mstat.sync_percent is None)) 4535 if mstat.sync_percent is not None: 4536 done = False 4537 if mstat.estimated_time is not None: 4538 rem_time = ("%s remaining (estimated)" % 4539 utils.FormatSeconds(mstat.estimated_time)) 4540 max_time = mstat.estimated_time 4541 else: 4542 rem_time = "no time estimate" 4543 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" % 4544 (disks[i].iv_name, mstat.sync_percent, rem_time)) 4545 4546 # if we're done but degraded, let's do a few small retries, to 4547 # make sure we see a stable and not transient situation; therefore 4548 # we force restart of the loop 4549 if (done or oneshot) and cumul_degraded and degr_retries > 0: 4550 logging.info("Degraded disks found, %d retries left", degr_retries) 4551 degr_retries -= 1 4552 time.sleep(1) 4553 continue 4554 4555 if done or oneshot: 4556 break 4557 4558 time.sleep(min(60, max_time)) 4559 4560 if done: 4561 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name) 4562 return not cumul_degraded
4563
4564 4565 -def _BlockdevFind(lu, node, dev, instance):
4566 """Wrapper around call_blockdev_find to annotate diskparams. 4567 4568 @param lu: A reference to the lu object 4569 @param node: The node to call out 4570 @param dev: The device to find 4571 @param instance: The instance object the device belongs to 4572 @returns The result of the rpc call 4573 4574 """ 4575 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg) 4576 return lu.rpc.call_blockdev_find(node, disk)
4577
4578 4579 -def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4580 """Wrapper around L{_CheckDiskConsistencyInner}. 4581 4582 """ 4583 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg) 4584 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary, 4585 ldisk=ldisk)
4586
4587 4588 -def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary, 4589 ldisk=False):
4590 """Check that mirrors are not degraded. 4591 4592 @attention: The device has to be annotated already. 4593 4594 The ldisk parameter, if True, will change the test from the 4595 is_degraded attribute (which represents overall non-ok status for 4596 the device(s)) to the ldisk (representing the local storage status). 4597 4598 """ 4599 lu.cfg.SetDiskID(dev, node) 4600 4601 result = True 4602 4603 if on_primary or dev.AssembleOnSecondary(): 4604 rstats = lu.rpc.call_blockdev_find(node, dev) 4605 msg = rstats.fail_msg 4606 if msg: 4607 lu.LogWarning("Can't find disk on node %s: %s", node, msg) 4608 result = False 4609 elif not rstats.payload: 4610 lu.LogWarning("Can't find disk on node %s", node) 4611 result = False 4612 else: 4613 if ldisk: 4614 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY 4615 else: 4616 result = result and not rstats.payload.is_degraded 4617 4618 if dev.children: 4619 for child in dev.children: 4620 result = result and _CheckDiskConsistencyInner(lu, instance, child, node, 4621 on_primary) 4622 4623 return result
4624
4625 4626 -class LUOobCommand(NoHooksLU):
4627 """Logical unit for OOB handling. 4628 4629 """ 4630 REQ_BGL = False 4631 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE) 4632
4633 - def ExpandNames(self):
4634 """Gather locks we need. 4635 4636 """ 4637 if self.op.node_names: 4638 self.op.node_names = _GetWantedNodes(self, self.op.node_names) 4639 lock_names = self.op.node_names 4640 else: 4641 lock_names = locking.ALL_SET 4642 4643 self.needed_locks = { 4644 locking.LEVEL_NODE: lock_names, 4645 }
4646
4647 - def CheckPrereq(self):
4648 """Check prerequisites. 4649 4650 This checks: 4651 - the node exists in the configuration 4652 - OOB is supported 4653 4654 Any errors are signaled by raising errors.OpPrereqError. 4655 4656 """ 4657 self.nodes = [] 4658 self.master_node = self.cfg.GetMasterNode() 4659 4660 assert self.op.power_delay >= 0.0 4661 4662 if self.op.node_names: 4663 if (self.op.command in self._SKIP_MASTER and 4664 self.master_node in self.op.node_names): 4665 master_node_obj = self.cfg.GetNodeInfo(self.master_node) 4666 master_oob_handler = _SupportsOob(self.cfg, master_node_obj) 4667 4668 if master_oob_handler: 4669 additional_text = ("run '%s %s %s' if you want to operate on the" 4670 " master regardless") % (master_oob_handler, 4671 self.op.command, 4672 self.master_node) 4673 else: 4674 additional_text = "it does not support out-of-band operations" 4675 4676 raise errors.OpPrereqError(("Operating on the master node %s is not" 4677 " allowed for %s; %s") % 4678 (self.master_node, self.op.command, 4679 additional_text), errors.ECODE_INVAL) 4680 else: 4681 self.op.node_names = self.cfg.GetNodeList() 4682 if self.op.command in self._SKIP_MASTER: 4683 self.op.node_names.remove(self.master_node) 4684 4685 if self.op.command in self._SKIP_MASTER: 4686 assert self.master_node not in self.op.node_names 4687 4688 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names): 4689 if node is None: 4690 raise errors.OpPrereqError("Node %s not found" % node_name, 4691 errors.ECODE_NOENT) 4692 else: 4693 self.nodes.append(node) 4694 4695 if (not self.op.ignore_status and 4696 (self.op.command == constants.OOB_POWER_OFF and not node.offline)): 4697 raise errors.OpPrereqError(("Cannot power off node %s because it is" 4698 " not marked offline") % node_name, 4699 errors.ECODE_STATE)
4700
4701 - def Exec(self, feedback_fn):
4702 """Execute OOB and return result if we expect any. 4703 4704 """ 4705 master_node = self.master_node 4706 ret = [] 4707 4708 for idx, node in enumerate(utils.NiceSort(self.nodes, 4709 key=lambda node: node.name)): 4710 node_entry = [(constants.RS_NORMAL, node.name)] 4711 ret.append(node_entry) 4712 4713 oob_program = _SupportsOob(self.cfg, node) 4714 4715 if not oob_program: 4716 node_entry.append((constants.RS_UNAVAIL, None)) 4717 continue 4718 4719 logging.info("Executing out-of-band command '%s' using '%s' on %s", 4720 self.op.command, oob_program, node.name) 4721 result = self.rpc.call_run_oob(master_node, oob_program, 4722 self.op.command, node.name, 4723 self.op.timeout) 4724 4725 if result.fail_msg: 4726 self.LogWarning("Out-of-band RPC failed on node '%s': %s", 4727 node.name, result.fail_msg) 4728 node_entry.append((constants.RS_NODATA, None)) 4729 else: 4730 try: 4731 self._CheckPayload(result) 4732 except errors.OpExecError, err: 4733 self.LogWarning("Payload returned by node '%s' is not valid: %s", 4734 node.name, err) 4735 node_entry.append((constants.RS_NODATA, None)) 4736 else: 4737 if self.op.command == constants.OOB_HEALTH: 4738 # For health we should log important events 4739 for item, status in result.payload: 4740 if status in [constants.OOB_STATUS_WARNING, 4741 constants.OOB_STATUS_CRITICAL]: 4742 self.LogWarning("Item '%s' on node '%s' has status '%s'", 4743 item, node.name, status) 4744 4745 if self.op.command == constants.OOB_POWER_ON: 4746 node.powered = True 4747 elif self.op.command == constants.OOB_POWER_OFF: 4748 node.powered = False 4749 elif self.op.command == constants.OOB_POWER_STATUS: 4750 powered = result.payload[constants.OOB_POWER_STATUS_POWERED] 4751 if powered != node.powered: 4752 logging.warning(("Recorded power state (%s) of node '%s' does not" 4753 " match actual power state (%s)"), node.powered, 4754 node.name, powered) 4755 4756 # For configuration changing commands we should update the node 4757 if self.op.command in (constants.OOB_POWER_ON, 4758 constants.OOB_POWER_OFF): 4759 self.cfg.Update(node, feedback_fn) 4760 4761 node_entry.append((constants.RS_NORMAL, result.payload)) 4762 4763 if (self.op.command == constants.OOB_POWER_ON and 4764 idx < len(self.nodes) - 1): 4765 time.sleep(self.op.power_delay) 4766 4767 return ret
4768
4769 - def _CheckPayload(self, result):
4770 """Checks if the payload is valid. 4771 4772 @param result: RPC result 4773 @raises errors.OpExecError: If payload is not valid 4774 4775 """ 4776 errs = [] 4777 if self.op.command == constants.OOB_HEALTH: 4778 if not isinstance(result.payload, list): 4779 errs.append("command 'health' is expected to return a list but got %s" % 4780 type(result.payload)) 4781 else: 4782 for item, status in result.payload: 4783 if status not in constants.OOB_STATUSES: 4784 errs.append("health item '%s' has invalid status '%s'" % 4785 (item, status)) 4786 4787 if self.op.command == constants.OOB_POWER_STATUS: 4788 if not isinstance(result.payload, dict): 4789 errs.append("power-status is expected to return a dict but got %s" % 4790 type(result.payload)) 4791 4792 if self.op.command in [ 4793 constants.OOB_POWER_ON, 4794 constants.OOB_POWER_OFF, 4795 constants.OOB_POWER_CYCLE, 4796 ]: 4797 if result.payload is not None: 4798 errs.append("%s is expected to not return payload but got '%s'" % 4799 (self.op.command, result.payload)) 4800 4801 if errs: 4802 raise errors.OpExecError("Check of out-of-band payload failed due to %s" % 4803 utils.CommaJoin(errs))
4804
4805 4806 -class _OsQuery(_QueryBase):
4807 FIELDS = query.OS_FIELDS 4808
4809 - def ExpandNames(self, lu):
4810 # Lock all nodes in shared mode 4811 # Temporary removal of locks, should be reverted later 4812 # TODO: reintroduce locks when they are lighter-weight 4813 lu.needed_locks = {} 4814 #self.share_locks[locking.LEVEL_NODE] = 1 4815 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 4816 4817 # The following variables interact with _QueryBase._GetNames 4818 if self.names: 4819 self.wanted = self.names 4820 else: 4821 self.wanted = locking.ALL_SET 4822 4823 self.do_locking = self.use_locking
4824
4825 - def DeclareLocks(self, lu, level):
4826 pass
4827 4828 @staticmethod
4829 - def _DiagnoseByOS(rlist):
4830 """Remaps a per-node return list into an a per-os per-node dictionary 4831 4832 @param rlist: a map with node names as keys and OS objects as values 4833 4834 @rtype: dict 4835 @return: a dictionary with osnames as keys and as value another 4836 map, with nodes as keys and tuples of (path, status, diagnose, 4837 variants, parameters, api_versions) as values, eg:: 4838 4839 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []), 4840 (/srv/..., False, "invalid api")], 4841 "node2": [(/srv/..., True, "", [], [])]} 4842 } 4843 4844 """ 4845 all_os = {} 4846 # we build here the list of nodes that didn't fail the RPC (at RPC 4847 # level), so that nodes with a non-responding node daemon don't 4848 # make all OSes invalid 4849 good_nodes = [node_name for node_name in rlist 4850 if not rlist[node_name].fail_msg] 4851 for node_name, nr in rlist.items(): 4852 if nr.fail_msg or not nr.payload: 4853 continue 4854 for (name, path, status, diagnose, variants, 4855 params, api_versions) in nr.payload: 4856 if name not in all_os: 4857 # build a list of nodes for this os containing empty lists 4858 # for each node in node_list 4859 all_os[name] = {} 4860 for nname in good_nodes: 4861 all_os[name][nname] = [] 4862 # convert params from [name, help] to (name, help) 4863 params = [tuple(v) for v in params] 4864 all_os[name][node_name].append((path, status, diagnose, 4865 variants, params, api_versions)) 4866 return all_os
4867
4868 - def _GetQueryData(self, lu):
4869 """Computes the list of nodes and their attributes. 4870 4871 """ 4872 # Locking is not used 4873 assert not (compat.any(lu.glm.is_owned(level) 4874 for level in locking.LEVELS 4875 if level != locking.LEVEL_CLUSTER) or 4876 self.do_locking or self.use_locking) 4877 4878 valid_nodes = [node.name 4879 for node in lu.cfg.GetAllNodesInfo().values() 4880 if not node.offline and node.vm_capable] 4881 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes)) 4882 cluster = lu.cfg.GetClusterInfo() 4883 4884 data = {} 4885 4886 for (os_name, os_data) in pol.items(): 4887 info = query.OsInfo(name=os_name, valid=True, node_status=os_data, 4888 hidden=(os_name in cluster.hidden_os), 4889 blacklisted=(os_name in cluster.blacklisted_os)) 4890 4891 variants = set() 4892 parameters = set() 4893 api_versions = set() 4894 4895 for idx, osl in enumerate(os_data.values()): 4896 info.valid = bool(info.valid and osl and osl[0][1]) 4897 if not info.valid: 4898 break 4899 4900 (node_variants, node_params, node_api) = osl[0][3:6] 4901 if idx == 0: 4902 # First entry 4903 variants.update(node_variants) 4904 parameters.update(node_params) 4905 api_versions.update(node_api) 4906 else: 4907 # Filter out inconsistent values 4908 variants.intersection_update(node_variants) 4909 parameters.intersection_update(node_params) 4910 api_versions.intersection_update(node_api) 4911 4912 info.variants = list(variants) 4913 info.parameters = list(parameters) 4914 info.api_versions = list(api_versions) 4915 4916 data[os_name] = info 4917 4918 # Prepare data in requested order 4919 return [data[name] for name in self._GetNames(lu, pol.keys(), None) 4920 if name in data]
4921
4922 4923 -class LUOsDiagnose(NoHooksLU):
4924 """Logical unit for OS diagnose/query. 4925 4926 """ 4927 REQ_BGL = False 4928 4929 @staticmethod
4930 - def _BuildFilter(fields, names):
4931 """Builds a filter for querying OSes. 4932 4933 """ 4934 name_filter = qlang.MakeSimpleFilter("name", names) 4935 4936 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the 4937 # respective field is not requested 4938 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]] 4939 for fname in ["hidden", "blacklisted"] 4940 if fname not in fields] 4941 if "valid" not in fields: 4942 status_filter.append([qlang.OP_TRUE, "valid"]) 4943 4944 if status_filter: 4945 status_filter.insert(0, qlang.OP_AND) 4946 else: 4947 status_filter = None 4948 4949 if name_filter and status_filter: 4950 return [qlang.OP_AND, name_filter, status_filter] 4951 elif name_filter: 4952 return name_filter 4953 else: 4954 return status_filter
4955
4956 - def CheckArguments(self):
4957 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names), 4958 self.op.output_fields, False)
4959
4960 - def ExpandNames(self):
4961 self.oq.ExpandNames(self)
4962
4963 - def Exec(self, feedback_fn):
4964 return self.oq.OldStyleQuery(self)
4965
4966 4967 -class LUNodeRemove(LogicalUnit):
4968 """Logical unit for removing a node. 4969 4970 """ 4971 HPATH = "node-remove" 4972 HTYPE = constants.HTYPE_NODE 4973
4974 - def BuildHooksEnv(self):
4975 """Build hooks env. 4976 4977 """ 4978 return { 4979 "OP_TARGET": self.op.node_name, 4980 "NODE_NAME": self.op.node_name, 4981 }
4982
4983 - def BuildHooksNodes(self):
4984 """Build hooks nodes. 4985 4986 This doesn't run on the target node in the pre phase as a failed 4987 node would then be impossible to remove. 4988 4989 """ 4990 all_nodes = self.cfg.GetNodeList() 4991 try: 4992 all_nodes.remove(self.op.node_name) 4993 except ValueError: 4994 pass 4995 return (all_nodes, all_nodes)
4996
4997 - def CheckPrereq(self):
4998 """Check prerequisites. 4999 5000 This checks: 5001 - the node exists in the configuration 5002 - it does not have primary or secondary instances 5003 - it's not the master 5004 5005 Any errors are signaled by raising errors.OpPrereqError. 5006 5007 """ 5008 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 5009 node = self.cfg.GetNodeInfo(self.op.node_name) 5010 assert node is not None 5011 5012 masternode = self.cfg.GetMasterNode() 5013 if node.name == masternode: 5014 raise errors.OpPrereqError("Node is the master node, failover to another" 5015 " node is required", errors.ECODE_INVAL) 5016 5017 for instance_name, instance in self.cfg.GetAllInstancesInfo().items(): 5018 if node.name in instance.all_nodes: 5019 raise errors.OpPrereqError("Instance %s is still running on the node," 5020 " please remove first" % instance_name, 5021 errors.ECODE_INVAL) 5022 self.op.node_name = node.name 5023 self.node = node
5024
5025 - def Exec(self, feedback_fn):
5026 """Removes the node from the cluster. 5027 5028 """ 5029 node = self.node 5030 logging.info("Stopping the node daemon and removing configs from node %s", 5031 node.name) 5032 5033 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup 5034 5035 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \ 5036 "Not owning BGL" 5037 5038 # Promote nodes to master candidate as needed 5039 _AdjustCandidatePool(self, exceptions=[node.name]) 5040 self.context.RemoveNode(node.name) 5041 5042 # Run post hooks on the node before it's removed 5043 _RunPostHook(self, node.name) 5044 5045 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup) 5046 msg = result.fail_msg 5047 if msg: 5048 self.LogWarning("Errors encountered on the remote node while leaving" 5049 " the cluster: %s", msg) 5050 5051 # Remove node from our /etc/hosts 5052 if self.cfg.GetClusterInfo().modify_etc_hosts: 5053 master_node = self.cfg.GetMasterNode() 5054 result = self.rpc.call_etc_hosts_modify(master_node, 5055 constants.ETC_HOSTS_REMOVE, 5056 node.name, None) 5057 result.Raise("Can't update hosts file with new host data") 5058 _RedistributeAncillaryFiles(self)
5059
5060 5061 -class _NodeQuery(_QueryBase):
5062 FIELDS = query.NODE_FIELDS 5063
5064 - def ExpandNames(self, lu):
5065 lu.needed_locks = {} 5066 lu.share_locks = _ShareAll() 5067 5068 if self.names: 5069 self.wanted = _GetWantedNodes(lu, self.names) 5070 else: 5071 self.wanted = locking.ALL_SET 5072 5073 self.do_locking = (self.use_locking and 5074 query.NQ_LIVE in self.requested_data) 5075 5076 if self.do_locking: 5077 # If any non-static field is requested we need to lock the nodes 5078 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5079
5080 - def DeclareLocks(self, lu, level):
5081 pass
5082
5083 - def _GetQueryData(self, lu):
5084 """Computes the list of nodes and their attributes. 5085 5086 """ 5087 all_info = lu.cfg.GetAllNodesInfo() 5088 5089 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE) 5090 5091 # Gather data as requested 5092 if query.NQ_LIVE in self.requested_data: 5093 # filter out non-vm_capable nodes 5094 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable] 5095 5096 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()], 5097 [lu.cfg.GetHypervisorType()]) 5098 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload)) 5099 for (name, nresult) in node_data.items() 5100 if not nresult.fail_msg and nresult.payload) 5101 else: 5102 live_data = None 5103 5104 if query.NQ_INST in self.requested_data: 5105 node_to_primary = dict([(name, set()) for name in nodenames]) 5106 node_to_secondary = dict([(name, set()) for name in nodenames]) 5107 5108 inst_data = lu.cfg.GetAllInstancesInfo() 5109 5110 for inst in inst_data.values(): 5111 if inst.primary_node in node_to_primary: 5112 node_to_primary[inst.primary_node].add(inst.name) 5113 for secnode in inst.secondary_nodes: 5114 if secnode in node_to_secondary: 5115 node_to_secondary[secnode].add(inst.name) 5116 else: 5117 node_to_primary = None 5118 node_to_secondary = None 5119 5120 if query.NQ_OOB in self.requested_data: 5121 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node))) 5122 for name, node in all_info.iteritems()) 5123 else: 5124 oob_support = None 5125 5126 if query.NQ_GROUP in self.requested_data: 5127 groups = lu.cfg.GetAllNodeGroupsInfo() 5128 else: 5129 groups = {} 5130 5131 return query.NodeQueryData([all_info[name] for name in nodenames], 5132 live_data, lu.cfg.GetMasterNode(), 5133 node_to_primary, node_to_secondary, groups, 5134 oob_support, lu.cfg.GetClusterInfo())
5135
5136 5137 -class LUNodeQuery(NoHooksLU):
5138 """Logical unit for querying nodes. 5139 5140 """ 5141 # pylint: disable=W0142 5142 REQ_BGL = False 5143
5144 - def CheckArguments(self):
5145 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names), 5146 self.op.output_fields, self.op.use_locking)
5147
5148 - def ExpandNames(self):
5149 self.nq.ExpandNames(self)
5150
5151 - def DeclareLocks(self, level):
5152 self.nq.DeclareLocks(self, level)
5153
5154 - def Exec(self, feedback_fn):
5155 return self.nq.OldStyleQuery(self)
5156
5157 5158 -class LUNodeQueryvols(NoHooksLU):
5159 """Logical unit for getting volumes on node(s). 5160 5161 """ 5162 REQ_BGL = False 5163 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance") 5164 _FIELDS_STATIC = utils.FieldSet("node") 5165
5166 - def CheckArguments(self):
5167 _CheckOutputFields(static=self._FIELDS_STATIC, 5168 dynamic=self._FIELDS_DYNAMIC, 5169 selected=self.op.output_fields)
5170
5171 - def ExpandNames(self):
5172 self.share_locks = _ShareAll() 5173 self.needed_locks = {} 5174 5175 if not self.op.nodes: 5176 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 5177 else: 5178 self.needed_locks[locking.LEVEL_NODE] = \ 5179 _GetWantedNodes(self, self.op.nodes)
5180
5181 - def Exec(self, feedback_fn):
5182 """Computes the list of nodes and their attributes. 5183 5184 """ 5185 nodenames = self.owned_locks(locking.LEVEL_NODE) 5186 volumes = self.rpc.call_node_volumes(nodenames) 5187 5188 ilist = self.cfg.GetAllInstancesInfo() 5189 vol2inst = _MapInstanceDisksToNodes(ilist.values()) 5190 5191 output = [] 5192 for node in nodenames: 5193 nresult = volumes[node] 5194 if nresult.offline: 5195 continue 5196 msg = nresult.fail_msg 5197 if msg: 5198 self.LogWarning("Can't compute volume data on node %s: %s", node, msg) 5199 continue 5200 5201 node_vols = sorted(nresult.payload, 5202 key=operator.itemgetter("dev")) 5203 5204 for vol in node_vols: 5205 node_output = [] 5206 for field in self.op.output_fields: 5207 if field == "node": 5208 val = node 5209 elif field == "phys": 5210 val = vol["dev"] 5211 elif field == "vg": 5212 val = vol["vg"] 5213 elif field == "name": 5214 val = vol["name"] 5215 elif field == "size": 5216 val = int(float(vol["size"])) 5217 elif field == "instance": 5218 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-") 5219 else: 5220 raise errors.ParameterError(field) 5221 node_output.append(str(val)) 5222 5223 output.append(node_output) 5224 5225 return output
5226
5227 5228 -class LUNodeQueryStorage(NoHooksLU):
5229 """Logical unit for getting information on storage units on node(s). 5230 5231 """ 5232 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE) 5233 REQ_BGL = False 5234
5235 - def CheckArguments(self):
5236 _CheckOutputFields(static=self._FIELDS_STATIC, 5237 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS), 5238 selected=self.op.output_fields)
5239
5240 - def ExpandNames(self):
5241 self.share_locks = _ShareAll() 5242 self.needed_locks = {} 5243 5244 if self.op.nodes: 5245 self.needed_locks[locking.LEVEL_NODE] = \ 5246 _GetWantedNodes(self, self.op.nodes) 5247 else: 5248 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5249
5250 - def Exec(self, feedback_fn):
5251 """Computes the list of nodes and their attributes. 5252 5253 """ 5254 self.nodes = self.owned_locks(locking.LEVEL_NODE) 5255 5256 # Always get name to sort by 5257 if constants.SF_NAME in self.op.output_fields: 5258 fields = self.op.output_fields[:] 5259 else: 5260 fields = [constants.SF_NAME] + self.op.output_fields 5261 5262 # Never ask for node or type as it's only known to the LU 5263 for extra in [constants.SF_NODE, constants.SF_TYPE]: 5264 while extra in fields: 5265 fields.remove(extra) 5266 5267 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)]) 5268 name_idx = field_idx[constants.SF_NAME] 5269 5270 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 5271 data = self.rpc.call_storage_list(self.nodes, 5272 self.op.storage_type, st_args, 5273 self.op.name, fields) 5274 5275 result = [] 5276 5277 for node in utils.NiceSort(self.nodes): 5278 nresult = data[node] 5279 if nresult.offline: 5280 continue 5281 5282 msg = nresult.fail_msg 5283 if msg: 5284 self.LogWarning("Can't get storage data from node %s: %s", node, msg) 5285 continue 5286 5287 rows = dict([(row[name_idx], row) for row in nresult.payload]) 5288 5289 for name in utils.NiceSort(rows.keys()): 5290 row = rows[name] 5291 5292 out = [] 5293 5294 for field in self.op.output_fields: 5295 if field == constants.SF_NODE: 5296 val = node 5297 elif field == constants.SF_TYPE: 5298 val = self.op.storage_type 5299 elif field in field_idx: 5300 val = row[field_idx[field]] 5301 else: 5302 raise errors.ParameterError(field) 5303 5304 out.append(val) 5305 5306 result.append(out) 5307 5308 return result
5309
5310 5311 -class _InstanceQuery(_QueryBase):
5312 FIELDS = query.INSTANCE_FIELDS 5313
5314 - def ExpandNames(self, lu):
5315 lu.needed_locks = {} 5316 lu.share_locks = _ShareAll() 5317 5318 if self.names: 5319 self.wanted = _GetWantedInstances(lu, self.names) 5320 else: 5321 self.wanted = locking.ALL_SET 5322 5323 self.do_locking = (self.use_locking and 5324 query.IQ_LIVE in self.requested_data) 5325 if self.do_locking: 5326 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted 5327 lu.needed_locks[locking.LEVEL_NODEGROUP] = [] 5328 lu.needed_locks[locking.LEVEL_NODE] = [] 5329 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 5330 5331 self.do_grouplocks = (self.do_locking and 5332 query.IQ_NODES in self.requested_data)
5333
5334 - def DeclareLocks(self, lu, level):
5335 if self.do_locking: 5336 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks: 5337 assert not lu.needed_locks[locking.LEVEL_NODEGROUP] 5338 5339 # Lock all groups used by instances optimistically; this requires going 5340 # via the node before it's locked, requiring verification later on 5341 lu.needed_locks[locking.LEVEL_NODEGROUP] = \ 5342 set(group_uuid 5343 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE) 5344 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name)) 5345 elif level == locking.LEVEL_NODE: 5346 lu._LockInstancesNodes() # pylint: disable=W0212
5347 5348 @staticmethod
5349 - def _CheckGroupLocks(lu):
5350 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE)) 5351 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP)) 5352 5353 # Check if node groups for locked instances are still correct 5354 for instance_name in owned_instances: 5355 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5356
5357 - def _GetQueryData(self, lu):
5358 """Computes the list of instances and their attributes. 5359 5360 """ 5361 if self.do_grouplocks: 5362 self._CheckGroupLocks(lu) 5363 5364 cluster = lu.cfg.GetClusterInfo() 5365 all_info = lu.cfg.GetAllInstancesInfo() 5366 5367 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE) 5368 5369 instance_list = [all_info[name] for name in instance_names] 5370 nodes = frozenset(itertools.chain(*(inst.all_nodes 5371 for inst in instance_list))) 5372 hv_list = list(set([inst.hypervisor for inst in instance_list])) 5373 bad_nodes = [] 5374 offline_nodes = [] 5375 wrongnode_inst = set() 5376 5377 # Gather data as requested 5378 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]): 5379 live_data = {} 5380 node_data = lu.rpc.call_all_instances_info(nodes, hv_list) 5381 for name in nodes: 5382 result = node_data[name] 5383 if result.offline: 5384 # offline nodes will be in both lists 5385 assert result.fail_msg 5386 offline_nodes.append(name) 5387 if result.fail_msg: 5388 bad_nodes.append(name) 5389 elif result.payload: 5390 for inst in result.payload: 5391 if inst in all_info: 5392 if all_info[inst].primary_node == name: 5393 live_data.update(result.payload) 5394 else: 5395 wrongnode_inst.add(inst) 5396 else: 5397 # orphan instance; we don't list it here as we don't 5398 # handle this case yet in the output of instance listing 5399 logging.warning("Orphan instance '%s' found on node %s", 5400 inst, name) 5401 # else no instance is alive 5402 else: 5403 live_data = {} 5404 5405 if query.IQ_DISKUSAGE in self.requested_data: 5406 disk_usage = dict((inst.name, 5407 _ComputeDiskSize(inst.disk_template, 5408 [{constants.IDISK_SIZE: disk.size} 5409 for disk in inst.disks])) 5410 for inst in instance_list) 5411 else: 5412 disk_usage = None 5413 5414 if query.IQ_CONSOLE in self.requested_data: 5415 consinfo = {} 5416 for inst in instance_list: 5417 if inst.name in live_data: 5418 # Instance is running 5419 consinfo[inst.name] = _GetInstanceConsole(cluster, inst) 5420 else: 5421 consinfo[inst.name] = None 5422 assert set(consinfo.keys()) == set(instance_names) 5423 else: 5424 consinfo = None 5425 5426 if query.IQ_NODES in self.requested_data: 5427 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"), 5428 instance_list))) 5429 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names)) 5430 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid)) 5431 for uuid in set(map(operator.attrgetter("group"), 5432 nodes.values()))) 5433 else: 5434 nodes = None 5435 groups = None 5436 5437 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(), 5438 disk_usage, offline_nodes, bad_nodes, 5439 live_data, wrongnode_inst, consinfo, 5440 nodes, groups)
5441
5442 5443 -class LUQuery(NoHooksLU):
5444 """Query for resources/items of a certain kind. 5445 5446 """ 5447 # pylint: disable=W0142 5448 REQ_BGL = False 5449
5450 - def CheckArguments(self):
5451 qcls = _GetQueryImplementation(self.op.what) 5452 5453 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5454
5455 - def ExpandNames(self):
5456 self.impl.ExpandNames(self)
5457
5458 - def DeclareLocks(self, level):
5459 self.impl.DeclareLocks(self, level)
5460
5461 - def Exec(self, feedback_fn):
5462 return self.impl.NewStyleQuery(self)
5463
5464 5465 -class LUQueryFields(NoHooksLU):
5466 """Query for resources/items of a certain kind. 5467 5468 """ 5469 # pylint: disable=W0142 5470 REQ_BGL = False 5471
5472 - def CheckArguments(self):
5473 self.qcls = _GetQueryImplementation(self.op.what)
5474
5475 - def ExpandNames(self):
5476 self.needed_locks = {}
5477
5478 - def Exec(self, feedback_fn):
5479 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5480
5481 5482 -class LUNodeModifyStorage(NoHooksLU):
5483 """Logical unit for modifying a storage volume on a node. 5484 5485 """ 5486 REQ_BGL = False 5487
5488 - def CheckArguments(self):
5489 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 5490 5491 storage_type = self.op.storage_type 5492 5493 try: 5494 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type] 5495 except KeyError: 5496 raise errors.OpPrereqError("Storage units of type '%s' can not be" 5497 " modified" % storage_type, 5498 errors.ECODE_INVAL) 5499 5500 diff = set(self.op.changes.keys()) - modifiable 5501 if diff: 5502 raise errors.OpPrereqError("The following fields can not be modified for" 5503 " storage units of type '%s': %r" % 5504 (storage_type, list(diff)), 5505 errors.ECODE_INVAL)
5506
5507 - def ExpandNames(self):
5508 self.needed_locks = { 5509 locking.LEVEL_NODE: self.op.node_name, 5510 }
5511
5512 - def Exec(self, feedback_fn):
5513 """Computes the list of nodes and their attributes. 5514 5515 """ 5516 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 5517 result = self.rpc.call_storage_modify(self.op.node_name, 5518 self.op.storage_type, st_args, 5519 self.op.name, self.op.changes) 5520 result.Raise("Failed to modify storage unit '%s' on %s" % 5521 (self.op.name, self.op.node_name))
5522
5523 5524 -class LUNodeAdd(LogicalUnit):
5525 """Logical unit for adding node to the cluster. 5526 5527 """ 5528 HPATH = "node-add" 5529 HTYPE = constants.HTYPE_NODE 5530 _NFLAGS = ["master_capable", "vm_capable"] 5531
5532 - def CheckArguments(self):
5533 self.primary_ip_family = self.cfg.GetPrimaryIPFamily() 5534 # validate/normalize the node name 5535 self.hostname = netutils.GetHostname(name=self.op.node_name, 5536 family=self.primary_ip_family) 5537 self.op.node_name = self.hostname.name 5538 5539 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode(): 5540 raise errors.OpPrereqError("Cannot readd the master node", 5541 errors.ECODE_STATE) 5542 5543 if self.op.readd and self.op.group: 5544 raise errors.OpPrereqError("Cannot pass a node group when a node is" 5545 " being readded", errors.ECODE_INVAL)
5546
5547 - def BuildHooksEnv(self):
5548 """Build hooks env. 5549 5550 This will run on all nodes before, and on all nodes + the new node after. 5551 5552 """ 5553 return { 5554 "OP_TARGET": self.op.node_name, 5555 "NODE_NAME": self.op.node_name, 5556 "NODE_PIP": self.op.primary_ip, 5557 "NODE_SIP": self.op.secondary_ip, 5558 "MASTER_CAPABLE": str(self.op.master_capable), 5559 "VM_CAPABLE": str(self.op.vm_capable), 5560 }
5561
5562 - def BuildHooksNodes(self):
5563 """Build hooks nodes. 5564 5565 """ 5566 # Exclude added node 5567 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name])) 5568 post_nodes = pre_nodes + [self.op.node_name, ] 5569 5570 return (pre_nodes, post_nodes)
5571
5572 - def CheckPrereq(self):
5573 """Check prerequisites. 5574 5575 This checks: 5576 - the new node is not already in the config 5577 - it is resolvable 5578 - its parameters (single/dual homed) matches the cluster 5579 5580 Any errors are signaled by raising errors.OpPrereqError. 5581 5582 """ 5583 cfg = self.cfg 5584 hostname = self.hostname 5585 node = hostname.name 5586 primary_ip = self.op.primary_ip = hostname.ip 5587 if self.op.secondary_ip is None: 5588 if self.primary_ip_family == netutils.IP6Address.family: 5589 raise errors.OpPrereqError("When using a IPv6 primary address, a valid" 5590 " IPv4 address must be given as secondary", 5591 errors.ECODE_INVAL) 5592 self.op.secondary_ip = primary_ip 5593 5594 secondary_ip = self.op.secondary_ip 5595 if not netutils.IP4Address.IsValid(secondary_ip): 5596 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4" 5597 " address" % secondary_ip, errors.ECODE_INVAL) 5598 5599 node_list = cfg.GetNodeList() 5600 if not self.op.readd and node in node_list: 5601 raise errors.OpPrereqError("Node %s is already in the configuration" % 5602 node, errors.ECODE_EXISTS) 5603 elif self.op.readd and node not in node_list: 5604 raise errors.OpPrereqError("Node %s is not in the configuration" % node, 5605 errors.ECODE_NOENT) 5606 5607 self.changed_primary_ip = False 5608 5609 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list): 5610 if self.op.readd and node == existing_node_name: 5611 if existing_node.secondary_ip != secondary_ip: 5612 raise errors.OpPrereqError("Readded node doesn't have the same IP" 5613 " address configuration as before", 5614 errors.ECODE_INVAL) 5615 if existing_node.primary_ip != primary_ip: 5616 self.changed_primary_ip = True 5617 5618 continue 5619 5620 if (existing_node.primary_ip == primary_ip or 5621 existing_node.secondary_ip == primary_ip or 5622 existing_node.primary_ip == secondary_ip or 5623 existing_node.secondary_ip == secondary_ip): 5624 raise errors.OpPrereqError("New node ip address(es) conflict with" 5625 " existing node %s" % existing_node.name, 5626 errors.ECODE_NOTUNIQUE) 5627 5628 # After this 'if' block, None is no longer a valid value for the 5629 # _capable op attributes 5630 if self.op.readd: 5631 old_node = self.cfg.GetNodeInfo(node) 5632 assert old_node is not None, "Can't retrieve locked node %s" % node 5633 for attr in self._NFLAGS: 5634 if getattr(self.op, attr) is None: 5635 setattr(self.op, attr, getattr(old_node, attr)) 5636 else: 5637 for attr in self._NFLAGS: 5638 if getattr(self.op, attr) is None: 5639 setattr(self.op, attr, True) 5640 5641 if self.op.readd and not self.op.vm_capable: 5642 pri, sec = cfg.GetNodeInstances(node) 5643 if pri or sec: 5644 raise errors.OpPrereqError("Node %s being re-added with vm_capable" 5645 " flag set to false, but it already holds" 5646 " instances" % node, 5647 errors.ECODE_STATE) 5648 5649 # check that the type of the node (single versus dual homed) is the 5650 # same as for the master 5651 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode()) 5652 master_singlehomed = myself.secondary_ip == myself.primary_ip 5653 newbie_singlehomed = secondary_ip == primary_ip 5654 if master_singlehomed != newbie_singlehomed: 5655 if master_singlehomed: 5656 raise errors.OpPrereqError("The master has no secondary ip but the" 5657 " new node has one", 5658 errors.ECODE_INVAL) 5659 else: 5660 raise errors.OpPrereqError("The master has a secondary ip but the" 5661 " new node doesn't have one", 5662 errors.ECODE_INVAL) 5663 5664 # checks reachability 5665 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT): 5666 raise errors.OpPrereqError("Node not reachable by ping", 5667 errors.ECODE_ENVIRON) 5668 5669 if not newbie_singlehomed: 5670 # check reachability from my secondary ip to newbie's secondary ip 5671 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT, 5672 source=myself.secondary_ip): 5673 raise errors.OpPrereqError("Node secondary ip not reachable by TCP" 5674 " based ping to node daemon port", 5675 errors.ECODE_ENVIRON) 5676 5677 if self.op.readd: 5678 exceptions = [node] 5679 else: 5680 exceptions = [] 5681 5682 if self.op.master_capable: 5683 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions) 5684 else: 5685 self.master_candidate = False 5686 5687 if self.op.readd: 5688 self.new_node = old_node 5689 else: 5690 node_group = cfg.LookupNodeGroup(self.op.group) 5691 self.new_node = objects.Node(name=node, 5692 primary_ip=primary_ip, 5693 secondary_ip=secondary_ip, 5694 master_candidate=self.master_candidate, 5695 offline=False, drained=False, 5696 group=node_group) 5697 5698 if self.op.ndparams: 5699 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) 5700 5701 if self.op.hv_state: 5702 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None) 5703 5704 if self.op.disk_state: 5705 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None) 5706 5707 # TODO: If we need to have multiple DnsOnlyRunner we probably should make 5708 # it a property on the base class. 5709 result = rpc.DnsOnlyRunner().call_version([node])[node] 5710 result.Raise("Can't get version information from node %s" % node) 5711 if constants.PROTOCOL_VERSION == result.payload: 5712 logging.info("Communication to node %s fine, sw version %s match", 5713 node, result.payload) 5714 else: 5715 raise errors.OpPrereqError("Version mismatch master version %s," 5716 " node version %s" % 5717 (constants.PROTOCOL_VERSION, result.payload), 5718 errors.ECODE_ENVIRON)
5719
5720 - def Exec(self, feedback_fn):
5721 """Adds the new node to the cluster. 5722 5723 """ 5724 new_node = self.new_node 5725 node = new_node.name 5726 5727 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \ 5728 "Not owning BGL" 5729 5730 # We adding a new node so we assume it's powered 5731 new_node.powered = True 5732 5733 # for re-adds, reset the offline/drained/master-candidate flags; 5734 # we need to reset here, otherwise offline would prevent RPC calls 5735 # later in the procedure; this also means that if the re-add 5736 # fails, we are left with a non-offlined, broken node 5737 if self.op.readd: 5738 new_node.drained = new_node.offline = False # pylint: disable=W0201 5739 self.LogInfo("Readding a node, the offline/drained flags were reset") 5740 # if we demote the node, we do cleanup later in the procedure 5741 new_node.master_candidate = self.master_candidate 5742 if self.changed_primary_ip: 5743 new_node.primary_ip = self.op.primary_ip 5744 5745 # copy the master/vm_capable flags 5746 for attr in self._NFLAGS: 5747 setattr(new_node, attr, getattr(self.op, attr)) 5748 5749 # notify the user about any possible mc promotion 5750 if new_node.master_candidate: 5751 self.LogInfo("Node will be a master candidate") 5752 5753 if self.op.ndparams: 5754 new_node.ndparams = self.op.ndparams 5755 else: 5756 new_node.ndparams = {} 5757 5758 if self.op.hv_state: 5759 new_node.hv_state_static = self.new_hv_state 5760 5761 if self.op.disk_state: 5762 new_node.disk_state_static = self.new_disk_state 5763 5764 # Add node to our /etc/hosts, and add key to known_hosts 5765 if self.cfg.GetClusterInfo().modify_etc_hosts: 5766 master_node = self.cfg.GetMasterNode() 5767 result = self.rpc.call_etc_hosts_modify(master_node, 5768 constants.ETC_HOSTS_ADD, 5769 self.hostname.name, 5770 self.hostname.ip) 5771 result.Raise("Can't update hosts file with new host data") 5772 5773 if new_node.secondary_ip != new_node.primary_ip: 5774 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip, 5775 False) 5776 5777 node_verify_list = [self.cfg.GetMasterNode()] 5778 node_verify_param = { 5779 constants.NV_NODELIST: ([node], {}), 5780 # TODO: do a node-net-test as well? 5781 } 5782 5783 result = self.rpc.call_node_verify(node_verify_list, node_verify_param, 5784 self.cfg.GetClusterName()) 5785 for verifier in node_verify_list: 5786 result[verifier].Raise("Cannot communicate with node %s" % verifier) 5787 nl_payload = result[verifier].payload[constants.NV_NODELIST] 5788 if nl_payload: 5789 for failed in nl_payload: 5790 feedback_fn("ssh/hostname verification failed" 5791 " (checking from %s): %s" % 5792 (verifier, nl_payload[failed])) 5793 raise errors.OpExecError("ssh/hostname verification failed") 5794 5795 if self.op.readd: 5796 _RedistributeAncillaryFiles(self) 5797 self.context.ReaddNode(new_node) 5798 # make sure we redistribute the config 5799 self.cfg.Update(new_node, feedback_fn) 5800 # and make sure the new node will not have old files around 5801 if not new_node.master_candidate: 5802 result = self.rpc.call_node_demote_from_mc(new_node.name) 5803 msg = result.fail_msg 5804 if msg: 5805 self.LogWarning("Node failed to demote itself from master" 5806 " candidate status: %s" % msg) 5807 else: 5808 _RedistributeAncillaryFiles(self, additional_nodes=[node], 5809 additional_vm=self.op.vm_capable) 5810 self.context.AddNode(new_node, self.proc.GetECId())
5811
5812 5813 -class LUNodeSetParams(LogicalUnit):
5814 """Modifies the parameters of a node. 5815 5816 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline) 5817 to the node role (as _ROLE_*) 5818 @cvar _R2F: a dictionary from node role to tuples of flags 5819 @cvar _FLAGS: a list of attribute names corresponding to the flags 5820 5821 """ 5822 HPATH = "node-modify" 5823 HTYPE = constants.HTYPE_NODE 5824 REQ_BGL = False 5825 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4) 5826 _F2R = { 5827 (True, False, False): _ROLE_CANDIDATE, 5828 (False, True, False): _ROLE_DRAINED, 5829 (False, False, True): _ROLE_OFFLINE, 5830 (False, False, False): _ROLE_REGULAR, 5831 } 5832 _R2F = dict((v, k) for k, v in _F2R.items()) 5833 _FLAGS = ["master_candidate", "drained", "offline"] 5834
5835 - def CheckArguments(self):
5836 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 5837 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained, 5838 self.op.master_capable, self.op.vm_capable, 5839 self.op.secondary_ip, self.op.ndparams, self.op.hv_state, 5840 self.op.disk_state] 5841 if all_mods.count(None) == len(all_mods): 5842 raise errors.OpPrereqError("Please pass at least one modification", 5843 errors.ECODE_INVAL) 5844 if all_mods.count(True) > 1: 5845 raise errors.OpPrereqError("Can't set the node into more than one" 5846 " state at the same time", 5847 errors.ECODE_INVAL) 5848 5849 # Boolean value that tells us whether we might be demoting from MC 5850 self.might_demote = (self.op.master_candidate == False or 5851 self.op.offline == True or 5852 self.op.drained == True or 5853 self.op.master_capable == False) 5854 5855 if self.op.secondary_ip: 5856 if not netutils.IP4Address.IsValid(self.op.secondary_ip): 5857 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4" 5858 " address" % self.op.secondary_ip, 5859 errors.ECODE_INVAL) 5860 5861 self.lock_all = self.op.auto_promote and self.might_demote 5862 self.lock_instances = self.op.secondary_ip is not None
5863
5864 - def _InstanceFilter(self, instance):
5865 """Filter for getting affected instances. 5866 5867 """ 5868 return (instance.disk_template in constants.DTS_INT_MIRROR and 5869 self.op.node_name in instance.all_nodes)
5870
5871 - def ExpandNames(self):
5872 if self.lock_all: 5873 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET} 5874 else: 5875 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name} 5876 5877 # Since modifying a node can have severe effects on currently running 5878 # operations the resource lock is at least acquired in shared mode 5879 self.needed_locks[locking.LEVEL_NODE_RES] = \ 5880 self.needed_locks[locking.LEVEL_NODE] 5881 5882 # Get node resource and instance locks in shared mode; they are not used 5883 # for anything but read-only access 5884 self.share_locks[locking.LEVEL_NODE_RES] = 1 5885 self.share_locks[locking.LEVEL_INSTANCE] = 1 5886 5887 if self.lock_instances: 5888 self.needed_locks[locking.LEVEL_INSTANCE] = \ 5889 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5890
5891 - def BuildHooksEnv(self):
5892 """Build hooks env. 5893 5894 This runs on the master node. 5895 5896 """ 5897 return { 5898 "OP_TARGET": self.op.node_name, 5899 "MASTER_CANDIDATE": str(self.op.master_candidate), 5900 "OFFLINE": str(self.op.offline), 5901 "DRAINED": str(self.op.drained), 5902 "MASTER_CAPABLE": str(self.op.master_capable), 5903 "VM_CAPABLE": str(self.op.vm_capable), 5904 }
5905
5906 - def BuildHooksNodes(self):
5907 """Build hooks nodes. 5908 5909 """ 5910 nl = [self.cfg.GetMasterNode(), self.op.node_name] 5911 return (nl, nl)
5912
5913 - def CheckPrereq(self):
5914 """Check prerequisites. 5915 5916 This only checks the instance list against the existing names. 5917 5918 """ 5919 node = self.node = self.cfg.GetNodeInfo(self.op.node_name) 5920 5921 if self.lock_instances: 5922 affected_instances = \ 5923 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter) 5924 5925 # Verify instance locks 5926 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE) 5927 wanted_instances = frozenset(affected_instances.keys()) 5928 if wanted_instances - owned_instances: 5929 raise errors.OpPrereqError("Instances affected by changing node %s's" 5930 " secondary IP address have changed since" 5931 " locks were acquired, wanted '%s', have" 5932 " '%s'; retry the operation" % 5933 (self.op.node_name, 5934 utils.CommaJoin(wanted_instances), 5935 utils.CommaJoin(owned_instances)), 5936 errors.ECODE_STATE) 5937 else: 5938 affected_instances = None 5939 5940 if (self.op.master_candidate is not None or 5941 self.op.drained is not None or 5942 self.op.offline is not None): 5943 # we can't change the master's node flags 5944 if self.op.node_name == self.cfg.GetMasterNode(): 5945 raise errors.OpPrereqError("The master role can be changed" 5946 " only via master-failover", 5947 errors.ECODE_INVAL) 5948 5949 if self.op.master_candidate and not node.master_capable: 5950 raise errors.OpPrereqError("Node %s is not master capable, cannot make" 5951 " it a master candidate" % node.name, 5952 errors.ECODE_STATE) 5953 5954 if self.op.vm_capable == False: 5955 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name) 5956 if ipri or isec: 5957 raise errors.OpPrereqError("Node %s hosts instances, cannot unset" 5958 " the vm_capable flag" % node.name, 5959 errors.ECODE_STATE) 5960 5961 if node.master_candidate and self.might_demote and not self.lock_all: 5962 assert not self.op.auto_promote, "auto_promote set but lock_all not" 5963 # check if after removing the current node, we're missing master 5964 # candidates 5965 (mc_remaining, mc_should, _) = \ 5966 self.cfg.GetMasterCandidateStats(exceptions=[node.name]) 5967 if mc_remaining < mc_should: 5968 raise errors.OpPrereqError("Not enough master candidates, please" 5969 " pass auto promote option to allow" 5970 " promotion (--auto-promote or RAPI" 5971 " auto_promote=True)", errors.ECODE_STATE) 5972 5973 self.old_flags = old_flags = (node.master_candidate, 5974 node.drained, node.offline) 5975 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags) 5976 self.old_role = old_role = self._F2R[old_flags] 5977 5978 # Check for ineffective changes 5979 for attr in self._FLAGS: 5980 if (getattr(self.op, attr) == False and getattr(node, attr) == False): 5981 self.LogInfo("Ignoring request to unset flag %s, already unset", attr) 5982 setattr(self.op, attr, None) 5983 5984 # Past this point, any flag change to False means a transition 5985 # away from the respective state, as only real changes are kept 5986 5987 # TODO: We might query the real power state if it supports OOB 5988 if _SupportsOob(self.cfg, node): 5989 if self.op.offline is False and not (node.powered or 5990 self.op.powered == True): 5991 raise errors.OpPrereqError(("Node %s needs to be turned on before its" 5992 " offline status can be reset") % 5993 self.op.node_name) 5994 elif self.op.powered is not None: 5995 raise errors.OpPrereqError(("Unable to change powered state for node %s" 5996 " as it does not support out-of-band" 5997 " handling") % self.op.node_name) 5998 5999 # If we're being deofflined/drained, we'll MC ourself if needed 6000 if (self.op.drained == False or self.op.offline == False or 6001 (self.op.master_capable and not node.master_capable)): 6002 if _DecideSelfPromotion(self): 6003 self.op.master_candidate = True 6004 self.LogInfo("Auto-promoting node to master candidate") 6005 6006 # If we're no longer master capable, we'll demote ourselves from MC 6007 if self.op.master_capable == False and node.master_candidate: 6008 self.LogInfo("Demoting from master candidate") 6009 self.op.master_candidate = False 6010 6011 # Compute new role 6012 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1 6013 if self.op.master_candidate: 6014 new_role = self._ROLE_CANDIDATE 6015 elif self.op.drained: 6016 new_role = self._ROLE_DRAINED 6017 elif self.op.offline: 6018 new_role = self._ROLE_OFFLINE 6019 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]: 6020 # False is still in new flags, which means we're un-setting (the 6021 # only) True flag 6022 new_role = self._ROLE_REGULAR 6023 else: # no new flags, nothing, keep old role 6024 new_role = old_role 6025 6026 self.new_role = new_role 6027 6028 if old_role == self._ROLE_OFFLINE and new_role != old_role: 6029 # Trying to transition out of offline status 6030 result = self.rpc.call_version([node.name])[node.name] 6031 if result.fail_msg: 6032 raise errors.OpPrereqError("Node %s is being de-offlined but fails" 6033 " to report its version: %s" % 6034 (node.name, result.fail_msg), 6035 errors.ECODE_STATE) 6036 else: 6037 self.LogWarning("Transitioning node from offline to online state" 6038 " without using re-add. Please make sure the node" 6039 " is healthy!") 6040 6041 if self.op.secondary_ip: 6042 # Ok even without locking, because this can't be changed by any LU 6043 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode()) 6044 master_singlehomed = master.secondary_ip == master.primary_ip 6045 if master_singlehomed and self.op.secondary_ip: 6046 raise errors.OpPrereqError("Cannot change the secondary ip on a single" 6047 " homed cluster", errors.ECODE_INVAL) 6048 6049 assert not (frozenset(affected_instances) - 6050 self.owned_locks(locking.LEVEL_INSTANCE)) 6051 6052 if node.offline: 6053 if affected_instances: 6054 raise errors.OpPrereqError("Cannot change secondary IP address:" 6055 " offline node has instances (%s)" 6056 " configured to use it" % 6057 utils.CommaJoin(affected_instances.keys())) 6058 else: 6059 # On online nodes, check that no instances are running, and that 6060 # the node has the new ip and we can reach it. 6061 for instance in affected_instances.values(): 6062 _CheckInstanceState(self, instance, INSTANCE_DOWN, 6063 msg="cannot change secondary ip") 6064 6065 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True) 6066 if master.name != node.name: 6067 # check reachability from master secondary ip to new secondary ip 6068 if not netutils.TcpPing(self.op.secondary_ip, 6069 constants.DEFAULT_NODED_PORT, 6070 source=master.secondary_ip): 6071 raise errors.OpPrereqError("Node secondary ip not reachable by TCP" 6072 " based ping to node daemon port", 6073 errors.ECODE_ENVIRON) 6074 6075 if self.op.ndparams: 6076 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams) 6077 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES) 6078 self.new_ndparams = new_ndparams 6079 6080 if self.op.hv_state: 6081 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, 6082 self.node.hv_state_static) 6083 6084 if self.op.disk_state: 6085 self.new_disk_state = \ 6086 _MergeAndVerifyDiskState(self.op.disk_state, 6087 self.node.disk_state_static)
6088
6089 - def Exec(self, feedback_fn):
6090 """Modifies a node. 6091 6092 """ 6093 node = self.node 6094 old_role = self.old_role 6095 new_role = self.new_role 6096 6097 result = [] 6098 6099 if self.op.ndparams: 6100 node.ndparams = self.new_ndparams 6101 6102 if self.op.powered is not None: 6103 node.powered = self.op.powered 6104 6105 if self.op.hv_state: 6106 node.hv_state_static = self.new_hv_state 6107 6108 if self.op.disk_state: 6109 node.disk_state_static = self.new_disk_state 6110 6111 for attr in ["master_capable", "vm_capable"]: 6112 val = getattr(self.op, attr) 6113 if val is not None: 6114 setattr(node, attr, val) 6115 result.append((attr, str(val))) 6116 6117 if new_role != old_role: 6118 # Tell the node to demote itself, if no longer MC and not offline 6119 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE: 6120 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg 6121 if msg: 6122 self.LogWarning("Node failed to demote itself: %s", msg) 6123 6124 new_flags = self._R2F[new_role] 6125 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS): 6126 if of != nf: 6127 result.append((desc, str(nf))) 6128 (node.master_candidate, node.drained, node.offline) = new_flags 6129 6130 # we locked all nodes, we adjust the CP before updating this node 6131 if self.lock_all: 6132 _AdjustCandidatePool(self, [node.name]) 6133 6134 if self.op.secondary_ip: 6135 node.secondary_ip = self.op.secondary_ip 6136 result.append(("secondary_ip", self.op.secondary_ip)) 6137 6138 # this will trigger configuration file update, if needed 6139 self.cfg.Update(node, feedback_fn) 6140 6141 # this will trigger job queue propagation or cleanup if the mc 6142 # flag changed 6143 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1: 6144 self.context.ReaddNode(node) 6145 6146 return result
6147
6148 6149 -class LUNodePowercycle(NoHooksLU):
6150 """Powercycles a node. 6151 6152 """ 6153 REQ_BGL = False 6154
6155 - def CheckArguments(self):
6156 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 6157 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force: 6158 raise errors.OpPrereqError("The node is the master and the force" 6159 " parameter was not set", 6160 errors.ECODE_INVAL)
6161
6162 - def ExpandNames(self):
6163 """Locking for PowercycleNode. 6164 6165 This is a last-resort option and shouldn't block on other 6166 jobs. Therefore, we grab no locks. 6167 6168 """ 6169 self.needed_locks = {}
6170
6171 - def Exec(self, feedback_fn):
6172 """Reboots a node. 6173 6174 """ 6175 result = self.rpc.call_node_powercycle(self.op.node_name, 6176 self.cfg.GetHypervisorType()) 6177 result.Raise("Failed to schedule the reboot") 6178 return result.payload
6179
6180 6181 -class LUClusterQuery(NoHooksLU):
6182 """Query cluster configuration. 6183 6184 """ 6185 REQ_BGL = False 6186
6187 - def ExpandNames(self):
6188 self.needed_locks = {}
6189
6190 - def Exec(self, feedback_fn):
6191 """Return cluster config. 6192 6193 """ 6194 cluster = self.cfg.GetClusterInfo() 6195 os_hvp = {} 6196 6197 # Filter just for enabled hypervisors 6198 for os_name, hv_dict in cluster.os_hvp.items(): 6199 os_hvp[os_name] = {} 6200 for hv_name, hv_params in hv_dict.items(): 6201 if hv_name in cluster.enabled_hypervisors: 6202 os_hvp[os_name][hv_name] = hv_params 6203 6204 # Convert ip_family to ip_version 6205 primary_ip_version = constants.IP4_VERSION 6206 if cluster.primary_ip_family == netutils.IP6Address.family: 6207 primary_ip_version = constants.IP6_VERSION 6208 6209 result = { 6210 "software_version": constants.RELEASE_VERSION, 6211 "protocol_version": constants.PROTOCOL_VERSION, 6212 "config_version": constants.CONFIG_VERSION, 6213 "os_api_version": max(constants.OS_API_VERSIONS), 6214 "export_version": constants.EXPORT_VERSION, 6215 "architecture": runtime.GetArchInfo(), 6216 "name": cluster.cluster_name, 6217 "master": cluster.master_node, 6218 "default_hypervisor": cluster.primary_hypervisor, 6219 "enabled_hypervisors": cluster.enabled_hypervisors, 6220 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name]) 6221 for hypervisor_name in cluster.enabled_hypervisors]), 6222 "os_hvp": os_hvp, 6223 "beparams": cluster.beparams, 6224 "osparams": cluster.osparams, 6225 "ipolicy": cluster.ipolicy, 6226 "nicparams": cluster.nicparams, 6227 "ndparams": cluster.ndparams, 6228 "diskparams": cluster.diskparams, 6229 "candidate_pool_size": cluster.candidate_pool_size, 6230 "master_netdev": cluster.master_netdev, 6231 "master_netmask": cluster.master_netmask, 6232 "use_external_mip_script": cluster.use_external_mip_script, 6233 "volume_group_name": cluster.volume_group_name, 6234 "drbd_usermode_helper": cluster.drbd_usermode_helper, 6235 "file_storage_dir": cluster.file_storage_dir, 6236 "shared_file_storage_dir": cluster.shared_file_storage_dir, 6237 "maintain_node_health": cluster.maintain_node_health, 6238 "ctime": cluster.ctime, 6239 "mtime": cluster.mtime, 6240 "uuid": cluster.uuid, 6241 "tags": list(cluster.GetTags()), 6242 "uid_pool": cluster.uid_pool, 6243 "default_iallocator": cluster.default_iallocator, 6244 "reserved_lvs": cluster.reserved_lvs, 6245 "primary_ip_version": primary_ip_version, 6246 "prealloc_wipe_disks": cluster.prealloc_wipe_disks, 6247 "hidden_os": cluster.hidden_os, 6248 "blacklisted_os": cluster.blacklisted_os, 6249 } 6250 6251 return result
6252
6253 6254 -class LUClusterConfigQuery(NoHooksLU):
6255 """Return configuration values. 6256 6257 """ 6258 REQ_BGL = False 6259
6260 - def CheckArguments(self):
6261 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6262
6263 - def ExpandNames(self):
6264 self.cq.ExpandNames(self)
6265
6266 - def DeclareLocks(self, level):
6267 self.cq.DeclareLocks(self, level)
6268
6269 - def Exec(self, feedback_fn):
6270 result = self.cq.OldStyleQuery(self) 6271 6272 assert len(result) == 1 6273 6274 return result[0]
6275
6276 6277 -class _ClusterQuery(_QueryBase):
6278 FIELDS = query.CLUSTER_FIELDS 6279 6280 #: Do not sort (there is only one item) 6281 SORT_FIELD = None 6282
6283 - def ExpandNames(self, lu):
6284 lu.needed_locks = {} 6285 6286 # The following variables interact with _QueryBase._GetNames 6287 self.wanted = locking.ALL_SET 6288 self.do_locking = self.use_locking 6289 6290 if self.do_locking: 6291 raise errors.OpPrereqError("Can not use locking for cluster queries", 6292 errors.ECODE_INVAL)
6293
6294 - def DeclareLocks(self, lu, level):
6295 pass
6296
6297 - def _GetQueryData(self, lu):
6298 """Computes the list of nodes and their attributes. 6299 6300 """ 6301 # Locking is not used 6302 assert not (compat.any(lu.glm.is_owned(level) 6303 for level in locking.LEVELS 6304 if level != locking.LEVEL_CLUSTER) or 6305 self.do_locking or self.use_locking) 6306 6307 if query.CQ_CONFIG in self.requested_data: 6308 cluster = lu.cfg.GetClusterInfo() 6309 else: 6310 cluster = NotImplemented 6311 6312 if query.CQ_QUEUE_DRAINED in self.requested_data: 6313 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE) 6314 else: 6315 drain_flag = NotImplemented 6316 6317 if query.CQ_WATCHER_PAUSE in self.requested_data: 6318 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE) 6319 else: 6320 watcher_pause = NotImplemented 6321 6322 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6323
6324 6325 -class LUInstanceActivateDisks(NoHooksLU):
6326 """Bring up an instance's disks. 6327 6328 """ 6329 REQ_BGL = False 6330
6331 - def ExpandNames(self):
6332 self._ExpandAndLockInstance() 6333 self.needed_locks[locking.LEVEL_NODE] = [] 6334 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6335
6336 - def DeclareLocks(self, level):
6337 if level == locking.LEVEL_NODE: 6338 self._LockInstancesNodes()
6339
6340 - def CheckPrereq(self):
6341 """Check prerequisites. 6342 6343 This checks that the instance is in the cluster. 6344 6345 """ 6346 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 6347 assert self.instance is not None, \ 6348 "Cannot retrieve locked instance %s" % self.op.instance_name 6349 _CheckNodeOnline(self, self.instance.primary_node)
6350
6351 - def Exec(self, feedback_fn):
6352 """Activate the disks. 6353 6354 """ 6355 disks_ok, disks_info = \ 6356 _AssembleInstanceDisks(self, self.instance, 6357 ignore_size=self.op.ignore_size) 6358 if not disks_ok: 6359 raise errors.OpExecError("Cannot activate block devices") 6360 6361 return disks_info
6362
6363 6364 -def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False, 6365 ignore_size=False):
6366 """Prepare the block devices for an instance. 6367 6368 This sets up the block devices on all nodes. 6369 6370 @type lu: L{LogicalUnit} 6371 @param lu: the logical unit on whose behalf we execute 6372 @type instance: L{objects.Instance} 6373 @param instance: the instance for whose disks we assemble 6374 @type disks: list of L{objects.Disk} or None 6375 @param disks: which disks to assemble (or all, if None) 6376 @type ignore_secondaries: boolean 6377 @param ignore_secondaries: if true, errors on secondary nodes 6378 won't result in an error return from the function 6379 @type ignore_size: boolean 6380 @param ignore_size: if true, the current known size of the disk 6381 will not be used during the disk activation, useful for cases 6382 when the size is wrong 6383 @return: False if the operation failed, otherwise a list of 6384 (host, instance_visible_name, node_visible_name) 6385 with the mapping from node devices to instance devices 6386 6387 """ 6388 device_info = [] 6389 disks_ok = True 6390 iname = instance.name 6391 disks = _ExpandCheckDisks(instance, disks) 6392 6393 # With the two passes mechanism we try to reduce the window of 6394 # opportunity for the race condition of switching DRBD to primary 6395 # before handshaking occured, but we do not eliminate it 6396 6397 # The proper fix would be to wait (with some limits) until the 6398 # connection has been made and drbd transitions from WFConnection 6399 # into any other network-connected state (Connected, SyncTarget, 6400 # SyncSource, etc.) 6401 6402 # 1st pass, assemble on all nodes in secondary mode 6403 for idx, inst_disk in enumerate(disks): 6404 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): 6405 if ignore_size: 6406 node_disk = node_disk.Copy() 6407 node_disk.UnsetSize() 6408 lu.cfg.SetDiskID(node_disk, node) 6409 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname, 6410 False, idx) 6411 msg = result.fail_msg 6412 if msg: 6413 is_offline_secondary = (node in instance.secondary_nodes and 6414 result.offline) 6415 lu.proc.LogWarning("Could not prepare block device %s on node %s" 6416 " (is_primary=False, pass=1): %s", 6417 inst_disk.iv_name, node, msg) 6418 if not (ignore_secondaries or is_offline_secondary): 6419 disks_ok = False 6420 6421 # FIXME: race condition on drbd migration to primary 6422 6423 # 2nd pass, do only the primary node 6424 for idx, inst_disk in enumerate(disks): 6425 dev_path = None 6426 6427 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): 6428 if node != instance.primary_node: 6429 continue 6430 if ignore_size: 6431 node_disk = node_disk.Copy() 6432 node_disk.UnsetSize() 6433 lu.cfg.SetDiskID(node_disk, node) 6434 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname, 6435 True, idx) 6436 msg = result.fail_msg 6437 if msg: 6438 lu.proc.LogWarning("Could not prepare block device %s on node %s" 6439 " (is_primary=True, pass=2): %s", 6440 inst_disk.iv_name, node, msg) 6441 disks_ok = False 6442 else: 6443 dev_path = result.payload 6444 6445 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path)) 6446 6447 # leave the disks configured for the primary node 6448 # this is a workaround that would be fixed better by 6449 # improving the logical/physical id handling 6450 for disk in disks: 6451 lu.cfg.SetDiskID(disk, instance.primary_node) 6452 6453 return disks_ok, device_info
6454
6455 6456 -def _StartInstanceDisks(lu, instance, force):
6457 """Start the disks of an instance. 6458 6459 """ 6460 disks_ok, _ = _AssembleInstanceDisks(lu, instance, 6461 ignore_secondaries=force) 6462 if not disks_ok: 6463 _ShutdownInstanceDisks(lu, instance) 6464 if force is not None and not force: 6465 lu.proc.LogWarning("", hint="If the message above refers to a" 6466 " secondary node," 6467 " you can retry the operation using '--force'.") 6468 raise errors.OpExecError("Disk consistency error")
6469
6470 6471 -class LUInstanceDeactivateDisks(NoHooksLU):
6472 """Shutdown an instance's disks. 6473 6474 """ 6475 REQ_BGL = False 6476
6477 - def ExpandNames(self):
6478 self._ExpandAndLockInstance() 6479 self.needed_locks[locking.LEVEL_NODE] = [] 6480 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6481
6482 - def DeclareLocks(self, level):
6483 if level == locking.LEVEL_NODE: 6484 self._LockInstancesNodes()
6485
6486 - def CheckPrereq(self):
6487 """Check prerequisites. 6488 6489 This checks that the instance is in the cluster. 6490 6491 """ 6492 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 6493 assert self.instance is not None, \ 6494 "Cannot retrieve locked instance %s" % self.op.instance_name
6495
6496 - def Exec(self, feedback_fn):
6497 """Deactivate the disks 6498 6499 """ 6500 instance = self.instance 6501 if self.op.force: 6502 _ShutdownInstanceDisks(self, instance) 6503 else: 6504 _SafeShutdownInstanceDisks(self, instance)
6505
6506 6507 -def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6508 """Shutdown block devices of an instance. 6509 6510 This function checks if an instance is running, before calling 6511 _ShutdownInstanceDisks. 6512 6513 """ 6514 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks") 6515 _ShutdownInstanceDisks(lu, instance, disks=disks)
6516
6517 6518 -def _ExpandCheckDisks(instance, disks):
6519 """Return the instance disks selected by the disks list 6520 6521 @type disks: list of L{objects.Disk} or None 6522 @param disks: selected disks 6523 @rtype: list of L{objects.Disk} 6524 @return: selected instance disks to act on 6525 6526 """ 6527 if disks is None: 6528 return instance.disks 6529 else: 6530 if not set(disks).issubset(instance.disks): 6531 raise errors.ProgrammerError("Can only act on disks belonging to the" 6532 " target instance") 6533 return disks
6534
6535 6536 -def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6537 """Shutdown block devices of an instance. 6538 6539 This does the shutdown on all nodes of the instance. 6540 6541 If the ignore_primary is false, errors on the primary node are 6542 ignored. 6543 6544 """ 6545 all_result = True 6546 disks = _ExpandCheckDisks(instance, disks) 6547 6548 for disk in disks: 6549 for node, top_disk in disk.ComputeNodeTree(instance.primary_node): 6550 lu.cfg.SetDiskID(top_disk, node) 6551 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance)) 6552 msg = result.fail_msg 6553 if msg: 6554 lu.LogWarning("Could not shutdown block device %s on node %s: %s", 6555 disk.iv_name, node, msg) 6556 if ((node == instance.primary_node and not ignore_primary) or 6557 (node != instance.primary_node and not result.offline)): 6558 all_result = False 6559 return all_result
6560
6561 6562 -def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6563 """Checks if a node has enough free memory. 6564 6565 This function check if a given node has the needed amount of free 6566 memory. In case the node has less memory or we cannot get the 6567 information from the node, this function raise an OpPrereqError 6568 exception. 6569 6570 @type lu: C{LogicalUnit} 6571 @param lu: a logical unit from which we get configuration data 6572 @type node: C{str} 6573 @param node: the node to check 6574 @type reason: C{str} 6575 @param reason: string to use in the error message 6576 @type requested: C{int} 6577 @param requested: the amount of memory in MiB to check for 6578 @type hypervisor_name: C{str} 6579 @param hypervisor_name: the hypervisor to ask for memory stats 6580 @rtype: integer 6581 @return: node current free memory 6582 @raise errors.OpPrereqError: if the node doesn't have enough memory, or 6583 we cannot check the node 6584 6585 """ 6586 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name]) 6587 nodeinfo[node].Raise("Can't get data from node %s" % node, 6588 prereq=True, ecode=errors.ECODE_ENVIRON) 6589 (_, _, (hv_info, )) = nodeinfo[node].payload 6590 6591 free_mem = hv_info.get("memory_free", None) 6592 if not isinstance(free_mem, int): 6593 raise errors.OpPrereqError("Can't compute free memory on node %s, result" 6594 " was '%s'" % (node, free_mem), 6595 errors.ECODE_ENVIRON) 6596 if requested > free_mem: 6597 raise errors.OpPrereqError("Not enough memory on node %s for %s:" 6598 " needed %s MiB, available %s MiB" % 6599 (node, reason, requested, free_mem), 6600 errors.ECODE_NORES) 6601 return free_mem
6602
6603 6604 -def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6605 """Checks if nodes have enough free disk space in the all VGs. 6606 6607 This function check if all given nodes have the needed amount of 6608 free disk. In case any node has less disk or we cannot get the 6609 information from the node, this function raise an OpPrereqError 6610 exception. 6611 6612 @type lu: C{LogicalUnit} 6613 @param lu: a logical unit from which we get configuration data 6614 @type nodenames: C{list} 6615 @param nodenames: the list of node names to check 6616 @type req_sizes: C{dict} 6617 @param req_sizes: the hash of vg and corresponding amount of disk in 6618 MiB to check for 6619 @raise errors.OpPrereqError: if the node doesn't have enough disk, 6620 or we cannot check the node 6621 6622 """ 6623 for vg, req_size in req_sizes.items(): 6624 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6625
6626 6627 -def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6628 """Checks if nodes have enough free disk space in the specified VG. 6629 6630 This function check if all given nodes have the needed amount of 6631 free disk. In case any node has less disk or we cannot get the 6632 information from the node, this function raise an OpPrereqError 6633 exception. 6634 6635 @type lu: C{LogicalUnit} 6636 @param lu: a logical unit from which we get configuration data 6637 @type nodenames: C{list} 6638 @param nodenames: the list of node names to check 6639 @type vg: C{str} 6640 @param vg: the volume group to check 6641 @type requested: C{int} 6642 @param requested: the amount of disk in MiB to check for 6643 @raise errors.OpPrereqError: if the node doesn't have enough disk, 6644 or we cannot check the node 6645 6646 """ 6647 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None) 6648 for node in nodenames: 6649 info = nodeinfo[node] 6650 info.Raise("Cannot get current information from node %s" % node, 6651 prereq=True, ecode=errors.ECODE_ENVIRON) 6652 (_, (vg_info, ), _) = info.payload 6653 vg_free = vg_info.get("vg_free", None) 6654 if not isinstance(vg_free, int): 6655 raise errors.OpPrereqError("Can't compute free disk space on node" 6656 " %s for vg %s, result was '%s'" % 6657 (node, vg, vg_free), errors.ECODE_ENVIRON) 6658 if requested > vg_free: 6659 raise errors.OpPrereqError("Not enough disk space on target node %s" 6660 " vg %s: required %d MiB, available %d MiB" % 6661 (node, vg, requested, vg_free), 6662 errors.ECODE_NORES)
6663
6664 6665 -def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6666 """Checks if nodes have enough physical CPUs 6667 6668 This function checks if all given nodes have the needed number of 6669 physical CPUs. In case any node has less CPUs or we cannot get the 6670 information from the node, this function raises an OpPrereqError 6671 exception. 6672 6673 @type lu: C{LogicalUnit} 6674 @param lu: a logical unit from which we get configuration data 6675 @type nodenames: C{list} 6676 @param nodenames: the list of node names to check 6677 @type requested: C{int} 6678 @param requested: the minimum acceptable number of physical CPUs 6679 @raise errors.OpPrereqError: if the node doesn't have enough CPUs, 6680 or we cannot check the node 6681 6682 """ 6683 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name]) 6684 for node in nodenames: 6685 info = nodeinfo[node] 6686 info.Raise("Cannot get current information from node %s" % node, 6687 prereq=True, ecode=errors.ECODE_ENVIRON) 6688 (_, _, (hv_info, )) = info.payload 6689 num_cpus = hv_info.get("cpu_total", None) 6690 if not isinstance(num_cpus, int): 6691 raise errors.OpPrereqError("Can't compute the number of physical CPUs" 6692 " on node %s, result was '%s'" % 6693 (node, num_cpus), errors.ECODE_ENVIRON) 6694 if requested > num_cpus: 6695 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are " 6696 "required" % (node, num_cpus, requested), 6697 errors.ECODE_NORES)
6698
6699 6700 -class LUInstanceStartup(LogicalUnit):
6701 """Starts an instance. 6702 6703 """ 6704 HPATH = "instance-start" 6705 HTYPE = constants.HTYPE_INSTANCE 6706 REQ_BGL = False 6707
6708 - def CheckArguments(self):
6709 # extra beparams 6710 if self.op.beparams: 6711 # fill the beparams dict 6712 objects.UpgradeBeParams(self.op.beparams) 6713 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6714
6715 - def ExpandNames(self):
6716 self._ExpandAndLockInstance() 6717 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6718
6719 - def DeclareLocks(self, level):
6720 if level == locking.LEVEL_NODE_RES: 6721 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6722
6723 - def BuildHooksEnv(self):
6724 """Build hooks env. 6725 6726 This runs on master, primary and secondary nodes of the instance. 6727 6728 """ 6729 env = { 6730 "FORCE": self.op.force, 6731 } 6732 6733 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 6734 6735 return env
6736
6737 - def BuildHooksNodes(self):
6738 """Build hooks nodes. 6739 6740 """ 6741 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 6742 return (nl, nl)
6743
6744 - def CheckPrereq(self):
6745 """Check prerequisites. 6746 6747 This checks that the instance is in the cluster. 6748 6749 """ 6750 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 6751 assert self.instance is not None, \ 6752 "Cannot retrieve locked instance %s" % self.op.instance_name 6753 6754 # extra hvparams 6755 if self.op.hvparams: 6756 # check hypervisor parameter syntax (locally) 6757 cluster = self.cfg.GetClusterInfo() 6758 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) 6759 filled_hvp = cluster.FillHV(instance) 6760 filled_hvp.update(self.op.hvparams) 6761 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor) 6762 hv_type.CheckParameterSyntax(filled_hvp) 6763 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp) 6764 6765 _CheckInstanceState(self, instance, INSTANCE_ONLINE) 6766 6767 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline 6768 6769 if self.primary_offline and self.op.ignore_offline_nodes: 6770 self.proc.LogWarning("Ignoring offline primary node") 6771 6772 if self.op.hvparams or self.op.beparams: 6773 self.proc.LogWarning("Overridden parameters are ignored") 6774 else: 6775 _CheckNodeOnline(self, instance.primary_node) 6776 6777 bep = self.cfg.GetClusterInfo().FillBE(instance) 6778 bep.update(self.op.beparams) 6779 6780 # check bridges existence 6781 _CheckInstanceBridgesExist(self, instance) 6782 6783 remote_info = self.rpc.call_instance_info(instance.primary_node, 6784 instance.name, 6785 instance.hypervisor) 6786 remote_info.Raise("Error checking node %s" % instance.primary_node, 6787 prereq=True, ecode=errors.ECODE_ENVIRON) 6788 if not remote_info.payload: # not running already 6789 _CheckNodeFreeMemory(self, instance.primary_node, 6790 "starting instance %s" % instance.name, 6791 bep[constants.BE_MINMEM], instance.hypervisor)
6792
6793 - def Exec(self, feedback_fn):
6794 """Start the instance. 6795 6796 """ 6797 instance = self.instance 6798 force = self.op.force 6799 6800 if not self.op.no_remember: 6801 self.cfg.MarkInstanceUp(instance.name) 6802 6803 if self.primary_offline: 6804 assert self.op.ignore_offline_nodes 6805 self.proc.LogInfo("Primary node offline, marked instance as started") 6806 else: 6807 node_current = instance.primary_node 6808 6809 _StartInstanceDisks(self, instance, force) 6810 6811 result = \ 6812 self.rpc.call_instance_start(node_current, 6813 (instance, self.op.hvparams, 6814 self.op.beparams), 6815 self.op.startup_paused) 6816 msg = result.fail_msg 6817 if msg: 6818 _ShutdownInstanceDisks(self, instance) 6819 raise errors.OpExecError("Could not start instance: %s" % msg)
6820
6821 6822 -class LUInstanceReboot(LogicalUnit):
6823 """Reboot an instance. 6824 6825 """ 6826 HPATH = "instance-reboot" 6827 HTYPE = constants.HTYPE_INSTANCE 6828 REQ_BGL = False 6829
6830 - def ExpandNames(self):
6832
6833 - def BuildHooksEnv(self):
6834 """Build hooks env. 6835 6836 This runs on master, primary and secondary nodes of the instance. 6837 6838 """ 6839 env = { 6840 "IGNORE_SECONDARIES": self.op.ignore_secondaries, 6841 "REBOOT_TYPE": self.op.reboot_type, 6842 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 6843 } 6844 6845 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 6846 6847 return env
6848
6849 - def BuildHooksNodes(self):
6850 """Build hooks nodes. 6851 6852 """ 6853 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 6854 return (nl, nl)
6855
6856 - def CheckPrereq(self):
6857 """Check prerequisites. 6858 6859 This checks that the instance is in the cluster. 6860 6861 """ 6862 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 6863 assert self.instance is not None, \ 6864 "Cannot retrieve locked instance %s" % self.op.instance_name 6865 _CheckInstanceState(self, instance, INSTANCE_ONLINE) 6866 _CheckNodeOnline(self, instance.primary_node) 6867 6868 # check bridges existence 6869 _CheckInstanceBridgesExist(self, instance)
6870
6871 - def Exec(self, feedback_fn):
6872 """Reboot the instance. 6873 6874 """ 6875 instance = self.instance 6876 ignore_secondaries = self.op.ignore_secondaries 6877 reboot_type = self.op.reboot_type 6878 6879 remote_info = self.rpc.call_instance_info(instance.primary_node, 6880 instance.name, 6881 instance.hypervisor) 6882 remote_info.Raise("Error checking node %s" % instance.primary_node) 6883 instance_running = bool(remote_info.payload) 6884 6885 node_current = instance.primary_node 6886 6887 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT, 6888 constants.INSTANCE_REBOOT_HARD]: 6889 for disk in instance.disks: 6890 self.cfg.SetDiskID(disk, node_current) 6891 result = self.rpc.call_instance_reboot(node_current, instance, 6892 reboot_type, 6893 self.op.shutdown_timeout) 6894 result.Raise("Could not reboot instance") 6895 else: 6896 if instance_running: 6897 result = self.rpc.call_instance_shutdown(node_current, instance, 6898 self.op.shutdown_timeout) 6899 result.Raise("Could not shutdown instance for full reboot") 6900 _ShutdownInstanceDisks(self, instance) 6901 else: 6902 self.LogInfo("Instance %s was already stopped, starting now", 6903 instance.name) 6904 _StartInstanceDisks(self, instance, ignore_secondaries) 6905 result = self.rpc.call_instance_start(node_current, 6906 (instance, None, None), False) 6907 msg = result.fail_msg 6908 if msg: 6909 _ShutdownInstanceDisks(self, instance) 6910 raise errors.OpExecError("Could not start instance for" 6911 " full reboot: %s" % msg) 6912 6913 self.cfg.MarkInstanceUp(instance.name)
6914
6915 6916 -class LUInstanceShutdown(LogicalUnit):
6917 """Shutdown an instance. 6918 6919 """ 6920 HPATH = "instance-stop" 6921 HTYPE = constants.HTYPE_INSTANCE 6922 REQ_BGL = False 6923
6924 - def ExpandNames(self):
6926
6927 - def BuildHooksEnv(self):
6928 """Build hooks env. 6929 6930 This runs on master, primary and secondary nodes of the instance. 6931 6932 """ 6933 env = _BuildInstanceHookEnvByObject(self, self.instance) 6934 env["TIMEOUT"] = self.op.timeout 6935 return env
6936
6937 - def BuildHooksNodes(self):
6938 """Build hooks nodes. 6939 6940 """ 6941 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 6942 return (nl, nl)
6943
6944 - def CheckPrereq(self):
6945 """Check prerequisites. 6946 6947 This checks that the instance is in the cluster. 6948 6949 """ 6950 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 6951 assert self.instance is not None, \ 6952 "Cannot retrieve locked instance %s" % self.op.instance_name 6953 6954 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE) 6955 6956 self.primary_offline = \ 6957 self.cfg.GetNodeInfo(self.instance.primary_node).offline 6958 6959 if self.primary_offline and self.op.ignore_offline_nodes: 6960 self.proc.LogWarning("Ignoring offline primary node") 6961 else: 6962 _CheckNodeOnline(self, self.instance.primary_node)
6963
6964 - def Exec(self, feedback_fn):
6965 """Shutdown the instance. 6966 6967 """ 6968 instance = self.instance 6969 node_current = instance.primary_node 6970 timeout = self.op.timeout 6971 6972 if not self.op.no_remember: 6973 self.cfg.MarkInstanceDown(instance.name) 6974 6975 if self.primary_offline: 6976 assert self.op.ignore_offline_nodes 6977 self.proc.LogInfo("Primary node offline, marked instance as stopped") 6978 else: 6979 result = self.rpc.call_instance_shutdown(node_current, instance, timeout) 6980 msg = result.fail_msg 6981 if msg: 6982 self.proc.LogWarning("Could not shutdown instance: %s" % msg) 6983 6984 _ShutdownInstanceDisks(self, instance)
6985
6986 6987 -class LUInstanceReinstall(LogicalUnit):
6988 """Reinstall an instance. 6989 6990 """ 6991 HPATH = "instance-reinstall" 6992 HTYPE = constants.HTYPE_INSTANCE 6993 REQ_BGL = False 6994
6995 - def ExpandNames(self):
6997
6998 - def BuildHooksEnv(self):
6999 """Build hooks env. 7000 7001 This runs on master, primary and secondary nodes of the instance. 7002 7003 """ 7004 return _BuildInstanceHookEnvByObject(self, self.instance)
7005
7006 - def BuildHooksNodes(self):
7007 """Build hooks nodes. 7008 7009 """ 7010 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 7011 return (nl, nl)
7012
7013 - def CheckPrereq(self):
7014 """Check prerequisites. 7015 7016 This checks that the instance is in the cluster and is not running. 7017 7018 """ 7019 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7020 assert instance is not None, \ 7021 "Cannot retrieve locked instance %s" % self.op.instance_name 7022 _CheckNodeOnline(self, instance.primary_node, "Instance primary node" 7023 " offline, cannot reinstall") 7024 7025 if instance.disk_template == constants.DT_DISKLESS: 7026 raise errors.OpPrereqError("Instance '%s' has no disks" % 7027 self.op.instance_name, 7028 errors.ECODE_INVAL) 7029 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall") 7030 7031 if self.op.os_type is not None: 7032 # OS verification 7033 pnode = _ExpandNodeName(self.cfg, instance.primary_node) 7034 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant) 7035 instance_os = self.op.os_type 7036 else: 7037 instance_os = instance.os 7038 7039 nodelist = list(instance.all_nodes) 7040 7041 if self.op.osparams: 7042 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams) 7043 _CheckOSParams(self, True, nodelist, instance_os, i_osdict) 7044 self.os_inst = i_osdict # the new dict (without defaults) 7045 else: 7046 self.os_inst = None 7047 7048 self.instance = instance
7049
7050 - def Exec(self, feedback_fn):
7051 """Reinstall the instance. 7052 7053 """ 7054 inst = self.instance 7055 7056 if self.op.os_type is not None: 7057 feedback_fn("Changing OS to '%s'..." % self.op.os_type) 7058 inst.os = self.op.os_type 7059 # Write to configuration 7060 self.cfg.Update(inst, feedback_fn) 7061 7062 _StartInstanceDisks(self, inst, None) 7063 try: 7064 feedback_fn("Running the instance OS create scripts...") 7065 # FIXME: pass debug option from opcode to backend 7066 result = self.rpc.call_instance_os_add(inst.primary_node, 7067 (inst, self.os_inst), True, 7068 self.op.debug_level) 7069 result.Raise("Could not install OS for instance %s on node %s" % 7070 (inst.name, inst.primary_node)) 7071 finally: 7072 _ShutdownInstanceDisks(self, inst)
7073
7074 7075 -class LUInstanceRecreateDisks(LogicalUnit):
7076 """Recreate an instance's missing disks. 7077 7078 """ 7079 HPATH = "instance-recreate-disks" 7080 HTYPE = constants.HTYPE_INSTANCE 7081 REQ_BGL = False 7082 7083 _MODIFYABLE = frozenset([ 7084 constants.IDISK_SIZE, 7085 constants.IDISK_MODE, 7086 ]) 7087 7088 # New or changed disk parameters may have different semantics 7089 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([ 7090 constants.IDISK_ADOPT, 7091 7092 # TODO: Implement support changing VG while recreating 7093 constants.IDISK_VG, 7094 constants.IDISK_METAVG, 7095 ])) 7096
7097 - def CheckArguments(self):
7098 if self.op.disks and ht.TPositiveInt(self.op.disks[0]): 7099 # Normalize and convert deprecated list of disk indices 7100 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))] 7101 7102 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks)) 7103 if duplicates: 7104 raise errors.OpPrereqError("Some disks have been specified more than" 7105 " once: %s" % utils.CommaJoin(duplicates), 7106 errors.ECODE_INVAL) 7107 7108 for (idx, params) in self.op.disks: 7109 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES) 7110 unsupported = frozenset(params.keys()) - self._MODIFYABLE 7111 if unsupported: 7112 raise errors.OpPrereqError("Parameters for disk %s try to change" 7113 " unmodifyable parameter(s): %s" % 7114 (idx, utils.CommaJoin(unsupported)), 7115 errors.ECODE_INVAL)
7116
7117 - def ExpandNames(self):
7118 self._ExpandAndLockInstance() 7119 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 7120 if self.op.nodes: 7121 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes] 7122 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes) 7123 else: 7124 self.needed_locks[locking.LEVEL_NODE] = [] 7125 self.needed_locks[locking.LEVEL_NODE_RES] = []
7126
7127 - def DeclareLocks(self, level):
7128 if level == locking.LEVEL_NODE: 7129 # if we replace the nodes, we only need to lock the old primary, 7130 # otherwise we need to lock all nodes for disk re-creation 7131 primary_only = bool(self.op.nodes) 7132 self._LockInstancesNodes(primary_only=primary_only) 7133 elif level == locking.LEVEL_NODE_RES: 7134 # Copy node locks 7135 self.needed_locks[locking.LEVEL_NODE_RES] = \ 7136 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7137
7138 - def BuildHooksEnv(self):
7139 """Build hooks env. 7140 7141 This runs on master, primary and secondary nodes of the instance. 7142 7143 """ 7144 return _BuildInstanceHookEnvByObject(self, self.instance)
7145
7146 - def BuildHooksNodes(self):
7147 """Build hooks nodes. 7148 7149 """ 7150 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 7151 return (nl, nl)
7152
7153 - def CheckPrereq(self):
7154 """Check prerequisites. 7155 7156 This checks that the instance is in the cluster and is not running. 7157 7158 """ 7159 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7160 assert instance is not None, \ 7161 "Cannot retrieve locked instance %s" % self.op.instance_name 7162 if self.op.nodes: 7163 if len(self.op.nodes) != len(instance.all_nodes): 7164 raise errors.OpPrereqError("Instance %s currently has %d nodes, but" 7165 " %d replacement nodes were specified" % 7166 (instance.name, len(instance.all_nodes), 7167 len(self.op.nodes)), 7168 errors.ECODE_INVAL) 7169 assert instance.disk_template != constants.DT_DRBD8 or \ 7170 len(self.op.nodes) == 2 7171 assert instance.disk_template != constants.DT_PLAIN or \ 7172 len(self.op.nodes) == 1 7173 primary_node = self.op.nodes[0] 7174 else: 7175 primary_node = instance.primary_node 7176 _CheckNodeOnline(self, primary_node) 7177 7178 if instance.disk_template == constants.DT_DISKLESS: 7179 raise errors.OpPrereqError("Instance '%s' has no disks" % 7180 self.op.instance_name, errors.ECODE_INVAL) 7181 7182 # if we replace nodes *and* the old primary is offline, we don't 7183 # check 7184 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE) 7185 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES) 7186 old_pnode = self.cfg.GetNodeInfo(instance.primary_node) 7187 if not (self.op.nodes and old_pnode.offline): 7188 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING, 7189 msg="cannot recreate disks") 7190 7191 if self.op.disks: 7192 self.disks = dict(self.op.disks) 7193 else: 7194 self.disks = dict((idx, {}) for idx in range(len(instance.disks))) 7195 7196 maxidx = max(self.disks.keys()) 7197 if maxidx >= len(instance.disks): 7198 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx, 7199 errors.ECODE_INVAL) 7200 7201 if (self.op.nodes and 7202 sorted(self.disks.keys()) != range(len(instance.disks))): 7203 raise errors.OpPrereqError("Can't recreate disks partially and" 7204 " change the nodes at the same time", 7205 errors.ECODE_INVAL) 7206 7207 self.instance = instance
7208
7209 - def Exec(self, feedback_fn):
7210 """Recreate the disks. 7211 7212 """ 7213 instance = self.instance 7214 7215 assert (self.owned_locks(locking.LEVEL_NODE) == 7216 self.owned_locks(locking.LEVEL_NODE_RES)) 7217 7218 to_skip = [] 7219 mods = [] # keeps track of needed changes 7220 7221 for idx, disk in enumerate(instance.disks): 7222 try: 7223 changes = self.disks[idx] 7224 except KeyError: 7225 # Disk should not be recreated 7226 to_skip.append(idx) 7227 continue 7228 7229 # update secondaries for disks, if needed 7230 if self.op.nodes and disk.dev_type == constants.LD_DRBD8: 7231 # need to update the nodes and minors 7232 assert len(self.op.nodes) == 2 7233 assert len(disk.logical_id) == 6 # otherwise disk internals 7234 # have changed 7235 (_, _, old_port, _, _, old_secret) = disk.logical_id 7236 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name) 7237 new_id = (self.op.nodes[0], self.op.nodes[1], old_port, 7238 new_minors[0], new_minors[1], old_secret) 7239 assert len(disk.logical_id) == len(new_id) 7240 else: 7241 new_id = None 7242 7243 mods.append((idx, new_id, changes)) 7244 7245 # now that we have passed all asserts above, we can apply the mods 7246 # in a single run (to avoid partial changes) 7247 for idx, new_id, changes in mods: 7248 disk = instance.disks[idx] 7249 if new_id is not None: 7250 assert disk.dev_type == constants.LD_DRBD8 7251 disk.logical_id = new_id 7252 if changes: 7253 disk.Update(size=changes.get(constants.IDISK_SIZE, None), 7254 mode=changes.get(constants.IDISK_MODE, None)) 7255 7256 # change primary node, if needed 7257 if self.op.nodes: 7258 instance.primary_node = self.op.nodes[0] 7259 self.LogWarning("Changing the instance's nodes, you will have to" 7260 " remove any disks left on the older nodes manually") 7261 7262 if self.op.nodes: 7263 self.cfg.Update(instance, feedback_fn) 7264 7265 _CreateDisks(self, instance, to_skip=to_skip)
7266
7267 7268 -class LUInstanceRename(LogicalUnit):
7269 """Rename an instance. 7270 7271 """ 7272 HPATH = "instance-rename" 7273 HTYPE = constants.HTYPE_INSTANCE 7274
7275 - def CheckArguments(self):
7276 """Check arguments. 7277 7278 """ 7279 if self.op.ip_check and not self.op.name_check: 7280 # TODO: make the ip check more flexible and not depend on the name check 7281 raise errors.OpPrereqError("IP address check requires a name check", 7282 errors.ECODE_INVAL)
7283
7284 - def BuildHooksEnv(self):
7285 """Build hooks env. 7286 7287 This runs on master, primary and secondary nodes of the instance. 7288 7289 """ 7290 env = _BuildInstanceHookEnvByObject(self, self.instance) 7291 env["INSTANCE_NEW_NAME"] = self.op.new_name 7292 return env
7293
7294 - def BuildHooksNodes(self):
7295 """Build hooks nodes. 7296 7297 """ 7298 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 7299 return (nl, nl)
7300
7301 - def CheckPrereq(self):
7302 """Check prerequisites. 7303 7304 This checks that the instance is in the cluster and is not running. 7305 7306 """ 7307 self.op.instance_name = _ExpandInstanceName(self.cfg, 7308 self.op.instance_name) 7309 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7310 assert instance is not None 7311 _CheckNodeOnline(self, instance.primary_node) 7312 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING, 7313 msg="cannot rename") 7314 self.instance = instance 7315 7316 new_name = self.op.new_name 7317 if self.op.name_check: 7318 hostname = _CheckHostnameSane(self, new_name) 7319 new_name = self.op.new_name = hostname.name 7320 if (self.op.ip_check and 7321 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)): 7322 raise errors.OpPrereqError("IP %s of instance %s already in use" % 7323 (hostname.ip, new_name), 7324 errors.ECODE_NOTUNIQUE) 7325 7326 instance_list = self.cfg.GetInstanceList() 7327 if new_name in instance_list and new_name != instance.name: 7328 raise errors.OpPrereqError("Instance '%s' is already in the cluster" % 7329 new_name, errors.ECODE_EXISTS)
7330
7331 - def Exec(self, feedback_fn):
7332 """Rename the instance. 7333 7334 """ 7335 inst = self.instance 7336 old_name = inst.name 7337 7338 rename_file_storage = False 7339 if (inst.disk_template in constants.DTS_FILEBASED and 7340 self.op.new_name != inst.name): 7341 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1]) 7342 rename_file_storage = True 7343 7344 self.cfg.RenameInstance(inst.name, self.op.new_name) 7345 # Change the instance lock. This is definitely safe while we hold the BGL. 7346 # Otherwise the new lock would have to be added in acquired mode. 7347 assert self.REQ_BGL 7348 self.glm.remove(locking.LEVEL_INSTANCE, old_name) 7349 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name) 7350 7351 # re-read the instance from the configuration after rename 7352 inst = self.cfg.GetInstanceInfo(self.op.new_name) 7353 7354 if rename_file_storage: 7355 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1]) 7356 result = self.rpc.call_file_storage_dir_rename(inst.primary_node, 7357 old_file_storage_dir, 7358 new_file_storage_dir) 7359 result.Raise("Could not rename on node %s directory '%s' to '%s'" 7360 " (but the instance has been renamed in Ganeti)" % 7361 (inst.primary_node, old_file_storage_dir, 7362 new_file_storage_dir)) 7363 7364 _StartInstanceDisks(self, inst, None) 7365 try: 7366 result = self.rpc.call_instance_run_rename(inst.primary_node, inst, 7367 old_name, self.op.debug_level) 7368 msg = result.fail_msg 7369 if msg: 7370 msg = ("Could not run OS rename script for instance %s on node %s" 7371 " (but the instance has been renamed in Ganeti): %s" % 7372 (inst.name, inst.primary_node, msg)) 7373 self.proc.LogWarning(msg) 7374 finally: 7375 _ShutdownInstanceDisks(self, inst) 7376 7377 return inst.name
7378
7379 7380 -class LUInstanceRemove(LogicalUnit):
7381 """Remove an instance. 7382 7383 """ 7384 HPATH = "instance-remove" 7385 HTYPE = constants.HTYPE_INSTANCE 7386 REQ_BGL = False 7387
7388 - def ExpandNames(self):
7389 self._ExpandAndLockInstance() 7390 self.needed_locks[locking.LEVEL_NODE] = [] 7391 self.needed_locks[locking.LEVEL_NODE_RES] = [] 7392 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7393
7394 - def DeclareLocks(self, level):
7395 if level == locking.LEVEL_NODE: 7396 self._LockInstancesNodes() 7397 elif level == locking.LEVEL_NODE_RES: 7398 # Copy node locks 7399 self.needed_locks[locking.LEVEL_NODE_RES] = \ 7400 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7401
7402 - def BuildHooksEnv(self):
7403 """Build hooks env. 7404 7405 This runs on master, primary and secondary nodes of the instance. 7406 7407 """ 7408 env = _BuildInstanceHookEnvByObject(self, self.instance) 7409 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout 7410 return env
7411
7412 - def BuildHooksNodes(self):
7413 """Build hooks nodes. 7414 7415 """ 7416 nl = [self.cfg.GetMasterNode()] 7417 nl_post = list(self.instance.all_nodes) + nl 7418 return (nl, nl_post)
7419
7420 - def CheckPrereq(self):
7421 """Check prerequisites. 7422 7423 This checks that the instance is in the cluster. 7424 7425 """ 7426 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7427 assert self.instance is not None, \ 7428 "Cannot retrieve locked instance %s" % self.op.instance_name
7429
7430 - def Exec(self, feedback_fn):
7431 """Remove the instance. 7432 7433 """ 7434 instance = self.instance 7435 logging.info("Shutting down instance %s on node %s", 7436 instance.name, instance.primary_node) 7437 7438 result = self.rpc.call_instance_shutdown(instance.primary_node, instance, 7439 self.op.shutdown_timeout) 7440 msg = result.fail_msg 7441 if msg: 7442 if self.op.ignore_failures: 7443 feedback_fn("Warning: can't shutdown instance: %s" % msg) 7444 else: 7445 raise errors.OpExecError("Could not shutdown instance %s on" 7446 " node %s: %s" % 7447 (instance.name, instance.primary_node, msg)) 7448 7449 assert (self.owned_locks(locking.LEVEL_NODE) == 7450 self.owned_locks(locking.LEVEL_NODE_RES)) 7451 assert not (set(instance.all_nodes) - 7452 self.owned_locks(locking.LEVEL_NODE)), \ 7453 "Not owning correct locks" 7454 7455 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7456
7457 7458 -def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7459 """Utility function to remove an instance. 7460 7461 """ 7462 logging.info("Removing block devices for instance %s", instance.name) 7463 7464 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures): 7465 if not ignore_failures: 7466 raise errors.OpExecError("Can't remove instance's disks") 7467 feedback_fn("Warning: can't remove instance's disks") 7468 7469 logging.info("Removing instance %s out of cluster config", instance.name) 7470 7471 lu.cfg.RemoveInstance(instance.name) 7472 7473 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \ 7474 "Instance lock removal conflict" 7475 7476 # Remove lock for the instance 7477 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7478
7479 7480 -class LUInstanceQuery(NoHooksLU):
7481 """Logical unit for querying instances. 7482 7483 """ 7484 # pylint: disable=W0142 7485 REQ_BGL = False 7486
7487 - def CheckArguments(self):
7488 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names), 7489 self.op.output_fields, self.op.use_locking)
7490
7491 - def ExpandNames(self):
7492 self.iq.ExpandNames(self)
7493
7494 - def DeclareLocks(self, level):
7495 self.iq.DeclareLocks(self, level)
7496
7497 - def Exec(self, feedback_fn):
7498 return self.iq.OldStyleQuery(self)
7499
7500 7501 -class LUInstanceFailover(LogicalUnit):
7502 """Failover an instance. 7503 7504 """ 7505 HPATH = "instance-failover" 7506 HTYPE = constants.HTYPE_INSTANCE 7507 REQ_BGL = False 7508
7509 - def CheckArguments(self):
7510 """Check the arguments. 7511 7512 """ 7513 self.iallocator = getattr(self.op, "iallocator", None) 7514 self.target_node = getattr(self.op, "target_node", None)
7515
7516 - def ExpandNames(self):
7517 self._ExpandAndLockInstance() 7518 7519 if self.op.target_node is not None: 7520 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node) 7521 7522 self.needed_locks[locking.LEVEL_NODE] = [] 7523 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 7524 7525 self.needed_locks[locking.LEVEL_NODE_RES] = [] 7526 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE 7527 7528 ignore_consistency = self.op.ignore_consistency 7529 shutdown_timeout = self.op.shutdown_timeout 7530 self._migrater = TLMigrateInstance(self, self.op.instance_name, 7531 cleanup=False, 7532 failover=True, 7533 ignore_consistency=ignore_consistency, 7534 shutdown_timeout=shutdown_timeout, 7535 ignore_ipolicy=self.op.ignore_ipolicy) 7536 self.tasklets = [self._migrater]
7537
7538 - def DeclareLocks(self, level):
7539 if level == locking.LEVEL_NODE: 7540 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name) 7541 if instance.disk_template in constants.DTS_EXT_MIRROR: 7542 if self.op.target_node is None: 7543 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 7544 else: 7545 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node, 7546 self.op.target_node] 7547 del self.recalculate_locks[locking.LEVEL_NODE] 7548 else: 7549 self._LockInstancesNodes() 7550 elif level == locking.LEVEL_NODE_RES: 7551 # Copy node locks 7552 self.needed_locks[locking.LEVEL_NODE_RES] = \ 7553 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7554
7555 - def BuildHooksEnv(self):
7556 """Build hooks env. 7557 7558 This runs on master, primary and secondary nodes of the instance. 7559 7560 """ 7561 instance = self._migrater.instance 7562 source_node = instance.primary_node 7563 target_node = self.op.target_node 7564 env = { 7565 "IGNORE_CONSISTENCY": self.op.ignore_consistency, 7566 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 7567 "OLD_PRIMARY": source_node, 7568 "NEW_PRIMARY": target_node, 7569 } 7570 7571 if instance.disk_template in constants.DTS_INT_MIRROR: 7572 env["OLD_SECONDARY"] = instance.secondary_nodes[0] 7573 env["NEW_SECONDARY"] = source_node 7574 else: 7575 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = "" 7576 7577 env.update(_BuildInstanceHookEnvByObject(self, instance)) 7578 7579 return env
7580
7581 - def BuildHooksNodes(self):
7582 """Build hooks nodes. 7583 7584 """ 7585 instance = self._migrater.instance 7586 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes) 7587 return (nl, nl + [instance.primary_node])
7588
7589 7590 -class LUInstanceMigrate(LogicalUnit):
7591 """Migrate an instance. 7592 7593 This is migration without shutting down, compared to the failover, 7594 which is done with shutdown. 7595 7596 """ 7597 HPATH = "instance-migrate" 7598 HTYPE = constants.HTYPE_INSTANCE 7599 REQ_BGL = False 7600
7601 - def ExpandNames(self):
7602 self._ExpandAndLockInstance() 7603 7604 if self.op.target_node is not None: 7605 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node) 7606 7607 self.needed_locks[locking.LEVEL_NODE] = [] 7608 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 7609 7610 self.needed_locks[locking.LEVEL_NODE] = [] 7611 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 7612 7613 self._migrater = \ 7614 TLMigrateInstance(self, self.op.instance_name, 7615 cleanup=self.op.cleanup, 7616 failover=False, 7617 fallback=self.op.allow_failover, 7618 allow_runtime_changes=self.op.allow_runtime_changes, 7619 ignore_ipolicy=self.op.ignore_ipolicy) 7620 self.tasklets = [self._migrater]
7621
7622 - def DeclareLocks(self, level):
7623 if level == locking.LEVEL_NODE: 7624 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name) 7625 if instance.disk_template in constants.DTS_EXT_MIRROR: 7626 if self.op.target_node is None: 7627 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 7628 else: 7629 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node, 7630 self.op.target_node] 7631 del self.recalculate_locks[locking.LEVEL_NODE] 7632 else: 7633 self._LockInstancesNodes() 7634 elif level == locking.LEVEL_NODE_RES: 7635 # Copy node locks 7636 self.needed_locks[locking.LEVEL_NODE_RES] = \ 7637 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7638
7639 - def BuildHooksEnv(self):
7640 """Build hooks env. 7641 7642 This runs on master, primary and secondary nodes of the instance. 7643 7644 """ 7645 instance = self._migrater.instance 7646 source_node = instance.primary_node 7647 target_node = self.op.target_node 7648 env = _BuildInstanceHookEnvByObject(self, instance) 7649 env.update({ 7650 "MIGRATE_LIVE": self._migrater.live, 7651 "MIGRATE_CLEANUP": self.op.cleanup, 7652 "OLD_PRIMARY": source_node, 7653 "NEW_PRIMARY": target_node, 7654 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes, 7655 }) 7656 7657 if instance.disk_template in constants.DTS_INT_MIRROR: 7658 env["OLD_SECONDARY"] = target_node 7659 env["NEW_SECONDARY"] = source_node 7660 else: 7661 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None 7662 7663 return env
7664
7665 - def BuildHooksNodes(self):
7666 """Build hooks nodes. 7667 7668 """ 7669 instance = self._migrater.instance 7670 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes) 7671 return (nl, nl + [instance.primary_node])
7672
7673 7674 -class LUInstanceMove(LogicalUnit):
7675 """Move an instance by data-copying. 7676 7677 """ 7678 HPATH = "instance-move" 7679 HTYPE = constants.HTYPE_INSTANCE 7680 REQ_BGL = False 7681
7682 - def ExpandNames(self):
7683 self._ExpandAndLockInstance() 7684 target_node = _ExpandNodeName(self.cfg, self.op.target_node) 7685 self.op.target_node = target_node 7686 self.needed_locks[locking.LEVEL_NODE] = [target_node] 7687 self.needed_locks[locking.LEVEL_NODE_RES] = [] 7688 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7689
7690 - def DeclareLocks(self, level):
7691 if level == locking.LEVEL_NODE: 7692 self._LockInstancesNodes(primary_only=True) 7693 elif level == locking.LEVEL_NODE_RES: 7694 # Copy node locks 7695 self.needed_locks[locking.LEVEL_NODE_RES] = \ 7696 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7697
7698 - def BuildHooksEnv(self):
7699 """Build hooks env. 7700 7701 This runs on master, primary and secondary nodes of the instance. 7702 7703 """ 7704 env = { 7705 "TARGET_NODE": self.op.target_node, 7706 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 7707 } 7708 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 7709 return env
7710
7711 - def BuildHooksNodes(self):
7712 """Build hooks nodes. 7713 7714 """ 7715 nl = [ 7716 self.cfg.GetMasterNode(), 7717 self.instance.primary_node, 7718 self.op.target_node, 7719 ] 7720 return (nl, nl)
7721
7722 - def CheckPrereq(self):
7723 """Check prerequisites. 7724 7725 This checks that the instance is in the cluster. 7726 7727 """ 7728 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name) 7729 assert self.instance is not None, \ 7730 "Cannot retrieve locked instance %s" % self.op.instance_name 7731 7732 node = self.cfg.GetNodeInfo(self.op.target_node) 7733 assert node is not None, \ 7734 "Cannot retrieve locked node %s" % self.op.target_node 7735 7736 self.target_node = target_node = node.name 7737 7738 if target_node == instance.primary_node: 7739 raise errors.OpPrereqError("Instance %s is already on the node %s" % 7740 (instance.name, target_node), 7741 errors.ECODE_STATE) 7742 7743 bep = self.cfg.GetClusterInfo().FillBE(instance) 7744 7745 for idx, dsk in enumerate(instance.disks): 7746 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE): 7747 raise errors.OpPrereqError("Instance disk %d has a complex layout," 7748 " cannot copy" % idx, errors.ECODE_STATE) 7749 7750 _CheckNodeOnline(self, target_node) 7751 _CheckNodeNotDrained(self, target_node) 7752 _CheckNodeVmCapable(self, target_node) 7753 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), 7754 self.cfg.GetNodeGroup(node.group)) 7755 _CheckTargetNodeIPolicy(self, ipolicy, instance, node, 7756 ignore=self.op.ignore_ipolicy) 7757 7758 if instance.admin_state == constants.ADMINST_UP: 7759 # check memory requirements on the secondary node 7760 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" % 7761 instance.name, bep[constants.BE_MAXMEM], 7762 instance.hypervisor) 7763 else: 7764 self.LogInfo("Not checking memory on the secondary node as" 7765 " instance will not be started") 7766 7767 # check bridge existance 7768 _CheckInstanceBridgesExist(self, instance, node=target_node)
7769
7770 - def Exec(self, feedback_fn):
7771 """Move an instance. 7772 7773 The move is done by shutting it down on its present node, copying 7774 the data over (slow) and starting it on the new node. 7775 7776 """ 7777 instance = self.instance 7778 7779 source_node = instance.primary_node 7780 target_node = self.target_node 7781 7782 self.LogInfo("Shutting down instance %s on source node %s", 7783 instance.name, source_node) 7784 7785 assert (self.owned_locks(locking.LEVEL_NODE) == 7786 self.owned_locks(locking.LEVEL_NODE_RES)) 7787 7788 result = self.rpc.call_instance_shutdown(source_node, instance, 7789 self.op.shutdown_timeout) 7790 msg = result.fail_msg 7791 if msg: 7792 if self.op.ignore_consistency: 7793 self.proc.LogWarning("Could not shutdown instance %s on node %s." 7794 " Proceeding anyway. Please make sure node" 7795 " %s is down. Error details: %s", 7796 instance.name, source_node, source_node, msg) 7797 else: 7798 raise errors.OpExecError("Could not shutdown instance %s on" 7799 " node %s: %s" % 7800 (instance.name, source_node, msg)) 7801 7802 # create the target disks 7803 try: 7804 _CreateDisks(self, instance, target_node=target_node) 7805 except errors.OpExecError: 7806 self.LogWarning("Device creation failed, reverting...") 7807 try: 7808 _RemoveDisks(self, instance, target_node=target_node) 7809 finally: 7810 self.cfg.ReleaseDRBDMinors(instance.name) 7811 raise 7812 7813 cluster_name = self.cfg.GetClusterInfo().cluster_name 7814 7815 errs = [] 7816 # activate, get path, copy the data over 7817 for idx, disk in enumerate(instance.disks): 7818 self.LogInfo("Copying data for disk %d", idx) 7819 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance), 7820 instance.name, True, idx) 7821 if result.fail_msg: 7822 self.LogWarning("Can't assemble newly created disk %d: %s", 7823 idx, result.fail_msg) 7824 errs.append(result.fail_msg) 7825 break 7826 dev_path = result.payload 7827 result = self.rpc.call_blockdev_export(source_node, (disk, instance), 7828 target_node, dev_path, 7829 cluster_name) 7830 if result.fail_msg: 7831 self.LogWarning("Can't copy data over for disk %d: %s", 7832 idx, result.fail_msg) 7833 errs.append(result.fail_msg) 7834 break 7835 7836 if errs: 7837 self.LogWarning("Some disks failed to copy, aborting") 7838 try: 7839 _RemoveDisks(self, instance, target_node=target_node) 7840 finally: 7841 self.cfg.ReleaseDRBDMinors(instance.name) 7842 raise errors.OpExecError("Errors during disk copy: %s" % 7843 (",".join(errs),)) 7844 7845 instance.primary_node = target_node 7846 self.cfg.Update(instance, feedback_fn) 7847 7848 self.LogInfo("Removing the disks on the original node") 7849 _RemoveDisks(self, instance, target_node=source_node) 7850 7851 # Only start the instance if it's marked as up 7852 if instance.admin_state == constants.ADMINST_UP: 7853 self.LogInfo("Starting instance %s on node %s", 7854 instance.name, target_node) 7855 7856 disks_ok, _ = _AssembleInstanceDisks(self, instance, 7857 ignore_secondaries=True) 7858 if not disks_ok: 7859 _ShutdownInstanceDisks(self, instance) 7860 raise errors.OpExecError("Can't activate the instance's disks") 7861 7862 result = self.rpc.call_instance_start(target_node, 7863 (instance, None, None), False) 7864 msg = result.fail_msg 7865 if msg: 7866 _ShutdownInstanceDisks(self, instance) 7867 raise errors.OpExecError("Could not start instance %s on node %s: %s" % 7868 (instance.name, target_node, msg))
7869
7870 7871 -class LUNodeMigrate(LogicalUnit):
7872 """Migrate all instances from a node. 7873 7874 """ 7875 HPATH = "node-migrate" 7876 HTYPE = constants.HTYPE_NODE 7877 REQ_BGL = False 7878
7879 - def CheckArguments(self):
7880 pass
7881
7882 - def ExpandNames(self):
7883 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 7884 7885 self.share_locks = _ShareAll() 7886 self.needed_locks = { 7887 locking.LEVEL_NODE: [self.op.node_name], 7888 }
7889
7890 - def BuildHooksEnv(self):
7891 """Build hooks env. 7892 7893 This runs on the master, the primary and all the secondaries. 7894 7895 """ 7896 return { 7897 "NODE_NAME": self.op.node_name, 7898 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes, 7899 }
7900
7901 - def BuildHooksNodes(self):
7902 """Build hooks nodes. 7903 7904 """ 7905 nl = [self.cfg.GetMasterNode()] 7906 return (nl, nl)
7907
7908 - def CheckPrereq(self):
7909 pass
7910
7911 - def Exec(self, feedback_fn):
7912 # Prepare jobs for migration instances 7913 allow_runtime_changes = self.op.allow_runtime_changes 7914 jobs = [ 7915 [opcodes.OpInstanceMigrate(instance_name=inst.name, 7916 mode=self.op.mode, 7917 live=self.op.live, 7918 iallocator=self.op.iallocator, 7919 target_node=self.op.target_node, 7920 allow_runtime_changes=allow_runtime_changes, 7921 ignore_ipolicy=self.op.ignore_ipolicy)] 7922 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name) 7923 ] 7924 7925 # TODO: Run iallocator in this opcode and pass correct placement options to 7926 # OpInstanceMigrate. Since other jobs can modify the cluster between 7927 # running the iallocator and the actual migration, a good consistency model 7928 # will have to be found. 7929 7930 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) == 7931 frozenset([self.op.node_name])) 7932 7933 return ResultWithJobs(jobs)
7934
7935 7936 -class TLMigrateInstance(Tasklet):
7937 """Tasklet class for instance migration. 7938 7939 @type live: boolean 7940 @ivar live: whether the migration will be done live or non-live; 7941 this variable is initalized only after CheckPrereq has run 7942 @type cleanup: boolean 7943 @ivar cleanup: Wheater we cleanup from a failed migration 7944 @type iallocator: string 7945 @ivar iallocator: The iallocator used to determine target_node 7946 @type target_node: string 7947 @ivar target_node: If given, the target_node to reallocate the instance to 7948 @type failover: boolean 7949 @ivar failover: Whether operation results in failover or migration 7950 @type fallback: boolean 7951 @ivar fallback: Whether fallback to failover is allowed if migration not 7952 possible 7953 @type ignore_consistency: boolean 7954 @ivar ignore_consistency: Wheter we should ignore consistency between source 7955 and target node 7956 @type shutdown_timeout: int 7957 @ivar shutdown_timeout: In case of failover timeout of the shutdown 7958 @type ignore_ipolicy: bool 7959 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating 7960 7961 """ 7962 7963 # Constants 7964 _MIGRATION_POLL_INTERVAL = 1 # seconds 7965 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds 7966
7967 - def __init__(self, lu, instance_name, cleanup=False, 7968 failover=False, fallback=False, 7969 ignore_consistency=False, 7970 allow_runtime_changes=True, 7971 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT, 7972 ignore_ipolicy=False):
7973 """Initializes this class. 7974 7975 """ 7976 Tasklet.__init__(self, lu) 7977 7978 # Parameters 7979 self.instance_name = instance_name 7980 self.cleanup = cleanup 7981 self.live = False # will be overridden later 7982 self.failover = failover 7983 self.fallback = fallback 7984 self.ignore_consistency = ignore_consistency 7985 self.shutdown_timeout = shutdown_timeout 7986 self.ignore_ipolicy = ignore_ipolicy 7987 self.allow_runtime_changes = allow_runtime_changes
7988
7989 - def CheckPrereq(self):
7990 """Check prerequisites. 7991 7992 This checks that the instance is in the cluster. 7993 7994 """ 7995 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name) 7996 instance = self.cfg.GetInstanceInfo(instance_name) 7997 assert instance is not None 7998 self.instance = instance 7999 cluster = self.cfg.GetClusterInfo() 8000 8001 if (not self.cleanup and 8002 not instance.admin_state == constants.ADMINST_UP and 8003 not self.failover and self.fallback): 8004 self.lu.LogInfo("Instance is marked down or offline, fallback allowed," 8005 " switching to failover") 8006 self.failover = True 8007 8008 if instance.disk_template not in constants.DTS_MIRRORED: 8009 if self.failover: 8010 text = "failovers" 8011 else: 8012 text = "migrations" 8013 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow" 8014 " %s" % (instance.disk_template, text), 8015 errors.ECODE_STATE) 8016 8017 if instance.disk_template in constants.DTS_EXT_MIRROR: 8018 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node") 8019 8020 if self.lu.op.iallocator: 8021 self._RunAllocator() 8022 else: 8023 # We set set self.target_node as it is required by 8024 # BuildHooksEnv 8025 self.target_node = self.lu.op.target_node 8026 8027 # Check that the target node is correct in terms of instance policy 8028 nodeinfo = self.cfg.GetNodeInfo(self.target_node) 8029 group_info = self.cfg.GetNodeGroup(nodeinfo.group) 8030 ipolicy = _CalculateGroupIPolicy(cluster, group_info) 8031 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, 8032 ignore=self.ignore_ipolicy) 8033 8034 # self.target_node is already populated, either directly or by the 8035 # iallocator run 8036 target_node = self.target_node 8037 if self.target_node == instance.primary_node: 8038 raise errors.OpPrereqError("Cannot migrate instance %s" 8039 " to its primary (%s)" % 8040 (instance.name, instance.primary_node)) 8041 8042 if len(self.lu.tasklets) == 1: 8043 # It is safe to release locks only when we're the only tasklet 8044 # in the LU 8045 _ReleaseLocks(self.lu, locking.LEVEL_NODE, 8046 keep=[instance.primary_node, self.target_node]) 8047 8048 else: 8049 secondary_nodes = instance.secondary_nodes 8050 if not secondary_nodes: 8051 raise errors.ConfigurationError("No secondary node but using" 8052 " %s disk template" % 8053 instance.disk_template) 8054 target_node = secondary_nodes[0] 8055 if self.lu.op.iallocator or (self.lu.op.target_node and 8056 self.lu.op.target_node != target_node): 8057 if self.failover: 8058 text = "failed over" 8059 else: 8060 text = "migrated" 8061 raise errors.OpPrereqError("Instances with disk template %s cannot" 8062 " be %s to arbitrary nodes" 8063 " (neither an iallocator nor a target" 8064 " node can be passed)" % 8065 (instance.disk_template, text), 8066 errors.ECODE_INVAL) 8067 nodeinfo = self.cfg.GetNodeInfo(target_node) 8068 group_info = self.cfg.GetNodeGroup(nodeinfo.group) 8069 ipolicy = _CalculateGroupIPolicy(cluster, group_info) 8070 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, 8071 ignore=self.ignore_ipolicy) 8072 8073 i_be = cluster.FillBE(instance) 8074 8075 # check memory requirements on the secondary node 8076 if (not self.cleanup and 8077 (not self.failover or instance.admin_state == constants.ADMINST_UP)): 8078 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node, 8079 "migrating instance %s" % 8080 instance.name, 8081 i_be[constants.BE_MINMEM], 8082 instance.hypervisor) 8083 else: 8084 self.lu.LogInfo("Not checking memory on the secondary node as" 8085 " instance will not be started") 8086 8087 # check if failover must be forced instead of migration 8088 if (not self.cleanup and not self.failover and 8089 i_be[constants.BE_ALWAYS_FAILOVER]): 8090 self.lu.LogInfo("Instance configured to always failover; fallback" 8091 " to failover") 8092 self.failover = True 8093 8094 # check bridge existance 8095 _CheckInstanceBridgesExist(self.lu, instance, node=target_node) 8096 8097 if not self.cleanup: 8098 _CheckNodeNotDrained(self.lu, target_node) 8099 if not self.failover: 8100 result = self.rpc.call_instance_migratable(instance.primary_node, 8101 instance) 8102 if result.fail_msg and self.fallback: 8103 self.lu.LogInfo("Can't migrate, instance offline, fallback to" 8104 " failover") 8105 self.failover = True 8106 else: 8107 result.Raise("Can't migrate, please use failover", 8108 prereq=True, ecode=errors.ECODE_STATE) 8109 8110 assert not (self.failover and self.cleanup) 8111 8112 if not self.failover: 8113 if self.lu.op.live is not None and self.lu.op.mode is not None: 8114 raise errors.OpPrereqError("Only one of the 'live' and 'mode'" 8115 " parameters are accepted", 8116 errors.ECODE_INVAL) 8117 if self.lu.op.live is not None: 8118 if self.lu.op.live: 8119 self.lu.op.mode = constants.HT_MIGRATION_LIVE 8120 else: 8121 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE 8122 # reset the 'live' parameter to None so that repeated 8123 # invocations of CheckPrereq do not raise an exception 8124 self.lu.op.live = None 8125 elif self.lu.op.mode is None: 8126 # read the default value from the hypervisor 8127 i_hv = cluster.FillHV(self.instance, skip_globals=False) 8128 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE] 8129 8130 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE 8131 else: 8132 # Failover is never live 8133 self.live = False 8134 8135 if not (self.failover or self.cleanup): 8136 remote_info = self.rpc.call_instance_info(instance.primary_node, 8137 instance.name, 8138 instance.hypervisor) 8139 remote_info.Raise("Error checking instance on node %s" % 8140 instance.primary_node) 8141 instance_running = bool(remote_info.payload) 8142 if instance_running: 8143 self.current_mem = int(remote_info.payload["memory"])
8144
8145 - def _RunAllocator(self):
8146 """Run the allocator based on input opcode. 8147 8148 """ 8149 # FIXME: add a self.ignore_ipolicy option 8150 ial = IAllocator(self.cfg, self.rpc, 8151 mode=constants.IALLOCATOR_MODE_RELOC, 8152 name=self.instance_name, 8153 relocate_from=[self.instance.primary_node], 8154 ) 8155 8156 ial.Run(self.lu.op.iallocator) 8157 8158 if not ial.success: 8159 raise errors.OpPrereqError("Can't compute nodes using" 8160 " iallocator '%s': %s" % 8161 (self.lu.op.iallocator, ial.info), 8162 errors.ECODE_NORES) 8163 if len(ial.result) != ial.required_nodes: 8164 raise errors.OpPrereqError("iallocator '%s' returned invalid number" 8165 " of nodes (%s), required %s" % 8166 (self.lu.op.iallocator, len(ial.result), 8167 ial.required_nodes), errors.ECODE_FAULT) 8168 self.target_node = ial.result[0] 8169 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s", 8170 self.instance_name, self.lu.op.iallocator, 8171 utils.CommaJoin(ial.result))
8172
8173 - def _WaitUntilSync(self):
8174 """Poll with custom rpc for disk sync. 8175 8176 This uses our own step-based rpc call. 8177 8178 """ 8179 self.feedback_fn("* wait until resync is done") 8180 all_done = False 8181 while not all_done: 8182 all_done = True 8183 result = self.rpc.call_drbd_wait_sync(self.all_nodes, 8184 self.nodes_ip, 8185 (self.instance.disks, 8186 self.instance)) 8187 min_percent = 100 8188 for node, nres in result.items(): 8189 nres.Raise("Cannot resync disks on node %s" % node) 8190 node_done, node_percent = nres.payload 8191 all_done = all_done and node_done 8192 if node_percent is not None: 8193 min_percent = min(min_percent, node_percent) 8194 if not all_done: 8195 if min_percent < 100: 8196 self.feedback_fn(" - progress: %.1f%%" % min_percent) 8197 time.sleep(2)
8198
8199 - def _EnsureSecondary(self, node):
8200 """Demote a node to secondary. 8201 8202 """ 8203 self.feedback_fn("* switching node %s to secondary mode" % node) 8204 8205 for dev in self.instance.disks: 8206 self.cfg.SetDiskID(dev, node) 8207 8208 result = self.rpc.call_blockdev_close(node, self.instance.name, 8209 self.instance.disks) 8210 result.Raise("Cannot change disk to secondary on node %s" % node)
8211
8212 - def _GoStandalone(self):
8213 """Disconnect from the network. 8214 8215 """ 8216 self.feedback_fn("* changing into standalone mode") 8217 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip, 8218 self.instance.disks) 8219 for node, nres in result.items(): 8220 nres.Raise("Cannot disconnect disks node %s" % node)
8221
8222 - def _GoReconnect(self, multimaster):
8223 """Reconnect to the network. 8224 8225 """ 8226 if multimaster: 8227 msg = "dual-master" 8228 else: 8229 msg = "single-master" 8230 self.feedback_fn("* changing disks into %s mode" % msg) 8231 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip, 8232 (self.instance.disks, self.instance), 8233 self.instance.name, multimaster) 8234 for node, nres in result.items(): 8235 nres.Raise("Cannot change disks config on node %s" % node)
8236
8237 - def _ExecCleanup(self):
8238 """Try to cleanup after a failed migration. 8239 8240 The cleanup is done by: 8241 - check that the instance is running only on one node 8242 (and update the config if needed) 8243 - change disks on its secondary node to secondary 8244 - wait until disks are fully synchronized 8245 - disconnect from the network 8246 - change disks into single-master mode 8247 - wait again until disks are fully synchronized 8248 8249 """ 8250 instance = self.instance 8251 target_node = self.target_node 8252 source_node = self.source_node 8253 8254 # check running on only one node 8255 self.feedback_fn("* checking where the instance actually runs" 8256 " (if this hangs, the hypervisor might be in" 8257 " a bad state)") 8258 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor]) 8259 for node, result in ins_l.items(): 8260 result.Raise("Can't contact node %s" % node) 8261 8262 runningon_source = instance.name in ins_l[source_node].payload 8263 runningon_target = instance.name in ins_l[target_node].payload 8264 8265 if runningon_source and runningon_target: 8266 raise errors.OpExecError("Instance seems to be running on two nodes," 8267 " or the hypervisor is confused; you will have" 8268 " to ensure manually that it runs only on one" 8269 " and restart this operation") 8270 8271 if not (runningon_source or runningon_target): 8272 raise errors.OpExecError("Instance does not seem to be running at all;" 8273 " in this case it's safer to repair by" 8274 " running 'gnt-instance stop' to ensure disk" 8275 " shutdown, and then restarting it") 8276 8277 if runningon_target: 8278 # the migration has actually succeeded, we need to update the config 8279 self.feedback_fn("* instance running on secondary node (%s)," 8280 " updating config" % target_node) 8281 instance.primary_node = target_node 8282 self.cfg.Update(instance, self.feedback_fn) 8283 demoted_node = source_node 8284 else: 8285 self.feedback_fn("* instance confirmed to be running on its" 8286 " primary node (%s)" % source_node) 8287 demoted_node = target_node 8288 8289 if instance.disk_template in constants.DTS_INT_MIRROR: 8290 self._EnsureSecondary(demoted_node) 8291 try: 8292 self._WaitUntilSync() 8293 except errors.OpExecError: 8294 # we ignore here errors, since if the device is standalone, it 8295 # won't be able to sync 8296 pass 8297 self._GoStandalone() 8298 self._GoReconnect(False) 8299 self._WaitUntilSync() 8300 8301 self.feedback_fn("* done")
8302
8303 - def _RevertDiskStatus(self):
8304 """Try to revert the disk status after a failed migration. 8305 8306 """ 8307 target_node = self.target_node 8308 if self.instance.disk_template in constants.DTS_EXT_MIRROR: 8309 return 8310 8311 try: 8312 self._EnsureSecondary(target_node) 8313 self._GoStandalone() 8314 self._GoReconnect(False) 8315 self._WaitUntilSync() 8316 except errors.OpExecError, err: 8317 self.lu.LogWarning("Migration failed and I can't reconnect the drives," 8318 " please try to recover the instance manually;" 8319 " error '%s'" % str(err))
8320
8321 - def _AbortMigration(self):
8322 """Call the hypervisor code to abort a started migration. 8323 8324 """ 8325 instance = self.instance 8326 target_node = self.target_node 8327 source_node = self.source_node 8328 migration_info = self.migration_info 8329 8330 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node, 8331 instance, 8332 migration_info, 8333 False) 8334 abort_msg = abort_result.fail_msg 8335 if abort_msg: 8336 logging.error("Aborting migration failed on target node %s: %s", 8337 target_node, abort_msg) 8338 # Don't raise an exception here, as we stil have to try to revert the 8339 # disk status, even if this step failed. 8340 8341 abort_result = self.rpc.call_instance_finalize_migration_src(source_node, 8342 instance, False, self.live) 8343 abort_msg = abort_result.fail_msg 8344 if abort_msg: 8345 logging.error("Aborting migration failed on source node %s: %s", 8346 source_node, abort_msg)
8347
8348 - def _ExecMigration(self):
8349 """Migrate an instance. 8350 8351 The migrate is done by: 8352 - change the disks into dual-master mode 8353 - wait until disks are fully synchronized again 8354 - migrate the instance 8355 - change disks on the new secondary node (the old primary) to secondary 8356 - wait until disks are fully synchronized 8357 - change disks into single-master mode 8358 8359 """ 8360 instance = self.instance 8361 target_node = self.target_node 8362 source_node = self.source_node 8363 8364 # Check for hypervisor version mismatch and warn the user. 8365 nodeinfo = self.rpc.call_node_info([source_node, target_node], 8366 None, [self.instance.hypervisor]) 8367 for ninfo in nodeinfo.values(): 8368 ninfo.Raise("Unable to retrieve node information from node '%s'" % 8369 ninfo.node) 8370 (_, _, (src_info, )) = nodeinfo[source_node].payload 8371 (_, _, (dst_info, )) = nodeinfo[target_node].payload 8372 8373 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and 8374 (constants.HV_NODEINFO_KEY_VERSION in dst_info)): 8375 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION] 8376 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION] 8377 if src_version != dst_version: 8378 self.feedback_fn("* warning: hypervisor version mismatch between" 8379 " source (%s) and target (%s) node" % 8380 (src_version, dst_version)) 8381 8382 self.feedback_fn("* checking disk consistency between source and target") 8383 for (idx, dev) in enumerate(instance.disks): 8384 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False): 8385 raise errors.OpExecError("Disk %s is degraded or not fully" 8386 " synchronized on target node," 8387 " aborting migration" % idx) 8388 8389 if self.current_mem > self.tgt_free_mem: 8390 if not self.allow_runtime_changes: 8391 raise errors.OpExecError("Memory ballooning not allowed and not enough" 8392 " free memory to fit instance %s on target" 8393 " node %s (have %dMB, need %dMB)" % 8394 (instance.name, target_node, 8395 self.tgt_free_mem, self.current_mem)) 8396 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem) 8397 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node, 8398 instance, 8399 self.tgt_free_mem) 8400 rpcres.Raise("Cannot modify instance runtime memory") 8401 8402 # First get the migration information from the remote node 8403 result = self.rpc.call_migration_info(source_node, instance) 8404 msg = result.fail_msg 8405 if msg: 8406 log_err = ("Failed fetching source migration information from %s: %s" % 8407 (source_node, msg)) 8408 logging.error(log_err) 8409 raise errors.OpExecError(log_err) 8410 8411 self.migration_info = migration_info = result.payload 8412 8413 if self.instance.disk_template not in constants.DTS_EXT_MIRROR: 8414 # Then switch the disks to master/master mode 8415 self._EnsureSecondary(target_node) 8416 self._GoStandalone() 8417 self._GoReconnect(True) 8418 self._WaitUntilSync() 8419 8420 self.feedback_fn("* preparing %s to accept the instance" % target_node) 8421 result = self.rpc.call_accept_instance(target_node, 8422 instance, 8423 migration_info, 8424 self.nodes_ip[target_node]) 8425 8426 msg = result.fail_msg 8427 if msg: 8428 logging.error("Instance pre-migration failed, trying to revert" 8429 " disk status: %s", msg) 8430 self.feedback_fn("Pre-migration failed, aborting") 8431 self._AbortMigration() 8432 self._RevertDiskStatus() 8433 raise errors.OpExecError("Could not pre-migrate instance %s: %s" % 8434 (instance.name, msg)) 8435 8436 self.feedback_fn("* migrating instance to %s" % target_node) 8437 result = self.rpc.call_instance_migrate(source_node, instance, 8438 self.nodes_ip[target_node], 8439 self.live) 8440 msg = result.fail_msg 8441 if msg: 8442 logging.error("Instance migration failed, trying to revert" 8443 " disk status: %s", msg) 8444 self.feedback_fn("Migration failed, aborting") 8445 self._AbortMigration() 8446 self._RevertDiskStatus() 8447 raise errors.OpExecError("Could not migrate instance %s: %s" % 8448 (instance.name, msg)) 8449 8450 self.feedback_fn("* starting memory transfer") 8451 last_feedback = time.time() 8452 while True: 8453 result = self.rpc.call_instance_get_migration_status(source_node, 8454 instance) 8455 msg = result.fail_msg 8456 ms = result.payload # MigrationStatus instance 8457 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES): 8458 logging.error("Instance migration failed, trying to revert" 8459 " disk status: %s", msg) 8460 self.feedback_fn("Migration failed, aborting") 8461 self._AbortMigration() 8462 self._RevertDiskStatus() 8463 if not msg: 8464 msg = "hypervisor returned failure" 8465 raise errors.OpExecError("Could not migrate instance %s: %s" % 8466 (instance.name, msg)) 8467 8468 if result.payload.status != constants.HV_MIGRATION_ACTIVE: 8469 self.feedback_fn("* memory transfer complete") 8470 break 8471 8472 if (utils.TimeoutExpired(last_feedback, 8473 self._MIGRATION_FEEDBACK_INTERVAL) and 8474 ms.transferred_ram is not None): 8475 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram) 8476 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress) 8477 last_feedback = time.time() 8478 8479 time.sleep(self._MIGRATION_POLL_INTERVAL) 8480 8481 result = self.rpc.call_instance_finalize_migration_src(source_node, 8482 instance, 8483 True, 8484 self.live) 8485 msg = result.fail_msg 8486 if msg: 8487 logging.error("Instance migration succeeded, but finalization failed" 8488 " on the source node: %s", msg) 8489 raise errors.OpExecError("Could not finalize instance migration: %s" % 8490 msg) 8491 8492 instance.primary_node = target_node 8493 8494 # distribute new instance config to the other nodes 8495 self.cfg.Update(instance, self.feedback_fn) 8496 8497 result = self.rpc.call_instance_finalize_migration_dst(target_node, 8498 instance, 8499 migration_info, 8500 True) 8501 msg = result.fail_msg 8502 if msg: 8503 logging.error("Instance migration succeeded, but finalization failed" 8504 " on the target node: %s", msg) 8505 raise errors.OpExecError("Could not finalize instance migration: %s" % 8506 msg) 8507 8508 if self.instance.disk_template not in constants.DTS_EXT_MIRROR: 8509 self._EnsureSecondary(source_node) 8510 self._WaitUntilSync() 8511 self._GoStandalone() 8512 self._GoReconnect(False) 8513 self._WaitUntilSync() 8514 8515 # If the instance's disk template is `rbd' and there was a successful 8516 # migration, unmap the device from the source node. 8517 if self.instance.disk_template == constants.DT_RBD: 8518 disks = _ExpandCheckDisks(instance, instance.disks) 8519 self.feedback_fn("* unmapping instance's disks from %s" % source_node) 8520 for disk in disks: 8521 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance)) 8522 msg = result.fail_msg 8523 if msg: 8524 logging.error("Migration was successful, but couldn't unmap the" 8525 " block device %s on source node %s: %s", 8526 disk.iv_name, source_node, msg) 8527 logging.error("You need to unmap the device %s manually on %s", 8528 disk.iv_name, source_node) 8529 8530 self.feedback_fn("* done")
8531
8532 - def _ExecFailover(self):
8533 """Failover an instance. 8534 8535 The failover is done by shutting it down on its present node and 8536 starting it on the secondary. 8537 8538 """ 8539 instance = self.instance 8540 primary_node = self.cfg.GetNodeInfo(instance.primary_node) 8541 8542 source_node = instance.primary_node 8543 target_node = self.target_node 8544 8545 if instance.admin_state == constants.ADMINST_UP: 8546 self.feedback_fn("* checking disk consistency between source and target") 8547 for (idx, dev) in enumerate(instance.disks): 8548 # for drbd, these are drbd over lvm 8549 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, 8550 False): 8551 if primary_node.offline: 8552 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on" 8553 " target node %s" % 8554 (primary_node.name, idx, target_node)) 8555 elif not self.ignore_consistency: 8556 raise errors.OpExecError("Disk %s is degraded on target node," 8557 " aborting failover" % idx) 8558 else: 8559 self.feedback_fn("* not checking disk consistency as instance is not" 8560 " running") 8561 8562 self.feedback_fn("* shutting down instance on source node") 8563 logging.info("Shutting down instance %s on node %s", 8564 instance.name, source_node) 8565 8566 result = self.rpc.call_instance_shutdown(source_node, instance, 8567 self.shutdown_timeout) 8568 msg = result.fail_msg 8569 if msg: 8570 if self.ignore_consistency or primary_node.offline: 8571 self.lu.LogWarning("Could not shutdown instance %s on node %s," 8572 " proceeding anyway; please make sure node" 8573 " %s is down; error details: %s", 8574 instance.name, source_node, source_node, msg) 8575 else: 8576 raise errors.OpExecError("Could not shutdown instance %s on" 8577 " node %s: %s" % 8578 (instance.name, source_node, msg)) 8579 8580 self.feedback_fn("* deactivating the instance's disks on source node") 8581 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True): 8582 raise errors.OpExecError("Can't shut down the instance's disks") 8583 8584 instance.primary_node = target_node 8585 # distribute new instance config to the other nodes 8586 self.cfg.Update(instance, self.feedback_fn) 8587 8588 # Only start the instance if it's marked as up 8589 if instance.admin_state == constants.ADMINST_UP: 8590 self.feedback_fn("* activating the instance's disks on target node %s" % 8591 target_node) 8592 logging.info("Starting instance %s on node %s", 8593 instance.name, target_node) 8594 8595 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance, 8596 ignore_secondaries=True) 8597 if not disks_ok: 8598 _ShutdownInstanceDisks(self.lu, instance) 8599 raise errors.OpExecError("Can't activate the instance's disks") 8600 8601 self.feedback_fn("* starting the instance on the target node %s" % 8602 target_node) 8603 result = self.rpc.call_instance_start(target_node, (instance, None, None), 8604 False) 8605 msg = result.fail_msg 8606 if msg: 8607 _ShutdownInstanceDisks(self.lu, instance) 8608 raise errors.OpExecError("Could not start instance %s on node %s: %s" % 8609 (instance.name, target_node, msg))
8610
8611 - def Exec(self, feedback_fn):
8612 """Perform the migration. 8613 8614 """ 8615 self.feedback_fn = feedback_fn 8616 self.source_node = self.instance.primary_node 8617 8618 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing 8619 if self.instance.disk_template in constants.DTS_INT_MIRROR: 8620 self.target_node = self.instance.secondary_nodes[0] 8621 # Otherwise self.target_node has been populated either 8622 # directly, or through an iallocator. 8623 8624 self.all_nodes = [self.source_node, self.target_node] 8625 self.nodes_ip = dict((name, node.secondary_ip) for (name, node) 8626 in self.cfg.GetMultiNodeInfo(self.all_nodes)) 8627 8628 if self.failover: 8629 feedback_fn("Failover instance %s" % self.instance.name) 8630 self._ExecFailover() 8631 else: 8632 feedback_fn("Migrating instance %s" % self.instance.name) 8633 8634 if self.cleanup: 8635 return self._ExecCleanup() 8636 else: 8637 return self._ExecMigration()
8638
8639 8640 -def _CreateBlockDev(lu, node, instance, device, force_create, info, 8641 force_open):
8642 """Wrapper around L{_CreateBlockDevInner}. 8643 8644 This method annotates the root device first. 8645 8646 """ 8647 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg) 8648 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info, 8649 force_open)
8650
8651 8652 -def _CreateBlockDevInner(lu, node, instance, device, force_create, 8653 info, force_open):
8654 """Create a tree of block devices on a given node. 8655 8656 If this device type has to be created on secondaries, create it and 8657 all its children. 8658 8659 If not, just recurse to children keeping the same 'force' value. 8660 8661 @attention: The device has to be annotated already. 8662 8663 @param lu: the lu on whose behalf we execute 8664 @param node: the node on which to create the device 8665 @type instance: L{objects.Instance} 8666 @param instance: the instance which owns the device 8667 @type device: L{objects.Disk} 8668 @param device: the device to create 8669 @type force_create: boolean 8670 @param force_create: whether to force creation of this device; this 8671 will be change to True whenever we find a device which has 8672 CreateOnSecondary() attribute 8673 @param info: the extra 'metadata' we should attach to the device 8674 (this will be represented as a LVM tag) 8675 @type force_open: boolean 8676 @param force_open: this parameter will be passes to the 8677 L{backend.BlockdevCreate} function where it specifies 8678 whether we run on primary or not, and it affects both 8679 the child assembly and the device own Open() execution 8680 8681 """ 8682 if device.CreateOnSecondary(): 8683 force_create = True 8684 8685 if device.children: 8686 for child in device.children: 8687 _CreateBlockDevInner(lu, node, instance, child, force_create, 8688 info, force_open) 8689 8690 if not force_create: 8691 return 8692 8693 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8694
8695 8696 -def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8697 """Create a single block device on a given node. 8698 8699 This will not recurse over children of the device, so they must be 8700 created in advance. 8701 8702 @param lu: the lu on whose behalf we execute 8703 @param node: the node on which to create the device 8704 @type instance: L{objects.Instance} 8705 @param instance: the instance which owns the device 8706 @type device: L{objects.Disk} 8707 @param device: the device to create 8708 @param info: the extra 'metadata' we should attach to the device 8709 (this will be represented as a LVM tag) 8710 @type force_open: boolean 8711 @param force_open: this parameter will be passes to the 8712 L{backend.BlockdevCreate} function where it specifies 8713 whether we run on primary or not, and it affects both 8714 the child assembly and the device own Open() execution 8715 8716 """ 8717 lu.cfg.SetDiskID(device, node) 8718 result = lu.rpc.call_blockdev_create(node, device, device.size, 8719 instance.name, force_open, info) 8720 result.Raise("Can't create block device %s on" 8721 " node %s for instance %s" % (device, node, instance.name)) 8722 if device.physical_id is None: 8723 device.physical_id = result.payload
8724
8725 8726 -def _GenerateUniqueNames(lu, exts):
8727 """Generate a suitable LV name. 8728 8729 This will generate a logical volume name for the given instance. 8730 8731 """ 8732 results = [] 8733 for val in exts: 8734 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId()) 8735 results.append("%s%s" % (new_id, val)) 8736 return results
8737
8738 8739 -def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names, 8740 iv_name, p_minor, s_minor):
8741 """Generate a drbd8 device complete with its children. 8742 8743 """ 8744 assert len(vgnames) == len(names) == 2 8745 port = lu.cfg.AllocatePort() 8746 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId()) 8747 8748 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size, 8749 logical_id=(vgnames[0], names[0]), 8750 params={}) 8751 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE, 8752 logical_id=(vgnames[1], names[1]), 8753 params={}) 8754 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size, 8755 logical_id=(primary, secondary, port, 8756 p_minor, s_minor, 8757 shared_secret), 8758 children=[dev_data, dev_meta], 8759 iv_name=iv_name, params={}) 8760 return drbd_dev
8761 8762 8763 _DISK_TEMPLATE_NAME_PREFIX = { 8764 constants.DT_PLAIN: "", 8765 constants.DT_RBD: ".rbd", 8766 } 8767 8768 8769 _DISK_TEMPLATE_DEVICE_TYPE = { 8770 constants.DT_PLAIN: constants.LD_LV, 8771 constants.DT_FILE: constants.LD_FILE, 8772 constants.DT_SHARED_FILE: constants.LD_FILE, 8773 constants.DT_BLOCK: constants.LD_BLOCKDEV, 8774 constants.DT_RBD: constants.LD_RBD, 8775 }
8776 8777 8778 -def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node, 8779 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index, 8780 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage, 8781 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8782 """Generate the entire disk layout for a given template type. 8783 8784 """ 8785 #TODO: compute space requirements 8786 8787 vgname = lu.cfg.GetVGName() 8788 disk_count = len(disk_info) 8789 disks = [] 8790 8791 if template_name == constants.DT_DISKLESS: 8792 pass 8793 elif template_name == constants.DT_DRBD8: 8794 if len(secondary_nodes) != 1: 8795 raise errors.ProgrammerError("Wrong template configuration") 8796 remote_node = secondary_nodes[0] 8797 minors = lu.cfg.AllocateDRBDMinor( 8798 [primary_node, remote_node] * len(disk_info), instance_name) 8799 8800 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name, 8801 full_disk_params) 8802 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG] 8803 8804 names = [] 8805 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i) 8806 for i in range(disk_count)]): 8807 names.append(lv_prefix + "_data") 8808 names.append(lv_prefix + "_meta") 8809 for idx, disk in enumerate(disk_info): 8810 disk_index = idx + base_index 8811 data_vg = disk.get(constants.IDISK_VG, vgname) 8812 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg) 8813 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node, 8814 disk[constants.IDISK_SIZE], 8815 [data_vg, meta_vg], 8816 names[idx * 2:idx * 2 + 2], 8817 "disk/%d" % disk_index, 8818 minors[idx * 2], minors[idx * 2 + 1]) 8819 disk_dev.mode = disk[constants.IDISK_MODE] 8820 disks.append(disk_dev) 8821 else: 8822 if secondary_nodes: 8823 raise errors.ProgrammerError("Wrong template configuration") 8824 8825 if template_name == constants.DT_FILE: 8826 _req_file_storage() 8827 elif template_name == constants.DT_SHARED_FILE: 8828 _req_shr_file_storage() 8829 8830 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None) 8831 if name_prefix is None: 8832 names = None 8833 else: 8834 names = _GenerateUniqueNames(lu, ["%s.disk%s" % 8835 (name_prefix, base_index + i) 8836 for i in range(disk_count)]) 8837 8838 if template_name == constants.DT_PLAIN: 8839 def logical_id_fn(idx, _, disk): 8840 vg = disk.get(constants.IDISK_VG, vgname) 8841 return (vg, names[idx])
8842 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE): 8843 logical_id_fn = \ 8844 lambda _, disk_index, disk: (file_driver, 8845 "%s/disk%d" % (file_storage_dir, 8846 disk_index)) 8847 elif template_name == constants.DT_BLOCK: 8848 logical_id_fn = \ 8849 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL, 8850 disk[constants.IDISK_ADOPT]) 8851 elif template_name == constants.DT_RBD: 8852 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx]) 8853 else: 8854 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name) 8855 8856 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name] 8857 8858 for idx, disk in enumerate(disk_info): 8859 disk_index = idx + base_index 8860 size = disk[constants.IDISK_SIZE] 8861 feedback_fn("* disk %s, size %s" % 8862 (disk_index, utils.FormatUnit(size, "h"))) 8863 disks.append(objects.Disk(dev_type=dev_type, size=size, 8864 logical_id=logical_id_fn(idx, disk_index, disk), 8865 iv_name="disk/%d" % disk_index, 8866 mode=disk[constants.IDISK_MODE], 8867 params={})) 8868 8869 return disks 8870
8871 8872 -def _GetInstanceInfoText(instance):
8873 """Compute that text that should be added to the disk's metadata. 8874 8875 """ 8876 return "originstname+%s" % instance.name
8877
8878 8879 -def _CalcEta(time_taken, written, total_size):
8880 """Calculates the ETA based on size written and total size. 8881 8882 @param time_taken: The time taken so far 8883 @param written: amount written so far 8884 @param total_size: The total size of data to be written 8885 @return: The remaining time in seconds 8886 8887 """ 8888 avg_time = time_taken / float(written) 8889 return (total_size - written) * avg_time
8890
8891 8892 -def _WipeDisks(lu, instance):
8893 """Wipes instance disks. 8894 8895 @type lu: L{LogicalUnit} 8896 @param lu: the logical unit on whose behalf we execute 8897 @type instance: L{objects.Instance} 8898 @param instance: the instance whose disks we should create 8899 @return: the success of the wipe 8900 8901 """ 8902 node = instance.primary_node 8903 8904 for device in instance.disks: 8905 lu.cfg.SetDiskID(device, node) 8906 8907 logging.info("Pause sync of instance %s disks", instance.name) 8908 result = lu.rpc.call_blockdev_pause_resume_sync(node, 8909 (instance.disks, instance), 8910 True) 8911 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node) 8912 8913 for idx, success in enumerate(result.payload): 8914 if not success: 8915 logging.warn("pause-sync of instance %s for disks %d failed", 8916 instance.name, idx) 8917 8918 try: 8919 for idx, device in enumerate(instance.disks): 8920 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but 8921 # MAX_WIPE_CHUNK at max 8922 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 * 8923 constants.MIN_WIPE_CHUNK_PERCENT) 8924 # we _must_ make this an int, otherwise rounding errors will 8925 # occur 8926 wipe_chunk_size = int(wipe_chunk_size) 8927 8928 lu.LogInfo("* Wiping disk %d", idx) 8929 logging.info("Wiping disk %d for instance %s, node %s using" 8930 " chunk size %s", idx, instance.name, node, wipe_chunk_size) 8931 8932 offset = 0 8933 size = device.size 8934 last_output = 0 8935 start_time = time.time() 8936 8937 while offset < size: 8938 wipe_size = min(wipe_chunk_size, size - offset) 8939 logging.debug("Wiping disk %d, offset %s, chunk %s", 8940 idx, offset, wipe_size) 8941 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset, 8942 wipe_size) 8943 result.Raise("Could not wipe disk %d at offset %d for size %d" % 8944 (idx, offset, wipe_size)) 8945 now = time.time() 8946 offset += wipe_size 8947 if now - last_output >= 60: 8948 eta = _CalcEta(now - start_time, offset, size) 8949 lu.LogInfo(" - done: %.1f%% ETA: %s" % 8950 (offset / float(size) * 100, utils.FormatSeconds(eta))) 8951 last_output = now 8952 finally: 8953 logging.info("Resume sync of instance %s disks", instance.name) 8954 8955 result = lu.rpc.call_blockdev_pause_resume_sync(node, 8956 (instance.disks, instance), 8957 False) 8958 8959 if result.fail_msg: 8960 lu.LogWarning("RPC call to %s for resuming disk syncing failed," 8961 " please have a look at the status and troubleshoot" 8962 " the issue: %s", node, result.fail_msg) 8963 else: 8964 for idx, success in enumerate(result.payload): 8965 if not success: 8966 lu.LogWarning("Resume sync of disk %d failed, please have a" 8967 " look at the status and troubleshoot the issue", idx) 8968 logging.warn("resume-sync of instance %s for disks %d failed", 8969 instance.name, idx)
8970
8971 8972 -def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8973 """Create all disks for an instance. 8974 8975 This abstracts away some work from AddInstance. 8976 8977 @type lu: L{LogicalUnit} 8978 @param lu: the logical unit on whose behalf we execute 8979 @type instance: L{objects.Instance} 8980 @param instance: the instance whose disks we should create 8981 @type to_skip: list 8982 @param to_skip: list of indices to skip 8983 @type target_node: string 8984 @param target_node: if passed, overrides the target node for creation 8985 @rtype: boolean 8986 @return: the success of the creation 8987 8988 """ 8989 info = _GetInstanceInfoText(instance) 8990 if target_node is None: 8991 pnode = instance.primary_node 8992 all_nodes = instance.all_nodes 8993 else: 8994 pnode = target_node 8995 all_nodes = [pnode] 8996 8997 if instance.disk_template in constants.DTS_FILEBASED: 8998 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) 8999 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir) 9000 9001 result.Raise("Failed to create directory '%s' on" 9002 " node %s" % (file_storage_dir, pnode)) 9003 9004 # Note: this needs to be kept in sync with adding of disks in 9005 # LUInstanceSetParams 9006 for idx, device in enumerate(instance.disks): 9007 if to_skip and idx in to_skip: 9008 continue 9009 logging.info("Creating disk %s for instance '%s'", idx, instance.name) 9010 #HARDCODE 9011 for node in all_nodes: 9012 f_create = node == pnode 9013 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9014
9015 9016 -def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9017 """Remove all disks for an instance. 9018 9019 This abstracts away some work from `AddInstance()` and 9020 `RemoveInstance()`. Note that in case some of the devices couldn't 9021 be removed, the removal will continue with the other ones (compare 9022 with `_CreateDisks()`). 9023 9024 @type lu: L{LogicalUnit} 9025 @param lu: the logical unit on whose behalf we execute 9026 @type instance: L{objects.Instance} 9027 @param instance: the instance whose disks we should remove 9028 @type target_node: string 9029 @param target_node: used to override the node on which to remove the disks 9030 @rtype: boolean 9031 @return: the success of the removal 9032 9033 """ 9034 logging.info("Removing block devices for instance %s", instance.name) 9035 9036 all_result = True 9037 ports_to_release = set() 9038 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg) 9039 for (idx, device) in enumerate(anno_disks): 9040 if target_node: 9041 edata = [(target_node, device)] 9042 else: 9043 edata = device.ComputeNodeTree(instance.primary_node) 9044 for node, disk in edata: 9045 lu.cfg.SetDiskID(disk, node) 9046 result = lu.rpc.call_blockdev_remove(node, disk) 9047 if result.fail_msg: 9048 lu.LogWarning("Could not remove disk %s on node %s," 9049 " continuing anyway: %s", idx, node, result.fail_msg) 9050 if not (result.offline and node != instance.primary_node): 9051 all_result = False 9052 9053 # if this is a DRBD disk, return its port to the pool 9054 if device.dev_type in constants.LDS_DRBD: 9055 ports_to_release.add(device.logical_id[2]) 9056 9057 if all_result or ignore_failures: 9058 for port in ports_to_release: 9059 lu.cfg.AddTcpUdpPort(port) 9060 9061 if instance.disk_template in constants.DTS_FILEBASED: 9062 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) 9063 if target_node: 9064 tgt = target_node 9065 else: 9066 tgt = instance.primary_node 9067 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir) 9068 if result.fail_msg: 9069 lu.LogWarning("Could not remove directory '%s' on node %s: %s", 9070 file_storage_dir, instance.primary_node, result.fail_msg) 9071 all_result = False 9072 9073 return all_result
9074
9075 9076 -def _ComputeDiskSizePerVG(disk_template, disks):
9077 """Compute disk size requirements in the volume group 9078 9079 """ 9080 def _compute(disks, payload): 9081 """Universal algorithm. 9082 9083 """ 9084 vgs = {} 9085 for disk in disks: 9086 vgs[disk[constants.IDISK_VG]] = \ 9087 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload 9088 9089 return vgs
9090 9091 # Required free disk space as a function of disk and swap space 9092 req_size_dict = { 9093 constants.DT_DISKLESS: {}, 9094 constants.DT_PLAIN: _compute(disks, 0), 9095 # 128 MB are added for drbd metadata for each disk 9096 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE), 9097 constants.DT_FILE: {}, 9098 constants.DT_SHARED_FILE: {}, 9099 } 9100 9101 if disk_template not in req_size_dict: 9102 raise errors.ProgrammerError("Disk template '%s' size requirement" 9103 " is unknown" % disk_template) 9104 9105 return req_size_dict[disk_template] 9106
9107 9108 -def _ComputeDiskSize(disk_template, disks):
9109 """Compute disk size requirements according to disk template 9110 9111 """ 9112 # Required free disk space as a function of disk and swap space 9113 req_size_dict = { 9114 constants.DT_DISKLESS: None, 9115 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks), 9116 # 128 MB are added for drbd metadata for each disk 9117 constants.DT_DRBD8: 9118 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks), 9119 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks), 9120 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks), 9121 constants.DT_BLOCK: 0, 9122 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks), 9123 } 9124 9125 if disk_template not in req_size_dict: 9126 raise errors.ProgrammerError("Disk template '%s' size requirement" 9127 " is unknown" % disk_template) 9128 9129 return req_size_dict[disk_template]
9130
9131 9132 -def _FilterVmNodes(lu, nodenames):
9133 """Filters out non-vm_capable nodes from a list. 9134 9135 @type lu: L{LogicalUnit} 9136 @param lu: the logical unit for which we check 9137 @type nodenames: list 9138 @param nodenames: the list of nodes on which we should check 9139 @rtype: list 9140 @return: the list of vm-capable nodes 9141 9142 """ 9143 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList()) 9144 return [name for name in nodenames if name not in vm_nodes]
9145
9146 9147 -def _CheckHVParams(lu, nodenames, hvname, hvparams):
9148 """Hypervisor parameter validation. 9149 9150 This function abstract the hypervisor parameter validation to be 9151 used in both instance create and instance modify. 9152 9153 @type lu: L{LogicalUnit} 9154 @param lu: the logical unit for which we check 9155 @type nodenames: list 9156 @param nodenames: the list of nodes on which we should check 9157 @type hvname: string 9158 @param hvname: the name of the hypervisor we should use 9159 @type hvparams: dict 9160 @param hvparams: the parameters which we need to check 9161 @raise errors.OpPrereqError: if the parameters are not valid 9162 9163 """ 9164 nodenames = _FilterVmNodes(lu, nodenames) 9165 9166 cluster = lu.cfg.GetClusterInfo() 9167 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams) 9168 9169 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull) 9170 for node in nodenames: 9171 info = hvinfo[node] 9172 if info.offline: 9173 continue 9174 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9175
9176 9177 -def _CheckOSParams(lu, required, nodenames, osname, osparams):
9178 """OS parameters validation. 9179 9180 @type lu: L{LogicalUnit} 9181 @param lu: the logical unit for which we check 9182 @type required: boolean 9183 @param required: whether the validation should fail if the OS is not 9184 found 9185 @type nodenames: list 9186 @param nodenames: the list of nodes on which we should check 9187 @type osname: string 9188 @param osname: the name of the hypervisor we should use 9189 @type osparams: dict 9190 @param osparams: the parameters which we need to check 9191 @raise errors.OpPrereqError: if the parameters are not valid 9192 9193 """ 9194 nodenames = _FilterVmNodes(lu, nodenames) 9195 result = lu.rpc.call_os_validate(nodenames, required, osname, 9196 [constants.OS_VALIDATE_PARAMETERS], 9197 osparams) 9198 for node, nres in result.items(): 9199 # we don't check for offline cases since this should be run only 9200 # against the master node and/or an instance's nodes 9201 nres.Raise("OS Parameters validation failed on node %s" % node) 9202 if not nres.payload: 9203 lu.LogInfo("OS %s not found on node %s, validation skipped", 9204 osname, node)
9205
9206 9207 -class LUInstanceCreate(LogicalUnit):
9208 """Create an instance. 9209 9210 """ 9211 HPATH = "instance-add" 9212 HTYPE = constants.HTYPE_INSTANCE 9213 REQ_BGL = False 9214
9215 - def CheckArguments(self):
9216 """Check arguments. 9217 9218 """ 9219 # do not require name_check to ease forward/backward compatibility 9220 # for tools 9221 if self.op.no_install and self.op.start: 9222 self.LogInfo("No-installation mode selected, disabling startup") 9223 self.op.start = False 9224 # validate/normalize the instance name 9225 self.op.instance_name = \ 9226 netutils.Hostname.GetNormalizedName(self.op.instance_name) 9227 9228 if self.op.ip_check and not self.op.name_check: 9229 # TODO: make the ip check more flexible and not depend on the name check 9230 raise errors.OpPrereqError("Cannot do IP address check without a name" 9231 " check", errors.ECODE_INVAL) 9232 9233 # check nics' parameter names 9234 for nic in self.op.nics: 9235 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES) 9236 9237 # check disks. parameter names and consistent adopt/no-adopt strategy 9238 has_adopt = has_no_adopt = False 9239 for disk in self.op.disks: 9240 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES) 9241 if constants.IDISK_ADOPT in disk: 9242 has_adopt = True 9243 else: 9244 has_no_adopt = True 9245 if has_adopt and has_no_adopt: 9246 raise errors.OpPrereqError("Either all disks are adopted or none is", 9247 errors.ECODE_INVAL) 9248 if has_adopt: 9249 if self.op.disk_template not in constants.DTS_MAY_ADOPT: 9250 raise errors.OpPrereqError("Disk adoption is not supported for the" 9251 " '%s' disk template" % 9252 self.op.disk_template, 9253 errors.ECODE_INVAL) 9254 if self.op.iallocator is not None: 9255 raise errors.OpPrereqError("Disk adoption not allowed with an" 9256 " iallocator script", errors.ECODE_INVAL) 9257 if self.op.mode == constants.INSTANCE_IMPORT: 9258 raise errors.OpPrereqError("Disk adoption not allowed for" 9259 " instance import", errors.ECODE_INVAL) 9260 else: 9261 if self.op.disk_template in constants.DTS_MUST_ADOPT: 9262 raise errors.OpPrereqError("Disk template %s requires disk adoption," 9263 " but no 'adopt' parameter given" % 9264 self.op.disk_template, 9265 errors.ECODE_INVAL) 9266 9267 self.adopt_disks = has_adopt 9268 9269 # instance name verification 9270 if self.op.name_check: 9271 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name) 9272 self.op.instance_name = self.hostname1.name 9273 # used in CheckPrereq for ip ping check 9274 self.check_ip = self.hostname1.ip 9275 else: 9276 self.check_ip = None 9277 9278 # file storage checks 9279 if (self.op.file_driver and 9280 not self.op.file_driver in constants.FILE_DRIVER): 9281 raise errors.OpPrereqError("Invalid file driver name '%s'" % 9282 self.op.file_driver, errors.ECODE_INVAL) 9283 9284 if self.op.disk_template == constants.DT_FILE: 9285 opcodes.RequireFileStorage() 9286 elif self.op.disk_template == constants.DT_SHARED_FILE: 9287 opcodes.RequireSharedFileStorage() 9288 9289 ### Node/iallocator related checks 9290 _CheckIAllocatorOrNode(self, "iallocator", "pnode") 9291 9292 if self.op.pnode is not None: 9293 if self.op.disk_template in constants.DTS_INT_MIRROR: 9294 if self.op.snode is None: 9295 raise errors.OpPrereqError("The networked disk templates need" 9296 " a mirror node", errors.ECODE_INVAL) 9297 elif self.op.snode: 9298 self.LogWarning("Secondary node will be ignored on non-mirrored disk" 9299 " template") 9300 self.op.snode = None 9301 9302 self._cds = _GetClusterDomainSecret() 9303 9304 if self.op.mode == constants.INSTANCE_IMPORT: 9305 # On import force_variant must be True, because if we forced it at 9306 # initial install, our only chance when importing it back is that it 9307 # works again! 9308 self.op.force_variant = True 9309 9310 if self.op.no_install: 9311 self.LogInfo("No-installation mode has no effect during import") 9312 9313 elif self.op.mode == constants.INSTANCE_CREATE: 9314 if self.op.os_type is None: 9315 raise errors.OpPrereqError("No guest OS specified", 9316 errors.ECODE_INVAL) 9317 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os: 9318 raise errors.OpPrereqError("Guest OS '%s' is not allowed for" 9319 " installation" % self.op.os_type, 9320 errors.ECODE_STATE) 9321 if self.op.disk_template is None: 9322 raise errors.OpPrereqError("No disk template specified", 9323 errors.ECODE_INVAL) 9324 9325 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: 9326 # Check handshake to ensure both clusters have the same domain secret 9327 src_handshake = self.op.source_handshake 9328 if not src_handshake: 9329 raise errors.OpPrereqError("Missing source handshake", 9330 errors.ECODE_INVAL) 9331 9332 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds, 9333 src_handshake) 9334 if errmsg: 9335 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg, 9336 errors.ECODE_INVAL) 9337 9338 # Load and check source CA 9339 self.source_x509_ca_pem = self.op.source_x509_ca 9340 if not self.source_x509_ca_pem: 9341 raise errors.OpPrereqError("Missing source X509 CA", 9342 errors.ECODE_INVAL) 9343 9344 try: 9345 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem, 9346 self._cds) 9347 except OpenSSL.crypto.Error, err: 9348 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" % 9349 (err, ), errors.ECODE_INVAL) 9350 9351 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) 9352 if errcode is not None: 9353 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ), 9354 errors.ECODE_INVAL) 9355 9356 self.source_x509_ca = cert 9357 9358 src_instance_name = self.op.source_instance_name 9359 if not src_instance_name: 9360 raise errors.OpPrereqError("Missing source instance name", 9361 errors.ECODE_INVAL) 9362 9363 self.source_instance_name = \ 9364 netutils.GetHostname(name=src_instance_name).name 9365 9366 else: 9367 raise errors.OpPrereqError("Invalid instance creation mode %r" % 9368 self.op.mode, errors.ECODE_INVAL)
9369
9370 - def ExpandNames(self):
9371 """ExpandNames for CreateInstance. 9372 9373 Figure out the right locks for instance creation. 9374 9375 """ 9376 self.needed_locks = {} 9377 9378 instance_name = self.op.instance_name 9379 # this is just a preventive check, but someone might still add this 9380 # instance in the meantime, and creation will fail at lock-add time 9381 if instance_name in self.cfg.GetInstanceList(): 9382 raise errors.OpPrereqError("Instance '%s' is already in the cluster" % 9383 instance_name, errors.ECODE_EXISTS) 9384 9385 self.add_locks[locking.LEVEL_INSTANCE] = instance_name 9386 9387 if self.op.iallocator: 9388 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by 9389 # specifying a group on instance creation and then selecting nodes from 9390 # that group 9391 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 9392 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET 9393 else: 9394 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode) 9395 nodelist = [self.op.pnode] 9396 if self.op.snode is not None: 9397 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode) 9398 nodelist.append(self.op.snode) 9399 self.needed_locks[locking.LEVEL_NODE] = nodelist 9400 # Lock resources of instance's primary and secondary nodes (copy to 9401 # prevent accidential modification) 9402 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist) 9403 9404 # in case of import lock the source node too 9405 if self.op.mode == constants.INSTANCE_IMPORT: 9406 src_node = self.op.src_node 9407 src_path = self.op.src_path 9408 9409 if src_path is None: 9410 self.op.src_path = src_path = self.op.instance_name 9411 9412 if src_node is None: 9413 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET 9414 self.op.src_node = None 9415 if os.path.isabs(src_path): 9416 raise errors.OpPrereqError("Importing an instance from a path" 9417 " requires a source node option", 9418 errors.ECODE_INVAL) 9419 else: 9420 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node) 9421 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET: 9422 self.needed_locks[locking.LEVEL_NODE].append(src_node) 9423 if not os.path.isabs(src_path): 9424 self.op.src_path = src_path = \ 9425 utils.PathJoin(constants.EXPORT_DIR, src_path)
9426
9427 - def _RunAllocator(self):
9428 """Run the allocator based on input opcode. 9429 9430 """ 9431 nics = [n.ToDict() for n in self.nics] 9432 ial = IAllocator(self.cfg, self.rpc, 9433 mode=constants.IALLOCATOR_MODE_ALLOC, 9434 name=self.op.instance_name, 9435 disk_template=self.op.disk_template, 9436 tags=self.op.tags, 9437 os=self.op.os_type, 9438 vcpus=self.be_full[constants.BE_VCPUS], 9439 memory=self.be_full[constants.BE_MAXMEM], 9440 spindle_use=self.be_full[constants.BE_SPINDLE_USE], 9441 disks=self.disks, 9442 nics=nics, 9443 hypervisor=self.op.hypervisor, 9444 ) 9445 9446 ial.Run(self.op.iallocator) 9447 9448 if not ial.success: 9449 raise errors.OpPrereqError("Can't compute nodes using" 9450 " iallocator '%s': %s" % 9451 (self.op.iallocator, ial.info), 9452 errors.ECODE_NORES) 9453 if len(ial.result) != ial.required_nodes: 9454 raise errors.OpPrereqError("iallocator '%s' returned invalid number" 9455 " of nodes (%s), required %s" % 9456 (self.op.iallocator, len(ial.result), 9457 ial.required_nodes), errors.ECODE_FAULT) 9458 self.op.pnode = ial.result[0] 9459 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s", 9460 self.op.instance_name, self.op.iallocator, 9461 utils.CommaJoin(ial.result)) 9462 if ial.required_nodes == 2: 9463 self.op.snode = ial.result[1]
9464
9465 - def BuildHooksEnv(self):
9466 """Build hooks env. 9467 9468 This runs on master, primary and secondary nodes of the instance. 9469 9470 """ 9471 env = { 9472 "ADD_MODE": self.op.mode, 9473 } 9474 if self.op.mode == constants.INSTANCE_IMPORT: 9475 env["SRC_NODE"] = self.op.src_node 9476 env["SRC_PATH"] = self.op.src_path 9477 env["SRC_IMAGES"] = self.src_images 9478 9479 env.update(_BuildInstanceHookEnv( 9480 name=self.op.instance_name, 9481 primary_node=self.op.pnode, 9482 secondary_nodes=self.secondaries, 9483 status=self.op.start, 9484 os_type=self.op.os_type, 9485 minmem=self.be_full[constants.BE_MINMEM], 9486 maxmem=self.be_full[constants.BE_MAXMEM], 9487 vcpus=self.be_full[constants.BE_VCPUS], 9488 nics=_NICListToTuple(self, self.nics), 9489 disk_template=self.op.disk_template, 9490 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE]) 9491 for d in self.disks], 9492 bep=self.be_full, 9493 hvp=self.hv_full, 9494 hypervisor_name=self.op.hypervisor, 9495 tags=self.op.tags, 9496 )) 9497 9498 return env
9499
9500 - def BuildHooksNodes(self):
9501 """Build hooks nodes. 9502 9503 """ 9504 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries 9505 return nl, nl
9506
9507 - def _ReadExportInfo(self):
9508 """Reads the export information from disk. 9509 9510 It will override the opcode source node and path with the actual 9511 information, if these two were not specified before. 9512 9513 @return: the export information 9514 9515 """ 9516 assert self.op.mode == constants.INSTANCE_IMPORT 9517 9518 src_node = self.op.src_node 9519 src_path = self.op.src_path 9520 9521 if src_node is None: 9522 locked_nodes = self.owned_locks(locking.LEVEL_NODE) 9523 exp_list = self.rpc.call_export_list(locked_nodes) 9524 found = False 9525 for node in exp_list: 9526 if exp_list[node].fail_msg: 9527 continue 9528 if src_path in exp_list[node].payload: 9529 found = True 9530 self.op.src_node = src_node = node 9531 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR, 9532 src_path) 9533 break 9534 if not found: 9535 raise errors.OpPrereqError("No export found for relative path %s" % 9536 src_path, errors.ECODE_INVAL) 9537 9538 _CheckNodeOnline(self, src_node) 9539 result = self.rpc.call_export_info(src_node, src_path) 9540 result.Raise("No export or invalid export found in dir %s" % src_path) 9541 9542 export_info = objects.SerializableConfigParser.Loads(str(result.payload)) 9543 if not export_info.has_section(constants.INISECT_EXP): 9544 raise errors.ProgrammerError("Corrupted export config", 9545 errors.ECODE_ENVIRON) 9546 9547 ei_version = export_info.get(constants.INISECT_EXP, "version") 9548 if (int(ei_version) != constants.EXPORT_VERSION): 9549 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" % 9550 (ei_version, constants.EXPORT_VERSION), 9551 errors.ECODE_ENVIRON) 9552 return export_info
9553
9554 - def _ReadExportParams(self, einfo):
9555 """Use export parameters as defaults. 9556 9557 In case the opcode doesn't specify (as in override) some instance 9558 parameters, then try to use them from the export information, if 9559 that declares them. 9560 9561 """ 9562 self.op.os_type = einfo.get(constants.INISECT_EXP, "os") 9563 9564 if self.op.disk_template is None: 9565 if einfo.has_option(constants.INISECT_INS, "disk_template"): 9566 self.op.disk_template = einfo.get(constants.INISECT_INS, 9567 "disk_template") 9568 if self.op.disk_template not in constants.DISK_TEMPLATES: 9569 raise errors.OpPrereqError("Disk template specified in configuration" 9570 " file is not one of the allowed values:" 9571 " %s" % " ".join(constants.DISK_TEMPLATES)) 9572 else: 9573 raise errors.OpPrereqError("No disk template specified and the export" 9574 " is missing the disk_template information", 9575 errors.ECODE_INVAL) 9576 9577 if not self.op.disks: 9578 disks = [] 9579 # TODO: import the disk iv_name too 9580 for idx in range(constants.MAX_DISKS): 9581 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx): 9582 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx) 9583 disks.append({constants.IDISK_SIZE: disk_sz}) 9584 self.op.disks = disks 9585 if not disks and self.op.disk_template != constants.DT_DISKLESS: 9586 raise errors.OpPrereqError("No disk info specified and the export" 9587 " is missing the disk information", 9588 errors.ECODE_INVAL) 9589 9590 if not self.op.nics: 9591 nics = [] 9592 for idx in range(constants.MAX_NICS): 9593 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx): 9594 ndict = {} 9595 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]: 9596 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name)) 9597 ndict[name] = v 9598 nics.append(ndict) 9599 else: 9600 break 9601 self.op.nics = nics 9602 9603 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"): 9604 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split() 9605 9606 if (self.op.hypervisor is None and 9607 einfo.has_option(constants.INISECT_INS, "hypervisor")): 9608 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor") 9609 9610 if einfo.has_section(constants.INISECT_HYP): 9611 # use the export parameters but do not override the ones 9612 # specified by the user 9613 for name, value in einfo.items(constants.INISECT_HYP): 9614 if name not in self.op.hvparams: 9615 self.op.hvparams[name] = value 9616 9617 if einfo.has_section(constants.INISECT_BEP): 9618 # use the parameters, without overriding 9619 for name, value in einfo.items(constants.INISECT_BEP): 9620 if name not in self.op.beparams: 9621 self.op.beparams[name] = value 9622 # Compatibility for the old "memory" be param 9623 if name == constants.BE_MEMORY: 9624 if constants.BE_MAXMEM not in self.op.beparams: 9625 self.op.beparams[constants.BE_MAXMEM] = value 9626 if constants.BE_MINMEM not in self.op.beparams: 9627 self.op.beparams[constants.BE_MINMEM] = value 9628 else: 9629 # try to read the parameters old style, from the main section 9630 for name in constants.BES_PARAMETERS: 9631 if (name not in self.op.beparams and 9632 einfo.has_option(constants.INISECT_INS, name)): 9633 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name) 9634 9635 if einfo.has_section(constants.INISECT_OSP): 9636 # use the parameters, without overriding 9637 for name, value in einfo.items(constants.INISECT_OSP): 9638 if name not in self.op.osparams: 9639 self.op.osparams[name] = value
9640
9641 - def _RevertToDefaults(self, cluster):
9642 """Revert the instance parameters to the default values. 9643 9644 """ 9645 # hvparams 9646 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {}) 9647 for name in self.op.hvparams.keys(): 9648 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]: 9649 del self.op.hvparams[name] 9650 # beparams 9651 be_defs = cluster.SimpleFillBE({}) 9652 for name in self.op.beparams.keys(): 9653 if name in be_defs and be_defs[name] == self.op.beparams[name]: 9654 del self.op.beparams[name] 9655 # nic params 9656 nic_defs = cluster.SimpleFillNIC({}) 9657 for nic in self.op.nics: 9658 for name in constants.NICS_PARAMETERS: 9659 if name in nic and name in nic_defs and nic[name] == nic_defs[name]: 9660 del nic[name] 9661 # osparams 9662 os_defs = cluster.SimpleFillOS(self.op.os_type, {}) 9663 for name in self.op.osparams.keys(): 9664 if name in os_defs and os_defs[name] == self.op.osparams[name]: 9665 del self.op.osparams[name]
9666
9667 - def _CalculateFileStorageDir(self):
9668 """Calculate final instance file storage dir. 9669 9670 """ 9671 # file storage dir calculation/check 9672 self.instance_file_storage_dir = None 9673 if self.op.disk_template in constants.DTS_FILEBASED: 9674 # build the full file storage dir path 9675 joinargs = [] 9676 9677 if self.op.disk_template == constants.DT_SHARED_FILE: 9678 get_fsd_fn = self.cfg.GetSharedFileStorageDir 9679 else: 9680 get_fsd_fn = self.cfg.GetFileStorageDir 9681 9682 cfg_storagedir = get_fsd_fn() 9683 if not cfg_storagedir: 9684 raise errors.OpPrereqError("Cluster file storage dir not defined") 9685 joinargs.append(cfg_storagedir) 9686 9687 if self.op.file_storage_dir is not None: 9688 joinargs.append(self.op.file_storage_dir) 9689 9690 joinargs.append(self.op.instance_name) 9691 9692 # pylint: disable=W0142 9693 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9694
9695 - def CheckPrereq(self): # pylint: disable=R0914
9696 """Check prerequisites. 9697 9698 """ 9699 self._CalculateFileStorageDir() 9700 9701 if self.op.mode == constants.INSTANCE_IMPORT: 9702 export_info = self._ReadExportInfo() 9703 self._ReadExportParams(export_info) 9704 self._old_instance_name = export_info.get(constants.INISECT_INS, "name") 9705 else: 9706 self._old_instance_name = None 9707 9708 if (not self.cfg.GetVGName() and 9709 self.op.disk_template not in constants.DTS_NOT_LVM): 9710 raise errors.OpPrereqError("Cluster does not support lvm-based" 9711 " instances", errors.ECODE_STATE) 9712 9713 if (self.op.hypervisor is None or 9714 self.op.hypervisor == constants.VALUE_AUTO): 9715 self.op.hypervisor = self.cfg.GetHypervisorType() 9716 9717 cluster = self.cfg.GetClusterInfo() 9718 enabled_hvs = cluster.enabled_hypervisors 9719 if self.op.hypervisor not in enabled_hvs: 9720 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the" 9721 " cluster (%s)" % (self.op.hypervisor, 9722 ",".join(enabled_hvs)), 9723 errors.ECODE_STATE) 9724 9725 # Check tag validity 9726 for tag in self.op.tags: 9727 objects.TaggableObject.ValidateTag(tag) 9728 9729 # check hypervisor parameter syntax (locally) 9730 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES) 9731 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, 9732 self.op.hvparams) 9733 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor) 9734 hv_type.CheckParameterSyntax(filled_hvp) 9735 self.hv_full = filled_hvp 9736 # check that we don't specify global parameters on an instance 9737 _CheckGlobalHvParams(self.op.hvparams) 9738 9739 # fill and remember the beparams dict 9740 default_beparams = cluster.beparams[constants.PP_DEFAULT] 9741 for param, value in self.op.beparams.iteritems(): 9742 if value == constants.VALUE_AUTO: 9743 self.op.beparams[param] = default_beparams[param] 9744 objects.UpgradeBeParams(self.op.beparams) 9745 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 9746 self.be_full = cluster.SimpleFillBE(self.op.beparams) 9747 9748 # build os parameters 9749 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams) 9750 9751 # now that hvp/bep are in final format, let's reset to defaults, 9752 # if told to do so 9753 if self.op.identify_defaults: 9754 self._RevertToDefaults(cluster) 9755 9756 # NIC buildup 9757 self.nics = [] 9758 for idx, nic in enumerate(self.op.nics): 9759 nic_mode_req = nic.get(constants.INIC_MODE, None) 9760 nic_mode = nic_mode_req 9761 if nic_mode is None or nic_mode == constants.VALUE_AUTO: 9762 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE] 9763 9764 # in routed mode, for the first nic, the default ip is 'auto' 9765 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0: 9766 default_ip_mode = constants.VALUE_AUTO 9767 else: 9768 default_ip_mode = constants.VALUE_NONE 9769 9770 # ip validity checks 9771 ip = nic.get(constants.INIC_IP, default_ip_mode) 9772 if ip is None or ip.lower() == constants.VALUE_NONE: 9773 nic_ip = None 9774 elif ip.lower() == constants.VALUE_AUTO: 9775 if not self.op.name_check: 9776 raise errors.OpPrereqError("IP address set to auto but name checks" 9777 " have been skipped", 9778 errors.ECODE_INVAL) 9779 nic_ip = self.hostname1.ip 9780 else: 9781 if not netutils.IPAddress.IsValid(ip): 9782 raise errors.OpPrereqError("Invalid IP address '%s'" % ip, 9783 errors.ECODE_INVAL) 9784 nic_ip = ip 9785 9786 # TODO: check the ip address for uniqueness 9787 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip: 9788 raise errors.OpPrereqError("Routed nic mode requires an ip address", 9789 errors.ECODE_INVAL) 9790 9791 # MAC address verification 9792 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO) 9793 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 9794 mac = utils.NormalizeAndValidateMac(mac) 9795 9796 try: 9797 self.cfg.ReserveMAC(mac, self.proc.GetECId()) 9798 except errors.ReservationError: 9799 raise errors.OpPrereqError("MAC address %s already in use" 9800 " in cluster" % mac, 9801 errors.ECODE_NOTUNIQUE) 9802 9803 # Build nic parameters 9804 link = nic.get(constants.INIC_LINK, None) 9805 if link == constants.VALUE_AUTO: 9806 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK] 9807 nicparams = {} 9808 if nic_mode_req: 9809 nicparams[constants.NIC_MODE] = nic_mode 9810 if link: 9811 nicparams[constants.NIC_LINK] = link 9812 9813 check_params = cluster.SimpleFillNIC(nicparams) 9814 objects.NIC.CheckParameterSyntax(check_params) 9815 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams)) 9816 9817 # disk checks/pre-build 9818 default_vg = self.cfg.GetVGName() 9819 self.disks = [] 9820 for disk in self.op.disks: 9821 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR) 9822 if mode not in constants.DISK_ACCESS_SET: 9823 raise errors.OpPrereqError("Invalid disk access mode '%s'" % 9824 mode, errors.ECODE_INVAL) 9825 size = disk.get(constants.IDISK_SIZE, None) 9826 if size is None: 9827 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL) 9828 try: 9829 size = int(size) 9830 except (TypeError, ValueError): 9831 raise errors.OpPrereqError("Invalid disk size '%s'" % size, 9832 errors.ECODE_INVAL) 9833 9834 data_vg = disk.get(constants.IDISK_VG, default_vg) 9835 new_disk = { 9836 constants.IDISK_SIZE: size, 9837 constants.IDISK_MODE: mode, 9838 constants.IDISK_VG: data_vg, 9839 } 9840 if constants.IDISK_METAVG in disk: 9841 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG] 9842 if constants.IDISK_ADOPT in disk: 9843 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT] 9844 self.disks.append(new_disk) 9845 9846 if self.op.mode == constants.INSTANCE_IMPORT: 9847 disk_images = [] 9848 for idx in range(len(self.disks)): 9849 option = "disk%d_dump" % idx 9850 if export_info.has_option(constants.INISECT_INS, option): 9851 # FIXME: are the old os-es, disk sizes, etc. useful? 9852 export_name = export_info.get(constants.INISECT_INS, option) 9853 image = utils.PathJoin(self.op.src_path, export_name) 9854 disk_images.append(image) 9855 else: 9856 disk_images.append(False) 9857 9858 self.src_images = disk_images 9859 9860 if self.op.instance_name == self._old_instance_name: 9861 for idx, nic in enumerate(self.nics): 9862 if nic.mac == constants.VALUE_AUTO: 9863 nic_mac_ini = "nic%d_mac" % idx 9864 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini) 9865 9866 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT 9867 9868 # ip ping checks (we use the same ip that was resolved in ExpandNames) 9869 if self.op.ip_check: 9870 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT): 9871 raise errors.OpPrereqError("IP %s of instance %s already in use" % 9872 (self.check_ip, self.op.instance_name), 9873 errors.ECODE_NOTUNIQUE) 9874 9875 #### mac address generation 9876 # By generating here the mac address both the allocator and the hooks get 9877 # the real final mac address rather than the 'auto' or 'generate' value. 9878 # There is a race condition between the generation and the instance object 9879 # creation, which means that we know the mac is valid now, but we're not 9880 # sure it will be when we actually add the instance. If things go bad 9881 # adding the instance will abort because of a duplicate mac, and the 9882 # creation job will fail. 9883 for nic in self.nics: 9884 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 9885 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId()) 9886 9887 #### allocator run 9888 9889 if self.op.iallocator is not None: 9890 self._RunAllocator() 9891 9892 # Release all unneeded node locks 9893 _ReleaseLocks(self, locking.LEVEL_NODE, 9894 keep=filter(None, [self.op.pnode, self.op.snode, 9895 self.op.src_node])) 9896 _ReleaseLocks(self, locking.LEVEL_NODE_RES, 9897 keep=filter(None, [self.op.pnode, self.op.snode, 9898 self.op.src_node])) 9899 9900 #### node related checks 9901 9902 # check primary node 9903 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode) 9904 assert self.pnode is not None, \ 9905 "Cannot retrieve locked node %s" % self.op.pnode 9906 if pnode.offline: 9907 raise errors.OpPrereqError("Cannot use offline primary node '%s'" % 9908 pnode.name, errors.ECODE_STATE) 9909 if pnode.drained: 9910 raise errors.OpPrereqError("Cannot use drained primary node '%s'" % 9911 pnode.name, errors.ECODE_STATE) 9912 if not pnode.vm_capable: 9913 raise errors.OpPrereqError("Cannot use non-vm_capable primary node" 9914 " '%s'" % pnode.name, errors.ECODE_STATE) 9915 9916 self.secondaries = [] 9917 9918 # mirror node verification 9919 if self.op.disk_template in constants.DTS_INT_MIRROR: 9920 if self.op.snode == pnode.name: 9921 raise errors.OpPrereqError("The secondary node cannot be the" 9922 " primary node", errors.ECODE_INVAL) 9923 _CheckNodeOnline(self, self.op.snode) 9924 _CheckNodeNotDrained(self, self.op.snode) 9925 _CheckNodeVmCapable(self, self.op.snode) 9926 self.secondaries.append(self.op.snode) 9927 9928 snode = self.cfg.GetNodeInfo(self.op.snode) 9929 if pnode.group != snode.group: 9930 self.LogWarning("The primary and secondary nodes are in two" 9931 " different node groups; the disk parameters" 9932 " from the first disk's node group will be" 9933 " used") 9934 9935 nodenames = [pnode.name] + self.secondaries 9936 9937 if not self.adopt_disks: 9938 if self.op.disk_template == constants.DT_RBD: 9939 # _CheckRADOSFreeSpace() is just a placeholder. 9940 # Any function that checks prerequisites can be placed here. 9941 # Check if there is enough space on the RADOS cluster. 9942 _CheckRADOSFreeSpace() 9943 else: 9944 # Check lv size requirements, if not adopting 9945 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks) 9946 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes) 9947 9948 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data 9949 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG], 9950 disk[constants.IDISK_ADOPT]) 9951 for disk in self.disks]) 9952 if len(all_lvs) != len(self.disks): 9953 raise errors.OpPrereqError("Duplicate volume names given for adoption", 9954 errors.ECODE_INVAL) 9955 for lv_name in all_lvs: 9956 try: 9957 # FIXME: lv_name here is "vg/lv" need to ensure that other calls 9958 # to ReserveLV uses the same syntax 9959 self.cfg.ReserveLV(lv_name, self.proc.GetECId()) 9960 except errors.ReservationError: 9961 raise errors.OpPrereqError("LV named %s used by another instance" % 9962 lv_name, errors.ECODE_NOTUNIQUE) 9963 9964 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name] 9965 vg_names.Raise("Cannot get VG information from node %s" % pnode.name) 9966 9967 node_lvs = self.rpc.call_lv_list([pnode.name], 9968 vg_names.payload.keys())[pnode.name] 9969 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name) 9970 node_lvs = node_lvs.payload 9971 9972 delta = all_lvs.difference(node_lvs.keys()) 9973 if delta: 9974 raise errors.OpPrereqError("Missing logical volume(s): %s" % 9975 utils.CommaJoin(delta), 9976 errors.ECODE_INVAL) 9977 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]] 9978 if online_lvs: 9979 raise errors.OpPrereqError("Online logical volumes found, cannot" 9980 " adopt: %s" % utils.CommaJoin(online_lvs), 9981 errors.ECODE_STATE) 9982 # update the size of disk based on what is found 9983 for dsk in self.disks: 9984 dsk[constants.IDISK_SIZE] = \ 9985 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG], 9986 dsk[constants.IDISK_ADOPT])][0])) 9987 9988 elif self.op.disk_template == constants.DT_BLOCK: 9989 # Normalize and de-duplicate device paths 9990 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT]) 9991 for disk in self.disks]) 9992 if len(all_disks) != len(self.disks): 9993 raise errors.OpPrereqError("Duplicate disk names given for adoption", 9994 errors.ECODE_INVAL) 9995 baddisks = [d for d in all_disks 9996 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)] 9997 if baddisks: 9998 raise errors.OpPrereqError("Device node(s) %s lie outside %s and" 9999 " cannot be adopted" % 10000 (", ".join(baddisks), 10001 constants.ADOPTABLE_BLOCKDEV_ROOT), 10002 errors.ECODE_INVAL) 10003 10004 node_disks = self.rpc.call_bdev_sizes([pnode.name], 10005 list(all_disks))[pnode.name] 10006 node_disks.Raise("Cannot get block device information from node %s" % 10007 pnode.name) 10008 node_disks = node_disks.payload 10009 delta = all_disks.difference(node_disks.keys()) 10010 if delta: 10011 raise errors.OpPrereqError("Missing block device(s): %s" % 10012 utils.CommaJoin(delta), 10013 errors.ECODE_INVAL) 10014 for dsk in self.disks: 10015 dsk[constants.IDISK_SIZE] = \ 10016 int(float(node_disks[dsk[constants.IDISK_ADOPT]])) 10017 10018 # Verify instance specs 10019 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None) 10020 ispec = { 10021 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None), 10022 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None), 10023 constants.ISPEC_DISK_COUNT: len(self.disks), 10024 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE] 10025 for disk in self.disks], 10026 constants.ISPEC_NIC_COUNT: len(self.nics), 10027 constants.ISPEC_SPINDLE_USE: spindle_use, 10028 } 10029 10030 group_info = self.cfg.GetNodeGroup(pnode.group) 10031 ipolicy = _CalculateGroupIPolicy(cluster, group_info) 10032 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec) 10033 if not self.op.ignore_ipolicy and res: 10034 raise errors.OpPrereqError(("Instance allocation to group %s violates" 10035 " policy: %s") % (pnode.group, 10036 utils.CommaJoin(res)), 10037 errors.ECODE_INVAL) 10038 10039 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams) 10040 10041 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant) 10042 # check OS parameters (remotely) 10043 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full) 10044 10045 _CheckNicsBridgesExist(self, self.nics, self.pnode.name) 10046 10047 # memory check on primary node 10048 #TODO(dynmem): use MINMEM for checking 10049 if self.op.start: 10050 _CheckNodeFreeMemory(self, self.pnode.name, 10051 "creating instance %s" % self.op.instance_name, 10052 self.be_full[constants.BE_MAXMEM], 10053 self.op.hypervisor) 10054 10055 self.dry_run_result = list(nodenames)
10056
10057 - def Exec(self, feedback_fn):
10058 """Create and add the instance to the cluster. 10059 10060 """ 10061 instance = self.op.instance_name 10062 pnode_name = self.pnode.name 10063 10064 assert not (self.owned_locks(locking.LEVEL_NODE_RES) - 10065 self.owned_locks(locking.LEVEL_NODE)), \ 10066 "Node locks differ from node resource locks" 10067 10068 ht_kind = self.op.hypervisor 10069 if ht_kind in constants.HTS_REQ_PORT: 10070 network_port = self.cfg.AllocatePort() 10071 else: 10072 network_port = None 10073 10074 # This is ugly but we got a chicken-egg problem here 10075 # We can only take the group disk parameters, as the instance 10076 # has no disks yet (we are generating them right here). 10077 node = self.cfg.GetNodeInfo(pnode_name) 10078 nodegroup = self.cfg.GetNodeGroup(node.group) 10079 disks = _GenerateDiskTemplate(self, 10080 self.op.disk_template, 10081 instance, pnode_name, 10082 self.secondaries, 10083 self.disks, 10084 self.instance_file_storage_dir, 10085 self.op.file_driver, 10086 0, 10087 feedback_fn, 10088 self.cfg.GetGroupDiskParams(nodegroup)) 10089 10090 iobj = objects.Instance(name=instance, os=self.op.os_type, 10091 primary_node=pnode_name, 10092 nics=self.nics, disks=disks, 10093 disk_template=self.op.disk_template, 10094 admin_state=constants.ADMINST_DOWN, 10095 network_port=network_port, 10096 beparams=self.op.beparams, 10097 hvparams=self.op.hvparams, 10098 hypervisor=self.op.hypervisor, 10099 osparams=self.op.osparams, 10100 ) 10101 10102 if self.op.tags: 10103 for tag in self.op.tags: 10104 iobj.AddTag(tag) 10105 10106 if self.adopt_disks: 10107 if self.op.disk_template == constants.DT_PLAIN: 10108 # rename LVs to the newly-generated names; we need to construct 10109 # 'fake' LV disks with the old data, plus the new unique_id 10110 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks] 10111 rename_to = [] 10112 for t_dsk, a_dsk in zip(tmp_disks, self.disks): 10113 rename_to.append(t_dsk.logical_id) 10114 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT]) 10115 self.cfg.SetDiskID(t_dsk, pnode_name) 10116 result = self.rpc.call_blockdev_rename(pnode_name, 10117 zip(tmp_disks, rename_to)) 10118 result.Raise("Failed to rename adoped LVs") 10119 else: 10120 feedback_fn("* creating instance disks...") 10121 try: 10122 _CreateDisks(self, iobj) 10123 except errors.OpExecError: 10124 self.LogWarning("Device creation failed, reverting...") 10125 try: 10126 _RemoveDisks(self, iobj) 10127 finally: 10128 self.cfg.ReleaseDRBDMinors(instance) 10129 raise 10130 10131 feedback_fn("adding instance %s to cluster config" % instance) 10132 10133 self.cfg.AddInstance(iobj, self.proc.GetECId()) 10134 10135 # Declare that we don't want to remove the instance lock anymore, as we've 10136 # added the instance to the config 10137 del self.remove_locks[locking.LEVEL_INSTANCE] 10138 10139 if self.op.mode == constants.INSTANCE_IMPORT: 10140 # Release unused nodes 10141 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node]) 10142 else: 10143 # Release all nodes 10144 _ReleaseLocks(self, locking.LEVEL_NODE) 10145 10146 disk_abort = False 10147 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks: 10148 feedback_fn("* wiping instance disks...") 10149 try: 10150 _WipeDisks(self, iobj) 10151 except errors.OpExecError, err: 10152 logging.exception("Wiping disks failed") 10153 self.LogWarning("Wiping instance disks failed (%s)", err) 10154 disk_abort = True 10155 10156 if disk_abort: 10157 # Something is already wrong with the disks, don't do anything else 10158 pass 10159 elif self.op.wait_for_sync: 10160 disk_abort = not _WaitForSync(self, iobj) 10161 elif iobj.disk_template in constants.DTS_INT_MIRROR: 10162 # make sure the disks are not degraded (still sync-ing is ok) 10163 feedback_fn("* checking mirrors status") 10164 disk_abort = not _WaitForSync(self, iobj, oneshot=True) 10165 else: 10166 disk_abort = False 10167 10168 if disk_abort: 10169 _RemoveDisks(self, iobj) 10170 self.cfg.RemoveInstance(iobj.name) 10171 # Make sure the instance lock gets removed 10172 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name 10173 raise errors.OpExecError("There are some degraded disks for" 10174 " this instance") 10175 10176 # Release all node resource locks 10177 _ReleaseLocks(self, locking.LEVEL_NODE_RES) 10178 10179 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks: 10180 # we need to set the disks ID to the primary node, since the 10181 # preceding code might or might have not done it, depending on 10182 # disk template and other options 10183 for disk in iobj.disks: 10184 self.cfg.SetDiskID(disk, pnode_name) 10185 if self.op.mode == constants.INSTANCE_CREATE: 10186 if not self.op.no_install: 10187 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and 10188 not self.op.wait_for_sync) 10189 if pause_sync: 10190 feedback_fn("* pausing disk sync to install instance OS") 10191 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name, 10192 (iobj.disks, 10193 iobj), True) 10194 for idx, success in enumerate(result.payload): 10195 if not success: 10196 logging.warn("pause-sync of instance %s for disk %d failed", 10197 instance, idx) 10198 10199 feedback_fn("* running the instance OS create scripts...") 10200 # FIXME: pass debug option from opcode to backend 10201 os_add_result = \ 10202 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False, 10203 self.op.debug_level) 10204 if pause_sync: 10205 feedback_fn("* resuming disk sync") 10206 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name, 10207 (iobj.disks, 10208 iobj), False) 10209 for idx, success in enumerate(result.payload): 10210 if not success: 10211 logging.warn("resume-sync of instance %s for disk %d failed", 10212 instance, idx) 10213 10214 os_add_result.Raise("Could not add os for instance %s" 10215 " on node %s" % (instance, pnode_name)) 10216 10217 else: 10218 if self.op.mode == constants.INSTANCE_IMPORT: 10219 feedback_fn("* running the instance OS import scripts...") 10220 10221 transfers = [] 10222 10223 for idx, image in enumerate(self.src_images): 10224 if not image: 10225 continue 10226 10227 # FIXME: pass debug option from opcode to backend 10228 dt = masterd.instance.DiskTransfer("disk/%s" % idx, 10229 constants.IEIO_FILE, (image, ), 10230 constants.IEIO_SCRIPT, 10231 (iobj.disks[idx], idx), 10232 None) 10233 transfers.append(dt) 10234 10235 import_result = \ 10236 masterd.instance.TransferInstanceData(self, feedback_fn, 10237 self.op.src_node, pnode_name, 10238 self.pnode.secondary_ip, 10239 iobj, transfers) 10240 if not compat.all(import_result): 10241 self.LogWarning("Some disks for instance %s on node %s were not" 10242 " imported successfully" % (instance, pnode_name)) 10243 10244 rename_from = self._old_instance_name 10245 10246 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: 10247 feedback_fn("* preparing remote import...") 10248 # The source cluster will stop the instance before attempting to make 10249 # a connection. In some cases stopping an instance can take a long 10250 # time, hence the shutdown timeout is added to the connection 10251 # timeout. 10252 connect_timeout = (constants.RIE_CONNECT_TIMEOUT + 10253 self.op.source_shutdown_timeout) 10254 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) 10255 10256 assert iobj.primary_node == self.pnode.name 10257 disk_results = \ 10258 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode, 10259 self.source_x509_ca, 10260 self._cds, timeouts) 10261 if not compat.all(disk_results): 10262 # TODO: Should the instance still be started, even if some disks 10263 # failed to import (valid for local imports, too)? 10264 self.LogWarning("Some disks for instance %s on node %s were not" 10265 " imported successfully" % (instance, pnode_name)) 10266 10267 rename_from = self.source_instance_name 10268 10269 else: 10270 # also checked in the prereq part 10271 raise errors.ProgrammerError("Unknown OS initialization mode '%s'" 10272 % self.op.mode) 10273 10274 # Run rename script on newly imported instance 10275 assert iobj.name == instance 10276 feedback_fn("Running rename script for %s" % instance) 10277 result = self.rpc.call_instance_run_rename(pnode_name, iobj, 10278 rename_from, 10279 self.op.debug_level) 10280 if result.fail_msg: 10281 self.LogWarning("Failed to run rename script for %s on node" 10282 " %s: %s" % (instance, pnode_name, result.fail_msg)) 10283 10284 assert not self.owned_locks(locking.LEVEL_NODE_RES) 10285 10286 if self.op.start: 10287 iobj.admin_state = constants.ADMINST_UP 10288 self.cfg.Update(iobj, feedback_fn) 10289 logging.info("Starting instance %s on node %s", instance, pnode_name) 10290 feedback_fn("* starting instance...") 10291 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None), 10292 False) 10293 result.Raise("Could not start instance") 10294 10295 return list(iobj.all_nodes)
10296
10297 10298 -def _CheckRADOSFreeSpace():
10299 """Compute disk size requirements inside the RADOS cluster. 10300 10301 """ 10302 # For the RADOS cluster we assume there is always enough space. 10303 pass
10304
10305 10306 -class LUInstanceConsole(NoHooksLU):
10307 """Connect to an instance's console. 10308 10309 This is somewhat special in that it returns the command line that 10310 you need to run on the master node in order to connect to the 10311 console. 10312 10313 """ 10314 REQ_BGL = False 10315
10316 - def ExpandNames(self):
10317 self.share_locks = _ShareAll() 10318 self._ExpandAndLockInstance()
10319
10320 - def CheckPrereq(self):
10321 """Check prerequisites. 10322 10323 This checks that the instance is in the cluster. 10324 10325 """ 10326 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 10327 assert self.instance is not None, \ 10328 "Cannot retrieve locked instance %s" % self.op.instance_name 10329 _CheckNodeOnline(self, self.instance.primary_node)
10330
10331 - def Exec(self, feedback_fn):
10332 """Connect to the console of an instance 10333 10334 """ 10335 instance = self.instance 10336 node = instance.primary_node 10337 10338 node_insts = self.rpc.call_instance_list([node], 10339 [instance.hypervisor])[node] 10340 node_insts.Raise("Can't get node information from %s" % node) 10341 10342 if instance.name not in node_insts.payload: 10343 if instance.admin_state == constants.ADMINST_UP: 10344 state = constants.INSTST_ERRORDOWN 10345 elif instance.admin_state == constants.ADMINST_DOWN: 10346 state = constants.INSTST_ADMINDOWN 10347 else: 10348 state = constants.INSTST_ADMINOFFLINE 10349 raise errors.OpExecError("Instance %s is not running (state %s)" % 10350 (instance.name, state)) 10351 10352 logging.debug("Connecting to console of %s on %s", instance.name, node) 10353 10354 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10355
10356 10357 -def _GetInstanceConsole(cluster, instance):
10358 """Returns console information for an instance. 10359 10360 @type cluster: L{objects.Cluster} 10361 @type instance: L{objects.Instance} 10362 @rtype: dict 10363 10364 """ 10365 hyper = hypervisor.GetHypervisorClass(instance.hypervisor) 10366 # beparams and hvparams are passed separately, to avoid editing the 10367 # instance and then saving the defaults in the instance itself. 10368 hvparams = cluster.FillHV(instance) 10369 beparams = cluster.FillBE(instance) 10370 console = hyper.GetInstanceConsole(instance, hvparams, beparams) 10371 10372 assert console.instance == instance.name 10373 assert console.Validate() 10374 10375 return console.ToDict()
10376
10377 10378 -class LUInstanceReplaceDisks(LogicalUnit):
10379 """Replace the disks of an instance. 10380 10381 """ 10382 HPATH = "mirrors-replace" 10383 HTYPE = constants.HTYPE_INSTANCE 10384 REQ_BGL = False 10385
10386 - def CheckArguments(self):
10387 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node, 10388 self.op.iallocator)
10389
10390 - def ExpandNames(self):
10391 self._ExpandAndLockInstance() 10392 10393 assert locking.LEVEL_NODE not in self.needed_locks 10394 assert locking.LEVEL_NODE_RES not in self.needed_locks 10395 assert locking.LEVEL_NODEGROUP not in self.needed_locks 10396 10397 assert self.op.iallocator is None or self.op.remote_node is None, \ 10398 "Conflicting options" 10399 10400 if self.op.remote_node is not None: 10401 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 10402 10403 # Warning: do not remove the locking of the new secondary here 10404 # unless DRBD8.AddChildren is changed to work in parallel; 10405 # currently it doesn't since parallel invocations of 10406 # FindUnusedMinor will conflict 10407 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node] 10408 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 10409 else: 10410 self.needed_locks[locking.LEVEL_NODE] = [] 10411 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 10412 10413 if self.op.iallocator is not None: 10414 # iallocator will select a new node in the same group 10415 self.needed_locks[locking.LEVEL_NODEGROUP] = [] 10416 10417 self.needed_locks[locking.LEVEL_NODE_RES] = [] 10418 10419 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode, 10420 self.op.iallocator, self.op.remote_node, 10421 self.op.disks, False, self.op.early_release, 10422 self.op.ignore_ipolicy) 10423 10424 self.tasklets = [self.replacer]
10425
10426 - def DeclareLocks(self, level):
10427 if level == locking.LEVEL_NODEGROUP: 10428 assert self.op.remote_node is None 10429 assert self.op.iallocator is not None 10430 assert not self.needed_locks[locking.LEVEL_NODEGROUP] 10431 10432 self.share_locks[locking.LEVEL_NODEGROUP] = 1 10433 # Lock all groups used by instance optimistically; this requires going 10434 # via the node before it's locked, requiring verification later on 10435 self.needed_locks[locking.LEVEL_NODEGROUP] = \ 10436 self.cfg.GetInstanceNodeGroups(self.op.instance_name) 10437 10438 elif level == locking.LEVEL_NODE: 10439 if self.op.iallocator is not None: 10440 assert self.op.remote_node is None 10441 assert not self.needed_locks[locking.LEVEL_NODE] 10442 10443 # Lock member nodes of all locked groups 10444 self.needed_locks[locking.LEVEL_NODE] = [node_name 10445 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 10446 for node_name in self.cfg.GetNodeGroup(group_uuid).members] 10447 else: 10448 self._LockInstancesNodes() 10449 elif level == locking.LEVEL_NODE_RES: 10450 # Reuse node locks 10451 self.needed_locks[locking.LEVEL_NODE_RES] = \ 10452 self.needed_locks[locking.LEVEL_NODE]
10453
10454 - def BuildHooksEnv(self):
10455 """Build hooks env. 10456 10457 This runs on the master, the primary and all the secondaries. 10458 10459 """ 10460 instance = self.replacer.instance 10461 env = { 10462 "MODE": self.op.mode, 10463 "NEW_SECONDARY": self.op.remote_node, 10464 "OLD_SECONDARY": instance.secondary_nodes[0], 10465 } 10466 env.update(_BuildInstanceHookEnvByObject(self, instance)) 10467 return env
10468
10469 - def BuildHooksNodes(self):
10470 """Build hooks nodes. 10471 10472 """ 10473 instance = self.replacer.instance 10474 nl = [ 10475 self.cfg.GetMasterNode(), 10476 instance.primary_node, 10477 ] 10478 if self.op.remote_node is not None: 10479 nl.append(self.op.remote_node) 10480 return nl, nl
10481
10482 - def CheckPrereq(self):
10483 """Check prerequisites. 10484 10485 """ 10486 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or 10487 self.op.iallocator is None) 10488 10489 # Verify if node group locks are still correct 10490 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP) 10491 if owned_groups: 10492 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups) 10493 10494 return LogicalUnit.CheckPrereq(self)
10495
10496 10497 -class TLReplaceDisks(Tasklet):
10498 """Replaces disks for an instance. 10499 10500 Note: Locking is not within the scope of this class. 10501 10502 """
10503 - def __init__(self, lu, instance_name, mode, iallocator_name, remote_node, 10504 disks, delay_iallocator, early_release, ignore_ipolicy):
10505 """Initializes this class. 10506 10507 """ 10508 Tasklet.__init__(self, lu) 10509 10510 # Parameters 10511 self.instance_name = instance_name 10512 self.mode = mode 10513 self.iallocator_name = iallocator_name 10514 self.remote_node = remote_node 10515 self.disks = disks 10516 self.delay_iallocator = delay_iallocator 10517 self.early_release = early_release 10518 self.ignore_ipolicy = ignore_ipolicy 10519 10520 # Runtime data 10521 self.instance = None 10522 self.new_node = None 10523 self.target_node = None 10524 self.other_node = None 10525 self.remote_node_info = None 10526 self.node_secondary_ip = None
10527 10528 @staticmethod
10529 - def CheckArguments(mode, remote_node, iallocator):
10530 """Helper function for users of this class. 10531 10532 """ 10533 # check for valid parameter combination 10534 if mode == constants.REPLACE_DISK_CHG: 10535 if remote_node is None and iallocator is None: 10536 raise errors.OpPrereqError("When changing the secondary either an" 10537 " iallocator script must be used or the" 10538 " new node given", errors.ECODE_INVAL) 10539 10540 if remote_node is not None and iallocator is not None: 10541 raise errors.OpPrereqError("Give either the iallocator or the new" 10542 " secondary, not both", errors.ECODE_INVAL) 10543 10544 elif remote_node is not None or iallocator is not None: 10545 # Not replacing the secondary 10546 raise errors.OpPrereqError("The iallocator and new node options can" 10547 " only be used when changing the" 10548 " secondary node", errors.ECODE_INVAL)
10549 10550 @staticmethod
10551 - def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10552 """Compute a new secondary node using an IAllocator. 10553 10554 """ 10555 ial = IAllocator(lu.cfg, lu.rpc, 10556 mode=constants.IALLOCATOR_MODE_RELOC, 10557 name=instance_name, 10558 relocate_from=list(relocate_from)) 10559 10560 ial.Run(iallocator_name) 10561 10562 if not ial.success: 10563 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':" 10564 " %s" % (iallocator_name, ial.info), 10565 errors.ECODE_NORES) 10566 10567 if len(ial.result) != ial.required_nodes: 10568 raise errors.OpPrereqError("iallocator '%s' returned invalid number" 10569 " of nodes (%s), required %s" % 10570 (iallocator_name, 10571 len(ial.result), ial.required_nodes), 10572 errors.ECODE_FAULT) 10573 10574 remote_node_name = ial.result[0] 10575 10576 lu.LogInfo("Selected new secondary for instance '%s': %s", 10577 instance_name, remote_node_name) 10578 10579 return remote_node_name
10580
10581 - def _FindFaultyDisks(self, node_name):
10582 """Wrapper for L{_FindFaultyInstanceDisks}. 10583 10584 """ 10585 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance, 10586 node_name, True)
10587
10588 - def _CheckDisksActivated(self, instance):
10589 """Checks if the instance disks are activated. 10590 10591 @param instance: The instance to check disks 10592 @return: True if they are activated, False otherwise 10593 10594 """ 10595 nodes = instance.all_nodes 10596 10597 for idx, dev in enumerate(instance.disks): 10598 for node in nodes: 10599 self.lu.LogInfo("Checking disk/%d on %s", idx, node) 10600 self.cfg.SetDiskID(dev, node) 10601 10602 result = _BlockdevFind(self, node, dev, instance) 10603 10604 if result.offline: 10605 continue 10606 elif result.fail_msg or not result.payload: 10607 return False 10608 10609 return True
10610
10611 - def CheckPrereq(self):
10612 """Check prerequisites. 10613 10614 This checks that the instance is in the cluster. 10615 10616 """ 10617 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name) 10618 assert instance is not None, \ 10619 "Cannot retrieve locked instance %s" % self.instance_name 10620 10621 if instance.disk_template != constants.DT_DRBD8: 10622 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based" 10623 " instances", errors.ECODE_INVAL) 10624 10625 if len(instance.secondary_nodes) != 1: 10626 raise errors.OpPrereqError("The instance has a strange layout," 10627 " expected one secondary but found %d" % 10628 len(instance.secondary_nodes), 10629 errors.ECODE_FAULT) 10630 10631 if not self.delay_iallocator: 10632 self._CheckPrereq2()
10633
10634 - def _CheckPrereq2(self):
10635 """Check prerequisites, second part. 10636 10637 This function should always be part of CheckPrereq. It was separated and is 10638 now called from Exec because during node evacuation iallocator was only 10639 called with an unmodified cluster model, not taking planned changes into 10640 account. 10641 10642 """ 10643 instance = self.instance 10644 secondary_node = instance.secondary_nodes[0] 10645 10646 if self.iallocator_name is None: 10647 remote_node = self.remote_node 10648 else: 10649 remote_node = self._RunAllocator(self.lu, self.iallocator_name, 10650 instance.name, instance.secondary_nodes) 10651 10652 if remote_node is None: 10653 self.remote_node_info = None 10654 else: 10655 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \ 10656 "Remote node '%s' is not locked" % remote_node 10657 10658 self.remote_node_info = self.cfg.GetNodeInfo(remote_node) 10659 assert self.remote_node_info is not None, \ 10660 "Cannot retrieve locked node %s" % remote_node 10661 10662 if remote_node == self.instance.primary_node: 10663 raise errors.OpPrereqError("The specified node is the primary node of" 10664 " the instance", errors.ECODE_INVAL) 10665 10666 if remote_node == secondary_node: 10667 raise errors.OpPrereqError("The specified node is already the" 10668 " secondary node of the instance", 10669 errors.ECODE_INVAL) 10670 10671 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO, 10672 constants.REPLACE_DISK_CHG): 10673 raise errors.OpPrereqError("Cannot specify disks to be replaced", 10674 errors.ECODE_INVAL) 10675 10676 if self.mode == constants.REPLACE_DISK_AUTO: 10677 if not self._CheckDisksActivated(instance): 10678 raise errors.OpPrereqError("Please run activate-disks on instance %s" 10679 " first" % self.instance_name, 10680 errors.ECODE_STATE) 10681 faulty_primary = self._FindFaultyDisks(instance.primary_node) 10682 faulty_secondary = self._FindFaultyDisks(secondary_node) 10683 10684 if faulty_primary and faulty_secondary: 10685 raise errors.OpPrereqError("Instance %s has faulty disks on more than" 10686 " one node and can not be repaired" 10687 " automatically" % self.instance_name, 10688 errors.ECODE_STATE) 10689 10690 if faulty_primary: 10691 self.disks = faulty_primary 10692 self.target_node = instance.primary_node 10693 self.other_node = secondary_node 10694 check_nodes = [self.target_node, self.other_node] 10695 elif faulty_secondary: 10696 self.disks = faulty_secondary 10697 self.target_node = secondary_node 10698 self.other_node = instance.primary_node 10699 check_nodes = [self.target_node, self.other_node] 10700 else: 10701 self.disks = [] 10702 check_nodes = [] 10703 10704 else: 10705 # Non-automatic modes 10706 if self.mode == constants.REPLACE_DISK_PRI: 10707 self.target_node = instance.primary_node 10708 self.other_node = secondary_node 10709 check_nodes = [self.target_node, self.other_node] 10710 10711 elif self.mode == constants.REPLACE_DISK_SEC: 10712 self.target_node = secondary_node 10713 self.other_node = instance.primary_node 10714 check_nodes = [self.target_node, self.other_node] 10715 10716 elif self.mode == constants.REPLACE_DISK_CHG: 10717 self.new_node = remote_node 10718 self.other_node = instance.primary_node 10719 self.target_node = secondary_node 10720 check_nodes = [self.new_node, self.other_node] 10721 10722 _CheckNodeNotDrained(self.lu, remote_node) 10723 _CheckNodeVmCapable(self.lu, remote_node) 10724 10725 old_node_info = self.cfg.GetNodeInfo(secondary_node) 10726 assert old_node_info is not None 10727 if old_node_info.offline and not self.early_release: 10728 # doesn't make sense to delay the release 10729 self.early_release = True 10730 self.lu.LogInfo("Old secondary %s is offline, automatically enabling" 10731 " early-release mode", secondary_node) 10732 10733 else: 10734 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" % 10735 self.mode) 10736 10737 # If not specified all disks should be replaced 10738 if not self.disks: 10739 self.disks = range(len(self.instance.disks)) 10740 10741 # TODO: This is ugly, but right now we can't distinguish between internal 10742 # submitted opcode and external one. We should fix that. 10743 if self.remote_node_info: 10744 # We change the node, lets verify it still meets instance policy 10745 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group) 10746 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), 10747 new_group_info) 10748 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info, 10749 ignore=self.ignore_ipolicy) 10750 10751 for node in check_nodes: 10752 _CheckNodeOnline(self.lu, node) 10753 10754 touched_nodes = frozenset(node_name for node_name in [self.new_node, 10755 self.other_node, 10756 self.target_node] 10757 if node_name is not None) 10758 10759 # Release unneeded node and node resource locks 10760 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes) 10761 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes) 10762 10763 # Release any owned node group 10764 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP): 10765 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP) 10766 10767 # Check whether disks are valid 10768 for disk_idx in self.disks: 10769 instance.FindDisk(disk_idx) 10770 10771 # Get secondary node IP addresses 10772 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node) 10773 in self.cfg.GetMultiNodeInfo(touched_nodes))
10774
10775 - def Exec(self, feedback_fn):
10776 """Execute disk replacement. 10777 10778 This dispatches the disk replacement to the appropriate handler. 10779 10780 """ 10781 if self.delay_iallocator: 10782 self._CheckPrereq2() 10783 10784 if __debug__: 10785 # Verify owned locks before starting operation 10786 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE) 10787 assert set(owned_nodes) == set(self.node_secondary_ip), \ 10788 ("Incorrect node locks, owning %s, expected %s" % 10789 (owned_nodes, self.node_secondary_ip.keys())) 10790 assert (self.lu.owned_locks(locking.LEVEL_NODE) == 10791 self.lu.owned_locks(locking.LEVEL_NODE_RES)) 10792 10793 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE) 10794 assert list(owned_instances) == [self.instance_name], \ 10795 "Instance '%s' not locked" % self.instance_name 10796 10797 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \ 10798 "Should not own any node group lock at this point" 10799 10800 if not self.disks: 10801 feedback_fn("No disks need replacement for instance '%s'" % 10802 self.instance.name) 10803 return 10804 10805 feedback_fn("Replacing disk(s) %s for instance '%s'" % 10806 (utils.CommaJoin(self.disks), self.instance.name)) 10807 feedback_fn("Current primary node: %s", self.instance.primary_node) 10808 feedback_fn("Current seconary node: %s", 10809 utils.CommaJoin(self.instance.secondary_nodes)) 10810 10811 activate_disks = (self.instance.admin_state != constants.ADMINST_UP) 10812 10813 # Activate the instance disks if we're replacing them on a down instance 10814 if activate_disks: 10815 _StartInstanceDisks(self.lu, self.instance, True) 10816 10817 try: 10818 # Should we replace the secondary node? 10819 if self.new_node is not None: 10820 fn = self._ExecDrbd8Secondary 10821 else: 10822 fn = self._ExecDrbd8DiskOnly 10823 10824 result = fn(feedback_fn) 10825 finally: 10826 # Deactivate the instance disks if we're replacing them on a 10827 # down instance 10828 if activate_disks: 10829 _SafeShutdownInstanceDisks(self.lu, self.instance) 10830 10831 assert not self.lu.owned_locks(locking.LEVEL_NODE) 10832 10833 if __debug__: 10834 # Verify owned locks 10835 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES) 10836 nodes = frozenset(self.node_secondary_ip) 10837 assert ((self.early_release and not owned_nodes) or 10838 (not self.early_release and not (set(owned_nodes) - nodes))), \ 10839 ("Not owning the correct locks, early_release=%s, owned=%r," 10840 " nodes=%r" % (self.early_release, owned_nodes, nodes)) 10841 10842 return result
10843
10844 - def _CheckVolumeGroup(self, nodes):
10845 self.lu.LogInfo("Checking volume groups") 10846 10847 vgname = self.cfg.GetVGName() 10848 10849 # Make sure volume group exists on all involved nodes 10850 results = self.rpc.call_vg_list(nodes) 10851 if not results: 10852 raise errors.OpExecError("Can't list volume groups on the nodes") 10853 10854 for node in nodes: 10855 res = results[node] 10856 res.Raise("Error checking node %s" % node) 10857 if vgname not in res.payload: 10858 raise errors.OpExecError("Volume group '%s' not found on node %s" % 10859 (vgname, node))
10860
10861 - def _CheckDisksExistence(self, nodes):
10862 # Check disk existence 10863 for idx, dev in enumerate(self.instance.disks): 10864 if idx not in self.disks: 10865 continue 10866 10867 for node in nodes: 10868 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node)) 10869 self.cfg.SetDiskID(dev, node) 10870 10871 result = _BlockdevFind(self, node, dev, self.instance) 10872 10873 msg = result.fail_msg 10874 if msg or not result.payload: 10875 if not msg: 10876 msg = "disk not found" 10877 raise errors.OpExecError("Can't find disk/%d on node %s: %s" % 10878 (idx, node, msg))
10879
10880 - def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10881 for idx, dev in enumerate(self.instance.disks): 10882 if idx not in self.disks: 10883 continue 10884 10885 self.lu.LogInfo("Checking disk/%d consistency on node %s" % 10886 (idx, node_name)) 10887 10888 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name, 10889 on_primary, ldisk=ldisk): 10890 raise errors.OpExecError("Node %s has degraded storage, unsafe to" 10891 " replace disks for instance %s" % 10892 (node_name, self.instance.name))
10893
10894 - def _CreateNewStorage(self, node_name):
10895 """Create new storage on the primary or secondary node. 10896 10897 This is only used for same-node replaces, not for changing the 10898 secondary node, hence we don't want to modify the existing disk. 10899 10900 """ 10901 iv_names = {} 10902 10903 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg) 10904 for idx, dev in enumerate(disks): 10905 if idx not in self.disks: 10906 continue 10907 10908 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx)) 10909 10910 self.cfg.SetDiskID(dev, node_name) 10911 10912 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]] 10913 names = _GenerateUniqueNames(self.lu, lv_names) 10914 10915 (data_disk, meta_disk) = dev.children 10916 vg_data = data_disk.logical_id[0] 10917 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size, 10918 logical_id=(vg_data, names[0]), 10919 params=data_disk.params) 10920 vg_meta = meta_disk.logical_id[0] 10921 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE, 10922 logical_id=(vg_meta, names[1]), 10923 params=meta_disk.params) 10924 10925 new_lvs = [lv_data, lv_meta] 10926 old_lvs = [child.Copy() for child in dev.children] 10927 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs) 10928 10929 # we pass force_create=True to force the LVM creation 10930 for new_lv in new_lvs: 10931 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True, 10932 _GetInstanceInfoText(self.instance), False) 10933 10934 return iv_names
10935
10936 - def _CheckDevices(self, node_name, iv_names):
10937 for name, (dev, _, _) in iv_names.iteritems(): 10938 self.cfg.SetDiskID(dev, node_name) 10939 10940 result = _BlockdevFind(self, node_name, dev, self.instance) 10941 10942 msg = result.fail_msg 10943 if msg or not result.payload: 10944 if not msg: 10945 msg = "disk not found" 10946 raise errors.OpExecError("Can't find DRBD device %s: %s" % 10947 (name, msg)) 10948 10949 if result.payload.is_degraded: 10950 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10951
10952 - def _RemoveOldStorage(self, node_name, iv_names):
10953 for name, (_, old_lvs, _) in iv_names.iteritems(): 10954 self.lu.LogInfo("Remove logical volumes for %s" % name) 10955 10956 for lv in old_lvs: 10957 self.cfg.SetDiskID(lv, node_name) 10958 10959 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg 10960 if msg: 10961 self.lu.LogWarning("Can't remove old LV: %s" % msg, 10962 hint="remove unused LVs manually")
10963
10964 - def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10965 """Replace a disk on the primary or secondary for DRBD 8. 10966 10967 The algorithm for replace is quite complicated: 10968 10969 1. for each disk to be replaced: 10970 10971 1. create new LVs on the target node with unique names 10972 1. detach old LVs from the drbd device 10973 1. rename old LVs to name_replaced.<time_t> 10974 1. rename new LVs to old LVs 10975 1. attach the new LVs (with the old names now) to the drbd device 10976 10977 1. wait for sync across all devices 10978 10979 1. for each modified disk: 10980 10981 1. remove old LVs (which have the name name_replaces.<time_t>) 10982 10983 Failures are not very well handled. 10984 10985 """ 10986 steps_total = 6 10987 10988 # Step: check device activation 10989 self.lu.LogStep(1, steps_total, "Check device existence") 10990 self._CheckDisksExistence([self.other_node, self.target_node]) 10991 self._CheckVolumeGroup([self.target_node, self.other_node]) 10992 10993 # Step: check other node consistency 10994 self.lu.LogStep(2, steps_total, "Check peer consistency") 10995 self._CheckDisksConsistency(self.other_node, 10996 self.other_node == self.instance.primary_node, 10997 False) 10998 10999 # Step: create new storage 11000 self.lu.LogStep(3, steps_total, "Allocate new storage") 11001 iv_names = self._CreateNewStorage(self.target_node) 11002 11003 # Step: for each lv, detach+rename*2+attach 11004 self.lu.LogStep(4, steps_total, "Changing drbd configuration") 11005 for dev, old_lvs, new_lvs in iv_names.itervalues(): 11006 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name) 11007 11008 result = self.rpc.call_blockdev_removechildren(self.target_node, dev, 11009 old_lvs) 11010 result.Raise("Can't detach drbd from local storage on node" 11011 " %s for device %s" % (self.target_node, dev.iv_name)) 11012 #dev.children = [] 11013 #cfg.Update(instance) 11014 11015 # ok, we created the new LVs, so now we know we have the needed 11016 # storage; as such, we proceed on the target node to rename 11017 # old_lv to _old, and new_lv to old_lv; note that we rename LVs 11018 # using the assumption that logical_id == physical_id (which in 11019 # turn is the unique_id on that node) 11020 11021 # FIXME(iustin): use a better name for the replaced LVs 11022 temp_suffix = int(time.time()) 11023 ren_fn = lambda d, suff: (d.physical_id[0], 11024 d.physical_id[1] + "_replaced-%s" % suff) 11025 11026 # Build the rename list based on what LVs exist on the node 11027 rename_old_to_new = [] 11028 for to_ren in old_lvs: 11029 result = self.rpc.call_blockdev_find(self.target_node, to_ren) 11030 if not result.fail_msg and result.payload: 11031 # device exists 11032 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix))) 11033 11034 self.lu.LogInfo("Renaming the old LVs on the target node") 11035 result = self.rpc.call_blockdev_rename(self.target_node, 11036 rename_old_to_new) 11037 result.Raise("Can't rename old LVs on node %s" % self.target_node) 11038 11039 # Now we rename the new LVs to the old LVs 11040 self.lu.LogInfo("Renaming the new LVs on the target node") 11041 rename_new_to_old = [(new, old.physical_id) 11042 for old, new in zip(old_lvs, new_lvs)] 11043 result = self.rpc.call_blockdev_rename(self.target_node, 11044 rename_new_to_old) 11045 result.Raise("Can't rename new LVs on node %s" % self.target_node) 11046 11047 # Intermediate steps of in memory modifications 11048 for old, new in zip(old_lvs, new_lvs): 11049 new.logical_id = old.logical_id 11050 self.cfg.SetDiskID(new, self.target_node) 11051 11052 # We need to modify old_lvs so that removal later removes the 11053 # right LVs, not the newly added ones; note that old_lvs is a 11054 # copy here 11055 for disk in old_lvs: 11056 disk.logical_id = ren_fn(disk, temp_suffix) 11057 self.cfg.SetDiskID(disk, self.target_node) 11058 11059 # Now that the new lvs have the old name, we can add them to the device 11060 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node) 11061 result = self.rpc.call_blockdev_addchildren(self.target_node, 11062 (dev, self.instance), new_lvs) 11063 msg = result.fail_msg 11064 if msg: 11065 for new_lv in new_lvs: 11066 msg2 = self.rpc.call_blockdev_remove(self.target_node, 11067 new_lv).fail_msg 11068 if msg2: 11069 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2, 11070 hint=("cleanup manually the unused logical" 11071 "volumes")) 11072 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg) 11073 11074 cstep = itertools.count(5) 11075 11076 if self.early_release: 11077 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage") 11078 self._RemoveOldStorage(self.target_node, iv_names) 11079 # TODO: Check if releasing locks early still makes sense 11080 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES) 11081 else: 11082 # Release all resource locks except those used by the instance 11083 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, 11084 keep=self.node_secondary_ip.keys()) 11085 11086 # Release all node locks while waiting for sync 11087 _ReleaseLocks(self.lu, locking.LEVEL_NODE) 11088 11089 # TODO: Can the instance lock be downgraded here? Take the optional disk 11090 # shutdown in the caller into consideration. 11091 11092 # Wait for sync 11093 # This can fail as the old devices are degraded and _WaitForSync 11094 # does a combined result over all disks, so we don't check its return value 11095 self.lu.LogStep(cstep.next(), steps_total, "Sync devices") 11096 _WaitForSync(self.lu, self.instance) 11097 11098 # Check all devices manually 11099 self._CheckDevices(self.instance.primary_node, iv_names) 11100 11101 # Step: remove old storage 11102 if not self.early_release: 11103 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage") 11104 self._RemoveOldStorage(self.target_node, iv_names)
11105
11106 - def _ExecDrbd8Secondary(self, feedback_fn):
11107 """Replace the secondary node for DRBD 8. 11108 11109 The algorithm for replace is quite complicated: 11110 - for all disks of the instance: 11111 - create new LVs on the new node with same names 11112 - shutdown the drbd device on the old secondary 11113 - disconnect the drbd network on the primary 11114 - create the drbd device on the new secondary 11115 - network attach the drbd on the primary, using an artifice: 11116 the drbd code for Attach() will connect to the network if it 11117 finds a device which is connected to the good local disks but 11118 not network enabled 11119 - wait for sync across all devices 11120 - remove all disks from the old secondary 11121 11122 Failures are not very well handled. 11123 11124 """ 11125 steps_total = 6 11126 11127 pnode = self.instance.primary_node 11128 11129 # Step: check device activation 11130 self.lu.LogStep(1, steps_total, "Check device existence") 11131 self._CheckDisksExistence([self.instance.primary_node]) 11132 self._CheckVolumeGroup([self.instance.primary_node]) 11133 11134 # Step: check other node consistency 11135 self.lu.LogStep(2, steps_total, "Check peer consistency") 11136 self._CheckDisksConsistency(self.instance.primary_node, True, True) 11137 11138 # Step: create new storage 11139 self.lu.LogStep(3, steps_total, "Allocate new storage") 11140 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg) 11141 for idx, dev in enumerate(disks): 11142 self.lu.LogInfo("Adding new local storage on %s for disk/%d" % 11143 (self.new_node, idx)) 11144 # we pass force_create=True to force LVM creation 11145 for new_lv in dev.children: 11146 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv, 11147 True, _GetInstanceInfoText(self.instance), False) 11148 11149 # Step 4: dbrd minors and drbd setups changes 11150 # after this, we must manually remove the drbd minors on both the 11151 # error and the success paths 11152 self.lu.LogStep(4, steps_total, "Changing drbd configuration") 11153 minors = self.cfg.AllocateDRBDMinor([self.new_node 11154 for dev in self.instance.disks], 11155 self.instance.name) 11156 logging.debug("Allocated minors %r", minors) 11157 11158 iv_names = {} 11159 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)): 11160 self.lu.LogInfo("activating a new drbd on %s for disk/%d" % 11161 (self.new_node, idx)) 11162 # create new devices on new_node; note that we create two IDs: 11163 # one without port, so the drbd will be activated without 11164 # networking information on the new node at this stage, and one 11165 # with network, for the latter activation in step 4 11166 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id 11167 if self.instance.primary_node == o_node1: 11168 p_minor = o_minor1 11169 else: 11170 assert self.instance.primary_node == o_node2, "Three-node instance?" 11171 p_minor = o_minor2 11172 11173 new_alone_id = (self.instance.primary_node, self.new_node, None, 11174 p_minor, new_minor, o_secret) 11175 new_net_id = (self.instance.primary_node, self.new_node, o_port, 11176 p_minor, new_minor, o_secret) 11177 11178 iv_names[idx] = (dev, dev.children, new_net_id) 11179 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor, 11180 new_net_id) 11181 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8, 11182 logical_id=new_alone_id, 11183 children=dev.children, 11184 size=dev.size, 11185 params={}) 11186 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd], 11187 self.cfg) 11188 try: 11189 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, 11190 anno_new_drbd, 11191 _GetInstanceInfoText(self.instance), False) 11192 except errors.GenericError: 11193 self.cfg.ReleaseDRBDMinors(self.instance.name) 11194 raise 11195 11196 # We have new devices, shutdown the drbd on the old secondary 11197 for idx, dev in enumerate(self.instance.disks): 11198 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx) 11199 self.cfg.SetDiskID(dev, self.target_node) 11200 msg = self.rpc.call_blockdev_shutdown(self.target_node, 11201 (dev, self.instance)).fail_msg 11202 if msg: 11203 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old" 11204 "node: %s" % (idx, msg), 11205 hint=("Please cleanup this device manually as" 11206 " soon as possible")) 11207 11208 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)") 11209 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip, 11210 self.instance.disks)[pnode] 11211 11212 msg = result.fail_msg 11213 if msg: 11214 # detaches didn't succeed (unlikely) 11215 self.cfg.ReleaseDRBDMinors(self.instance.name) 11216 raise errors.OpExecError("Can't detach the disks from the network on" 11217 " old node: %s" % (msg,)) 11218 11219 # if we managed to detach at least one, we update all the disks of 11220 # the instance to point to the new secondary 11221 self.lu.LogInfo("Updating instance configuration") 11222 for dev, _, new_logical_id in iv_names.itervalues(): 11223 dev.logical_id = new_logical_id 11224 self.cfg.SetDiskID(dev, self.instance.primary_node) 11225 11226 self.cfg.Update(self.instance, feedback_fn) 11227 11228 # Release all node locks (the configuration has been updated) 11229 _ReleaseLocks(self.lu, locking.LEVEL_NODE) 11230 11231 # and now perform the drbd attach 11232 self.lu.LogInfo("Attaching primary drbds to new secondary" 11233 " (standalone => connected)") 11234 result = self.rpc.call_drbd_attach_net([self.instance.primary_node, 11235 self.new_node], 11236 self.node_secondary_ip, 11237 (self.instance.disks, self.instance), 11238 self.instance.name, 11239 False) 11240 for to_node, to_result in result.items(): 11241 msg = to_result.fail_msg 11242 if msg: 11243 self.lu.LogWarning("Can't attach drbd disks on node %s: %s", 11244 to_node, msg, 11245 hint=("please do a gnt-instance info to see the" 11246 " status of disks")) 11247 11248 cstep = itertools.count(5) 11249 11250 if self.early_release: 11251 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage") 11252 self._RemoveOldStorage(self.target_node, iv_names) 11253 # TODO: Check if releasing locks early still makes sense 11254 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES) 11255 else: 11256 # Release all resource locks except those used by the instance 11257 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, 11258 keep=self.node_secondary_ip.keys()) 11259 11260 # TODO: Can the instance lock be downgraded here? Take the optional disk 11261 # shutdown in the caller into consideration. 11262 11263 # Wait for sync 11264 # This can fail as the old devices are degraded and _WaitForSync 11265 # does a combined result over all disks, so we don't check its return value 11266 self.lu.LogStep(cstep.next(), steps_total, "Sync devices") 11267 _WaitForSync(self.lu, self.instance) 11268 11269 # Check all devices manually 11270 self._CheckDevices(self.instance.primary_node, iv_names) 11271 11272 # Step: remove old storage 11273 if not self.early_release: 11274 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage") 11275 self._RemoveOldStorage(self.target_node, iv_names)
11276
11277 11278 -class LURepairNodeStorage(NoHooksLU):
11279 """Repairs the volume group on a node. 11280 11281 """ 11282 REQ_BGL = False 11283
11284 - def CheckArguments(self):
11285 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 11286 11287 storage_type = self.op.storage_type 11288 11289 if (constants.SO_FIX_CONSISTENCY not in 11290 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])): 11291 raise errors.OpPrereqError("Storage units of type '%s' can not be" 11292 " repaired" % storage_type, 11293 errors.ECODE_INVAL)
11294
11295 - def ExpandNames(self):
11296 self.needed_locks = { 11297 locking.LEVEL_NODE: [self.op.node_name], 11298 }
11299
11300 - def _CheckFaultyDisks(self, instance, node_name):
11301 """Ensure faulty disks abort the opcode or at least warn.""" 11302 try: 11303 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance, 11304 node_name, True): 11305 raise errors.OpPrereqError("Instance '%s' has faulty disks on" 11306 " node '%s'" % (instance.name, node_name), 11307 errors.ECODE_STATE) 11308 except errors.OpPrereqError, err: 11309 if self.op.ignore_consistency: 11310 self.proc.LogWarning(str(err.args[0])) 11311 else: 11312 raise
11313
11314 - def CheckPrereq(self):
11315 """Check prerequisites. 11316 11317 """ 11318 # Check whether any instance on this node has faulty disks 11319 for inst in _GetNodeInstances(self.cfg, self.op.node_name): 11320 if inst.admin_state != constants.ADMINST_UP: 11321 continue 11322 check_nodes = set(inst.all_nodes) 11323 check_nodes.discard(self.op.node_name) 11324 for inst_node_name in check_nodes: 11325 self._CheckFaultyDisks(inst, inst_node_name)
11326
11327 - def Exec(self, feedback_fn):
11328 feedback_fn("Repairing storage unit '%s' on %s ..." % 11329 (self.op.name, self.op.node_name)) 11330 11331 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type) 11332 result = self.rpc.call_storage_execute(self.op.node_name, 11333 self.op.storage_type, st_args, 11334 self.op.name, 11335 constants.SO_FIX_CONSISTENCY) 11336 result.Raise("Failed to repair storage unit '%s' on %s" % 11337 (self.op.name, self.op.node_name))
11338
11339 11340 -class LUNodeEvacuate(NoHooksLU):
11341 """Evacuates instances off a list of nodes. 11342 11343 """ 11344 REQ_BGL = False 11345 11346 _MODE2IALLOCATOR = { 11347 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI, 11348 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC, 11349 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL, 11350 } 11351 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES 11352 assert (frozenset(_MODE2IALLOCATOR.values()) == 11353 constants.IALLOCATOR_NEVAC_MODES) 11354
11355 - def CheckArguments(self):
11356 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11357
11358 - def ExpandNames(self):
11359 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) 11360 11361 if self.op.remote_node is not None: 11362 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 11363 assert self.op.remote_node 11364 11365 if self.op.remote_node == self.op.node_name: 11366 raise errors.OpPrereqError("Can not use evacuated node as a new" 11367 " secondary node", errors.ECODE_INVAL) 11368 11369 if self.op.mode != constants.NODE_EVAC_SEC: 11370 raise errors.OpPrereqError("Without the use of an iallocator only" 11371 " secondary instances can be evacuated", 11372 errors.ECODE_INVAL) 11373 11374 # Declare locks 11375 self.share_locks = _ShareAll() 11376 self.needed_locks = { 11377 locking.LEVEL_INSTANCE: [], 11378 locking.LEVEL_NODEGROUP: [], 11379 locking.LEVEL_NODE: [], 11380 } 11381 11382 # Determine nodes (via group) optimistically, needs verification once locks 11383 # have been acquired 11384 self.lock_nodes = self._DetermineNodes()
11385
11386 - def _DetermineNodes(self):
11387 """Gets the list of nodes to operate on. 11388 11389 """ 11390 if self.op.remote_node is None: 11391 # Iallocator will choose any node(s) in the same group 11392 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name]) 11393 else: 11394 group_nodes = frozenset([self.op.remote_node]) 11395 11396 # Determine nodes to be locked 11397 return set([self.op.node_name]) | group_nodes
11398
11399 - def _DetermineInstances(self):
11400 """Builds list of instances to operate on. 11401 11402 """ 11403 assert self.op.mode in constants.NODE_EVAC_MODES 11404 11405 if self.op.mode == constants.NODE_EVAC_PRI: 11406 # Primary instances only 11407 inst_fn = _GetNodePrimaryInstances 11408 assert self.op.remote_node is None, \ 11409 "Evacuating primary instances requires iallocator" 11410 elif self.op.mode == constants.NODE_EVAC_SEC: 11411 # Secondary instances only 11412 inst_fn = _GetNodeSecondaryInstances 11413 else: 11414 # All instances 11415 assert self.op.mode == constants.NODE_EVAC_ALL 11416 inst_fn = _GetNodeInstances 11417 # TODO: In 2.6, change the iallocator interface to take an evacuation mode 11418 # per instance 11419 raise errors.OpPrereqError("Due to an issue with the iallocator" 11420 " interface it is not possible to evacuate" 11421 " all instances at once; specify explicitly" 11422 " whether to evacuate primary or secondary" 11423 " instances", 11424 errors.ECODE_INVAL) 11425 11426 return inst_fn(self.cfg, self.op.node_name)
11427
11428 - def DeclareLocks(self, level):
11429 if level == locking.LEVEL_INSTANCE: 11430 # Lock instances optimistically, needs verification once node and group 11431 # locks have been acquired 11432 self.needed_locks[locking.LEVEL_INSTANCE] = \ 11433 set(i.name for i in self._DetermineInstances()) 11434 11435 elif level == locking.LEVEL_NODEGROUP: 11436 # Lock node groups for all potential target nodes optimistically, needs 11437 # verification once nodes have been acquired 11438 self.needed_locks[locking.LEVEL_NODEGROUP] = \ 11439 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes) 11440 11441 elif level == locking.LEVEL_NODE: 11442 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11443
11444 - def CheckPrereq(self):
11445 # Verify locks 11446 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE) 11447 owned_nodes = self.owned_locks(locking.LEVEL_NODE) 11448 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP) 11449 11450 need_nodes = self._DetermineNodes() 11451 11452 if not owned_nodes.issuperset(need_nodes): 11453 raise errors.OpPrereqError("Nodes in same group as '%s' changed since" 11454 " locks were acquired, current nodes are" 11455 " are '%s', used to be '%s'; retry the" 11456 " operation" % 11457 (self.op.node_name, 11458 utils.CommaJoin(need_nodes), 11459 utils.CommaJoin(owned_nodes)), 11460 errors.ECODE_STATE) 11461 11462 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes) 11463 if owned_groups != wanted_groups: 11464 raise errors.OpExecError("Node groups changed since locks were acquired," 11465 " current groups are '%s', used to be '%s';" 11466 " retry the operation" % 11467 (utils.CommaJoin(wanted_groups), 11468 utils.CommaJoin(owned_groups))) 11469 11470 # Determine affected instances 11471 self.instances = self._DetermineInstances() 11472 self.instance_names = [i.name for i in self.instances] 11473 11474 if set(self.instance_names) != owned_instances: 11475 raise errors.OpExecError("Instances on node '%s' changed since locks" 11476 " were acquired, current instances are '%s'," 11477 " used to be '%s'; retry the operation" % 11478 (self.op.node_name, 11479 utils.CommaJoin(self.instance_names), 11480 utils.CommaJoin(owned_instances))) 11481 11482 if self.instance_names: 11483 self.LogInfo("Evacuating instances from node '%s': %s", 11484 self.op.node_name, 11485 utils.CommaJoin(utils.NiceSort(self.instance_names))) 11486 else: 11487 self.LogInfo("No instances to evacuate from node '%s'", 11488 self.op.node_name) 11489 11490 if self.op.remote_node is not None: 11491 for i in self.instances: 11492 if i.primary_node == self.op.remote_node: 11493 raise errors.OpPrereqError("Node %s is the primary node of" 11494 " instance %s, cannot use it as" 11495 " secondary" % 11496 (self.op.remote_node, i.name), 11497 errors.ECODE_INVAL)
11498
11499 - def Exec(self, feedback_fn):
11500 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None) 11501 11502 if not self.instance_names: 11503 # No instances to evacuate 11504 jobs = [] 11505 11506 elif self.op.iallocator is not None: 11507 # TODO: Implement relocation to other group 11508 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC, 11509 evac_mode=self._MODE2IALLOCATOR[self.op.mode], 11510 instances=list(self.instance_names)) 11511 11512 ial.Run(self.op.iallocator) 11513 11514 if not ial.success: 11515 raise errors.OpPrereqError("Can't compute node evacuation using" 11516 " iallocator '%s': %s" % 11517 (self.op.iallocator, ial.info), 11518 errors.ECODE_NORES) 11519 11520 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True) 11521 11522 elif self.op.remote_node is not None: 11523 assert self.op.mode == constants.NODE_EVAC_SEC 11524 jobs = [ 11525 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name, 11526 remote_node=self.op.remote_node, 11527 disks=[], 11528 mode=constants.REPLACE_DISK_CHG, 11529 early_release=self.op.early_release)] 11530 for instance_name in self.instance_names 11531 ] 11532 11533 else: 11534 raise errors.ProgrammerError("No iallocator or remote node") 11535 11536 return ResultWithJobs(jobs)
11537
11538 11539 -def _SetOpEarlyRelease(early_release, op):
11540 """Sets C{early_release} flag on opcodes if available. 11541 11542 """ 11543 try: 11544 op.early_release = early_release 11545 except AttributeError: 11546 assert not isinstance(op, opcodes.OpInstanceReplaceDisks) 11547 11548 return op
11549
11550 11551 -def _NodeEvacDest(use_nodes, group, nodes):
11552 """Returns group or nodes depending on caller's choice. 11553 11554 """ 11555 if use_nodes: 11556 return utils.CommaJoin(nodes) 11557 else: 11558 return group
11559
11560 11561 -def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11562 """Unpacks the result of change-group and node-evacuate iallocator requests. 11563 11564 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and 11565 L{constants.IALLOCATOR_MODE_CHG_GROUP}. 11566 11567 @type lu: L{LogicalUnit} 11568 @param lu: Logical unit instance 11569 @type alloc_result: tuple/list 11570 @param alloc_result: Result from iallocator 11571 @type early_release: bool 11572 @param early_release: Whether to release locks early if possible 11573 @type use_nodes: bool 11574 @param use_nodes: Whether to display node names instead of groups 11575 11576 """ 11577 (moved, failed, jobs) = alloc_result 11578 11579 if failed: 11580 failreason = utils.CommaJoin("%s (%s)" % (name, reason) 11581 for (name, reason) in failed) 11582 lu.LogWarning("Unable to evacuate instances %s", failreason) 11583 raise errors.OpExecError("Unable to evacuate instances %s" % failreason) 11584 11585 if moved: 11586 lu.LogInfo("Instances to be moved: %s", 11587 utils.CommaJoin("%s (to %s)" % 11588 (name, _NodeEvacDest(use_nodes, group, nodes)) 11589 for (name, group, nodes) in moved)) 11590 11591 return [map(compat.partial(_SetOpEarlyRelease, early_release), 11592 map(opcodes.OpCode.LoadOpCode, ops)) 11593 for ops in jobs]
11594
11595 11596 -class LUInstanceGrowDisk(LogicalUnit):
11597 """Grow a disk of an instance. 11598 11599 """ 11600 HPATH = "disk-grow" 11601 HTYPE = constants.HTYPE_INSTANCE 11602 REQ_BGL = False 11603
11604 - def ExpandNames(self):
11605 self._ExpandAndLockInstance() 11606 self.needed_locks[locking.LEVEL_NODE] = [] 11607 self.needed_locks[locking.LEVEL_NODE_RES] = [] 11608 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 11609 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11610
11611 - def DeclareLocks(self, level):
11612 if level == locking.LEVEL_NODE: 11613 self._LockInstancesNodes() 11614 elif level == locking.LEVEL_NODE_RES: 11615 # Copy node locks 11616 self.needed_locks[locking.LEVEL_NODE_RES] = \ 11617 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11618
11619 - def BuildHooksEnv(self):
11620 """Build hooks env. 11621 11622 This runs on the master, the primary and all the secondaries. 11623 11624 """ 11625 env = { 11626 "DISK": self.op.disk, 11627 "AMOUNT": self.op.amount, 11628 "ABSOLUTE": self.op.absolute, 11629 } 11630 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 11631 return env
11632
11633 - def BuildHooksNodes(self):
11634 """Build hooks nodes. 11635 11636 """ 11637 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 11638 return (nl, nl)
11639
11640 - def CheckPrereq(self):
11641 """Check prerequisites. 11642 11643 This checks that the instance is in the cluster. 11644 11645 """ 11646 instance = self.cfg.GetInstanceInfo(self.op.instance_name) 11647 assert instance is not None, \ 11648 "Cannot retrieve locked instance %s" % self.op.instance_name 11649 nodenames = list(instance.all_nodes) 11650 for node in nodenames: 11651 _CheckNodeOnline(self, node) 11652 11653 self.instance = instance 11654 11655 if instance.disk_template not in constants.DTS_GROWABLE: 11656 raise errors.OpPrereqError("Instance's disk layout does not support" 11657 " growing", errors.ECODE_INVAL) 11658 11659 self.disk = instance.FindDisk(self.op.disk) 11660 11661 if self.op.absolute: 11662 self.target = self.op.amount 11663 self.delta = self.target - self.disk.size 11664 if self.delta < 0: 11665 raise errors.OpPrereqError("Requested size (%s) is smaller than " 11666 "current disk size (%s)" % 11667 (utils.FormatUnit(self.target, "h"), 11668 utils.FormatUnit(self.disk.size, "h")), 11669 errors.ECODE_STATE) 11670 else: 11671 self.delta = self.op.amount 11672 self.target = self.disk.size + self.delta 11673 if self.delta < 0: 11674 raise errors.OpPrereqError("Requested increment (%s) is negative" % 11675 utils.FormatUnit(self.delta, "h"), 11676 errors.ECODE_INVAL) 11677 11678 if instance.disk_template not in (constants.DT_FILE, 11679 constants.DT_SHARED_FILE, 11680 constants.DT_RBD): 11681 # TODO: check the free disk space for file, when that feature will be 11682 # supported 11683 _CheckNodesFreeDiskPerVG(self, nodenames, 11684 self.disk.ComputeGrowth(self.delta))
11685
11686 - def Exec(self, feedback_fn):
11687 """Execute disk grow. 11688 11689 """ 11690 instance = self.instance 11691 disk = self.disk 11692 11693 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE) 11694 assert (self.owned_locks(locking.LEVEL_NODE) == 11695 self.owned_locks(locking.LEVEL_NODE_RES)) 11696 11697 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk]) 11698 if not disks_ok: 11699 raise errors.OpExecError("Cannot activate block device to grow") 11700 11701 feedback_fn("Growing disk %s of instance '%s' by %s to %s" % 11702 (self.op.disk, instance.name, 11703 utils.FormatUnit(self.delta, "h"), 11704 utils.FormatUnit(self.target, "h"))) 11705 11706 # First run all grow ops in dry-run mode 11707 for node in instance.all_nodes: 11708 self.cfg.SetDiskID(disk, node) 11709 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta, 11710 True) 11711 result.Raise("Grow request failed to node %s" % node) 11712 11713 # We know that (as far as we can test) operations across different 11714 # nodes will succeed, time to run it for real 11715 for node in instance.all_nodes: 11716 self.cfg.SetDiskID(disk, node) 11717 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta, 11718 False) 11719 result.Raise("Grow request failed to node %s" % node) 11720 11721 # TODO: Rewrite code to work properly 11722 # DRBD goes into sync mode for a short amount of time after executing the 11723 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby 11724 # calling "resize" in sync mode fails. Sleeping for a short amount of 11725 # time is a work-around. 11726 time.sleep(5) 11727 11728 disk.RecordGrow(self.delta) 11729 self.cfg.Update(instance, feedback_fn) 11730 11731 # Changes have been recorded, release node lock 11732 _ReleaseLocks(self, locking.LEVEL_NODE) 11733 11734 # Downgrade lock while waiting for sync 11735 self.glm.downgrade(locking.LEVEL_INSTANCE) 11736 11737 if self.op.wait_for_sync: 11738 disk_abort = not _WaitForSync(self, instance, disks=[disk]) 11739 if disk_abort: 11740 self.proc.LogWarning("Disk sync-ing has not returned a good" 11741 " status; please check the instance") 11742 if instance.admin_state != constants.ADMINST_UP: 11743 _SafeShutdownInstanceDisks(self, instance, disks=[disk]) 11744 elif instance.admin_state != constants.ADMINST_UP: 11745 self.proc.LogWarning("Not shutting down the disk even if the instance is" 11746 " not supposed to be running because no wait for" 11747 " sync mode was requested") 11748 11749 assert self.owned_locks(locking.LEVEL_NODE_RES) 11750 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11751
11752 11753 -class LUInstanceQueryData(NoHooksLU):
11754 """Query runtime instance data. 11755 11756 """ 11757 REQ_BGL = False 11758
11759 - def ExpandNames(self):
11760 self.needed_locks = {} 11761 11762 # Use locking if requested or when non-static information is wanted 11763 if not (self.op.static or self.op.use_locking): 11764 self.LogWarning("Non-static data requested, locks need to be acquired") 11765 self.op.use_locking = True 11766 11767 if self.op.instances or not self.op.use_locking: 11768 # Expand instance names right here 11769 self.wanted_names = _GetWantedInstances(self, self.op.instances) 11770 else: 11771 # Will use acquired locks 11772 self.wanted_names = None 11773 11774 if self.op.use_locking: 11775 self.share_locks = _ShareAll() 11776 11777 if self.wanted_names is None: 11778 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET 11779 else: 11780 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names 11781 11782 self.needed_locks[locking.LEVEL_NODEGROUP] = [] 11783 self.needed_locks[locking.LEVEL_NODE] = [] 11784 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11785
11786 - def DeclareLocks(self, level):
11787 if self.op.use_locking: 11788 if level == locking.LEVEL_NODEGROUP: 11789 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE) 11790 11791 # Lock all groups used by instances optimistically; this requires going 11792 # via the node before it's locked, requiring verification later on 11793 self.needed_locks[locking.LEVEL_NODEGROUP] = \ 11794 frozenset(group_uuid 11795 for instance_name in owned_instances 11796 for group_uuid in 11797 self.cfg.GetInstanceNodeGroups(instance_name)) 11798 11799 elif level == locking.LEVEL_NODE: 11800 self._LockInstancesNodes()
11801
11802 - def CheckPrereq(self):
11803 """Check prerequisites. 11804 11805 This only checks the optional instance list against the existing names. 11806 11807 """ 11808 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) 11809 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) 11810 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE)) 11811 11812 if self.wanted_names is None: 11813 assert self.op.use_locking, "Locking was not used" 11814 self.wanted_names = owned_instances 11815 11816 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names)) 11817 11818 if self.op.use_locking: 11819 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes, 11820 None) 11821 else: 11822 assert not (owned_instances or owned_groups or owned_nodes) 11823 11824 self.wanted_instances = instances.values()
11825
11826 - def _ComputeBlockdevStatus(self, node, instance, dev):
11827 """Returns the status of a block device 11828 11829 """ 11830 if self.op.static or not node: 11831 return None 11832 11833 self.cfg.SetDiskID(dev, node) 11834 11835 result = self.rpc.call_blockdev_find(node, dev) 11836 if result.offline: 11837 return None 11838 11839 result.Raise("Can't compute disk status for %s" % instance.name) 11840 11841 status = result.payload 11842 if status is None: 11843 return None 11844 11845 return (status.dev_path, status.major, status.minor, 11846 status.sync_percent, status.estimated_time, 11847 status.is_degraded, status.ldisk_status)
11848
11849 - def _ComputeDiskStatus(self, instance, snode, dev):
11850 """Compute block device status. 11851 11852 """ 11853 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg) 11854 11855 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11856
11857 - def _ComputeDiskStatusInner(self, instance, snode, dev):
11858 """Compute block device status. 11859 11860 @attention: The device has to be annotated already. 11861 11862 """ 11863 if dev.dev_type in constants.LDS_DRBD: 11864 # we change the snode then (otherwise we use the one passed in) 11865 if dev.logical_id[0] == instance.primary_node: 11866 snode = dev.logical_id[1] 11867 else: 11868 snode = dev.logical_id[0] 11869 11870 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node, 11871 instance, dev) 11872 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev) 11873 11874 if dev.children: 11875 dev_children = map(compat.partial(self._ComputeDiskStatusInner, 11876 instance, snode), 11877 dev.children) 11878 else: 11879 dev_children = [] 11880 11881 return { 11882 "iv_name": dev.iv_name, 11883 "dev_type": dev.dev_type, 11884 "logical_id": dev.logical_id, 11885 "physical_id": dev.physical_id, 11886 "pstatus": dev_pstatus, 11887 "sstatus": dev_sstatus, 11888 "children": dev_children, 11889 "mode": dev.mode, 11890 "size": dev.size, 11891 }
11892
11893 - def Exec(self, feedback_fn):
11894 """Gather and return data""" 11895 result = {} 11896 11897 cluster = self.cfg.GetClusterInfo() 11898 11899 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances)) 11900 nodes = dict(self.cfg.GetMultiNodeInfo(node_names)) 11901 11902 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group 11903 for node in nodes.values())) 11904 11905 group2name_fn = lambda uuid: groups[uuid].name 11906 11907 for instance in self.wanted_instances: 11908 pnode = nodes[instance.primary_node] 11909 11910 if self.op.static or pnode.offline: 11911 remote_state = None 11912 if pnode.offline: 11913 self.LogWarning("Primary node %s is marked offline, returning static" 11914 " information only for instance %s" % 11915 (pnode.name, instance.name)) 11916 else: 11917 remote_info = self.rpc.call_instance_info(instance.primary_node, 11918 instance.name, 11919 instance.hypervisor) 11920 remote_info.Raise("Error checking node %s" % instance.primary_node) 11921 remote_info = remote_info.payload 11922 if remote_info and "state" in remote_info: 11923 remote_state = "up" 11924 else: 11925 if instance.admin_state == constants.ADMINST_UP: 11926 remote_state = "down" 11927 else: 11928 remote_state = instance.admin_state 11929 11930 disks = map(compat.partial(self._ComputeDiskStatus, instance, None), 11931 instance.disks) 11932 11933 snodes_group_uuids = [nodes[snode_name].group 11934 for snode_name in instance.secondary_nodes] 11935 11936 result[instance.name] = { 11937 "name": instance.name, 11938 "config_state": instance.admin_state, 11939 "run_state": remote_state, 11940 "pnode": instance.primary_node, 11941 "pnode_group_uuid": pnode.group, 11942 "pnode_group_name": group2name_fn(pnode.group), 11943 "snodes": instance.secondary_nodes, 11944 "snodes_group_uuids": snodes_group_uuids, 11945 "snodes_group_names": map(group2name_fn, snodes_group_uuids), 11946 "os": instance.os, 11947 # this happens to be the same format used for hooks 11948 "nics": _NICListToTuple(self, instance.nics), 11949 "disk_template": instance.disk_template, 11950 "disks": disks, 11951 "hypervisor": instance.hypervisor, 11952 "network_port": instance.network_port, 11953 "hv_instance": instance.hvparams, 11954 "hv_actual": cluster.FillHV(instance, skip_globals=True), 11955 "be_instance": instance.beparams, 11956 "be_actual": cluster.FillBE(instance), 11957 "os_instance": instance.osparams, 11958 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams), 11959 "serial_no": instance.serial_no, 11960 "mtime": instance.mtime, 11961 "ctime": instance.ctime, 11962 "uuid": instance.uuid, 11963 } 11964 11965 return result
11966
11967 11968 -def PrepareContainerMods(mods, private_fn):
11969 """Prepares a list of container modifications by adding a private data field. 11970 11971 @type mods: list of tuples; (operation, index, parameters) 11972 @param mods: List of modifications 11973 @type private_fn: callable or None 11974 @param private_fn: Callable for constructing a private data field for a 11975 modification 11976 @rtype: list 11977 11978 """ 11979 if private_fn is None: 11980 fn = lambda: None 11981 else: 11982 fn = private_fn 11983 11984 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11985 11986 11987 #: Type description for changes as returned by L{ApplyContainerMods}'s 11988 #: callbacks 11989 _TApplyContModsCbChanges = \ 11990 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([ 11991 ht.TNonEmptyString, 11992 ht.TAny, 11993 ])))
11994 11995 11996 -def ApplyContainerMods(kind, container, chgdesc, mods, 11997 create_fn, modify_fn, remove_fn):
11998 """Applies descriptions in C{mods} to C{container}. 11999 12000 @type kind: string 12001 @param kind: One-word item description 12002 @type container: list 12003 @param container: Container to modify 12004 @type chgdesc: None or list 12005 @param chgdesc: List of applied changes 12006 @type mods: list 12007 @param mods: Modifications as returned by L{PrepareContainerMods} 12008 @type create_fn: callable 12009 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD}); 12010 receives absolute item index, parameters and private data object as added 12011 by L{PrepareContainerMods}, returns tuple containing new item and changes 12012 as list 12013 @type modify_fn: callable 12014 @param modify_fn: Callback for modifying an existing item 12015 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters 12016 and private data object as added by L{PrepareContainerMods}, returns 12017 changes as list 12018 @type remove_fn: callable 12019 @param remove_fn: Callback on removing item; receives absolute item index, 12020 item and private data object as added by L{PrepareContainerMods} 12021 12022 """ 12023 for (op, idx, params, private) in mods: 12024 if idx == -1: 12025 # Append 12026 absidx = len(container) - 1 12027 elif idx < 0: 12028 raise IndexError("Not accepting negative indices other than -1") 12029 elif idx > len(container): 12030 raise IndexError("Got %s index %s, but there are only %s" % 12031 (kind, idx, len(container))) 12032 else: 12033 absidx = idx 12034 12035 changes = None 12036 12037 if op == constants.DDM_ADD: 12038 # Calculate where item will be added 12039 if idx == -1: 12040 addidx = len(container) 12041 else: 12042 addidx = idx 12043 12044 if create_fn is None: 12045 item = params 12046 else: 12047 (item, changes) = create_fn(addidx, params, private) 12048 12049 if idx == -1: 12050 container.append(item) 12051 else: 12052 assert idx >= 0 12053 assert idx <= len(container) 12054 # list.insert does so before the specified index 12055 container.insert(idx, item) 12056 else: 12057 # Retrieve existing item 12058 try: 12059 item = container[absidx] 12060 except IndexError: 12061 raise IndexError("Invalid %s index %s" % (kind, idx)) 12062 12063 if op == constants.DDM_REMOVE: 12064 assert not params 12065 12066 if remove_fn is not None: 12067 remove_fn(absidx, item, private) 12068 12069 changes = [("%s/%s" % (kind, absidx), "remove")] 12070 12071 assert container[absidx] == item 12072 del container[absidx] 12073 elif op == constants.DDM_MODIFY: 12074 if modify_fn is not None: 12075 changes = modify_fn(absidx, item, params, private) 12076 else: 12077 raise errors.ProgrammerError("Unhandled operation '%s'" % op) 12078 12079 assert _TApplyContModsCbChanges(changes) 12080 12081 if not (chgdesc is None or changes is None): 12082 chgdesc.extend(changes)
12083
12084 12085 -def _UpdateIvNames(base_index, disks):
12086 """Updates the C{iv_name} attribute of disks. 12087 12088 @type disks: list of L{objects.Disk} 12089 12090 """ 12091 for (idx, disk) in enumerate(disks): 12092 disk.iv_name = "disk/%s" % (base_index + idx, )
12093
12094 12095 -class _InstNicModPrivate:
12096 """Data structure for network interface modifications. 12097 12098 Used by L{LUInstanceSetParams}. 12099 12100 """
12101 - def __init__(self):
12102 self.params = None 12103 self.filled = None
12104
12105 12106 -class LUInstanceSetParams(LogicalUnit):
12107 """Modifies an instances's parameters. 12108 12109 """ 12110 HPATH = "instance-modify" 12111 HTYPE = constants.HTYPE_INSTANCE 12112 REQ_BGL = False 12113 12114 @staticmethod
12115 - def _UpgradeDiskNicMods(kind, mods, verify_fn):
12116 assert ht.TList(mods) 12117 assert not mods or len(mods[0]) in (2, 3) 12118 12119 if mods and len(mods[0]) == 2: 12120 result = [] 12121 12122 addremove = 0 12123 for op, params in mods: 12124 if op in (constants.DDM_ADD, constants.DDM_REMOVE): 12125 result.append((op, -1, params)) 12126 addremove += 1 12127 12128 if addremove > 1: 12129 raise errors.OpPrereqError("Only one %s add or remove operation is" 12130 " supported at a time" % kind, 12131 errors.ECODE_INVAL) 12132 else: 12133 result.append((constants.DDM_MODIFY, op, params)) 12134 12135 assert verify_fn(result) 12136 else: 12137 result = mods 12138 12139 return result
12140 12141 @staticmethod
12142 - def _CheckMods(kind, mods, key_types, item_fn):
12143 """Ensures requested disk/NIC modifications are valid. 12144 12145 """ 12146 for (op, _, params) in mods: 12147 assert ht.TDict(params) 12148 12149 utils.ForceDictType(params, key_types) 12150 12151 if op == constants.DDM_REMOVE: 12152 if params: 12153 raise errors.OpPrereqError("No settings should be passed when" 12154 " removing a %s" % kind, 12155 errors.ECODE_INVAL) 12156 elif op in (constants.DDM_ADD, constants.DDM_MODIFY): 12157 item_fn(op, params) 12158 else: 12159 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12160 12161 @staticmethod
12162 - def _VerifyDiskModification(op, params):
12163 """Verifies a disk modification. 12164 12165 """ 12166 if op == constants.DDM_ADD: 12167 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR) 12168 if mode not in constants.DISK_ACCESS_SET: 12169 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode, 12170 errors.ECODE_INVAL) 12171 12172 size = params.get(constants.IDISK_SIZE, None) 12173 if size is None: 12174 raise errors.OpPrereqError("Required disk parameter '%s' missing" % 12175 constants.IDISK_SIZE, errors.ECODE_INVAL) 12176 12177 try: 12178 size = int(size) 12179 except (TypeError, ValueError), err: 12180 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err, 12181 errors.ECODE_INVAL) 12182 12183 params[constants.IDISK_SIZE] = size 12184 12185 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params: 12186 raise errors.OpPrereqError("Disk size change not possible, use" 12187 " grow-disk", errors.ECODE_INVAL)
12188 12189 @staticmethod
12190 - def _VerifyNicModification(op, params):
12191 """Verifies a network interface modification. 12192 12193 """ 12194 if op in (constants.DDM_ADD, constants.DDM_MODIFY): 12195 ip = params.get(constants.INIC_IP, None) 12196 if ip is None: 12197 pass 12198 elif ip.lower() == constants.VALUE_NONE: 12199 params[constants.INIC_IP] = None 12200 elif not netutils.IPAddress.IsValid(ip): 12201 raise errors.OpPrereqError("Invalid IP address '%s'" % ip, 12202 errors.ECODE_INVAL) 12203 12204 bridge = params.get("bridge", None) 12205 link = params.get(constants.INIC_LINK, None) 12206 if bridge and link: 12207 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'" 12208 " at the same time", errors.ECODE_INVAL) 12209 elif bridge and bridge.lower() == constants.VALUE_NONE: 12210 params["bridge"] = None 12211 elif link and link.lower() == constants.VALUE_NONE: 12212 params[constants.INIC_LINK] = None 12213 12214 if op == constants.DDM_ADD: 12215 macaddr = params.get(constants.INIC_MAC, None) 12216 if macaddr is None: 12217 params[constants.INIC_MAC] = constants.VALUE_AUTO 12218 12219 if constants.INIC_MAC in params: 12220 macaddr = params[constants.INIC_MAC] 12221 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 12222 macaddr = utils.NormalizeAndValidateMac(macaddr) 12223 12224 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO: 12225 raise errors.OpPrereqError("'auto' is not a valid MAC address when" 12226 " modifying an existing NIC", 12227 errors.ECODE_INVAL)
12228
12229 - def CheckArguments(self):
12230 if not (self.op.nics or self.op.disks or self.op.disk_template or 12231 self.op.hvparams or self.op.beparams or self.op.os_name or 12232 self.op.offline is not None or self.op.runtime_mem): 12233 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL) 12234 12235 if self.op.hvparams: 12236 _CheckGlobalHvParams(self.op.hvparams) 12237 12238 self.op.disks = \ 12239 self._UpgradeDiskNicMods("disk", self.op.disks, 12240 opcodes.OpInstanceSetParams.TestDiskModifications) 12241 self.op.nics = \ 12242 self._UpgradeDiskNicMods("NIC", self.op.nics, 12243 opcodes.OpInstanceSetParams.TestNicModifications) 12244 12245 # Check disk modifications 12246 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES, 12247 self._VerifyDiskModification) 12248 12249 if self.op.disks and self.op.disk_template is not None: 12250 raise errors.OpPrereqError("Disk template conversion and other disk" 12251 " changes not supported at the same time", 12252 errors.ECODE_INVAL) 12253 12254 if (self.op.disk_template and 12255 self.op.disk_template in constants.DTS_INT_MIRROR and 12256 self.op.remote_node is None): 12257 raise errors.OpPrereqError("Changing the disk template to a mirrored" 12258 " one requires specifying a secondary node", 12259 errors.ECODE_INVAL) 12260 12261 # Check NIC modifications 12262 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES, 12263 self._VerifyNicModification)
12264
12265 - def ExpandNames(self):
12266 self._ExpandAndLockInstance() 12267 # Can't even acquire node locks in shared mode as upcoming changes in 12268 # Ganeti 2.6 will start to modify the node object on disk conversion 12269 self.needed_locks[locking.LEVEL_NODE] = [] 12270 self.needed_locks[locking.LEVEL_NODE_RES] = [] 12271 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12272
12273 - def DeclareLocks(self, level):
12274 # TODO: Acquire group lock in shared mode (disk parameters) 12275 if level == locking.LEVEL_NODE: 12276 self._LockInstancesNodes() 12277 if self.op.disk_template and self.op.remote_node: 12278 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node) 12279 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node) 12280 elif level == locking.LEVEL_NODE_RES and self.op.disk_template: 12281 # Copy node locks 12282 self.needed_locks[locking.LEVEL_NODE_RES] = \ 12283 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12284
12285 - def BuildHooksEnv(self):
12286 """Build hooks env. 12287 12288 This runs on the master, primary and secondaries. 12289 12290 """ 12291 args = dict() 12292 if constants.BE_MINMEM in self.be_new: 12293 args["minmem"] = self.be_new[constants.BE_MINMEM] 12294 if constants.BE_MAXMEM in self.be_new: 12295 args["maxmem"] = self.be_new[constants.BE_MAXMEM] 12296 if constants.BE_VCPUS in self.be_new: 12297 args["vcpus"] = self.be_new[constants.BE_VCPUS] 12298 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk 12299 # information at all. 12300 12301 if self._new_nics is not None: 12302 nics = [] 12303 12304 for nic in self._new_nics: 12305 nicparams = self.cluster.SimpleFillNIC(nic.nicparams) 12306 mode = nicparams[constants.NIC_MODE] 12307 link = nicparams[constants.NIC_LINK] 12308 nics.append((nic.ip, nic.mac, mode, link)) 12309 12310 args["nics"] = nics 12311 12312 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args) 12313 if self.op.disk_template: 12314 env["NEW_DISK_TEMPLATE"] = self.op.disk_template 12315 if self.op.runtime_mem: 12316 env["RUNTIME_MEMORY"] = self.op.runtime_mem 12317 12318 return env
12319
12320 - def BuildHooksNodes(self):
12321 """Build hooks nodes. 12322 12323 """ 12324 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes) 12325 return (nl, nl)
12326
12327 - def _PrepareNicModification(self, params, private, old_ip, old_params, 12328 cluster, pnode):
12329 update_params_dict = dict([(key, params[key]) 12330 for key in constants.NICS_PARAMETERS 12331 if key in params]) 12332 12333 if "bridge" in params: 12334 update_params_dict[constants.NIC_LINK] = params["bridge"] 12335 12336 new_params = _GetUpdatedParams(old_params, update_params_dict) 12337 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES) 12338 12339 new_filled_params = cluster.SimpleFillNIC(new_params) 12340 objects.NIC.CheckParameterSyntax(new_filled_params) 12341 12342 new_mode = new_filled_params[constants.NIC_MODE] 12343 if new_mode == constants.NIC_MODE_BRIDGED: 12344 bridge = new_filled_params[constants.NIC_LINK] 12345 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg 12346 if msg: 12347 msg = "Error checking bridges on node '%s': %s" % (pnode, msg) 12348 if self.op.force: 12349 self.warn.append(msg) 12350 else: 12351 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) 12352 12353 elif new_mode == constants.NIC_MODE_ROUTED: 12354 ip = params.get(constants.INIC_IP, old_ip) 12355 if ip is None: 12356 raise errors.OpPrereqError("Cannot set the NIC IP address to None" 12357 " on a routed NIC", errors.ECODE_INVAL) 12358 12359 if constants.INIC_MAC in params: 12360 mac = params[constants.INIC_MAC] 12361 if mac is None: 12362 raise errors.OpPrereqError("Cannot unset the NIC MAC address", 12363 errors.ECODE_INVAL) 12364 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE): 12365 # otherwise generate the MAC address 12366 params[constants.INIC_MAC] = \ 12367 self.cfg.GenerateMAC(self.proc.GetECId()) 12368 else: 12369 # or validate/reserve the current one 12370 try: 12371 self.cfg.ReserveMAC(mac, self.proc.GetECId()) 12372 except errors.ReservationError: 12373 raise errors.OpPrereqError("MAC address '%s' already in use" 12374 " in cluster" % mac, 12375 errors.ECODE_NOTUNIQUE) 12376 12377 private.params = new_params 12378 private.filled = new_filled_params
12379
12380 - def CheckPrereq(self):
12381 """Check prerequisites. 12382 12383 This only checks the instance list against the existing names. 12384 12385 """ 12386 # checking the new params on the primary/secondary nodes 12387 12388 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 12389 cluster = self.cluster = self.cfg.GetClusterInfo() 12390 assert self.instance is not None, \ 12391 "Cannot retrieve locked instance %s" % self.op.instance_name 12392 pnode = instance.primary_node 12393 nodelist = list(instance.all_nodes) 12394 pnode_info = self.cfg.GetNodeInfo(pnode) 12395 self.diskparams = self.cfg.GetInstanceDiskParams(instance) 12396 12397 # Prepare disk/NIC modifications 12398 self.diskmod = PrepareContainerMods(self.op.disks, None) 12399 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate) 12400 12401 # OS change 12402 if self.op.os_name and not self.op.force: 12403 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name, 12404 self.op.force_variant) 12405 instance_os = self.op.os_name 12406 else: 12407 instance_os = instance.os 12408 12409 assert not (self.op.disk_template and self.op.disks), \ 12410 "Can't modify disk template and apply disk changes at the same time" 12411 12412 if self.op.disk_template: 12413 if instance.disk_template == self.op.disk_template: 12414 raise errors.OpPrereqError("Instance already has disk template %s" % 12415 instance.disk_template, errors.ECODE_INVAL) 12416 12417 if (instance.disk_template, 12418 self.op.disk_template) not in self._DISK_CONVERSIONS: 12419 raise errors.OpPrereqError("Unsupported disk template conversion from" 12420 " %s to %s" % (instance.disk_template, 12421 self.op.disk_template), 12422 errors.ECODE_INVAL) 12423 _CheckInstanceState(self, instance, INSTANCE_DOWN, 12424 msg="cannot change disk template") 12425 if self.op.disk_template in constants.DTS_INT_MIRROR: 12426 if self.op.remote_node == pnode: 12427 raise errors.OpPrereqError("Given new secondary node %s is the same" 12428 " as the primary node of the instance" % 12429 self.op.remote_node, errors.ECODE_STATE) 12430 _CheckNodeOnline(self, self.op.remote_node) 12431 _CheckNodeNotDrained(self, self.op.remote_node) 12432 # FIXME: here we assume that the old instance type is DT_PLAIN 12433 assert instance.disk_template == constants.DT_PLAIN 12434 disks = [{constants.IDISK_SIZE: d.size, 12435 constants.IDISK_VG: d.logical_id[0]} 12436 for d in instance.disks] 12437 required = _ComputeDiskSizePerVG(self.op.disk_template, disks) 12438 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required) 12439 12440 snode_info = self.cfg.GetNodeInfo(self.op.remote_node) 12441 snode_group = self.cfg.GetNodeGroup(snode_info.group) 12442 ipolicy = _CalculateGroupIPolicy(cluster, snode_group) 12443 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, 12444 ignore=self.op.ignore_ipolicy) 12445 if pnode_info.group != snode_info.group: 12446 self.LogWarning("The primary and secondary nodes are in two" 12447 " different node groups; the disk parameters" 12448 " from the first disk's node group will be" 12449 " used") 12450 12451 # hvparams processing 12452 if self.op.hvparams: 12453 hv_type = instance.hypervisor 12454 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams) 12455 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES) 12456 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict) 12457 12458 # local check 12459 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new) 12460 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new) 12461 self.hv_proposed = self.hv_new = hv_new # the new actual values 12462 self.hv_inst = i_hvdict # the new dict (without defaults) 12463 else: 12464 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os, 12465 instance.hvparams) 12466 self.hv_new = self.hv_inst = {} 12467 12468 # beparams processing 12469 if self.op.beparams: 12470 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams, 12471 use_none=True) 12472 objects.UpgradeBeParams(i_bedict) 12473 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES) 12474 be_new = cluster.SimpleFillBE(i_bedict) 12475 self.be_proposed = self.be_new = be_new # the new actual values 12476 self.be_inst = i_bedict # the new dict (without defaults) 12477 else: 12478 self.be_new = self.be_inst = {} 12479 self.be_proposed = cluster.SimpleFillBE(instance.beparams) 12480 be_old = cluster.FillBE(instance) 12481 12482 # CPU param validation -- checking every time a parameter is 12483 # changed to cover all cases where either CPU mask or vcpus have 12484 # changed 12485 if (constants.BE_VCPUS in self.be_proposed and 12486 constants.HV_CPU_MASK in self.hv_proposed): 12487 cpu_list = \ 12488 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK]) 12489 # Verify mask is consistent with number of vCPUs. Can skip this 12490 # test if only 1 entry in the CPU mask, which means same mask 12491 # is applied to all vCPUs. 12492 if (len(cpu_list) > 1 and 12493 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]): 12494 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the" 12495 " CPU mask [%s]" % 12496 (self.be_proposed[constants.BE_VCPUS], 12497 self.hv_proposed[constants.HV_CPU_MASK]), 12498 errors.ECODE_INVAL) 12499 12500 # Only perform this test if a new CPU mask is given 12501 if constants.HV_CPU_MASK in self.hv_new: 12502 # Calculate the largest CPU number requested 12503 max_requested_cpu = max(map(max, cpu_list)) 12504 # Check that all of the instance's nodes have enough physical CPUs to 12505 # satisfy the requested CPU mask 12506 _CheckNodesPhysicalCPUs(self, instance.all_nodes, 12507 max_requested_cpu + 1, instance.hypervisor) 12508 12509 # osparams processing 12510 if self.op.osparams: 12511 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams) 12512 _CheckOSParams(self, True, nodelist, instance_os, i_osdict) 12513 self.os_inst = i_osdict # the new dict (without defaults) 12514 else: 12515 self.os_inst = {} 12516 12517 self.warn = [] 12518 12519 #TODO(dynmem): do the appropriate check involving MINMEM 12520 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and 12521 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]): 12522 mem_check_list = [pnode] 12523 if be_new[constants.BE_AUTO_BALANCE]: 12524 # either we changed auto_balance to yes or it was from before 12525 mem_check_list.extend(instance.secondary_nodes) 12526 instance_info = self.rpc.call_instance_info(pnode, instance.name, 12527 instance.hypervisor) 12528 nodeinfo = self.rpc.call_node_info(mem_check_list, None, 12529 [instance.hypervisor]) 12530 pninfo = nodeinfo[pnode] 12531 msg = pninfo.fail_msg 12532 if msg: 12533 # Assume the primary node is unreachable and go ahead 12534 self.warn.append("Can't get info from primary node %s: %s" % 12535 (pnode, msg)) 12536 else: 12537 (_, _, (pnhvinfo, )) = pninfo.payload 12538 if not isinstance(pnhvinfo.get("memory_free", None), int): 12539 self.warn.append("Node data from primary node %s doesn't contain" 12540 " free memory information" % pnode) 12541 elif instance_info.fail_msg: 12542 self.warn.append("Can't get instance runtime information: %s" % 12543 instance_info.fail_msg) 12544 else: 12545 if instance_info.payload: 12546 current_mem = int(instance_info.payload["memory"]) 12547 else: 12548 # Assume instance not running 12549 # (there is a slight race condition here, but it's not very 12550 # probable, and we have no other way to check) 12551 # TODO: Describe race condition 12552 current_mem = 0 12553 #TODO(dynmem): do the appropriate check involving MINMEM 12554 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem - 12555 pnhvinfo["memory_free"]) 12556 if miss_mem > 0: 12557 raise errors.OpPrereqError("This change will prevent the instance" 12558 " from starting, due to %d MB of memory" 12559 " missing on its primary node" % 12560 miss_mem, 12561 errors.ECODE_NORES) 12562 12563 if be_new[constants.BE_AUTO_BALANCE]: 12564 for node, nres in nodeinfo.items(): 12565 if node not in instance.secondary_nodes: 12566 continue 12567 nres.Raise("Can't get info from secondary node %s" % node, 12568 prereq=True, ecode=errors.ECODE_STATE) 12569 (_, _, (nhvinfo, )) = nres.payload 12570 if not isinstance(nhvinfo.get("memory_free", None), int): 12571 raise errors.OpPrereqError("Secondary node %s didn't return free" 12572 " memory information" % node, 12573 errors.ECODE_STATE) 12574 #TODO(dynmem): do the appropriate check involving MINMEM 12575 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]: 12576 raise errors.OpPrereqError("This change will prevent the instance" 12577 " from failover to its secondary node" 12578 " %s, due to not enough memory" % node, 12579 errors.ECODE_STATE) 12580 12581 if self.op.runtime_mem: 12582 remote_info = self.rpc.call_instance_info(instance.primary_node, 12583 instance.name, 12584 instance.hypervisor) 12585 remote_info.Raise("Error checking node %s" % instance.primary_node) 12586 if not remote_info.payload: # not running already 12587 raise errors.OpPrereqError("Instance %s is not running" % instance.name, 12588 errors.ECODE_STATE) 12589 12590 current_memory = remote_info.payload["memory"] 12591 if (not self.op.force and 12592 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or 12593 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])): 12594 raise errors.OpPrereqError("Instance %s must have memory between %d" 12595 " and %d MB of memory unless --force is" 12596 " given" % (instance.name, 12597 self.be_proposed[constants.BE_MINMEM], 12598 self.be_proposed[constants.BE_MAXMEM]), 12599 errors.ECODE_INVAL) 12600 12601 delta = self.op.runtime_mem - current_memory 12602 if delta > 0: 12603 _CheckNodeFreeMemory(self, instance.primary_node, 12604 "ballooning memory for instance %s" % 12605 instance.name, delta, instance.hypervisor) 12606 12607 if self.op.disks and instance.disk_template == constants.DT_DISKLESS: 12608 raise errors.OpPrereqError("Disk operations not supported for" 12609 " diskless instances", 12610 errors.ECODE_INVAL) 12611 12612 def _PrepareNicCreate(_, params, private): 12613 self._PrepareNicModification(params, private, None, {}, cluster, pnode) 12614 return (None, None)
12615 12616 def _PrepareNicMod(_, nic, params, private): 12617 self._PrepareNicModification(params, private, nic.ip, 12618 nic.nicparams, cluster, pnode) 12619 return None
12620 12621 # Verify NIC changes (operating on copy) 12622 nics = instance.nics[:] 12623 ApplyContainerMods("NIC", nics, None, self.nicmod, 12624 _PrepareNicCreate, _PrepareNicMod, None) 12625 if len(nics) > constants.MAX_NICS: 12626 raise errors.OpPrereqError("Instance has too many network interfaces" 12627 " (%d), cannot add more" % constants.MAX_NICS, 12628 errors.ECODE_STATE) 12629 12630 # Verify disk changes (operating on a copy) 12631 disks = instance.disks[:] 12632 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None) 12633 if len(disks) > constants.MAX_DISKS: 12634 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add" 12635 " more" % constants.MAX_DISKS, 12636 errors.ECODE_STATE) 12637 12638 if self.op.offline is not None: 12639 if self.op.offline: 12640 msg = "can't change to offline" 12641 else: 12642 msg = "can't change to online" 12643 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg) 12644 12645 # Pre-compute NIC changes (necessary to use result in hooks) 12646 self._nic_chgdesc = [] 12647 if self.nicmod: 12648 # Operate on copies as this is still in prereq 12649 nics = [nic.Copy() for nic in instance.nics] 12650 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod, 12651 self._CreateNewNic, self._ApplyNicMods, None) 12652 self._new_nics = nics 12653 else: 12654 self._new_nics = None 12655
12656 - def _ConvertPlainToDrbd(self, feedback_fn):
12657 """Converts an instance from plain to drbd. 12658 12659 """ 12660 feedback_fn("Converting template to drbd") 12661 instance = self.instance 12662 pnode = instance.primary_node 12663 snode = self.op.remote_node 12664 12665 assert instance.disk_template == constants.DT_PLAIN 12666 12667 # create a fake disk info for _GenerateDiskTemplate 12668 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode, 12669 constants.IDISK_VG: d.logical_id[0]} 12670 for d in instance.disks] 12671 new_disks = _GenerateDiskTemplate(self, self.op.disk_template, 12672 instance.name, pnode, [snode], 12673 disk_info, None, None, 0, feedback_fn, 12674 self.diskparams) 12675 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks, 12676 self.diskparams) 12677 info = _GetInstanceInfoText(instance) 12678 feedback_fn("Creating additional volumes...") 12679 # first, create the missing data and meta devices 12680 for disk in anno_disks: 12681 # unfortunately this is... not too nice 12682 _CreateSingleBlockDev(self, pnode, instance, disk.children[1], 12683 info, True) 12684 for child in disk.children: 12685 _CreateSingleBlockDev(self, snode, instance, child, info, True) 12686 # at this stage, all new LVs have been created, we can rename the 12687 # old ones 12688 feedback_fn("Renaming original volumes...") 12689 rename_list = [(o, n.children[0].logical_id) 12690 for (o, n) in zip(instance.disks, new_disks)] 12691 result = self.rpc.call_blockdev_rename(pnode, rename_list) 12692 result.Raise("Failed to rename original LVs") 12693 12694 feedback_fn("Initializing DRBD devices...") 12695 # all child devices are in place, we can now create the DRBD devices 12696 for disk in anno_disks: 12697 for node in [pnode, snode]: 12698 f_create = node == pnode 12699 _CreateSingleBlockDev(self, node, instance, disk, info, f_create) 12700 12701 # at this point, the instance has been modified 12702 instance.disk_template = constants.DT_DRBD8 12703 instance.disks = new_disks 12704 self.cfg.Update(instance, feedback_fn) 12705 12706 # Release node locks while waiting for sync 12707 _ReleaseLocks(self, locking.LEVEL_NODE) 12708 12709 # disks are created, waiting for sync 12710 disk_abort = not _WaitForSync(self, instance, 12711 oneshot=not self.op.wait_for_sync) 12712 if disk_abort: 12713 raise errors.OpExecError("There are some degraded disks for" 12714 " this instance, please cleanup manually")
12715 12716 # Node resource locks will be released by caller 12717
12718 - def _ConvertDrbdToPlain(self, feedback_fn):
12719 """Converts an instance from drbd to plain. 12720 12721 """ 12722 instance = self.instance 12723 12724 assert len(instance.secondary_nodes) == 1 12725 assert instance.disk_template == constants.DT_DRBD8 12726 12727 pnode = instance.primary_node 12728 snode = instance.secondary_nodes[0] 12729 feedback_fn("Converting template to plain") 12730 12731 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg) 12732 new_disks = [d.children[0] for d in instance.disks] 12733 12734 # copy over size and mode 12735 for parent, child in zip(old_disks, new_disks): 12736 child.size = parent.size 12737 child.mode = parent.mode 12738 12739 # this is a DRBD disk, return its port to the pool 12740 # NOTE: this must be done right before the call to cfg.Update! 12741 for disk in old_disks: 12742 tcp_port = disk.logical_id[2] 12743 self.cfg.AddTcpUdpPort(tcp_port) 12744 12745 # update instance structure 12746 instance.disks = new_disks 12747 instance.disk_template = constants.DT_PLAIN 12748 self.cfg.Update(instance, feedback_fn) 12749 12750 # Release locks in case removing disks takes a while 12751 _ReleaseLocks(self, locking.LEVEL_NODE) 12752 12753 feedback_fn("Removing volumes on the secondary node...") 12754 for disk in old_disks: 12755 self.cfg.SetDiskID(disk, snode) 12756 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg 12757 if msg: 12758 self.LogWarning("Could not remove block device %s on node %s," 12759 " continuing anyway: %s", disk.iv_name, snode, msg) 12760 12761 feedback_fn("Removing unneeded volumes on the primary node...") 12762 for idx, disk in enumerate(old_disks): 12763 meta = disk.children[1] 12764 self.cfg.SetDiskID(meta, pnode) 12765 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg 12766 if msg: 12767 self.LogWarning("Could not remove metadata for disk %d on node %s," 12768 " continuing anyway: %s", idx, pnode, msg)
12769
12770 - def _CreateNewDisk(self, idx, params, _):
12771 """Creates a new disk. 12772 12773 """ 12774 instance = self.instance 12775 12776 # add a new disk 12777 if instance.disk_template in constants.DTS_FILEBASED: 12778 (file_driver, file_path) = instance.disks[0].logical_id 12779 file_path = os.path.dirname(file_path) 12780 else: 12781 file_driver = file_path = None 12782 12783 disk = \ 12784 _GenerateDiskTemplate(self, instance.disk_template, instance.name, 12785 instance.primary_node, instance.secondary_nodes, 12786 [params], file_path, file_driver, idx, 12787 self.Log, self.diskparams)[0] 12788 12789 info = _GetInstanceInfoText(instance) 12790 12791 logging.info("Creating volume %s for instance %s", 12792 disk.iv_name, instance.name) 12793 # Note: this needs to be kept in sync with _CreateDisks 12794 #HARDCODE 12795 for node in instance.all_nodes: 12796 f_create = (node == instance.primary_node) 12797 try: 12798 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create) 12799 except errors.OpExecError, err: 12800 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s", 12801 disk.iv_name, disk, node, err) 12802 12803 return (disk, [ 12804 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)), 12805 ])
12806 12807 @staticmethod
12808 - def _ModifyDisk(idx, disk, params, _):
12809 """Modifies a disk. 12810 12811 """ 12812 disk.mode = params[constants.IDISK_MODE] 12813 12814 return [ 12815 ("disk.mode/%d" % idx, disk.mode), 12816 ]
12817
12818 - def _RemoveDisk(self, idx, root, _):
12819 """Removes a disk. 12820 12821 """ 12822 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg) 12823 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node): 12824 self.cfg.SetDiskID(disk, node) 12825 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg 12826 if msg: 12827 self.LogWarning("Could not remove disk/%d on node '%s': %s," 12828 " continuing anyway", idx, node, msg) 12829 12830 # if this is a DRBD disk, return its port to the pool 12831 if root.dev_type in constants.LDS_DRBD: 12832 self.cfg.AddTcpUdpPort(root.logical_id[2])
12833 12834 @staticmethod
12835 - def _CreateNewNic(idx, params, private):
12836 """Creates data structure for a new network interface. 12837 12838 """ 12839 mac = params[constants.INIC_MAC] 12840 ip = params.get(constants.INIC_IP, None) 12841 nicparams = private.params 12842 12843 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [ 12844 ("nic.%d" % idx, 12845 "add:mac=%s,ip=%s,mode=%s,link=%s" % 12846 (mac, ip, private.filled[constants.NIC_MODE], 12847 private.filled[constants.NIC_LINK])), 12848 ])
12849 12850 @staticmethod
12851 - def _ApplyNicMods(idx, nic, params, private):
12852 """Modifies a network interface. 12853 12854 """ 12855 changes = [] 12856 12857 for key in [constants.INIC_MAC, constants.INIC_IP]: 12858 if key in params: 12859 changes.append(("nic.%s/%d" % (key, idx), params[key])) 12860 setattr(nic, key, params[key]) 12861 12862 if private.params: 12863 nic.nicparams = private.params 12864 12865 for (key, val) in params.items(): 12866 changes.append(("nic.%s/%d" % (key, idx), val)) 12867 12868 return changes
12869
12870 - def Exec(self, feedback_fn):
12871 """Modifies an instance. 12872 12873 All parameters take effect only at the next restart of the instance. 12874 12875 """ 12876 # Process here the warnings from CheckPrereq, as we don't have a 12877 # feedback_fn there. 12878 # TODO: Replace with self.LogWarning 12879 for warn in self.warn: 12880 feedback_fn("WARNING: %s" % warn) 12881 12882 assert ((self.op.disk_template is None) ^ 12883 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \ 12884 "Not owning any node resource locks" 12885 12886 result = [] 12887 instance = self.instance 12888 12889 # runtime memory 12890 if self.op.runtime_mem: 12891 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node, 12892 instance, 12893 self.op.runtime_mem) 12894 rpcres.Raise("Cannot modify instance runtime memory") 12895 result.append(("runtime_memory", self.op.runtime_mem)) 12896 12897 # Apply disk changes 12898 ApplyContainerMods("disk", instance.disks, result, self.diskmod, 12899 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk) 12900 _UpdateIvNames(0, instance.disks) 12901 12902 if self.op.disk_template: 12903 if __debug__: 12904 check_nodes = set(instance.all_nodes) 12905 if self.op.remote_node: 12906 check_nodes.add(self.op.remote_node) 12907 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]: 12908 owned = self.owned_locks(level) 12909 assert not (check_nodes - owned), \ 12910 ("Not owning the correct locks, owning %r, expected at least %r" % 12911 (owned, check_nodes)) 12912 12913 r_shut = _ShutdownInstanceDisks(self, instance) 12914 if not r_shut: 12915 raise errors.OpExecError("Cannot shutdown instance disks, unable to" 12916 " proceed with disk template conversion") 12917 mode = (instance.disk_template, self.op.disk_template) 12918 try: 12919 self._DISK_CONVERSIONS[mode](self, feedback_fn) 12920 except: 12921 self.cfg.ReleaseDRBDMinors(instance.name) 12922 raise 12923 result.append(("disk_template", self.op.disk_template)) 12924 12925 assert instance.disk_template == self.op.disk_template, \ 12926 ("Expected disk template '%s', found '%s'" % 12927 (self.op.disk_template, instance.disk_template)) 12928 12929 # Release node and resource locks if there are any (they might already have 12930 # been released during disk conversion) 12931 _ReleaseLocks(self, locking.LEVEL_NODE) 12932 _ReleaseLocks(self, locking.LEVEL_NODE_RES) 12933 12934 # Apply NIC changes 12935 if self._new_nics is not None: 12936 instance.nics = self._new_nics 12937 result.extend(self._nic_chgdesc) 12938 12939 # hvparams changes 12940 if self.op.hvparams: 12941 instance.hvparams = self.hv_inst 12942 for key, val in self.op.hvparams.iteritems(): 12943 result.append(("hv/%s" % key, val)) 12944 12945 # beparams changes 12946 if self.op.beparams: 12947 instance.beparams = self.be_inst 12948 for key, val in self.op.beparams.iteritems(): 12949 result.append(("be/%s" % key, val)) 12950 12951 # OS change 12952 if self.op.os_name: 12953 instance.os = self.op.os_name 12954 12955 # osparams changes 12956 if self.op.osparams: 12957 instance.osparams = self.os_inst 12958 for key, val in self.op.osparams.iteritems(): 12959 result.append(("os/%s" % key, val)) 12960 12961 if self.op.offline is None: 12962 # Ignore 12963 pass 12964 elif self.op.offline: 12965 # Mark instance as offline 12966 self.cfg.MarkInstanceOffline(instance.name) 12967 result.append(("admin_state", constants.ADMINST_OFFLINE)) 12968 else: 12969 # Mark instance as online, but stopped 12970 self.cfg.MarkInstanceDown(instance.name) 12971 result.append(("admin_state", constants.ADMINST_DOWN)) 12972 12973 self.cfg.Update(instance, feedback_fn) 12974 12975 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or 12976 self.owned_locks(locking.LEVEL_NODE)), \ 12977 "All node locks should have been released by now" 12978 12979 return result
12980 12981 _DISK_CONVERSIONS = { 12982 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd, 12983 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain, 12984 } 12985
12986 12987 -class LUInstanceChangeGroup(LogicalUnit):
12988 HPATH = "instance-change-group" 12989 HTYPE = constants.HTYPE_INSTANCE 12990 REQ_BGL = False 12991
12992 - def ExpandNames(self):
12993 self.share_locks = _ShareAll() 12994 self.needed_locks = { 12995 locking.LEVEL_NODEGROUP: [], 12996 locking.LEVEL_NODE: [], 12997 } 12998 12999 self._ExpandAndLockInstance() 13000 13001 if self.op.target_groups: 13002 self.req_target_uuids = map(self.cfg.LookupNodeGroup, 13003 self.op.target_groups) 13004 else: 13005 self.req_target_uuids = None 13006 13007 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13008
13009 - def DeclareLocks(self, level):
13010 if level == locking.LEVEL_NODEGROUP: 13011 assert not self.needed_locks[locking.LEVEL_NODEGROUP] 13012 13013 if self.req_target_uuids: 13014 lock_groups = set(self.req_target_uuids) 13015 13016 # Lock all groups used by instance optimistically; this requires going 13017 # via the node before it's locked, requiring verification later on 13018 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name) 13019 lock_groups.update(instance_groups) 13020 else: 13021 # No target groups, need to lock all of them 13022 lock_groups = locking.ALL_SET 13023 13024 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups 13025 13026 elif level == locking.LEVEL_NODE: 13027 if self.req_target_uuids: 13028 # Lock all nodes used by instances 13029 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 13030 self._LockInstancesNodes() 13031 13032 # Lock all nodes in all potential target groups 13033 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) - 13034 self.cfg.GetInstanceNodeGroups(self.op.instance_name)) 13035 member_nodes = [node_name 13036 for group in lock_groups 13037 for node_name in self.cfg.GetNodeGroup(group).members] 13038 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes) 13039 else: 13040 # Lock all nodes as all groups are potential targets 13041 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13042
13043 - def CheckPrereq(self):
13044 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) 13045 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) 13046 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE)) 13047 13048 assert (self.req_target_uuids is None or 13049 owned_groups.issuperset(self.req_target_uuids)) 13050 assert owned_instances == set([self.op.instance_name]) 13051 13052 # Get instance information 13053 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) 13054 13055 # Check if node groups for locked instance are still correct 13056 assert owned_nodes.issuperset(self.instance.all_nodes), \ 13057 ("Instance %s's nodes changed while we kept the lock" % 13058 self.op.instance_name) 13059 13060 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, 13061 owned_groups) 13062 13063 if self.req_target_uuids: 13064 # User requested specific target groups 13065 self.target_uuids = frozenset(self.req_target_uuids) 13066 else: 13067 # All groups except those used by the instance are potential targets 13068 self.target_uuids = owned_groups - inst_groups 13069 13070 conflicting_groups = self.target_uuids & inst_groups 13071 if conflicting_groups: 13072 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are" 13073 " used by the instance '%s'" % 13074 (utils.CommaJoin(conflicting_groups), 13075 self.op.instance_name), 13076 errors.ECODE_INVAL) 13077 13078 if not self.target_uuids: 13079 raise errors.OpPrereqError("There are no possible target groups", 13080 errors.ECODE_INVAL)
13081
13082 - def BuildHooksEnv(self):
13083 """Build hooks env. 13084 13085 """ 13086 assert self.target_uuids 13087 13088 env = { 13089 "TARGET_GROUPS": " ".join(self.target_uuids), 13090 } 13091 13092 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 13093 13094 return env
13095
13096 - def BuildHooksNodes(self):
13097 """Build hooks nodes. 13098 13099 """ 13100 mn = self.cfg.GetMasterNode() 13101 return ([mn], [mn])
13102
13103 - def Exec(self, feedback_fn):
13104 instances = list(self.owned_locks(locking.LEVEL_INSTANCE)) 13105 13106 assert instances == [self.op.instance_name], "Instance not locked" 13107 13108 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP, 13109 instances=instances, target_groups=list(self.target_uuids)) 13110 13111 ial.Run(self.op.iallocator) 13112 13113 if not ial.success: 13114 raise errors.OpPrereqError("Can't compute solution for changing group of" 13115 " instance '%s' using iallocator '%s': %s" % 13116 (self.op.instance_name, self.op.iallocator, 13117 ial.info), 13118 errors.ECODE_NORES) 13119 13120 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False) 13121 13122 self.LogInfo("Iallocator returned %s job(s) for changing group of" 13123 " instance '%s'", len(jobs), self.op.instance_name) 13124 13125 return ResultWithJobs(jobs)
13126
13127 13128 -class LUBackupQuery(NoHooksLU):
13129 """Query the exports list 13130 13131 """ 13132 REQ_BGL = False 13133
13134 - def CheckArguments(self):
13135 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes), 13136 ["node", "export"], self.op.use_locking)
13137
13138 - def ExpandNames(self):
13139 self.expq.ExpandNames(self)
13140
13141 - def DeclareLocks(self, level):
13142 self.expq.DeclareLocks(self, level)
13143
13144 - def Exec(self, feedback_fn):
13145 result = {} 13146 13147 for (node, expname) in self.expq.OldStyleQuery(self): 13148 if expname is None: 13149 result[node] = False 13150 else: 13151 result.setdefault(node, []).append(expname) 13152 13153 return result
13154
13155 13156 -class _ExportQuery(_QueryBase):
13157 FIELDS = query.EXPORT_FIELDS 13158 13159 #: The node name is not a unique key for this query 13160 SORT_FIELD = "node" 13161
13162 - def ExpandNames(self, lu):
13163 lu.needed_locks = {} 13164 13165 # The following variables interact with _QueryBase._GetNames 13166 if self.names: 13167 self.wanted = _GetWantedNodes(lu, self.names) 13168 else: 13169 self.wanted = locking.ALL_SET 13170 13171 self.do_locking = self.use_locking 13172 13173 if self.do_locking: 13174 lu.share_locks = _ShareAll() 13175 lu.needed_locks = { 13176 locking.LEVEL_NODE: self.wanted, 13177 }
13178
13179 - def DeclareLocks(self, lu, level):
13180 pass
13181
13182 - def _GetQueryData(self, lu):
13183 """Computes the list of nodes and their attributes. 13184 13185 """ 13186 # Locking is not used 13187 # TODO 13188 assert not (compat.any(lu.glm.is_owned(level) 13189 for level in locking.LEVELS 13190 if level != locking.LEVEL_CLUSTER) or 13191 self.do_locking or self.use_locking) 13192 13193 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE) 13194 13195 result = [] 13196 13197 for (node, nres) in lu.rpc.call_export_list(nodes).items(): 13198 if nres.fail_msg: 13199 result.append((node, None)) 13200 else: 13201 result.extend((node, expname) for expname in nres.payload) 13202 13203 return result
13204
13205 13206 -class LUBackupPrepare(NoHooksLU):
13207 """Prepares an instance for an export and returns useful information. 13208 13209 """ 13210 REQ_BGL = False 13211
13212 - def ExpandNames(self):
13214
13215 - def CheckPrereq(self):
13216 """Check prerequisites. 13217 13218 """ 13219 instance_name = self.op.instance_name 13220 13221 self.instance = self.cfg.GetInstanceInfo(instance_name) 13222 assert self.instance is not None, \ 13223 "Cannot retrieve locked instance %s" % self.op.instance_name 13224 _CheckNodeOnline(self, self.instance.primary_node) 13225 13226 self._cds = _GetClusterDomainSecret()
13227
13228 - def Exec(self, feedback_fn):
13229 """Prepares an instance for an export. 13230 13231 """ 13232 instance = self.instance 13233 13234 if self.op.mode == constants.EXPORT_MODE_REMOTE: 13235 salt = utils.GenerateSecret(8) 13236 13237 feedback_fn("Generating X509 certificate on %s" % instance.primary_node) 13238 result = self.rpc.call_x509_cert_create(instance.primary_node, 13239 constants.RIE_CERT_VALIDITY) 13240 result.Raise("Can't create X509 key and certificate on %s" % result.node) 13241 13242 (name, cert_pem) = result.payload 13243 13244 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 13245 cert_pem) 13246 13247 return { 13248 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds), 13249 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt), 13250 salt), 13251 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt), 13252 } 13253 13254 return None
13255
13256 13257 -class LUBackupExport(LogicalUnit):
13258 """Export an instance to an image in the cluster. 13259 13260 """ 13261 HPATH = "instance-export" 13262 HTYPE = constants.HTYPE_INSTANCE 13263 REQ_BGL = False 13264
13265 - def CheckArguments(self):
13266 """Check the arguments. 13267 13268 """ 13269 self.x509_key_name = self.op.x509_key_name 13270 self.dest_x509_ca_pem = self.op.destination_x509_ca 13271 13272 if self.op.mode == constants.EXPORT_MODE_REMOTE: 13273 if not self.x509_key_name: 13274 raise errors.OpPrereqError("Missing X509 key name for encryption", 13275 errors.ECODE_INVAL) 13276 13277 if not self.dest_x509_ca_pem: 13278 raise errors.OpPrereqError("Missing destination X509 CA", 13279 errors.ECODE_INVAL)
13280
13281 - def ExpandNames(self):
13282 self._ExpandAndLockInstance() 13283 13284 # Lock all nodes for local exports 13285 if self.op.mode == constants.EXPORT_MODE_LOCAL: 13286 # FIXME: lock only instance primary and destination node 13287 # 13288 # Sad but true, for now we have do lock all nodes, as we don't know where 13289 # the previous export might be, and in this LU we search for it and 13290 # remove it from its current node. In the future we could fix this by: 13291 # - making a tasklet to search (share-lock all), then create the 13292 # new one, then one to remove, after 13293 # - removing the removal operation altogether 13294 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13295
13296 - def DeclareLocks(self, level):
13297 """Last minute lock declaration."""
13298 # All nodes are locked anyway, so nothing to do here. 13299
13300 - def BuildHooksEnv(self):
13301 """Build hooks env. 13302 13303 This will run on the master, primary node and target node. 13304 13305 """ 13306 env = { 13307 "EXPORT_MODE": self.op.mode, 13308 "EXPORT_NODE": self.op.target_node, 13309 "EXPORT_DO_SHUTDOWN": self.op.shutdown, 13310 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 13311 # TODO: Generic function for boolean env variables 13312 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)), 13313 } 13314 13315 env.update(_BuildInstanceHookEnvByObject(self, self.instance)) 13316 13317 return env
13318
13319 - def BuildHooksNodes(self):
13320 """Build hooks nodes. 13321 13322 """ 13323 nl = [self.cfg.GetMasterNode(), self.instance.primary_node] 13324 13325 if self.op.mode == constants.EXPORT_MODE_LOCAL: 13326 nl.append(self.op.target_node) 13327 13328 return (nl, nl)
13329
13330 - def CheckPrereq(self):
13331 """Check prerequisites. 13332 13333 This checks that the instance and node names are valid. 13334 13335 """ 13336 instance_name = self.op.instance_name 13337 13338 self.instance = self.cfg.GetInstanceInfo(instance_name) 13339 assert self.instance is not None, \ 13340 "Cannot retrieve locked instance %s" % self.op.instance_name 13341 _CheckNodeOnline(self, self.instance.primary_node) 13342 13343 if (self.op.remove_instance and 13344 self.instance.admin_state == constants.ADMINST_UP and 13345 not self.op.shutdown): 13346 raise errors.OpPrereqError("Can not remove instance without shutting it" 13347 " down before") 13348 13349 if self.op.mode == constants.EXPORT_MODE_LOCAL: 13350 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node) 13351 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node) 13352 assert self.dst_node is not None 13353 13354 _CheckNodeOnline(self, self.dst_node.name) 13355 _CheckNodeNotDrained(self, self.dst_node.name) 13356 13357 self._cds = None 13358 self.dest_disk_info = None 13359 self.dest_x509_ca = None 13360 13361 elif self.op.mode == constants.EXPORT_MODE_REMOTE: 13362 self.dst_node = None 13363 13364 if len(self.op.target_node) != len(self.instance.disks): 13365 raise errors.OpPrereqError(("Received destination information for %s" 13366 " disks, but instance %s has %s disks") % 13367 (len(self.op.target_node), instance_name, 13368 len(self.instance.disks)), 13369 errors.ECODE_INVAL) 13370 13371 cds = _GetClusterDomainSecret() 13372 13373 # Check X509 key name 13374 try: 13375 (key_name, hmac_digest, hmac_salt) = self.x509_key_name 13376 except (TypeError, ValueError), err: 13377 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err) 13378 13379 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt): 13380 raise errors.OpPrereqError("HMAC for X509 key name is wrong", 13381 errors.ECODE_INVAL) 13382 13383 # Load and verify CA 13384 try: 13385 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds) 13386 except OpenSSL.crypto.Error, err: 13387 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" % 13388 (err, ), errors.ECODE_INVAL) 13389 13390 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) 13391 if errcode is not None: 13392 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % 13393 (msg, ), errors.ECODE_INVAL) 13394 13395 self.dest_x509_ca = cert 13396 13397 # Verify target information 13398 disk_info = [] 13399 for idx, disk_data in enumerate(self.op.target_node): 13400 try: 13401 (host, port, magic) = \ 13402 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data) 13403 except errors.GenericError, err: 13404 raise errors.OpPrereqError("Target info for disk %s: %s" % 13405 (idx, err), errors.ECODE_INVAL) 13406 13407 disk_info.append((host, port, magic)) 13408 13409 assert len(disk_info) == len(self.op.target_node) 13410 self.dest_disk_info = disk_info 13411 13412 else: 13413 raise errors.ProgrammerError("Unhandled export mode %r" % 13414 self.op.mode) 13415 13416 # instance disk type verification 13417 # TODO: Implement export support for file-based disks 13418 for disk in self.instance.disks: 13419 if disk.dev_type == constants.LD_FILE: 13420 raise errors.OpPrereqError("Export not supported for instances with" 13421 " file-based disks", errors.ECODE_INVAL)
13422
13423 - def _CleanupExports(self, feedback_fn):
13424 """Removes exports of current instance from all other nodes. 13425 13426 If an instance in a cluster with nodes A..D was exported to node C, its 13427 exports will be removed from the nodes A, B and D. 13428 13429 """ 13430 assert self.op.mode != constants.EXPORT_MODE_REMOTE 13431 13432 nodelist = self.cfg.GetNodeList() 13433 nodelist.remove(self.dst_node.name) 13434 13435 # on one-node clusters nodelist will be empty after the removal 13436 # if we proceed the backup would be removed because OpBackupQuery 13437 # substitutes an empty list with the full cluster node list. 13438 iname = self.instance.name 13439 if nodelist: 13440 feedback_fn("Removing old exports for instance %s" % iname) 13441 exportlist = self.rpc.call_export_list(nodelist) 13442 for node in exportlist: 13443 if exportlist[node].fail_msg: 13444 continue 13445 if iname in exportlist[node].payload: 13446 msg = self.rpc.call_export_remove(node, iname).fail_msg 13447 if msg: 13448 self.LogWarning("Could not remove older export for instance %s" 13449 " on node %s: %s", iname, node, msg)
13450
13451 - def Exec(self, feedback_fn):
13452 """Export an instance to an image in the cluster. 13453 13454 """ 13455 assert self.op.mode in constants.EXPORT_MODES 13456 13457 instance = self.instance 13458 src_node = instance.primary_node 13459 13460 if self.op.shutdown: 13461 # shutdown the instance, but not the disks 13462 feedback_fn("Shutting down instance %s" % instance.name) 13463 result = self.rpc.call_instance_shutdown(src_node, instance, 13464 self.op.shutdown_timeout) 13465 # TODO: Maybe ignore failures if ignore_remove_failures is set 13466 result.Raise("Could not shutdown instance %s on" 13467 " node %s" % (instance.name, src_node)) 13468 13469 # set the disks ID correctly since call_instance_start needs the 13470 # correct drbd minor to create the symlinks 13471 for disk in instance.disks: 13472 self.cfg.SetDiskID(disk, src_node) 13473 13474 activate_disks = (instance.admin_state != constants.ADMINST_UP) 13475 13476 if activate_disks: 13477 # Activate the instance disks if we'exporting a stopped instance 13478 feedback_fn("Activating disks for %s" % instance.name) 13479 _StartInstanceDisks(self, instance, None) 13480 13481 try: 13482 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn, 13483 instance) 13484 13485 helper.CreateSnapshots() 13486 try: 13487 if (self.op.shutdown and 13488 instance.admin_state == constants.ADMINST_UP and 13489 not self.op.remove_instance): 13490 assert not activate_disks 13491 feedback_fn("Starting instance %s" % instance.name) 13492 result = self.rpc.call_instance_start(src_node, 13493 (instance, None, None), False) 13494 msg = result.fail_msg 13495 if msg: 13496 feedback_fn("Failed to start instance: %s" % msg) 13497 _ShutdownInstanceDisks(self, instance) 13498 raise errors.OpExecError("Could not start instance: %s" % msg) 13499 13500 if self.op.mode == constants.EXPORT_MODE_LOCAL: 13501 (fin_resu, dresults) = helper.LocalExport(self.dst_node) 13502 elif self.op.mode == constants.EXPORT_MODE_REMOTE: 13503 connect_timeout = constants.RIE_CONNECT_TIMEOUT 13504 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) 13505 13506 (key_name, _, _) = self.x509_key_name 13507 13508 dest_ca_pem = \ 13509 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, 13510 self.dest_x509_ca) 13511 13512 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info, 13513 key_name, dest_ca_pem, 13514 timeouts) 13515 finally: 13516 helper.Cleanup() 13517 13518 # Check for backwards compatibility 13519 assert len(dresults) == len(instance.disks) 13520 assert compat.all(isinstance(i, bool) for i in dresults), \ 13521 "Not all results are boolean: %r" % dresults 13522 13523 finally: 13524 if activate_disks: 13525 feedback_fn("Deactivating disks for %s" % instance.name) 13526 _ShutdownInstanceDisks(self, instance) 13527 13528 if not (compat.all(dresults) and fin_resu): 13529 failures = [] 13530 if not fin_resu: 13531 failures.append("export finalization") 13532 if not compat.all(dresults): 13533 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults) 13534 if not dsk) 13535 failures.append("disk export: disk(s) %s" % fdsk) 13536 13537 raise errors.OpExecError("Export failed, errors in %s" % 13538 utils.CommaJoin(failures)) 13539 13540 # At this point, the export was successful, we can cleanup/finish 13541 13542 # Remove instance if requested 13543 if self.op.remove_instance: 13544 feedback_fn("Removing instance %s" % instance.name) 13545 _RemoveInstance(self, feedback_fn, instance, 13546 self.op.ignore_remove_failures) 13547 13548 if self.op.mode == constants.EXPORT_MODE_LOCAL: 13549 self._CleanupExports(feedback_fn) 13550 13551 return fin_resu, dresults
13552
13553 13554 -class LUBackupRemove(NoHooksLU):
13555 """Remove exports related to the named instance. 13556 13557 """ 13558 REQ_BGL = False 13559
13560 - def ExpandNames(self):
13561 self.needed_locks = {} 13562 # We need all nodes to be locked in order for RemoveExport to work, but we 13563 # don't need to lock the instance itself, as nothing will happen to it (and 13564 # we can remove exports also for a removed instance) 13565 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13566
13567 - def Exec(self, feedback_fn):
13568 """Remove any export. 13569 13570 """ 13571 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name) 13572 # If the instance was not found we'll try with the name that was passed in. 13573 # This will only work if it was an FQDN, though. 13574 fqdn_warn = False 13575 if not instance_name: 13576 fqdn_warn = True 13577 instance_name = self.op.instance_name 13578 13579 locked_nodes = self.owned_locks(locking.LEVEL_NODE) 13580 exportlist = self.rpc.call_export_list(locked_nodes) 13581 found = False 13582 for node in exportlist: 13583 msg = exportlist[node].fail_msg 13584 if msg: 13585 self.LogWarning("Failed to query node %s (continuing): %s", node, msg) 13586 continue 13587 if instance_name in exportlist[node].payload: 13588 found = True 13589 result = self.rpc.call_export_remove(node, instance_name) 13590 msg = result.fail_msg 13591 if msg: 13592 logging.error("Could not remove export for instance %s" 13593 " on node %s: %s", instance_name, node, msg) 13594 13595 if fqdn_warn and not found: 13596 feedback_fn("Export not found. If trying to remove an export belonging" 13597 " to a deleted instance please use its Fully Qualified" 13598 " Domain Name.")
13599
13600 13601 -class LUGroupAdd(LogicalUnit):
13602 """Logical unit for creating node groups. 13603 13604 """ 13605 HPATH = "group-add" 13606 HTYPE = constants.HTYPE_GROUP 13607 REQ_BGL = False 13608
13609 - def ExpandNames(self):
13610 # We need the new group's UUID here so that we can create and acquire the 13611 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup 13612 # that it should not check whether the UUID exists in the configuration. 13613 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId()) 13614 self.needed_locks = {} 13615 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13616
13617 - def CheckPrereq(self):
13618 """Check prerequisites. 13619 13620 This checks that the given group name is not an existing node group 13621 already. 13622 13623 """ 13624 try: 13625 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 13626 except errors.OpPrereqError: 13627 pass 13628 else: 13629 raise errors.OpPrereqError("Desired group name '%s' already exists as a" 13630 " node group (UUID: %s)" % 13631 (self.op.group_name, existing_uuid), 13632 errors.ECODE_EXISTS) 13633 13634 if self.op.ndparams: 13635 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES) 13636 13637 if self.op.hv_state: 13638 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None) 13639 else: 13640 self.new_hv_state = None 13641 13642 if self.op.disk_state: 13643 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None) 13644 else: 13645 self.new_disk_state = None 13646 13647 if self.op.diskparams: 13648 for templ in constants.DISK_TEMPLATES: 13649 if templ in self.op.diskparams: 13650 utils.ForceDictType(self.op.diskparams[templ], 13651 constants.DISK_DT_TYPES) 13652 self.new_diskparams = self.op.diskparams 13653 try: 13654 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS) 13655 except errors.OpPrereqError, err: 13656 raise errors.OpPrereqError("While verify diskparams options: %s" % err, 13657 errors.ECODE_INVAL) 13658 else: 13659 self.new_diskparams = {} 13660 13661 if self.op.ipolicy: 13662 cluster = self.cfg.GetClusterInfo() 13663 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy) 13664 try: 13665 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False) 13666 except errors.ConfigurationError, err: 13667 raise errors.OpPrereqError("Invalid instance policy: %s" % err, 13668 errors.ECODE_INVAL)
13669
13670 - def BuildHooksEnv(self):
13671 """Build hooks env. 13672 13673 """ 13674 return { 13675 "GROUP_NAME": self.op.group_name, 13676 }
13677
13678 - def BuildHooksNodes(self):
13679 """Build hooks nodes. 13680 13681 """ 13682 mn = self.cfg.GetMasterNode() 13683 return ([mn], [mn])
13684
13685 - def Exec(self, feedback_fn):
13686 """Add the node group to the cluster. 13687 13688 """ 13689 group_obj = objects.NodeGroup(name=self.op.group_name, members=[], 13690 uuid=self.group_uuid, 13691 alloc_policy=self.op.alloc_policy, 13692 ndparams=self.op.ndparams, 13693 diskparams=self.new_diskparams, 13694 ipolicy=self.op.ipolicy, 13695 hv_state_static=self.new_hv_state, 13696 disk_state_static=self.new_disk_state) 13697 13698 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False) 13699 del self.remove_locks[locking.LEVEL_NODEGROUP]
13700
13701 13702 -class LUGroupAssignNodes(NoHooksLU):
13703 """Logical unit for assigning nodes to groups. 13704 13705 """ 13706 REQ_BGL = False 13707
13708 - def ExpandNames(self):
13709 # These raise errors.OpPrereqError on their own: 13710 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 13711 self.op.nodes = _GetWantedNodes(self, self.op.nodes) 13712 13713 # We want to lock all the affected nodes and groups. We have readily 13714 # available the list of nodes, and the *destination* group. To gather the 13715 # list of "source" groups, we need to fetch node information later on. 13716 self.needed_locks = { 13717 locking.LEVEL_NODEGROUP: set([self.group_uuid]), 13718 locking.LEVEL_NODE: self.op.nodes, 13719 }
13720
13721 - def DeclareLocks(self, level):
13722 if level == locking.LEVEL_NODEGROUP: 13723 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1 13724 13725 # Try to get all affected nodes' groups without having the group or node 13726 # lock yet. Needs verification later in the code flow. 13727 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes) 13728 13729 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13730
13731 - def CheckPrereq(self):
13732 """Check prerequisites. 13733 13734 """ 13735 assert self.needed_locks[locking.LEVEL_NODEGROUP] 13736 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) == 13737 frozenset(self.op.nodes)) 13738 13739 expected_locks = (set([self.group_uuid]) | 13740 self.cfg.GetNodeGroupsFromNodes(self.op.nodes)) 13741 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP) 13742 if actual_locks != expected_locks: 13743 raise errors.OpExecError("Nodes changed groups since locks were acquired," 13744 " current groups are '%s', used to be '%s'" % 13745 (utils.CommaJoin(expected_locks), 13746 utils.CommaJoin(actual_locks))) 13747 13748 self.node_data = self.cfg.GetAllNodesInfo() 13749 self.group = self.cfg.GetNodeGroup(self.group_uuid) 13750 instance_data = self.cfg.GetAllInstancesInfo() 13751 13752 if self.group is None: 13753 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" % 13754 (self.op.group_name, self.group_uuid)) 13755 13756 (new_splits, previous_splits) = \ 13757 self.CheckAssignmentForSplitInstances([(node, self.group_uuid) 13758 for node in self.op.nodes], 13759 self.node_data, instance_data) 13760 13761 if new_splits: 13762 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits)) 13763 13764 if not self.op.force: 13765 raise errors.OpExecError("The following instances get split by this" 13766 " change and --force was not given: %s" % 13767 fmt_new_splits) 13768 else: 13769 self.LogWarning("This operation will split the following instances: %s", 13770 fmt_new_splits) 13771 13772 if previous_splits: 13773 self.LogWarning("In addition, these already-split instances continue" 13774 " to be split across groups: %s", 13775 utils.CommaJoin(utils.NiceSort(previous_splits)))
13776
13777 - def Exec(self, feedback_fn):
13778 """Assign nodes to a new group. 13779 13780 """ 13781 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes] 13782 13783 self.cfg.AssignGroupNodes(mods)
13784 13785 @staticmethod
13786 - def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13787 """Check for split instances after a node assignment. 13788 13789 This method considers a series of node assignments as an atomic operation, 13790 and returns information about split instances after applying the set of 13791 changes. 13792 13793 In particular, it returns information about newly split instances, and 13794 instances that were already split, and remain so after the change. 13795 13796 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are 13797 considered. 13798 13799 @type changes: list of (node_name, new_group_uuid) pairs. 13800 @param changes: list of node assignments to consider. 13801 @param node_data: a dict with data for all nodes 13802 @param instance_data: a dict with all instances to consider 13803 @rtype: a two-tuple 13804 @return: a list of instances that were previously okay and result split as a 13805 consequence of this change, and a list of instances that were previously 13806 split and this change does not fix. 13807 13808 """ 13809 changed_nodes = dict((node, group) for node, group in changes 13810 if node_data[node].group != group) 13811 13812 all_split_instances = set() 13813 previously_split_instances = set() 13814 13815 def InstanceNodes(instance): 13816 return [instance.primary_node] + list(instance.secondary_nodes)
13817 13818 for inst in instance_data.values(): 13819 if inst.disk_template not in constants.DTS_INT_MIRROR: 13820 continue 13821 13822 instance_nodes = InstanceNodes(inst) 13823 13824 if len(set(node_data[node].group for node in instance_nodes)) > 1: 13825 previously_split_instances.add(inst.name) 13826 13827 if len(set(changed_nodes.get(node, node_data[node].group) 13828 for node in instance_nodes)) > 1: 13829 all_split_instances.add(inst.name) 13830 13831 return (list(all_split_instances - previously_split_instances), 13832 list(previously_split_instances & all_split_instances))
13833
13834 13835 -class _GroupQuery(_QueryBase):
13836 FIELDS = query.GROUP_FIELDS 13837
13838 - def ExpandNames(self, lu):
13839 lu.needed_locks = {} 13840 13841 self._all_groups = lu.cfg.GetAllNodeGroupsInfo() 13842 self._cluster = lu.cfg.GetClusterInfo() 13843 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values()) 13844 13845 if not self.names: 13846 self.wanted = [name_to_uuid[name] 13847 for name in utils.NiceSort(name_to_uuid.keys())] 13848 else: 13849 # Accept names to be either names or UUIDs. 13850 missing = [] 13851 self.wanted = [] 13852 all_uuid = frozenset(self._all_groups.keys()) 13853 13854 for name in self.names: 13855 if name in all_uuid: 13856 self.wanted.append(name) 13857 elif name in name_to_uuid: 13858 self.wanted.append(name_to_uuid[name]) 13859 else: 13860 missing.append(name) 13861 13862 if missing: 13863 raise errors.OpPrereqError("Some groups do not exist: %s" % 13864 utils.CommaJoin(missing), 13865 errors.ECODE_NOENT)
13866
13867 - def DeclareLocks(self, lu, level):
13868 pass
13869
13870 - def _GetQueryData(self, lu):
13871 """Computes the list of node groups and their attributes. 13872 13873 """ 13874 do_nodes = query.GQ_NODE in self.requested_data 13875 do_instances = query.GQ_INST in self.requested_data 13876 13877 group_to_nodes = None 13878 group_to_instances = None 13879 13880 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for 13881 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the 13882 # latter GetAllInstancesInfo() is not enough, for we have to go through 13883 # instance->node. Hence, we will need to process nodes even if we only need 13884 # instance information. 13885 if do_nodes or do_instances: 13886 all_nodes = lu.cfg.GetAllNodesInfo() 13887 group_to_nodes = dict((uuid, []) for uuid in self.wanted) 13888 node_to_group = {} 13889 13890 for node in all_nodes.values(): 13891 if node.group in group_to_nodes: 13892 group_to_nodes[node.group].append(node.name) 13893 node_to_group[node.name] = node.group 13894 13895 if do_instances: 13896 all_instances = lu.cfg.GetAllInstancesInfo() 13897 group_to_instances = dict((uuid, []) for uuid in self.wanted) 13898 13899 for instance in all_instances.values(): 13900 node = instance.primary_node 13901 if node in node_to_group: 13902 group_to_instances[node_to_group[node]].append(instance.name) 13903 13904 if not do_nodes: 13905 # Do not pass on node information if it was not requested. 13906 group_to_nodes = None 13907 13908 return query.GroupQueryData(self._cluster, 13909 [self._all_groups[uuid] 13910 for uuid in self.wanted], 13911 group_to_nodes, group_to_instances, 13912 query.GQ_DISKPARAMS in self.requested_data)
13913
13914 13915 -class LUGroupQuery(NoHooksLU):
13916 """Logical unit for querying node groups. 13917 13918 """ 13919 REQ_BGL = False 13920
13921 - def CheckArguments(self):
13922 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names), 13923 self.op.output_fields, False)
13924
13925 - def ExpandNames(self):
13926 self.gq.ExpandNames(self)
13927
13928 - def DeclareLocks(self, level):
13929 self.gq.DeclareLocks(self, level)
13930
13931 - def Exec(self, feedback_fn):
13932 return self.gq.OldStyleQuery(self)
13933
13934 13935 -class LUGroupSetParams(LogicalUnit):
13936 """Modifies the parameters of a node group. 13937 13938 """ 13939 HPATH = "group-modify" 13940 HTYPE = constants.HTYPE_GROUP 13941 REQ_BGL = False 13942
13943 - def CheckArguments(self):
13944 all_changes = [ 13945 self.op.ndparams, 13946 self.op.diskparams, 13947 self.op.alloc_policy, 13948 self.op.hv_state, 13949 self.op.disk_state, 13950 self.op.ipolicy, 13951 ] 13952 13953 if all_changes.count(None) == len(all_changes): 13954 raise errors.OpPrereqError("Please pass at least one modification", 13955 errors.ECODE_INVAL)
13956
13957 - def ExpandNames(self):
13958 # This raises errors.OpPrereqError on its own: 13959 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 13960 13961 self.needed_locks = { 13962 locking.LEVEL_INSTANCE: [], 13963 locking.LEVEL_NODEGROUP: [self.group_uuid], 13964 } 13965 13966 self.share_locks[locking.LEVEL_INSTANCE] = 1
13967
13968 - def DeclareLocks(self, level):
13969 if level == locking.LEVEL_INSTANCE: 13970 assert not self.needed_locks[locking.LEVEL_INSTANCE] 13971 13972 # Lock instances optimistically, needs verification once group lock has 13973 # been acquired 13974 self.needed_locks[locking.LEVEL_INSTANCE] = \ 13975 self.cfg.GetNodeGroupInstances(self.group_uuid)
13976 13977 @staticmethod
13978 - def _UpdateAndVerifyDiskParams(old, new):
13979 """Updates and verifies disk parameters. 13980 13981 """ 13982 new_params = _GetUpdatedParams(old, new) 13983 utils.ForceDictType(new_params, constants.DISK_DT_TYPES) 13984 return new_params
13985
13986 - def CheckPrereq(self):
13987 """Check prerequisites. 13988 13989 """ 13990 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) 13991 13992 # Check if locked instances are still correct 13993 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances) 13994 13995 self.group = self.cfg.GetNodeGroup(self.group_uuid) 13996 cluster = self.cfg.GetClusterInfo() 13997 13998 if self.group is None: 13999 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" % 14000 (self.op.group_name, self.group_uuid)) 14001 14002 if self.op.ndparams: 14003 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams) 14004 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES) 14005 self.new_ndparams = new_ndparams 14006 14007 if self.op.diskparams: 14008 diskparams = self.group.diskparams 14009 uavdp = self._UpdateAndVerifyDiskParams 14010 # For each disktemplate subdict update and verify the values 14011 new_diskparams = dict((dt, 14012 uavdp(diskparams.get(dt, {}), 14013 self.op.diskparams[dt])) 14014 for dt in constants.DISK_TEMPLATES 14015 if dt in self.op.diskparams) 14016 # As we've all subdicts of diskparams ready, lets merge the actual 14017 # dict with all updated subdicts 14018 self.new_diskparams = objects.FillDict(diskparams, new_diskparams) 14019 try: 14020 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS) 14021 except errors.OpPrereqError, err: 14022 raise errors.OpPrereqError("While verify diskparams options: %s" % err, 14023 errors.ECODE_INVAL) 14024 14025 if self.op.hv_state: 14026 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, 14027 self.group.hv_state_static) 14028 14029 if self.op.disk_state: 14030 self.new_disk_state = \ 14031 _MergeAndVerifyDiskState(self.op.disk_state, 14032 self.group.disk_state_static) 14033 14034 if self.op.ipolicy: 14035 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy, 14036 self.op.ipolicy, 14037 group_policy=True) 14038 14039 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy) 14040 inst_filter = lambda inst: inst.name in owned_instances 14041 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values() 14042 violations = \ 14043 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster, 14044 self.group), 14045 new_ipolicy, instances) 14046 14047 if violations: 14048 self.LogWarning("After the ipolicy change the following instances" 14049 " violate them: %s", 14050 utils.CommaJoin(violations))
14051
14052 - def BuildHooksEnv(self):
14053 """Build hooks env. 14054 14055 """ 14056 return { 14057 "GROUP_NAME": self.op.group_name, 14058 "NEW_ALLOC_POLICY": self.op.alloc_policy, 14059 }
14060
14061 - def BuildHooksNodes(self):
14062 """Build hooks nodes. 14063 14064 """ 14065 mn = self.cfg.GetMasterNode() 14066 return ([mn], [mn])
14067
14068 - def Exec(self, feedback_fn):
14069 """Modifies the node group. 14070 14071 """ 14072 result = [] 14073 14074 if self.op.ndparams: 14075 self.group.ndparams = self.new_ndparams 14076 result.append(("ndparams", str(self.group.ndparams))) 14077 14078 if self.op.diskparams: 14079 self.group.diskparams = self.new_diskparams 14080 result.append(("diskparams", str(self.group.diskparams))) 14081 14082 if self.op.alloc_policy: 14083 self.group.alloc_policy = self.op.alloc_policy 14084 14085 if self.op.hv_state: 14086 self.group.hv_state_static = self.new_hv_state 14087 14088 if self.op.disk_state: 14089 self.group.disk_state_static = self.new_disk_state 14090 14091 if self.op.ipolicy: 14092 self.group.ipolicy = self.new_ipolicy 14093 14094 self.cfg.Update(self.group, feedback_fn) 14095 return result
14096
14097 14098 -class LUGroupRemove(LogicalUnit):
14099 HPATH = "group-remove" 14100 HTYPE = constants.HTYPE_GROUP 14101 REQ_BGL = False 14102
14103 - def ExpandNames(self):
14104 # This will raises errors.OpPrereqError on its own: 14105 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 14106 self.needed_locks = { 14107 locking.LEVEL_NODEGROUP: [self.group_uuid], 14108 }
14109
14110 - def CheckPrereq(self):
14111 """Check prerequisites. 14112 14113 This checks that the given group name exists as a node group, that is 14114 empty (i.e., contains no nodes), and that is not the last group of the 14115 cluster. 14116 14117 """ 14118 # Verify that the group is empty. 14119 group_nodes = [node.name 14120 for node in self.cfg.GetAllNodesInfo().values() 14121 if node.group == self.group_uuid] 14122 14123 if group_nodes: 14124 raise errors.OpPrereqError("Group '%s' not empty, has the following" 14125 " nodes: %s" % 14126 (self.op.group_name, 14127 utils.CommaJoin(utils.NiceSort(group_nodes))), 14128 errors.ECODE_STATE) 14129 14130 # Verify the cluster would not be left group-less. 14131 if len(self.cfg.GetNodeGroupList()) == 1: 14132 raise errors.OpPrereqError("Group '%s' is the only group," 14133 " cannot be removed" % 14134 self.op.group_name, 14135 errors.ECODE_STATE)
14136
14137 - def BuildHooksEnv(self):
14138 """Build hooks env. 14139 14140 """ 14141 return { 14142 "GROUP_NAME": self.op.group_name, 14143 }
14144
14145 - def BuildHooksNodes(self):
14146 """Build hooks nodes. 14147 14148 """ 14149 mn = self.cfg.GetMasterNode() 14150 return ([mn], [mn])
14151
14152 - def Exec(self, feedback_fn):
14153 """Remove the node group. 14154 14155 """ 14156 try: 14157 self.cfg.RemoveNodeGroup(self.group_uuid) 14158 except errors.ConfigurationError: 14159 raise errors.OpExecError("Group '%s' with UUID %s disappeared" % 14160 (self.op.group_name, self.group_uuid)) 14161 14162 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14163
14164 14165 -class LUGroupRename(LogicalUnit):
14166 HPATH = "group-rename" 14167 HTYPE = constants.HTYPE_GROUP 14168 REQ_BGL = False 14169
14170 - def ExpandNames(self):
14171 # This raises errors.OpPrereqError on its own: 14172 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 14173 14174 self.needed_locks = { 14175 locking.LEVEL_NODEGROUP: [self.group_uuid], 14176 }
14177
14178 - def CheckPrereq(self):
14179 """Check prerequisites. 14180 14181 Ensures requested new name is not yet used. 14182 14183 """ 14184 try: 14185 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name) 14186 except errors.OpPrereqError: 14187 pass 14188 else: 14189 raise errors.OpPrereqError("Desired new name '%s' clashes with existing" 14190 " node group (UUID: %s)" % 14191 (self.op.new_name, new_name_uuid), 14192 errors.ECODE_EXISTS)
14193
14194 - def BuildHooksEnv(self):
14195 """Build hooks env. 14196 14197 """ 14198 return { 14199 "OLD_NAME": self.op.group_name, 14200 "NEW_NAME": self.op.new_name, 14201 }
14202
14203 - def BuildHooksNodes(self):
14204 """Build hooks nodes. 14205 14206 """ 14207 mn = self.cfg.GetMasterNode() 14208 14209 all_nodes = self.cfg.GetAllNodesInfo() 14210 all_nodes.pop(mn, None) 14211 14212 run_nodes = [mn] 14213 run_nodes.extend(node.name for node in all_nodes.values() 14214 if node.group == self.group_uuid) 14215 14216 return (run_nodes, run_nodes)
14217
14218 - def Exec(self, feedback_fn):
14219 """Rename the node group. 14220 14221 """ 14222 group = self.cfg.GetNodeGroup(self.group_uuid) 14223 14224 if group is None: 14225 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" % 14226 (self.op.group_name, self.group_uuid)) 14227 14228 group.name = self.op.new_name 14229 self.cfg.Update(group, feedback_fn) 14230 14231 return self.op.new_name
14232
14233 14234 -class LUGroupEvacuate(LogicalUnit):
14235 HPATH = "group-evacuate" 14236 HTYPE = constants.HTYPE_GROUP 14237 REQ_BGL = False 14238
14239 - def ExpandNames(self):
14240 # This raises errors.OpPrereqError on its own: 14241 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 14242 14243 if self.op.target_groups: 14244 self.req_target_uuids = map(self.cfg.LookupNodeGroup, 14245 self.op.target_groups) 14246 else: 14247 self.req_target_uuids = [] 14248 14249 if self.group_uuid in self.req_target_uuids: 14250 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used" 14251 " as a target group (targets are %s)" % 14252 (self.group_uuid, 14253 utils.CommaJoin(self.req_target_uuids)), 14254 errors.ECODE_INVAL) 14255 14256 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator) 14257 14258 self.share_locks = _ShareAll() 14259 self.needed_locks = { 14260 locking.LEVEL_INSTANCE: [], 14261 locking.LEVEL_NODEGROUP: [], 14262 locking.LEVEL_NODE: [], 14263 }
14264
14265 - def DeclareLocks(self, level):
14266 if level == locking.LEVEL_INSTANCE: 14267 assert not self.needed_locks[locking.LEVEL_INSTANCE] 14268 14269 # Lock instances optimistically, needs verification once node and group 14270 # locks have been acquired 14271 self.needed_locks[locking.LEVEL_INSTANCE] = \ 14272 self.cfg.GetNodeGroupInstances(self.group_uuid) 14273 14274 elif level == locking.LEVEL_NODEGROUP: 14275 assert not self.needed_locks[locking.LEVEL_NODEGROUP] 14276 14277 if self.req_target_uuids: 14278 lock_groups = set([self.group_uuid] + self.req_target_uuids) 14279 14280 # Lock all groups used by instances optimistically; this requires going 14281 # via the node before it's locked, requiring verification later on 14282 lock_groups.update(group_uuid 14283 for instance_name in 14284 self.owned_locks(locking.LEVEL_INSTANCE) 14285 for group_uuid in 14286 self.cfg.GetInstanceNodeGroups(instance_name)) 14287 else: 14288 # No target groups, need to lock all of them 14289 lock_groups = locking.ALL_SET 14290 14291 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups 14292 14293 elif level == locking.LEVEL_NODE: 14294 # This will only lock the nodes in the group to be evacuated which 14295 # contain actual instances 14296 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND 14297 self._LockInstancesNodes() 14298 14299 # Lock all nodes in group to be evacuated and target groups 14300 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) 14301 assert self.group_uuid in owned_groups 14302 member_nodes = [node_name 14303 for group in owned_groups 14304 for node_name in self.cfg.GetNodeGroup(group).members] 14305 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14306
14307 - def CheckPrereq(self):
14308 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE)) 14309 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) 14310 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE)) 14311 14312 assert owned_groups.issuperset(self.req_target_uuids) 14313 assert self.group_uuid in owned_groups 14314 14315 # Check if locked instances are still correct 14316 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances) 14317 14318 # Get instance information 14319 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances)) 14320 14321 # Check if node groups for locked instances are still correct 14322 _CheckInstancesNodeGroups(self.cfg, self.instances, 14323 owned_groups, owned_nodes, self.group_uuid) 14324 14325 if self.req_target_uuids: 14326 # User requested specific target groups 14327 self.target_uuids = self.req_target_uuids 14328 else: 14329 # All groups except the one to be evacuated are potential targets 14330 self.target_uuids = [group_uuid for group_uuid in owned_groups 14331 if group_uuid != self.group_uuid] 14332 14333 if not self.target_uuids: 14334 raise errors.OpPrereqError("There are no possible target groups", 14335 errors.ECODE_INVAL)
14336
14337 - def BuildHooksEnv(self):
14338 """Build hooks env. 14339 14340 """ 14341 return { 14342 "GROUP_NAME": self.op.group_name, 14343 "TARGET_GROUPS": " ".join(self.target_uuids), 14344 }
14345
14346 - def BuildHooksNodes(self):
14347 """Build hooks nodes. 14348 14349 """ 14350 mn = self.cfg.GetMasterNode() 14351 14352 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 14353 14354 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members 14355 14356 return (run_nodes, run_nodes)
14357
14358 - def Exec(self, feedback_fn):
14359 instances = list(self.owned_locks(locking.LEVEL_INSTANCE)) 14360 14361 assert self.group_uuid not in self.target_uuids 14362 14363 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP, 14364 instances=instances, target_groups=self.target_uuids) 14365 14366 ial.Run(self.op.iallocator) 14367 14368 if not ial.success: 14369 raise errors.OpPrereqError("Can't compute group evacuation using" 14370 " iallocator '%s': %s" % 14371 (self.op.iallocator, ial.info), 14372 errors.ECODE_NORES) 14373 14374 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False) 14375 14376 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s", 14377 len(jobs), self.op.group_name) 14378 14379 return ResultWithJobs(jobs)
14380
14381 14382 -class TagsLU(NoHooksLU): # pylint: disable=W0223
14383 """Generic tags LU. 14384 14385 This is an abstract class which is the parent of all the other tags LUs. 14386 14387 """
14388 - def ExpandNames(self):
14389 self.group_uuid = None 14390 self.needed_locks = {} 14391 14392 if self.op.kind == constants.TAG_NODE: 14393 self.op.name = _ExpandNodeName(self.cfg, self.op.name) 14394 lock_level = locking.LEVEL_NODE 14395 lock_name = self.op.name 14396 elif self.op.kind == constants.TAG_INSTANCE: 14397 self.op.name = _ExpandInstanceName(self.cfg, self.op.name) 14398 lock_level = locking.LEVEL_INSTANCE 14399 lock_name = self.op.name 14400 elif self.op.kind == constants.TAG_NODEGROUP: 14401 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name) 14402 lock_level = locking.LEVEL_NODEGROUP 14403 lock_name = self.group_uuid 14404 else: 14405 lock_level = None 14406 lock_name = None 14407 14408 if lock_level and getattr(self.op, "use_locking", True): 14409 self.needed_locks[lock_level] = lock_name
14410 14411 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's 14412 # not possible to acquire the BGL based on opcode parameters) 14413
14414 - def CheckPrereq(self):
14415 """Check prerequisites. 14416 14417 """ 14418 if self.op.kind == constants.TAG_CLUSTER: 14419 self.target = self.cfg.GetClusterInfo() 14420 elif self.op.kind == constants.TAG_NODE: 14421 self.target = self.cfg.GetNodeInfo(self.op.name) 14422 elif self.op.kind == constants.TAG_INSTANCE: 14423 self.target = self.cfg.GetInstanceInfo(self.op.name) 14424 elif self.op.kind == constants.TAG_NODEGROUP: 14425 self.target = self.cfg.GetNodeGroup(self.group_uuid) 14426 else: 14427 raise errors.OpPrereqError("Wrong tag type requested (%s)" % 14428 str(self.op.kind), errors.ECODE_INVAL)
14429
14430 14431 -class LUTagsGet(TagsLU):
14432 """Returns the tags of a given object. 14433 14434 """ 14435 REQ_BGL = False 14436
14437 - def ExpandNames(self):
14438 TagsLU.ExpandNames(self) 14439 14440 # Share locks as this is only a read operation 14441 self.share_locks = _ShareAll()
14442
14443 - def Exec(self, feedback_fn):
14444 """Returns the tag list. 14445 14446 """ 14447 return list(self.target.GetTags())
14448
14449 14450 -class LUTagsSearch(NoHooksLU):
14451 """Searches the tags for a given pattern. 14452 14453 """ 14454 REQ_BGL = False 14455
14456 - def ExpandNames(self):
14457 self.needed_locks = {}
14458
14459 - def CheckPrereq(self):
14460 """Check prerequisites. 14461 14462 This checks the pattern passed for validity by compiling it. 14463 14464 """ 14465 try: 14466 self.re = re.compile(self.op.pattern) 14467 except re.error, err: 14468 raise errors.OpPrereqError("Invalid search pattern '%s': %s" % 14469 (self.op.pattern, err), errors.ECODE_INVAL)
14470
14471 - def Exec(self, feedback_fn):
14472 """Returns the tag list. 14473 14474 """ 14475 cfg = self.cfg 14476 tgts = [("/cluster", cfg.GetClusterInfo())] 14477 ilist = cfg.GetAllInstancesInfo().values() 14478 tgts.extend([("/instances/%s" % i.name, i) for i in ilist]) 14479 nlist = cfg.GetAllNodesInfo().values() 14480 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist]) 14481 tgts.extend(("/nodegroup/%s" % n.name, n) 14482 for n in cfg.GetAllNodeGroupsInfo().values()) 14483 results = [] 14484 for path, target in tgts: 14485 for tag in target.GetTags(): 14486 if self.re.search(tag): 14487 results.append((path, tag)) 14488 return results
14489
14490 14491 -class LUTagsSet(TagsLU):
14492 """Sets a tag on a given object. 14493 14494 """ 14495 REQ_BGL = False 14496
14497 - def CheckPrereq(self):
14498 """Check prerequisites. 14499 14500 This checks the type and length of the tag name and value. 14501 14502 """ 14503 TagsLU.CheckPrereq(self) 14504 for tag in self.op.tags: 14505 objects.TaggableObject.ValidateTag(tag)
14506
14507 - def Exec(self, feedback_fn):
14508 """Sets the tag. 14509 14510 """ 14511 try: 14512 for tag in self.op.tags: 14513 self.target.AddTag(tag) 14514 except errors.TagError, err: 14515 raise errors.OpExecError("Error while setting tag: %s" % str(err)) 14516 self.cfg.Update(self.target, feedback_fn)
14517
14518 14519 -class LUTagsDel(TagsLU):
14520 """Delete a list of tags from a given object. 14521 14522 """ 14523 REQ_BGL = False 14524
14525 - def CheckPrereq(self):
14526 """Check prerequisites. 14527 14528 This checks that we have the given tag. 14529 14530 """ 14531 TagsLU.CheckPrereq(self) 14532 for tag in self.op.tags: 14533 objects.TaggableObject.ValidateTag(tag) 14534 del_tags = frozenset(self.op.tags) 14535 cur_tags = self.target.GetTags() 14536 14537 diff_tags = del_tags - cur_tags 14538 if diff_tags: 14539 diff_names = ("'%s'" % i for i in sorted(diff_tags)) 14540 raise errors.OpPrereqError("Tag(s) %s not found" % 14541 (utils.CommaJoin(diff_names), ), 14542 errors.ECODE_NOENT)
14543
14544 - def Exec(self, feedback_fn):
14545 """Remove the tag from the object. 14546 14547 """ 14548 for tag in self.op.tags: 14549 self.target.RemoveTag(tag) 14550 self.cfg.Update(self.target, feedback_fn)
14551
14552 14553 -class LUTestDelay(NoHooksLU):
14554 """Sleep for a specified amount of time. 14555 14556 This LU sleeps on the master and/or nodes for a specified amount of 14557 time. 14558 14559 """ 14560 REQ_BGL = False 14561
14562 - def ExpandNames(self):
14563 """Expand names and set required locks. 14564 14565 This expands the node list, if any. 14566 14567 """ 14568 self.needed_locks = {} 14569 if self.op.on_nodes: 14570 # _GetWantedNodes can be used here, but is not always appropriate to use 14571 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for 14572 # more information. 14573 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes) 14574 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14575
14576 - def _TestDelay(self):
14577 """Do the actual sleep. 14578 14579 """ 14580 if self.op.on_master: 14581 if not utils.TestDelay(self.op.duration): 14582 raise errors.OpExecError("Error during master delay test") 14583 if self.op.on_nodes: 14584 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration) 14585 for node, node_result in result.items(): 14586 node_result.Raise("Failure during rpc call to node %s" % node)
14587
14588 - def Exec(self, feedback_fn):
14589 """Execute the test delay opcode, with the wanted repetitions. 14590 14591 """ 14592 if self.op.repeat == 0: 14593 self._TestDelay() 14594 else: 14595 top_value = self.op.repeat - 1 14596 for i in range(self.op.repeat): 14597 self.LogInfo("Test delay iteration %d/%d" % (i, top_value)) 14598 self._TestDelay()
14599
14600 14601 -class LUTestJqueue(NoHooksLU):
14602 """Utility LU to test some aspects of the job queue. 14603 14604 """ 14605 REQ_BGL = False 14606 14607 # Must be lower than default timeout for WaitForJobChange to see whether it 14608 # notices changed jobs 14609 _CLIENT_CONNECT_TIMEOUT = 20.0 14610 _CLIENT_CONFIRM_TIMEOUT = 60.0 14611 14612 @classmethod
14613 - def _NotifyUsingSocket(cls, cb, errcls):
14614 """Opens a Unix socket and waits for another program to connect. 14615 14616 @type cb: callable 14617 @param cb: Callback to send socket name to client 14618 @type errcls: class 14619 @param errcls: Exception class to use for errors 14620 14621 """ 14622 # Using a temporary directory as there's no easy way to create temporary 14623 # sockets without writing a custom loop around tempfile.mktemp and 14624 # socket.bind 14625 tmpdir = tempfile.mkdtemp() 14626 try: 14627 tmpsock = utils.PathJoin(tmpdir, "sock") 14628 14629 logging.debug("Creating temporary socket at %s", tmpsock) 14630 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) 14631 try: 14632 sock.bind(tmpsock) 14633 sock.listen(1) 14634 14635 # Send details to client 14636 cb(tmpsock) 14637 14638 # Wait for client to connect before continuing 14639 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT) 14640 try: 14641 (conn, _) = sock.accept() 14642 except socket.error, err: 14643 raise errcls("Client didn't connect in time (%s)" % err) 14644 finally: 14645 sock.close() 14646 finally: 14647 # Remove as soon as client is connected 14648 shutil.rmtree(tmpdir) 14649 14650 # Wait for client to close 14651 try: 14652 try: 14653 # pylint: disable=E1101 14654 # Instance of '_socketobject' has no ... member 14655 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT) 14656 conn.recv(1) 14657 except socket.error, err: 14658 raise errcls("Client failed to confirm notification (%s)" % err) 14659 finally: 14660 conn.close()
14661
14662 - def _SendNotification(self, test, arg, sockname):
14663 """Sends a notification to the client. 14664 14665 @type test: string 14666 @param test: Test name 14667 @param arg: Test argument (depends on test) 14668 @type sockname: string 14669 @param sockname: Socket path 14670 14671 """ 14672 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14673
14674 - def _Notify(self, prereq, test, arg):
14675 """Notifies the client of a test. 14676 14677 @type prereq: bool 14678 @param prereq: Whether this is a prereq-phase test 14679 @type test: string 14680 @param test: Test name 14681 @param arg: Test argument (depends on test) 14682 14683 """ 14684 if prereq: 14685 errcls = errors.OpPrereqError 14686 else: 14687 errcls = errors.OpExecError 14688 14689 return self._NotifyUsingSocket(compat.partial(self._SendNotification, 14690 test, arg), 14691 errcls)
14692
14693 - def CheckArguments(self):
14694 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1 14695 self.expandnames_calls = 0
14696
14697 - def ExpandNames(self):
14698 checkargs_calls = getattr(self, "checkargs_calls", 0) 14699 if checkargs_calls < 1: 14700 raise errors.ProgrammerError("CheckArguments was not called") 14701 14702 self.expandnames_calls += 1 14703 14704 if self.op.notify_waitlock: 14705 self._Notify(True, constants.JQT_EXPANDNAMES, None) 14706 14707 self.LogInfo("Expanding names") 14708 14709 # Get lock on master node (just to get a lock, not for a particular reason) 14710 self.needed_locks = { 14711 locking.LEVEL_NODE: self.cfg.GetMasterNode(), 14712 }
14713
14714 - def Exec(self, feedback_fn):
14715 if self.expandnames_calls < 1: 14716 raise errors.ProgrammerError("ExpandNames was not called") 14717 14718 if self.op.notify_exec: 14719 self._Notify(False, constants.JQT_EXEC, None) 14720 14721 self.LogInfo("Executing") 14722 14723 if self.op.log_messages: 14724 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages)) 14725 for idx, msg in enumerate(self.op.log_messages): 14726 self.LogInfo("Sending log message %s", idx + 1) 14727 feedback_fn(constants.JQT_MSGPREFIX + msg) 14728 # Report how many test messages have been sent 14729 self._Notify(False, constants.JQT_LOGMSG, idx + 1) 14730 14731 if self.op.fail: 14732 raise errors.OpExecError("Opcode failure was requested") 14733 14734 return True
14735
14736 14737 -class IAllocator(object):
14738 """IAllocator framework. 14739 14740 An IAllocator instance has three sets of attributes: 14741 - cfg that is needed to query the cluster 14742 - input data (all members of the _KEYS class attribute are required) 14743 - four buffer attributes (in|out_data|text), that represent the 14744 input (to the external script) in text and data structure format, 14745 and the output from it, again in two formats 14746 - the result variables from the script (success, info, nodes) for 14747 easy usage 14748 14749 """ 14750 # pylint: disable=R0902 14751 # lots of instance attributes 14752
14753 - def __init__(self, cfg, rpc_runner, mode, **kwargs):
14754 self.cfg = cfg 14755 self.rpc = rpc_runner 14756 # init buffer variables 14757 self.in_text = self.out_text = self.in_data = self.out_data = None 14758 # init all input fields so that pylint is happy 14759 self.mode = mode 14760 self.memory = self.disks = self.disk_template = self.spindle_use = None 14761 self.os = self.tags = self.nics = self.vcpus = None 14762 self.hypervisor = None 14763 self.relocate_from = None 14764 self.name = None 14765 self.instances = None 14766 self.evac_mode = None 14767 self.target_groups = [] 14768 # computed fields 14769 self.required_nodes = None 14770 # init result fields 14771 self.success = self.info = self.result = None 14772 14773 try: 14774 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode] 14775 except KeyError: 14776 raise errors.ProgrammerError("Unknown mode '%s' passed to the" 14777 " IAllocator" % self.mode) 14778 14779 keyset = [n for (n, _) in keydata] 14780 14781 for key in kwargs: 14782 if key not in keyset: 14783 raise errors.ProgrammerError("Invalid input parameter '%s' to" 14784 " IAllocator" % key) 14785 setattr(self, key, kwargs[key]) 14786 14787 for key in keyset: 14788 if key not in kwargs: 14789 raise errors.ProgrammerError("Missing input parameter '%s' to" 14790 " IAllocator" % key) 14791 self._BuildInputData(compat.partial(fn, self), keydata)
14792
14793 - def _ComputeClusterData(self):
14794 """Compute the generic allocator input data. 14795 14796 This is the data that is independent of the actual operation. 14797 14798 """ 14799 cfg = self.cfg 14800 cluster_info = cfg.GetClusterInfo() 14801 # cluster data 14802 data = { 14803 "version": constants.IALLOCATOR_VERSION, 14804 "cluster_name": cfg.GetClusterName(), 14805 "cluster_tags": list(cluster_info.GetTags()), 14806 "enabled_hypervisors": list(cluster_info.enabled_hypervisors), 14807 "ipolicy": cluster_info.ipolicy, 14808 } 14809 ninfo = cfg.GetAllNodesInfo() 14810 iinfo = cfg.GetAllInstancesInfo().values() 14811 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo] 14812 14813 # node data 14814 node_list = [n.name for n in ninfo.values() if n.vm_capable] 14815 14816 if self.mode == constants.IALLOCATOR_MODE_ALLOC: 14817 hypervisor_name = self.hypervisor 14818 elif self.mode == constants.IALLOCATOR_MODE_RELOC: 14819 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor 14820 else: 14821 hypervisor_name = cluster_info.primary_hypervisor 14822 14823 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()], 14824 [hypervisor_name]) 14825 node_iinfo = \ 14826 self.rpc.call_all_instances_info(node_list, 14827 cluster_info.enabled_hypervisors) 14828 14829 data["nodegroups"] = self._ComputeNodeGroupData(cfg) 14830 14831 config_ndata = self._ComputeBasicNodeData(cfg, ninfo) 14832 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo, 14833 i_list, config_ndata) 14834 assert len(data["nodes"]) == len(ninfo), \ 14835 "Incomplete node data computed" 14836 14837 data["instances"] = self._ComputeInstanceData(cluster_info, i_list) 14838 14839 self.in_data = data
14840 14841 @staticmethod
14842 - def _ComputeNodeGroupData(cfg):
14843 """Compute node groups data. 14844 14845 """ 14846 cluster = cfg.GetClusterInfo() 14847 ng = dict((guuid, { 14848 "name": gdata.name, 14849 "alloc_policy": gdata.alloc_policy, 14850 "ipolicy": _CalculateGroupIPolicy(cluster, gdata), 14851 }) 14852 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items()) 14853 14854 return ng
14855 14856 @staticmethod
14857 - def _ComputeBasicNodeData(cfg, node_cfg):
14858 """Compute global node data. 14859 14860 @rtype: dict 14861 @returns: a dict of name: (node dict, node config) 14862 14863 """ 14864 # fill in static (config-based) values 14865 node_results = dict((ninfo.name, { 14866 "tags": list(ninfo.GetTags()), 14867 "primary_ip": ninfo.primary_ip, 14868 "secondary_ip": ninfo.secondary_ip, 14869 "offline": ninfo.offline, 14870 "drained": ninfo.drained, 14871 "master_candidate": ninfo.master_candidate, 14872 "group": ninfo.group, 14873 "master_capable": ninfo.master_capable, 14874 "vm_capable": ninfo.vm_capable, 14875 "ndparams": cfg.GetNdParams(ninfo), 14876 }) 14877 for ninfo in node_cfg.values()) 14878 14879 return node_results
14880 14881 @staticmethod
14882 - def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list, 14883 node_results):
14884 """Compute global node data. 14885 14886 @param node_results: the basic node structures as filled from the config 14887 14888 """ 14889 #TODO(dynmem): compute the right data on MAX and MIN memory 14890 # make a copy of the current dict 14891 node_results = dict(node_results) 14892 for nname, nresult in node_data.items(): 14893 assert nname in node_results, "Missing basic data for node %s" % nname 14894 ninfo = node_cfg[nname] 14895 14896 if not (ninfo.offline or ninfo.drained): 14897 nresult.Raise("Can't get data for node %s" % nname) 14898 node_iinfo[nname].Raise("Can't get node instance info from node %s" % 14899 nname) 14900 remote_info = _MakeLegacyNodeInfo(nresult.payload) 14901 14902 for attr in ["memory_total", "memory_free", "memory_dom0", 14903 "vg_size", "vg_free", "cpu_total"]: 14904 if attr not in remote_info: 14905 raise errors.OpExecError("Node '%s' didn't return attribute" 14906 " '%s'" % (nname, attr)) 14907 if not isinstance(remote_info[attr], int): 14908 raise errors.OpExecError("Node '%s' returned invalid value" 14909 " for '%s': %s" % 14910 (nname, attr, remote_info[attr])) 14911 # compute memory used by primary instances 14912 i_p_mem = i_p_up_mem = 0 14913 for iinfo, beinfo in i_list: 14914 if iinfo.primary_node == nname: 14915 i_p_mem += beinfo[constants.BE_MAXMEM] 14916 if iinfo.name not in node_iinfo[nname].payload: 14917 i_used_mem = 0 14918 else: 14919 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"]) 14920 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem 14921 remote_info["memory_free"] -= max(0, i_mem_diff) 14922 14923 if iinfo.admin_state == constants.ADMINST_UP: 14924 i_p_up_mem += beinfo[constants.BE_MAXMEM] 14925 14926 # compute memory used by instances 14927 pnr_dyn = { 14928 "total_memory": remote_info["memory_total"], 14929 "reserved_memory": remote_info["memory_dom0"], 14930 "free_memory": remote_info["memory_free"], 14931 "total_disk": remote_info["vg_size"], 14932 "free_disk": remote_info["vg_free"], 14933 "total_cpus": remote_info["cpu_total"], 14934 "i_pri_memory": i_p_mem, 14935 "i_pri_up_memory": i_p_up_mem, 14936 } 14937 pnr_dyn.update(node_results[nname]) 14938 node_results[nname] = pnr_dyn 14939 14940 return node_results
14941 14942 @staticmethod
14943 - def _ComputeInstanceData(cluster_info, i_list):
14944 """Compute global instance data. 14945 14946 """ 14947 instance_data = {} 14948 for iinfo, beinfo in i_list: 14949 nic_data = [] 14950 for nic in iinfo.nics: 14951 filled_params = cluster_info.SimpleFillNIC(nic.nicparams) 14952 nic_dict = { 14953 "mac": nic.mac, 14954 "ip": nic.ip, 14955 "mode": filled_params[constants.NIC_MODE], 14956 "link": filled_params[constants.NIC_LINK], 14957 } 14958 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 14959 nic_dict["bridge"] = filled_params[constants.NIC_LINK] 14960 nic_data.append(nic_dict) 14961 pir = { 14962 "tags": list(iinfo.GetTags()), 14963 "admin_state": iinfo.admin_state, 14964 "vcpus": beinfo[constants.BE_VCPUS], 14965 "memory": beinfo[constants.BE_MAXMEM], 14966 "spindle_use": beinfo[constants.BE_SPINDLE_USE], 14967 "os": iinfo.os, 14968 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes), 14969 "nics": nic_data, 14970 "disks": [{constants.IDISK_SIZE: dsk.size, 14971 constants.IDISK_MODE: dsk.mode} 14972 for dsk in iinfo.disks], 14973 "disk_template": iinfo.disk_template, 14974 "hypervisor": iinfo.hypervisor, 14975 } 14976 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template, 14977 pir["disks"]) 14978 instance_data[iinfo.name] = pir 14979 14980 return instance_data
14981
14982 - def _AddNewInstance(self):
14983 """Add new instance data to allocator structure. 14984 14985 This in combination with _AllocatorGetClusterData will create the 14986 correct structure needed as input for the allocator. 14987 14988 The checks for the completeness of the opcode must have already been 14989 done. 14990 14991 """ 14992 disk_space = _ComputeDiskSize(self.disk_template, self.disks) 14993 14994 if self.disk_template in constants.DTS_INT_MIRROR: 14995 self.required_nodes = 2 14996 else: 14997 self.required_nodes = 1 14998 14999 request = { 15000 "name": self.name, 15001 "disk_template": self.disk_template, 15002 "tags": self.tags, 15003 "os": self.os, 15004 "vcpus": self.vcpus, 15005 "memory": self.memory, 15006 "spindle_use": self.spindle_use, 15007 "disks": self.disks, 15008 "disk_space_total": disk_space, 15009 "nics": self.nics, 15010 "required_nodes": self.required_nodes, 15011 "hypervisor": self.hypervisor, 15012 } 15013 15014 return request
15015
15016 - def _AddRelocateInstance(self):
15017 """Add relocate instance data to allocator structure. 15018 15019 This in combination with _IAllocatorGetClusterData will create the 15020 correct structure needed as input for the allocator. 15021 15022 The checks for the completeness of the opcode must have already been 15023 done. 15024 15025 """ 15026 instance = self.cfg.GetInstanceInfo(self.name) 15027 if instance is None: 15028 raise errors.ProgrammerError("Unknown instance '%s' passed to" 15029 " IAllocator" % self.name) 15030 15031 if instance.disk_template not in constants.DTS_MIRRORED: 15032 raise errors.OpPrereqError("Can't relocate non-mirrored instances", 15033 errors.ECODE_INVAL) 15034 15035 if instance.disk_template in constants.DTS_INT_MIRROR and \ 15036 len(instance.secondary_nodes) != 1: 15037 raise errors.OpPrereqError("Instance has not exactly one secondary node", 15038 errors.ECODE_STATE) 15039 15040 self.required_nodes = 1 15041 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks] 15042 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes) 15043 15044 request = { 15045 "name": self.name, 15046 "disk_space_total": disk_space, 15047 "required_nodes": self.required_nodes, 15048 "relocate_from": self.relocate_from, 15049 } 15050 return request
15051
15052 - def _AddNodeEvacuate(self):
15053 """Get data for node-evacuate requests. 15054 15055 """ 15056 return { 15057 "instances": self.instances, 15058 "evac_mode": self.evac_mode, 15059 }
15060
15061 - def _AddChangeGroup(self):
15062 """Get data for node-evacuate requests. 15063 15064 """ 15065 return { 15066 "instances": self.instances, 15067 "target_groups": self.target_groups, 15068 }
15069
15070 - def _BuildInputData(self, fn, keydata):
15071 """Build input data structures. 15072 15073 """ 15074 self._ComputeClusterData() 15075 15076 request = fn() 15077 request["type"] = self.mode 15078 for keyname, keytype in keydata: 15079 if keyname not in request: 15080 raise errors.ProgrammerError("Request parameter %s is missing" % 15081 keyname) 15082 val = request[keyname] 15083 if not keytype(val): 15084 raise errors.ProgrammerError("Request parameter %s doesn't pass" 15085 " validation, value %s, expected" 15086 " type %s" % (keyname, val, keytype)) 15087 self.in_data["request"] = request 15088 15089 self.in_text = serializer.Dump(self.in_data)
15090 15091 _STRING_LIST = ht.TListOf(ht.TString) 15092 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, { 15093 # pylint: disable=E1101 15094 # Class '...' has no 'OP_ID' member 15095 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID, 15096 opcodes.OpInstanceMigrate.OP_ID, 15097 opcodes.OpInstanceReplaceDisks.OP_ID]) 15098 }))) 15099 15100 _NEVAC_MOVED = \ 15101 ht.TListOf(ht.TAnd(ht.TIsLength(3), 15102 ht.TItems([ht.TNonEmptyString, 15103 ht.TNonEmptyString, 15104 ht.TListOf(ht.TNonEmptyString), 15105 ]))) 15106 _NEVAC_FAILED = \ 15107 ht.TListOf(ht.TAnd(ht.TIsLength(2), 15108 ht.TItems([ht.TNonEmptyString, 15109 ht.TMaybeString, 15110 ]))) 15111 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3), 15112 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST])) 15113 15114 _MODE_DATA = { 15115 constants.IALLOCATOR_MODE_ALLOC: 15116 (_AddNewInstance, 15117 [ 15118 ("name", ht.TString), 15119 ("memory", ht.TInt), 15120 ("spindle_use", ht.TInt), 15121 ("disks", ht.TListOf(ht.TDict)), 15122 ("disk_template", ht.TString), 15123 ("os", ht.TString), 15124 ("tags", _STRING_LIST), 15125 ("nics", ht.TListOf(ht.TDict)), 15126 ("vcpus", ht.TInt), 15127 ("hypervisor", ht.TString), 15128 ], ht.TList), 15129 constants.IALLOCATOR_MODE_RELOC: 15130 (_AddRelocateInstance, 15131 [("name", ht.TString), ("relocate_from", _STRING_LIST)], 15132 ht.TList), 15133 constants.IALLOCATOR_MODE_NODE_EVAC: 15134 (_AddNodeEvacuate, [ 15135 ("instances", _STRING_LIST), 15136 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)), 15137 ], _NEVAC_RESULT), 15138 constants.IALLOCATOR_MODE_CHG_GROUP: 15139 (_AddChangeGroup, [ 15140 ("instances", _STRING_LIST), 15141 ("target_groups", _STRING_LIST), 15142 ], _NEVAC_RESULT), 15143 } 15144
15145 - def Run(self, name, validate=True, call_fn=None):
15146 """Run an instance allocator and return the results. 15147 15148 """ 15149 if call_fn is None: 15150 call_fn = self.rpc.call_iallocator_runner 15151 15152 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text) 15153 result.Raise("Failure while running the iallocator script") 15154 15155 self.out_text = result.payload 15156 if validate: 15157 self._ValidateResult()
15158
15159 - def _ValidateResult(self):
15160 """Process the allocator results. 15161 15162 This will process and if successful save the result in 15163 self.out_data and the other parameters. 15164 15165 """ 15166 try: 15167 rdict = serializer.Load(self.out_text) 15168 except Exception, err: 15169 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err)) 15170 15171 if not isinstance(rdict, dict): 15172 raise errors.OpExecError("Can't parse iallocator results: not a dict") 15173 15174 # TODO: remove backwards compatiblity in later versions 15175 if "nodes" in rdict and "result" not in rdict: 15176 rdict["result"] = rdict["nodes"] 15177 del rdict["nodes"] 15178 15179 for key in "success", "info", "result": 15180 if key not in rdict: 15181 raise errors.OpExecError("Can't parse iallocator results:" 15182 " missing key '%s'" % key) 15183 setattr(self, key, rdict[key]) 15184 15185 if not self._result_check(self.result): 15186 raise errors.OpExecError("Iallocator returned invalid result," 15187 " expected %s, got %s" % 15188 (self._result_check, self.result), 15189 errors.ECODE_INVAL) 15190 15191 if self.mode == constants.IALLOCATOR_MODE_RELOC: 15192 assert self.relocate_from is not None 15193 assert self.required_nodes == 1 15194 15195 node2group = dict((name, ndata["group"]) 15196 for (name, ndata) in self.in_data["nodes"].items()) 15197 15198 fn = compat.partial(self._NodesToGroups, node2group, 15199 self.in_data["nodegroups"]) 15200 15201 instance = self.cfg.GetInstanceInfo(self.name) 15202 request_groups = fn(self.relocate_from + [instance.primary_node]) 15203 result_groups = fn(rdict["result"] + [instance.primary_node]) 15204 15205 if self.success and not set(result_groups).issubset(request_groups): 15206 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)" 15207 " differ from original groups (%s)" % 15208 (utils.CommaJoin(result_groups), 15209 utils.CommaJoin(request_groups))) 15210 15211 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC: 15212 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES 15213 15214 self.out_data = rdict
15215 15216 @staticmethod
15217 - def _NodesToGroups(node2group, groups, nodes):
15218 """Returns a list of unique group names for a list of nodes. 15219 15220 @type node2group: dict 15221 @param node2group: Map from node name to group UUID 15222 @type groups: dict 15223 @param groups: Group information 15224 @type nodes: list 15225 @param nodes: Node names 15226 15227 """ 15228 result = set() 15229 15230 for node in nodes: 15231 try: 15232 group_uuid = node2group[node] 15233 except KeyError: 15234 # Ignore unknown node 15235 pass 15236 else: 15237 try: 15238 group = groups[group_uuid] 15239 except KeyError: 15240 # Can't find group, let's use UUID 15241 group_name = group_uuid 15242 else: 15243 group_name = group["name"] 15244 15245 result.add(group_name) 15246 15247 return sorted(result)
15248
15249 15250 -class LUTestAllocator(NoHooksLU):
15251 """Run allocator tests. 15252 15253 This LU runs the allocator tests 15254 15255 """
15256 - def CheckPrereq(self):
15257 """Check prerequisites. 15258 15259 This checks the opcode parameters depending on the director and mode test. 15260 15261 """ 15262 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC: 15263 for attr in ["memory", "disks", "disk_template", 15264 "os", "tags", "nics", "vcpus"]: 15265 if not hasattr(self.op, attr): 15266 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" % 15267 attr, errors.ECODE_INVAL) 15268 iname = self.cfg.ExpandInstanceName(self.op.name) 15269 if iname is not None: 15270 raise errors.OpPrereqError("Instance '%s' already in the cluster" % 15271 iname, errors.ECODE_EXISTS) 15272 if not isinstance(self.op.nics, list): 15273 raise errors.OpPrereqError("Invalid parameter 'nics'", 15274 errors.ECODE_INVAL) 15275 if not isinstance(self.op.disks, list): 15276 raise errors.OpPrereqError("Invalid parameter 'disks'", 15277 errors.ECODE_INVAL) 15278 for row in self.op.disks: 15279 if (not isinstance(row, dict) or 15280 constants.IDISK_SIZE not in row or 15281 not isinstance(row[constants.IDISK_SIZE], int) or 15282 constants.IDISK_MODE not in row or 15283 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET): 15284 raise errors.OpPrereqError("Invalid contents of the 'disks'" 15285 " parameter", errors.ECODE_INVAL) 15286 if self.op.hypervisor is None: 15287 self.op.hypervisor = self.cfg.GetHypervisorType() 15288 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: 15289 fname = _ExpandInstanceName(self.cfg, self.op.name) 15290 self.op.name = fname 15291 self.relocate_from = \ 15292 list(self.cfg.GetInstanceInfo(fname).secondary_nodes) 15293 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP, 15294 constants.IALLOCATOR_MODE_NODE_EVAC): 15295 if not self.op.instances: 15296 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL) 15297 self.op.instances = _GetWantedInstances(self, self.op.instances) 15298 else: 15299 raise errors.OpPrereqError("Invalid test allocator mode '%s'" % 15300 self.op.mode, errors.ECODE_INVAL) 15301 15302 if self.op.direction == constants.IALLOCATOR_DIR_OUT: 15303 if self.op.allocator is None: 15304 raise errors.OpPrereqError("Missing allocator name", 15305 errors.ECODE_INVAL) 15306 elif self.op.direction != constants.IALLOCATOR_DIR_IN: 15307 raise errors.OpPrereqError("Wrong allocator test '%s'" % 15308 self.op.direction, errors.ECODE_INVAL)
15309
15310 - def Exec(self, feedback_fn):
15311 """Run the allocator test. 15312 15313 """ 15314 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC: 15315 ial = IAllocator(self.cfg, self.rpc, 15316 mode=self.op.mode, 15317 name=self.op.name, 15318 memory=self.op.memory, 15319 disks=self.op.disks, 15320 disk_template=self.op.disk_template, 15321 os=self.op.os, 15322 tags=self.op.tags, 15323 nics=self.op.nics, 15324 vcpus=self.op.vcpus, 15325 hypervisor=self.op.hypervisor, 15326 spindle_use=self.op.spindle_use, 15327 ) 15328 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC: 15329 ial = IAllocator(self.cfg, self.rpc, 15330 mode=self.op.mode, 15331 name=self.op.name, 15332 relocate_from=list(self.relocate_from), 15333 ) 15334 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP: 15335 ial = IAllocator(self.cfg, self.rpc, 15336 mode=self.op.mode, 15337 instances=self.op.instances, 15338 target_groups=self.op.target_groups) 15339 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC: 15340 ial = IAllocator(self.cfg, self.rpc, 15341 mode=self.op.mode, 15342 instances=self.op.instances, 15343 evac_mode=self.op.evac_mode) 15344 else: 15345 raise errors.ProgrammerError("Uncatched mode %s in" 15346 " LUTestAllocator.Exec", self.op.mode) 15347 15348 if self.op.direction == constants.IALLOCATOR_DIR_IN: 15349 result = ial.in_text 15350 else: 15351 ial.Run(self.op.allocator, validate=False) 15352 result = ial.out_text 15353 return result
15354 15355 15356 #: Query type implementations 15357 _QUERY_IMPL = { 15358 constants.QR_CLUSTER: _ClusterQuery, 15359 constants.QR_INSTANCE: _InstanceQuery, 15360 constants.QR_NODE: _NodeQuery, 15361 constants.QR_GROUP: _GroupQuery, 15362 constants.QR_OS: _OsQuery, 15363 constants.QR_EXPORT: _ExportQuery, 15364 } 15365 15366 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15367 15368 15369 -def _GetQueryImplementation(name):
15370 """Returns the implemtnation for a query type. 15371 15372 @param name: Query type, must be one of L{constants.QR_VIA_OP} 15373 15374 """ 15375 try: 15376 return _QUERY_IMPL[name] 15377 except KeyError: 15378 raise errors.OpPrereqError("Unknown query resource '%s'" % name, 15379 errors.ECODE_INVAL)
15380