Package ganeti :: Module cmdlib
[hide private]
[frames] | no frames]

Source Code for Module ganeti.cmdlib

    1  # 
    2  # 
    3   
    4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc. 
    5  # 
    6  # This program is free software; you can redistribute it and/or modify 
    7  # it under the terms of the GNU General Public License as published by 
    8  # the Free Software Foundation; either version 2 of the License, or 
    9  # (at your option) any later version. 
   10  # 
   11  # This program is distributed in the hope that it will be useful, but 
   12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
   13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
   14  # General Public License for more details. 
   15  # 
   16  # You should have received a copy of the GNU General Public License 
   17  # along with this program; if not, write to the Free Software 
   18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
   19  # 02110-1301, USA. 
   20   
   21   
   22  """Module implementing the master-side code.""" 
   23   
   24  # pylint: disable=W0201,C0302 
   25   
   26  # W0201 since most LU attributes are defined in CheckPrereq or similar 
   27  # functions 
   28   
   29  # C0302: since we have waaaay too many lines in this module 
   30   
   31  import os 
   32  import os.path 
   33  import time 
   34  import re 
   35  import logging 
   36  import copy 
   37  import OpenSSL 
   38  import socket 
   39  import tempfile 
   40  import shutil 
   41  import itertools 
   42  import operator 
   43   
   44  from ganeti import ssh 
   45  from ganeti import utils 
   46  from ganeti import errors 
   47  from ganeti import hypervisor 
   48  from ganeti import locking 
   49  from ganeti import constants 
   50  from ganeti import objects 
   51  from ganeti import serializer 
   52  from ganeti import ssconf 
   53  from ganeti import uidpool 
   54  from ganeti import compat 
   55  from ganeti import masterd 
   56  from ganeti import netutils 
   57  from ganeti import query 
   58  from ganeti import qlang 
   59  from ganeti import opcodes 
   60  from ganeti import ht 
   61  from ganeti import rpc 
   62  from ganeti import runtime 
   63   
   64  import ganeti.masterd.instance # pylint: disable=W0611 
   65   
   66   
   67  #: Size of DRBD meta block device 
   68  DRBD_META_SIZE = 128 
   69   
   70  # States of instance 
   71  INSTANCE_DOWN = [constants.ADMINST_DOWN] 
   72  INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP] 
   73  INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE] 
   74   
   75  #: Instance status in which an instance can be marked as offline/online 
   76  CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([ 
   77    constants.ADMINST_OFFLINE, 
   78    ])) 
79 80 81 -class ResultWithJobs:
82 """Data container for LU results with jobs. 83 84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized 85 by L{mcpu._ProcessResult}. The latter will then submit the jobs 86 contained in the C{jobs} attribute and include the job IDs in the opcode 87 result. 88 89 """
90 - def __init__(self, jobs, **kwargs):
91 """Initializes this class. 92 93 Additional return values can be specified as keyword arguments. 94 95 @type jobs: list of lists of L{opcode.OpCode} 96 @param jobs: A list of lists of opcode objects 97 98 """ 99 self.jobs = jobs 100 self.other = kwargs
101
102 103 -class LogicalUnit(object):
104 """Logical Unit base class. 105 106 Subclasses must follow these rules: 107 - implement ExpandNames 108 - implement CheckPrereq (except when tasklets are used) 109 - implement Exec (except when tasklets are used) 110 - implement BuildHooksEnv 111 - implement BuildHooksNodes 112 - redefine HPATH and HTYPE 113 - optionally redefine their run requirements: 114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively 115 116 Note that all commands require root permissions. 117 118 @ivar dry_run_result: the value (if any) that will be returned to the caller 119 in dry-run mode (signalled by opcode dry_run parameter) 120 121 """ 122 HPATH = None 123 HTYPE = None 124 REQ_BGL = True 125
126 - def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit. 128 129 This needs to be overridden in derived classes in order to check op 130 validity. 131 132 """ 133 self.proc = processor 134 self.op = op 135 self.cfg = context.cfg 136 self.glm = context.glm 137 # readability alias 138 self.owned_locks = context.glm.list_owned 139 self.context = context 140 self.rpc = rpc_runner 141 # Dicts used to declare locking needs to mcpu 142 self.needed_locks = None 143 self.share_locks = dict.fromkeys(locking.LEVELS, 0) 144 self.add_locks = {} 145 self.remove_locks = {} 146 # Used to force good behavior when calling helper functions 147 self.recalculate_locks = {} 148 # logging 149 self.Log = processor.Log # pylint: disable=C0103 150 self.LogWarning = processor.LogWarning # pylint: disable=C0103 151 self.LogInfo = processor.LogInfo # pylint: disable=C0103 152 self.LogStep = processor.LogStep # pylint: disable=C0103 153 # support for dry-run 154 self.dry_run_result = None 155 # support for generic debug attribute 156 if (not hasattr(self.op, "debug_level") or 157 not isinstance(self.op.debug_level, int)): 158 self.op.debug_level = 0 159 160 # Tasklets 161 self.tasklets = None 162 163 # Validate opcode parameters and set defaults 164 self.op.Validate(True) 165 166 self.CheckArguments()
167
168 - def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments. 170 171 This method is for doing a simple syntactic check and ensure 172 validity of opcode parameters, without any cluster-related 173 checks. While the same can be accomplished in ExpandNames and/or 174 CheckPrereq, doing these separate is better because: 175 176 - ExpandNames is left as as purely a lock-related function 177 - CheckPrereq is run after we have acquired locks (and possible 178 waited for them) 179 180 The function is allowed to change the self.op attribute so that 181 later methods can no longer worry about missing parameters. 182 183 """ 184 pass
185
186 - def ExpandNames(self):
187 """Expand names for this LU. 188 189 This method is called before starting to execute the opcode, and it should 190 update all the parameters of the opcode to their canonical form (e.g. a 191 short node name must be fully expanded after this method has successfully 192 completed). This way locking, hooks, logging, etc. can work correctly. 193 194 LUs which implement this method must also populate the self.needed_locks 195 member, as a dict with lock levels as keys, and a list of needed lock names 196 as values. Rules: 197 198 - use an empty dict if you don't need any lock 199 - if you don't need any lock at a particular level omit that 200 level (note that in this case C{DeclareLocks} won't be called 201 at all for that level) 202 - if you need locks at a level, but you can't calculate it in 203 this function, initialise that level with an empty list and do 204 further processing in L{LogicalUnit.DeclareLocks} (see that 205 function's docstring) 206 - don't put anything for the BGL level 207 - if you want all locks at a level use L{locking.ALL_SET} as a value 208 209 If you need to share locks (rather than acquire them exclusively) at one 210 level you can modify self.share_locks, setting a true value (usually 1) for 211 that level. By default locks are not shared. 212 213 This function can also define a list of tasklets, which then will be 214 executed in order instead of the usual LU-level CheckPrereq and Exec 215 functions, if those are not defined by the LU. 216 217 Examples:: 218 219 # Acquire all nodes and one instance 220 self.needed_locks = { 221 locking.LEVEL_NODE: locking.ALL_SET, 222 locking.LEVEL_INSTANCE: ['instance1.example.com'], 223 } 224 # Acquire just two nodes 225 self.needed_locks = { 226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'], 227 } 228 # Acquire no locks 229 self.needed_locks = {} # No, you can't leave it to the default value None 230 231 """ 232 # The implementation of this method is mandatory only if the new LU is 233 # concurrent, so that old LUs don't need to be changed all at the same 234 # time. 235 if self.REQ_BGL: 236 self.needed_locks = {} # Exclusive LUs don't need locks. 237 else: 238 raise NotImplementedError
239
240 - def DeclareLocks(self, level):
241 """Declare LU locking needs for a level 242 243 While most LUs can just declare their locking needs at ExpandNames time, 244 sometimes there's the need to calculate some locks after having acquired 245 the ones before. This function is called just before acquiring locks at a 246 particular level, but after acquiring the ones at lower levels, and permits 247 such calculations. It can be used to modify self.needed_locks, and by 248 default it does nothing. 249 250 This function is only called if you have something already set in 251 self.needed_locks for the level. 252 253 @param level: Locking level which is going to be locked 254 @type level: member of L{ganeti.locking.LEVELS} 255 256 """
257
258 - def CheckPrereq(self):
259 """Check prerequisites for this LU. 260 261 This method should check that the prerequisites for the execution 262 of this LU are fulfilled. It can do internode communication, but 263 it should be idempotent - no cluster or system changes are 264 allowed. 265 266 The method should raise errors.OpPrereqError in case something is 267 not fulfilled. Its return value is ignored. 268 269 This method should also update all the parameters of the opcode to 270 their canonical form if it hasn't been done by ExpandNames before. 271 272 """ 273 if self.tasklets is not None: 274 for (idx, tl) in enumerate(self.tasklets): 275 logging.debug("Checking prerequisites for tasklet %s/%s", 276 idx + 1, len(self.tasklets)) 277 tl.CheckPrereq() 278 else: 279 pass
280
281 - def Exec(self, feedback_fn):
282 """Execute the LU. 283 284 This method should implement the actual work. It should raise 285 errors.OpExecError for failures that are somewhat dealt with in 286 code, or expected. 287 288 """ 289 if self.tasklets is not None: 290 for (idx, tl) in enumerate(self.tasklets): 291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets)) 292 tl.Exec(feedback_fn) 293 else: 294 raise NotImplementedError
295
296 - def BuildHooksEnv(self):
297 """Build hooks environment for this LU. 298 299 @rtype: dict 300 @return: Dictionary containing the environment that will be used for 301 running the hooks for this LU. The keys of the dict must not be prefixed 302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner 303 will extend the environment with additional variables. If no environment 304 should be defined, an empty dictionary should be returned (not C{None}). 305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function 306 will not be called. 307 308 """ 309 raise NotImplementedError
310
311 - def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks. 313 314 @rtype: tuple; (list, list) 315 @return: Tuple containing a list of node names on which the hook 316 should run before the execution and a list of node names on which the 317 hook should run after the execution. No nodes should be returned as an 318 empty list (and not None). 319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function 320 will not be called. 321 322 """ 323 raise NotImplementedError
324
325 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks. 327 328 This method is called every time a hooks phase is executed, and notifies 329 the Logical Unit about the hooks' result. The LU can then use it to alter 330 its result based on the hooks. By default the method does nothing and the 331 previous result is passed back unchanged but any LU can define it if it 332 wants to use the local cluster hook-scripts somehow. 333 334 @param phase: one of L{constants.HOOKS_PHASE_POST} or 335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 336 @param hook_results: the results of the multi-node hooks rpc call 337 @param feedback_fn: function used send feedback back to the caller 338 @param lu_result: the previous Exec result this LU had, or None 339 in the PRE phase 340 @return: the new Exec result, based on the previous result 341 and hook results 342 343 """ 344 # API must be kept, thus we ignore the unused argument and could 345 # be a function warnings 346 # pylint: disable=W0613,R0201 347 return lu_result
348
349 - def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance. 351 352 Many LUs that work on an instance take its name in self.op.instance_name 353 and need to expand it and then declare the expanded name for locking. This 354 function does it, and then updates self.op.instance_name to the expanded 355 name. It also initializes needed_locks as a dict, if this hasn't been done 356 before. 357 358 """ 359 if self.needed_locks is None: 360 self.needed_locks = {} 361 else: 362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \ 363 "_ExpandAndLockInstance called with instance-level locks set" 364 self.op.instance_name = _ExpandInstanceName(self.cfg, 365 self.op.instance_name) 366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
367
368 - def _LockInstancesNodes(self, primary_only=False, 369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking. 371 372 This function should be called after locking one or more instances to lock 373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE] 374 with all primary or secondary nodes for instances already locked and 375 present in self.needed_locks[locking.LEVEL_INSTANCE]. 376 377 It should be called from DeclareLocks, and for safety only works if 378 self.recalculate_locks[locking.LEVEL_NODE] is set. 379 380 In the future it may grow parameters to just lock some instance's nodes, or 381 to just lock primaries or secondary nodes, if needed. 382 383 If should be called in DeclareLocks in a way similar to:: 384 385 if level == locking.LEVEL_NODE: 386 self._LockInstancesNodes() 387 388 @type primary_only: boolean 389 @param primary_only: only lock primary nodes of locked instances 390 @param level: Which lock level to use for locking nodes 391 392 """ 393 assert level in self.recalculate_locks, \ 394 "_LockInstancesNodes helper function called with no nodes to recalculate" 395 396 # TODO: check if we're really been called with the instance locks held 397 398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the 399 # future we might want to have different behaviors depending on the value 400 # of self.recalculate_locks[locking.LEVEL_NODE] 401 wanted_nodes = [] 402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE) 403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i): 404 wanted_nodes.append(instance.primary_node) 405 if not primary_only: 406 wanted_nodes.extend(instance.secondary_nodes) 407 408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE: 409 self.needed_locks[level] = wanted_nodes 410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND: 411 self.needed_locks[level].extend(wanted_nodes) 412 else: 413 raise errors.ProgrammerError("Unknown recalculation mode") 414 415 del self.recalculate_locks[level]
416
417 418 -class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks. 420 421 This LU is intended as a parent for other LogicalUnits which will 422 run no hooks, in order to reduce duplicate code. 423 424 """ 425 HPATH = None 426 HTYPE = None 427
428 - def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu. 430 431 This just raises an error. 432 433 """ 434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
435
436 - def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU. 438 439 """ 440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
441
442 443 -class Tasklet:
444 """Tasklet base class. 445 446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or 447 they can mix legacy code with tasklets. Locking needs to be done in the LU, 448 tasklets know nothing about locks. 449 450 Subclasses must follow these rules: 451 - Implement CheckPrereq 452 - Implement Exec 453 454 """
455 - def __init__(self, lu):
456 self.lu = lu 457 458 # Shortcuts 459 self.cfg = lu.cfg 460 self.rpc = lu.rpc
461
462 - def CheckPrereq(self):
463 """Check prerequisites for this tasklets. 464 465 This method should check whether the prerequisites for the execution of 466 this tasklet are fulfilled. It can do internode communication, but it 467 should be idempotent - no cluster or system changes are allowed. 468 469 The method should raise errors.OpPrereqError in case something is not 470 fulfilled. Its return value is ignored. 471 472 This method should also update all parameters to their canonical form if it 473 hasn't been done before. 474 475 """ 476 pass
477
478 - def Exec(self, feedback_fn):
479 """Execute the tasklet. 480 481 This method should implement the actual work. It should raise 482 errors.OpExecError for failures that are somewhat dealt with in code, or 483 expected. 484 485 """ 486 raise NotImplementedError
487
488 489 -class _QueryBase:
490 """Base for query utility classes. 491 492 """ 493 #: Attribute holding field definitions 494 FIELDS = None 495 496 #: Field to sort by 497 SORT_FIELD = "name" 498
499 - def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class. 501 502 """ 503 self.use_locking = use_locking 504 505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter, 506 namefield=self.SORT_FIELD) 507 self.requested_data = self.query.RequestedData() 508 self.names = self.query.RequestedNames() 509 510 # Sort only if no names were requested 511 self.sort_by_name = not self.names 512 513 self.do_locking = None 514 self.wanted = None
515
516 - def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query. 518 519 """ 520 if self.do_locking: 521 names = lu.owned_locks(lock_level) 522 else: 523 names = all_names 524 525 if self.wanted == locking.ALL_SET: 526 assert not self.names 527 # caller didn't specify names, so ordering is not important 528 return utils.NiceSort(names) 529 530 # caller specified names and we must keep the same order 531 assert self.names 532 assert not self.do_locking or lu.glm.is_owned(lock_level) 533 534 missing = set(self.wanted).difference(names) 535 if missing: 536 raise errors.OpExecError("Some items were removed before retrieving" 537 " their data: %s" % missing) 538 539 # Return expanded names 540 return self.wanted
541
542 - def ExpandNames(self, lu):
543 """Expand names for this query. 544 545 See L{LogicalUnit.ExpandNames}. 546 547 """ 548 raise NotImplementedError()
549
550 - def DeclareLocks(self, lu, level):
551 """Declare locks for this query. 552 553 See L{LogicalUnit.DeclareLocks}. 554 555 """ 556 raise NotImplementedError()
557
558 - def _GetQueryData(self, lu):
559 """Collects all data for this query. 560 561 @return: Query data object 562 563 """ 564 raise NotImplementedError()
565
566 - def NewStyleQuery(self, lu):
567 """Collect data and execute query. 568 569 """ 570 return query.GetQueryResponse(self.query, self._GetQueryData(lu), 571 sort_by_name=self.sort_by_name)
572
573 - def OldStyleQuery(self, lu):
574 """Collect data and execute query. 575 576 """ 577 return self.query.OldStyleQuery(self._GetQueryData(lu), 578 sort_by_name=self.sort_by_name)
579
580 581 -def _ShareAll():
582 """Returns a dict declaring all lock levels shared. 583 584 """ 585 return dict.fromkeys(locking.LEVELS, 1)
586
587 588 -def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}. 590 591 Converts the data into a single dictionary. This is fine for most use cases, 592 but some require information from more than one volume group or hypervisor. 593 594 """ 595 (bootid, (vg_info, ), (hv_info, )) = data 596 597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), { 598 "bootid": bootid, 599 })
600
601 602 -def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method. 604 605 @param instance: The instance object 606 @type devs: List of L{objects.Disk} 607 @param devs: The root devices (not any of its children!) 608 @param cfg: The config object 609 @returns The annotated disk copies 610 @see L{rpc.AnnotateDiskParams} 611 612 """ 613 return rpc.AnnotateDiskParams(instance.disk_template, devs, 614 cfg.GetInstanceDiskParams(instance))
615
616 617 -def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes, 618 cur_group_uuid):
619 """Checks if node groups for locked instances are still correct. 620 621 @type cfg: L{config.ConfigWriter} 622 @param cfg: Cluster configuration 623 @type instances: dict; string as key, L{objects.Instance} as value 624 @param instances: Dictionary, instance name as key, instance object as value 625 @type owned_groups: iterable of string 626 @param owned_groups: List of owned groups 627 @type owned_nodes: iterable of string 628 @param owned_nodes: List of owned nodes 629 @type cur_group_uuid: string or None 630 @param cur_group_uuid: Optional group UUID to check against instance's groups 631 632 """ 633 for (name, inst) in instances.items(): 634 assert owned_nodes.issuperset(inst.all_nodes), \ 635 "Instance %s's nodes changed while we kept the lock" % name 636 637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups) 638 639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \ 640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
641
642 643 -def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance. 645 646 @type cfg: L{config.ConfigWriter} 647 @param cfg: The cluster configuration 648 @type instance_name: string 649 @param instance_name: Instance name 650 @type owned_groups: set or frozenset 651 @param owned_groups: List of currently owned node groups 652 653 """ 654 inst_groups = cfg.GetInstanceNodeGroups(instance_name) 655 656 if not owned_groups.issuperset(inst_groups): 657 raise errors.OpPrereqError("Instance %s's node groups changed since" 658 " locks were acquired, current groups are" 659 " are '%s', owning groups '%s'; retry the" 660 " operation" % 661 (instance_name, 662 utils.CommaJoin(inst_groups), 663 utils.CommaJoin(owned_groups)), 664 errors.ECODE_STATE) 665 666 return inst_groups
667
668 669 -def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct. 671 672 @type cfg: L{config.ConfigWriter} 673 @param cfg: The cluster configuration 674 @type group_uuid: string 675 @param group_uuid: Node group UUID 676 @type owned_instances: set or frozenset 677 @param owned_instances: List of currently owned instances 678 679 """ 680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid) 681 if owned_instances != wanted_instances: 682 raise errors.OpPrereqError("Instances in node group '%s' changed since" 683 " locks were acquired, wanted '%s', have '%s';" 684 " retry the operation" % 685 (group_uuid, 686 utils.CommaJoin(wanted_instances), 687 utils.CommaJoin(owned_instances)), 688 errors.ECODE_STATE) 689 690 return wanted_instances
691
692 693 -def _SupportsOob(cfg, node):
694 """Tells if node supports OOB. 695 696 @type cfg: L{config.ConfigWriter} 697 @param cfg: The cluster configuration 698 @type node: L{objects.Node} 699 @param node: The node 700 @return: The OOB script if supported or an empty string otherwise 701 702 """ 703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
704
705 706 -def _CopyLockList(names):
707 """Makes a copy of a list of lock names. 708 709 Handles L{locking.ALL_SET} correctly. 710 711 """ 712 if names == locking.ALL_SET: 713 return locking.ALL_SET 714 else: 715 return names[:]
716
717 718 -def _GetWantedNodes(lu, nodes):
719 """Returns list of checked and expanded node names. 720 721 @type lu: L{LogicalUnit} 722 @param lu: the logical unit on whose behalf we execute 723 @type nodes: list 724 @param nodes: list of node names or None for all nodes 725 @rtype: list 726 @return: the list of nodes, sorted 727 @raise errors.ProgrammerError: if the nodes parameter is wrong type 728 729 """ 730 if nodes: 731 return [_ExpandNodeName(lu.cfg, name) for name in nodes] 732 733 return utils.NiceSort(lu.cfg.GetNodeList())
734
735 736 -def _GetWantedInstances(lu, instances):
737 """Returns list of checked and expanded instance names. 738 739 @type lu: L{LogicalUnit} 740 @param lu: the logical unit on whose behalf we execute 741 @type instances: list 742 @param instances: list of instance names or None for all instances 743 @rtype: list 744 @return: the list of instances, sorted 745 @raise errors.OpPrereqError: if the instances parameter is wrong type 746 @raise errors.OpPrereqError: if any of the passed instances is not found 747 748 """ 749 if instances: 750 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances] 751 else: 752 wanted = utils.NiceSort(lu.cfg.GetInstanceList()) 753 return wanted
754
755 756 -def _GetUpdatedParams(old_params, update_dict, 757 use_default=True, use_none=False):
758 """Return the new version of a parameter dictionary. 759 760 @type old_params: dict 761 @param old_params: old parameters 762 @type update_dict: dict 763 @param update_dict: dict containing new parameter values, or 764 constants.VALUE_DEFAULT to reset the parameter to its default 765 value 766 @param use_default: boolean 767 @type use_default: whether to recognise L{constants.VALUE_DEFAULT} 768 values as 'to be deleted' values 769 @param use_none: boolean 770 @type use_none: whether to recognise C{None} values as 'to be 771 deleted' values 772 @rtype: dict 773 @return: the new parameter dictionary 774 775 """ 776 params_copy = copy.deepcopy(old_params) 777 for key, val in update_dict.iteritems(): 778 if ((use_default and val == constants.VALUE_DEFAULT) or 779 (use_none and val is None)): 780 try: 781 del params_copy[key] 782 except KeyError: 783 pass 784 else: 785 params_copy[key] = val 786 return params_copy
787
788 789 -def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
790 """Return the new version of a instance policy. 791 792 @param group_policy: whether this policy applies to a group and thus 793 we should support removal of policy entries 794 795 """ 796 use_none = use_default = group_policy 797 ipolicy = copy.deepcopy(old_ipolicy) 798 for key, value in new_ipolicy.items(): 799 if key not in constants.IPOLICY_ALL_KEYS: 800 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key, 801 errors.ECODE_INVAL) 802 if key in constants.IPOLICY_ISPECS: 803 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES) 804 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value, 805 use_none=use_none, 806 use_default=use_default) 807 else: 808 if (not value or value == [constants.VALUE_DEFAULT] or 809 value == constants.VALUE_DEFAULT): 810 if group_policy: 811 del ipolicy[key] 812 else: 813 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'" 814 " on the cluster'" % key, 815 errors.ECODE_INVAL) 816 else: 817 if key in constants.IPOLICY_PARAMETERS: 818 # FIXME: we assume all such values are float 819 try: 820 ipolicy[key] = float(value) 821 except (TypeError, ValueError), err: 822 raise errors.OpPrereqError("Invalid value for attribute" 823 " '%s': '%s', error: %s" % 824 (key, value, err), errors.ECODE_INVAL) 825 else: 826 # FIXME: we assume all others are lists; this should be redone 827 # in a nicer way 828 ipolicy[key] = list(value) 829 try: 830 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy) 831 except errors.ConfigurationError, err: 832 raise errors.OpPrereqError("Invalid instance policy: %s" % err, 833 errors.ECODE_INVAL) 834 return ipolicy
835
836 837 -def _UpdateAndVerifySubDict(base, updates, type_check):
838 """Updates and verifies a dict with sub dicts of the same type. 839 840 @param base: The dict with the old data 841 @param updates: The dict with the new data 842 @param type_check: Dict suitable to ForceDictType to verify correct types 843 @returns: A new dict with updated and verified values 844 845 """ 846 def fn(old, value): 847 new = _GetUpdatedParams(old, value) 848 utils.ForceDictType(new, type_check) 849 return new
850 851 ret = copy.deepcopy(base) 852 ret.update(dict((key, fn(base.get(key, {}), value)) 853 for key, value in updates.items())) 854 return ret 855
856 857 -def _MergeAndVerifyHvState(op_input, obj_input):
858 """Combines the hv state from an opcode with the one of the object 859 860 @param op_input: The input dict from the opcode 861 @param obj_input: The input dict from the objects 862 @return: The verified and updated dict 863 864 """ 865 if op_input: 866 invalid_hvs = set(op_input) - constants.HYPER_TYPES 867 if invalid_hvs: 868 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:" 869 " %s" % utils.CommaJoin(invalid_hvs), 870 errors.ECODE_INVAL) 871 if obj_input is None: 872 obj_input = {} 873 type_check = constants.HVSTS_PARAMETER_TYPES 874 return _UpdateAndVerifySubDict(obj_input, op_input, type_check) 875 876 return None
877
878 879 -def _MergeAndVerifyDiskState(op_input, obj_input):
880 """Combines the disk state from an opcode with the one of the object 881 882 @param op_input: The input dict from the opcode 883 @param obj_input: The input dict from the objects 884 @return: The verified and updated dict 885 """ 886 if op_input: 887 invalid_dst = set(op_input) - constants.DS_VALID_TYPES 888 if invalid_dst: 889 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" % 890 utils.CommaJoin(invalid_dst), 891 errors.ECODE_INVAL) 892 type_check = constants.DSS_PARAMETER_TYPES 893 if obj_input is None: 894 obj_input = {} 895 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value, 896 type_check)) 897 for key, value in op_input.items()) 898 899 return None
900
901 902 -def _ReleaseLocks(lu, level, names=None, keep=None):
903 """Releases locks owned by an LU. 904 905 @type lu: L{LogicalUnit} 906 @param level: Lock level 907 @type names: list or None 908 @param names: Names of locks to release 909 @type keep: list or None 910 @param keep: Names of locks to retain 911 912 """ 913 assert not (keep is not None and names is not None), \ 914 "Only one of the 'names' and the 'keep' parameters can be given" 915 916 if names is not None: 917 should_release = names.__contains__ 918 elif keep: 919 should_release = lambda name: name not in keep 920 else: 921 should_release = None 922 923 owned = lu.owned_locks(level) 924 if not owned: 925 # Not owning any lock at this level, do nothing 926 pass 927 928 elif should_release: 929 retain = [] 930 release = [] 931 932 # Determine which locks to release 933 for name in owned: 934 if should_release(name): 935 release.append(name) 936 else: 937 retain.append(name) 938 939 assert len(lu.owned_locks(level)) == (len(retain) + len(release)) 940 941 # Release just some locks 942 lu.glm.release(level, names=release) 943 944 assert frozenset(lu.owned_locks(level)) == frozenset(retain) 945 else: 946 # Release everything 947 lu.glm.release(level) 948 949 assert not lu.glm.is_owned(level), "No locks should be owned"
950
951 952 -def _MapInstanceDisksToNodes(instances):
953 """Creates a map from (node, volume) to instance name. 954 955 @type instances: list of L{objects.Instance} 956 @rtype: dict; tuple of (node name, volume name) as key, instance name as value 957 958 """ 959 return dict(((node, vol), inst.name) 960 for inst in instances 961 for (node, vols) in inst.MapLVsByNode().items() 962 for vol in vols)
963
964 965 -def _RunPostHook(lu, node_name):
966 """Runs the post-hook for an opcode on a single node. 967 968 """ 969 hm = lu.proc.BuildHooksManager(lu) 970 try: 971 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name]) 972 except Exception, err: # pylint: disable=W0703 973 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
974
975 976 -def _CheckOutputFields(static, dynamic, selected):
977 """Checks whether all selected fields are valid. 978 979 @type static: L{utils.FieldSet} 980 @param static: static fields set 981 @type dynamic: L{utils.FieldSet} 982 @param dynamic: dynamic fields set 983 984 """ 985 f = utils.FieldSet() 986 f.Extend(static) 987 f.Extend(dynamic) 988 989 delta = f.NonMatching(selected) 990 if delta: 991 raise errors.OpPrereqError("Unknown output fields selected: %s" 992 % ",".join(delta), errors.ECODE_INVAL)
993
994 995 -def _CheckGlobalHvParams(params):
996 """Validates that given hypervisor params are not global ones. 997 998 This will ensure that instances don't get customised versions of 999 global params. 1000 1001 """ 1002 used_globals = constants.HVC_GLOBALS.intersection(params) 1003 if used_globals: 1004 msg = ("The following hypervisor parameters are global and cannot" 1005 " be customized at instance level, please modify them at" 1006 " cluster level: %s" % utils.CommaJoin(used_globals)) 1007 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1008
1009 1010 -def _CheckNodeOnline(lu, node, msg=None):
1011 """Ensure that a given node is online. 1012 1013 @param lu: the LU on behalf of which we make the check 1014 @param node: the node to check 1015 @param msg: if passed, should be a message to replace the default one 1016 @raise errors.OpPrereqError: if the node is offline 1017 1018 """ 1019 if msg is None: 1020 msg = "Can't use offline node" 1021 if lu.cfg.GetNodeInfo(node).offline: 1022 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1023
1024 1025 -def _CheckNodeNotDrained(lu, node):
1026 """Ensure that a given node is not drained. 1027 1028 @param lu: the LU on behalf of which we make the check 1029 @param node: the node to check 1030 @raise errors.OpPrereqError: if the node is drained 1031 1032 """ 1033 if lu.cfg.GetNodeInfo(node).drained: 1034 raise errors.OpPrereqError("Can't use drained node %s" % node, 1035 errors.ECODE_STATE)
1036
1037 1038 -def _CheckNodeVmCapable(lu, node):
1039 """Ensure that a given node is vm capable. 1040 1041 @param lu: the LU on behalf of which we make the check 1042 @param node: the node to check 1043 @raise errors.OpPrereqError: if the node is not vm capable 1044 1045 """ 1046 if not lu.cfg.GetNodeInfo(node).vm_capable: 1047 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node, 1048 errors.ECODE_STATE)
1049
1050 1051 -def _CheckNodeHasOS(lu, node, os_name, force_variant):
1052 """Ensure that a node supports a given OS. 1053 1054 @param lu: the LU on behalf of which we make the check 1055 @param node: the node to check 1056 @param os_name: the OS to query about 1057 @param force_variant: whether to ignore variant errors 1058 @raise errors.OpPrereqError: if the node is not supporting the OS 1059 1060 """ 1061 result = lu.rpc.call_os_get(node, os_name) 1062 result.Raise("OS '%s' not in supported OS list for node %s" % 1063 (os_name, node), 1064 prereq=True, ecode=errors.ECODE_INVAL) 1065 if not force_variant: 1066 _CheckOSVariant(result.payload, os_name)
1067
1068 1069 -def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1070 """Ensure that a node has the given secondary ip. 1071 1072 @type lu: L{LogicalUnit} 1073 @param lu: the LU on behalf of which we make the check 1074 @type node: string 1075 @param node: the node to check 1076 @type secondary_ip: string 1077 @param secondary_ip: the ip to check 1078 @type prereq: boolean 1079 @param prereq: whether to throw a prerequisite or an execute error 1080 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True 1081 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False 1082 1083 """ 1084 result = lu.rpc.call_node_has_ip_address(node, secondary_ip) 1085 result.Raise("Failure checking secondary ip on node %s" % node, 1086 prereq=prereq, ecode=errors.ECODE_ENVIRON) 1087 if not result.payload: 1088 msg = ("Node claims it doesn't have the secondary ip you gave (%s)," 1089 " please fix and re-run this command" % secondary_ip) 1090 if prereq: 1091 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) 1092 else: 1093 raise errors.OpExecError(msg)
1094
1095 1096 -def _GetClusterDomainSecret():
1097 """Reads the cluster domain secret. 1098 1099 """ 1100 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE, 1101 strict=True)
1102
1103 1104 -def _CheckInstanceState(lu, instance, req_states, msg=None):
1105 """Ensure that an instance is in one of the required states. 1106 1107 @param lu: the LU on behalf of which we make the check 1108 @param instance: the instance to check 1109 @param msg: if passed, should be a message to replace the default one 1110 @raise errors.OpPrereqError: if the instance is not in the required state 1111 1112 """ 1113 if msg is None: 1114 msg = "can't use instance from outside %s states" % ", ".join(req_states) 1115 if instance.admin_state not in req_states: 1116 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" % 1117 (instance.name, instance.admin_state, msg), 1118 errors.ECODE_STATE) 1119 1120 if constants.ADMINST_UP not in req_states: 1121 pnode = instance.primary_node 1122 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode] 1123 ins_l.Raise("Can't contact node %s for instance information" % pnode, 1124 prereq=True, ecode=errors.ECODE_ENVIRON) 1125 1126 if instance.name in ins_l.payload: 1127 raise errors.OpPrereqError("Instance %s is running, %s" % 1128 (instance.name, msg), errors.ECODE_STATE)
1129
1130 1131 -def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1132 """Computes if value is in the desired range. 1133 1134 @param name: name of the parameter for which we perform the check 1135 @param qualifier: a qualifier used in the error message (e.g. 'disk/1', 1136 not just 'disk') 1137 @param ipolicy: dictionary containing min, max and std values 1138 @param value: actual value that we want to use 1139 @return: None or element not meeting the criteria 1140 1141 1142 """ 1143 if value in [None, constants.VALUE_AUTO]: 1144 return None 1145 max_v = ipolicy[constants.ISPECS_MAX].get(name, value) 1146 min_v = ipolicy[constants.ISPECS_MIN].get(name, value) 1147 if value > max_v or min_v > value: 1148 if qualifier: 1149 fqn = "%s/%s" % (name, qualifier) 1150 else: 1151 fqn = name 1152 return ("%s value %s is not in range [%s, %s]" % 1153 (fqn, value, min_v, max_v)) 1154 return None
1155
1156 1157 -def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count, 1158 nic_count, disk_sizes, spindle_use, 1159 _compute_fn=_ComputeMinMaxSpec):
1160 """Verifies ipolicy against provided specs. 1161 1162 @type ipolicy: dict 1163 @param ipolicy: The ipolicy 1164 @type mem_size: int 1165 @param mem_size: The memory size 1166 @type cpu_count: int 1167 @param cpu_count: Used cpu cores 1168 @type disk_count: int 1169 @param disk_count: Number of disks used 1170 @type nic_count: int 1171 @param nic_count: Number of nics used 1172 @type disk_sizes: list of ints 1173 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count}) 1174 @type spindle_use: int 1175 @param spindle_use: The number of spindles this instance uses 1176 @param _compute_fn: The compute function (unittest only) 1177 @return: A list of violations, or an empty list of no violations are found 1178 1179 """ 1180 assert disk_count == len(disk_sizes) 1181 1182 test_settings = [ 1183 (constants.ISPEC_MEM_SIZE, "", mem_size), 1184 (constants.ISPEC_CPU_COUNT, "", cpu_count), 1185 (constants.ISPEC_DISK_COUNT, "", disk_count), 1186 (constants.ISPEC_NIC_COUNT, "", nic_count), 1187 (constants.ISPEC_SPINDLE_USE, "", spindle_use), 1188 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d) 1189 for idx, d in enumerate(disk_sizes)] 1190 1191 return filter(None, 1192 (_compute_fn(name, qualifier, ipolicy, value) 1193 for (name, qualifier, value) in test_settings))
1194
1195 1196 -def _ComputeIPolicyInstanceViolation(ipolicy, instance, 1197 _compute_fn=_ComputeIPolicySpecViolation):
1198 """Compute if instance meets the specs of ipolicy. 1199 1200 @type ipolicy: dict 1201 @param ipolicy: The ipolicy to verify against 1202 @type instance: L{objects.Instance} 1203 @param instance: The instance to verify 1204 @param _compute_fn: The function to verify ipolicy (unittest only) 1205 @see: L{_ComputeIPolicySpecViolation} 1206 1207 """ 1208 mem_size = instance.beparams.get(constants.BE_MAXMEM, None) 1209 cpu_count = instance.beparams.get(constants.BE_VCPUS, None) 1210 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None) 1211 disk_count = len(instance.disks) 1212 disk_sizes = [disk.size for disk in instance.disks] 1213 nic_count = len(instance.nics) 1214 1215 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count, 1216 disk_sizes, spindle_use)
1217
1218 1219 -def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec, 1220 _compute_fn=_ComputeIPolicySpecViolation):
1221 """Compute if instance specs meets the specs of ipolicy. 1222 1223 @type ipolicy: dict 1224 @param ipolicy: The ipolicy to verify against 1225 @param instance_spec: dict 1226 @param instance_spec: The instance spec to verify 1227 @param _compute_fn: The function to verify ipolicy (unittest only) 1228 @see: L{_ComputeIPolicySpecViolation} 1229 1230 """ 1231 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None) 1232 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None) 1233 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0) 1234 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, []) 1235 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0) 1236 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None) 1237 1238 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count, 1239 disk_sizes, spindle_use)
1240
1241 1242 -def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group, 1243 target_group, 1244 _compute_fn=_ComputeIPolicyInstanceViolation):
1245 """Compute if instance meets the specs of the new target group. 1246 1247 @param ipolicy: The ipolicy to verify 1248 @param instance: The instance object to verify 1249 @param current_group: The current group of the instance 1250 @param target_group: The new group of the instance 1251 @param _compute_fn: The function to verify ipolicy (unittest only) 1252 @see: L{_ComputeIPolicySpecViolation} 1253 1254 """ 1255 if current_group == target_group: 1256 return [] 1257 else: 1258 return _compute_fn(ipolicy, instance)
1259
1260 1261 -def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False, 1262 _compute_fn=_ComputeIPolicyNodeViolation):
1263 """Checks that the target node is correct in terms of instance policy. 1264 1265 @param ipolicy: The ipolicy to verify 1266 @param instance: The instance object to verify 1267 @param node: The new node to relocate 1268 @param ignore: Ignore violations of the ipolicy 1269 @param _compute_fn: The function to verify ipolicy (unittest only) 1270 @see: L{_ComputeIPolicySpecViolation} 1271 1272 """ 1273 primary_node = lu.cfg.GetNodeInfo(instance.primary_node) 1274 res = _compute_fn(ipolicy, instance, primary_node.group, node.group) 1275 1276 if res: 1277 msg = ("Instance does not meet target node group's (%s) instance" 1278 " policy: %s") % (node.group, utils.CommaJoin(res)) 1279 if ignore: 1280 lu.LogWarning(msg) 1281 else: 1282 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1283
1284 1285 -def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1286 """Computes a set of any instances that would violate the new ipolicy. 1287 1288 @param old_ipolicy: The current (still in-place) ipolicy 1289 @param new_ipolicy: The new (to become) ipolicy 1290 @param instances: List of instances to verify 1291 @return: A list of instances which violates the new ipolicy but 1292 did not before 1293 1294 """ 1295 return (_ComputeViolatingInstances(new_ipolicy, instances) - 1296 _ComputeViolatingInstances(old_ipolicy, instances))
1297
1298 1299 -def _ExpandItemName(fn, name, kind):
1300 """Expand an item name. 1301 1302 @param fn: the function to use for expansion 1303 @param name: requested item name 1304 @param kind: text description ('Node' or 'Instance') 1305 @return: the resolved (full) name 1306 @raise errors.OpPrereqError: if the item is not found 1307 1308 """ 1309 full_name = fn(name) 1310 if full_name is None: 1311 raise errors.OpPrereqError("%s '%s' not known" % (kind, name), 1312 errors.ECODE_NOENT) 1313 return full_name
1314
1315 1316 -def _ExpandNodeName(cfg, name):
1317 """Wrapper over L{_ExpandItemName} for nodes.""" 1318 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1319
1320 1321 -def _ExpandInstanceName(cfg, name):
1322 """Wrapper over L{_ExpandItemName} for instance.""" 1323 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1324
1325 1326 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, 1327 minmem, maxmem, vcpus, nics, disk_template, disks, 1328 bep, hvp, hypervisor_name, tags):
1329 """Builds instance related env variables for hooks 1330 1331 This builds the hook environment from individual variables. 1332 1333 @type name: string 1334 @param name: the name of the instance 1335 @type primary_node: string 1336 @param primary_node: the name of the instance's primary node 1337 @type secondary_nodes: list 1338 @param secondary_nodes: list of secondary nodes as strings 1339 @type os_type: string 1340 @param os_type: the name of the instance's OS 1341 @type status: string 1342 @param status: the desired status of the instance 1343 @type minmem: string 1344 @param minmem: the minimum memory size of the instance 1345 @type maxmem: string 1346 @param maxmem: the maximum memory size of the instance 1347 @type vcpus: string 1348 @param vcpus: the count of VCPUs the instance has 1349 @type nics: list 1350 @param nics: list of tuples (ip, mac, mode, link) representing 1351 the NICs the instance has 1352 @type disk_template: string 1353 @param disk_template: the disk template of the instance 1354 @type disks: list 1355 @param disks: the list of (size, mode) pairs 1356 @type bep: dict 1357 @param bep: the backend parameters for the instance 1358 @type hvp: dict 1359 @param hvp: the hypervisor parameters for the instance 1360 @type hypervisor_name: string 1361 @param hypervisor_name: the hypervisor for the instance 1362 @type tags: list 1363 @param tags: list of instance tags as strings 1364 @rtype: dict 1365 @return: the hook environment for this instance 1366 1367 """ 1368 env = { 1369 "OP_TARGET": name, 1370 "INSTANCE_NAME": name, 1371 "INSTANCE_PRIMARY": primary_node, 1372 "INSTANCE_SECONDARIES": " ".join(secondary_nodes), 1373 "INSTANCE_OS_TYPE": os_type, 1374 "INSTANCE_STATUS": status, 1375 "INSTANCE_MINMEM": minmem, 1376 "INSTANCE_MAXMEM": maxmem, 1377 # TODO(2.7) remove deprecated "memory" value 1378 "INSTANCE_MEMORY": maxmem, 1379 "INSTANCE_VCPUS": vcpus, 1380 "INSTANCE_DISK_TEMPLATE": disk_template, 1381 "INSTANCE_HYPERVISOR": hypervisor_name, 1382 } 1383 if nics: 1384 nic_count = len(nics) 1385 for idx, (ip, mac, mode, link) in enumerate(nics): 1386 if ip is None: 1387 ip = "" 1388 env["INSTANCE_NIC%d_IP" % idx] = ip 1389 env["INSTANCE_NIC%d_MAC" % idx] = mac 1390 env["INSTANCE_NIC%d_MODE" % idx] = mode 1391 env["INSTANCE_NIC%d_LINK" % idx] = link 1392 if mode == constants.NIC_MODE_BRIDGED: 1393 env["INSTANCE_NIC%d_BRIDGE" % idx] = link 1394 else: 1395 nic_count = 0 1396 1397 env["INSTANCE_NIC_COUNT"] = nic_count 1398 1399 if disks: 1400 disk_count = len(disks) 1401 for idx, (size, mode) in enumerate(disks): 1402 env["INSTANCE_DISK%d_SIZE" % idx] = size 1403 env["INSTANCE_DISK%d_MODE" % idx] = mode 1404 else: 1405 disk_count = 0 1406 1407 env["INSTANCE_DISK_COUNT"] = disk_count 1408 1409 if not tags: 1410 tags = [] 1411 1412 env["INSTANCE_TAGS"] = " ".join(tags) 1413 1414 for source, kind in [(bep, "BE"), (hvp, "HV")]: 1415 for key, value in source.items(): 1416 env["INSTANCE_%s_%s" % (kind, key)] = value 1417 1418 return env
1419
1420 1421 -def _NICListToTuple(lu, nics):
1422 """Build a list of nic information tuples. 1423 1424 This list is suitable to be passed to _BuildInstanceHookEnv or as a return 1425 value in LUInstanceQueryData. 1426 1427 @type lu: L{LogicalUnit} 1428 @param lu: the logical unit on whose behalf we execute 1429 @type nics: list of L{objects.NIC} 1430 @param nics: list of nics to convert to hooks tuples 1431 1432 """ 1433 hooks_nics = [] 1434 cluster = lu.cfg.GetClusterInfo() 1435 for nic in nics: 1436 ip = nic.ip 1437 mac = nic.mac 1438 filled_params = cluster.SimpleFillNIC(nic.nicparams) 1439 mode = filled_params[constants.NIC_MODE] 1440 link = filled_params[constants.NIC_LINK] 1441 hooks_nics.append((ip, mac, mode, link)) 1442 return hooks_nics
1443
1444 1445 -def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1446 """Builds instance related env variables for hooks from an object. 1447 1448 @type lu: L{LogicalUnit} 1449 @param lu: the logical unit on whose behalf we execute 1450 @type instance: L{objects.Instance} 1451 @param instance: the instance for which we should build the 1452 environment 1453 @type override: dict 1454 @param override: dictionary with key/values that will override 1455 our values 1456 @rtype: dict 1457 @return: the hook environment dictionary 1458 1459 """ 1460 cluster = lu.cfg.GetClusterInfo() 1461 bep = cluster.FillBE(instance) 1462 hvp = cluster.FillHV(instance) 1463 args = { 1464 "name": instance.name, 1465 "primary_node": instance.primary_node, 1466 "secondary_nodes": instance.secondary_nodes, 1467 "os_type": instance.os, 1468 "status": instance.admin_state, 1469 "maxmem": bep[constants.BE_MAXMEM], 1470 "minmem": bep[constants.BE_MINMEM], 1471 "vcpus": bep[constants.BE_VCPUS], 1472 "nics": _NICListToTuple(lu, instance.nics), 1473 "disk_template": instance.disk_template, 1474 "disks": [(disk.size, disk.mode) for disk in instance.disks], 1475 "bep": bep, 1476 "hvp": hvp, 1477 "hypervisor_name": instance.hypervisor, 1478 "tags": instance.tags, 1479 } 1480 if override: 1481 args.update(override) 1482 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1483
1484 1485 -def _AdjustCandidatePool(lu, exceptions):
1486 """Adjust the candidate pool after node operations. 1487 1488 """ 1489 mod_list = lu.cfg.MaintainCandidatePool(exceptions) 1490 if mod_list: 1491 lu.LogInfo("Promoted nodes to master candidate role: %s", 1492 utils.CommaJoin(node.name for node in mod_list)) 1493 for name in mod_list: 1494 lu.context.ReaddNode(name) 1495 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions) 1496 if mc_now > mc_max: 1497 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % 1498 (mc_now, mc_max))
1499
1500 1501 -def _DecideSelfPromotion(lu, exceptions=None):
1502 """Decide whether I should promote myself as a master candidate. 1503 1504 """ 1505 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size 1506 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions) 1507 # the new node will increase mc_max with one, so: 1508 mc_should = min(mc_should + 1, cp_size) 1509 return mc_now < mc_should
1510
1511 1512 -def _CalculateGroupIPolicy(cluster, group):
1513 """Calculate instance policy for group. 1514 1515 """ 1516 return cluster.SimpleFillIPolicy(group.ipolicy)
1517
1518 1519 -def _ComputeViolatingInstances(ipolicy, instances):
1520 """Computes a set of instances who violates given ipolicy. 1521 1522 @param ipolicy: The ipolicy to verify 1523 @type instances: object.Instance 1524 @param instances: List of instances to verify 1525 @return: A frozenset of instance names violating the ipolicy 1526 1527 """ 1528 return frozenset([inst.name for inst in instances 1529 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1530
1531 1532 -def _CheckNicsBridgesExist(lu, target_nics, target_node):
1533 """Check that the brigdes needed by a list of nics exist. 1534 1535 """ 1536 cluster = lu.cfg.GetClusterInfo() 1537 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics] 1538 brlist = [params[constants.NIC_LINK] for params in paramslist 1539 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED] 1540 if brlist: 1541 result = lu.rpc.call_bridges_exist(target_node, brlist) 1542 result.Raise("Error checking bridges on destination node '%s'" % 1543 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1544
1545 1546 -def _CheckInstanceBridgesExist(lu, instance, node=None):
1547 """Check that the brigdes needed by an instance exist. 1548 1549 """ 1550 if node is None: 1551 node = instance.primary_node 1552 _CheckNicsBridgesExist(lu, instance.nics, node)
1553
1554 1555 -def _CheckOSVariant(os_obj, name):
1556 """Check whether an OS name conforms to the os variants specification. 1557 1558 @type os_obj: L{objects.OS} 1559 @param os_obj: OS object to check 1560 @type name: string 1561 @param name: OS name passed by the user, to check for validity 1562 1563 """ 1564 variant = objects.OS.GetVariant(name) 1565 if not os_obj.supported_variants: 1566 if variant: 1567 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'" 1568 " passed)" % (os_obj.name, variant), 1569 errors.ECODE_INVAL) 1570 return 1571 if not variant: 1572 raise errors.OpPrereqError("OS name must include a variant", 1573 errors.ECODE_INVAL) 1574 1575 if variant not in os_obj.supported_variants: 1576 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1577
1578 1579 -def _GetNodeInstancesInner(cfg, fn):
1580 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1581
1582 1583 -def _GetNodeInstances(cfg, node_name):
1584 """Returns a list of all primary and secondary instances on a node. 1585 1586 """ 1587 1588 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1589
1590 1591 -def _GetNodePrimaryInstances(cfg, node_name):
1592 """Returns primary instances on a node. 1593 1594 """ 1595 return _GetNodeInstancesInner(cfg, 1596 lambda inst: node_name == inst.primary_node)
1597
1598 1599 -def _GetNodeSecondaryInstances(cfg, node_name):
1600 """Returns secondary instances on a node. 1601 1602 """ 1603 return _GetNodeInstancesInner(cfg, 1604 lambda inst: node_name in inst.secondary_nodes)
1605
1606 1607 -def _GetStorageTypeArgs(cfg, storage_type):
1608 """Returns the arguments for a storage type. 1609 1610 """ 1611 # Special case for file storage 1612 if storage_type == constants.ST_FILE: 1613 # storage.FileStorage wants a list of storage directories 1614 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]] 1615 1616 return []
1617
1618 1619 -def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1620 faulty = [] 1621 1622 for dev in instance.disks: 1623 cfg.SetDiskID(dev, node_name) 1624 1625 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks, 1626 instance)) 1627 result.Raise("Failed to get disk status from node %s" % node_name, 1628 prereq=prereq, ecode=errors.ECODE_ENVIRON) 1629 1630 for idx, bdev_status in enumerate(result.payload): 1631 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY: 1632 faulty.append(idx) 1633 1634 return faulty
1635
1636 1637 -def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1638 """Check the sanity of iallocator and node arguments and use the 1639 cluster-wide iallocator if appropriate. 1640 1641 Check that at most one of (iallocator, node) is specified. If none is 1642 specified, then the LU's opcode's iallocator slot is filled with the 1643 cluster-wide default iallocator. 1644 1645 @type iallocator_slot: string 1646 @param iallocator_slot: the name of the opcode iallocator slot 1647 @type node_slot: string 1648 @param node_slot: the name of the opcode target node slot 1649 1650 """ 1651 node = getattr(lu.op, node_slot, None) 1652 iallocator = getattr(lu.op, iallocator_slot, None) 1653 1654 if node is not None and iallocator is not None: 1655 raise errors.OpPrereqError("Do not specify both, iallocator and node", 1656 errors.ECODE_INVAL) 1657 elif node is None and iallocator is None: 1658 default_iallocator = lu.cfg.GetDefaultIAllocator() 1659 if default_iallocator: 1660 setattr(lu.op, iallocator_slot, default_iallocator) 1661 else: 1662 raise errors.OpPrereqError("No iallocator or node given and no" 1663 " cluster-wide default iallocator found;" 1664 " please specify either an iallocator or a" 1665 " node, or set a cluster-wide default" 1666 " iallocator")
1667
1668 1669 -def _GetDefaultIAllocator(cfg, iallocator):
1670 """Decides on which iallocator to use. 1671 1672 @type cfg: L{config.ConfigWriter} 1673 @param cfg: Cluster configuration object 1674 @type iallocator: string or None 1675 @param iallocator: Iallocator specified in opcode 1676 @rtype: string 1677 @return: Iallocator name 1678 1679 """ 1680 if not iallocator: 1681 # Use default iallocator 1682 iallocator = cfg.GetDefaultIAllocator() 1683 1684 if not iallocator: 1685 raise errors.OpPrereqError("No iallocator was specified, neither in the" 1686 " opcode nor as a cluster-wide default", 1687 errors.ECODE_INVAL) 1688 1689 return iallocator
1690
1691 1692 -def _CheckHostnameSane(lu, name):
1693 """Ensures that a given hostname resolves to a 'sane' name. 1694 1695 The given name is required to be a prefix of the resolved hostname, 1696 to prevent accidental mismatches. 1697 1698 @param lu: the logical unit on behalf of which we're checking 1699 @param name: the name we should resolve and check 1700 @return: the resolved hostname object 1701 1702 """ 1703 hostname = netutils.GetHostname(name=name) 1704 if hostname.name != name: 1705 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name) 1706 if not utils.MatchNameComponent(name, [hostname.name]): 1707 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the" 1708 " same as given hostname '%s'") % 1709 (hostname.name, name), errors.ECODE_INVAL) 1710 return hostname
1711
1712 1713 -class LUClusterPostInit(LogicalUnit):
1714 """Logical unit for running hooks after cluster initialization. 1715 1716 """ 1717 HPATH = "cluster-init" 1718 HTYPE = constants.HTYPE_CLUSTER 1719
1720 - def BuildHooksEnv(self):
1721 """Build hooks env. 1722 1723 """ 1724 return { 1725 "OP_TARGET": self.cfg.GetClusterName(), 1726 }
1727
1728 - def BuildHooksNodes(self):
1729 """Build hooks nodes. 1730 1731 """ 1732 return ([], [self.cfg.GetMasterNode()])
1733
1734 - def Exec(self, feedback_fn):
1735 """Nothing to do. 1736 1737 """ 1738 return True
1739
1740 1741 -class LUClusterDestroy(LogicalUnit):
1742 """Logical unit for destroying the cluster. 1743 1744 """ 1745 HPATH = "cluster-destroy" 1746 HTYPE = constants.HTYPE_CLUSTER 1747
1748 - def BuildHooksEnv(self):
1749 """Build hooks env. 1750 1751 """ 1752 return { 1753 "OP_TARGET": self.cfg.GetClusterName(), 1754 }
1755
1756 - def BuildHooksNodes(self):
1757 """Build hooks nodes. 1758 1759 """ 1760 return ([], [])
1761
1762 - def CheckPrereq(self):
1763 """Check prerequisites. 1764 1765 This checks whether the cluster is empty. 1766 1767 Any errors are signaled by raising errors.OpPrereqError. 1768 1769 """ 1770 master = self.cfg.GetMasterNode() 1771 1772 nodelist = self.cfg.GetNodeList() 1773 if len(nodelist) != 1 or nodelist[0] != master: 1774 raise errors.OpPrereqError("There are still %d node(s) in" 1775 " this cluster." % (len(nodelist) - 1), 1776 errors.ECODE_INVAL) 1777 instancelist = self.cfg.GetInstanceList() 1778 if instancelist: 1779 raise errors.OpPrereqError("There are still %d instance(s) in" 1780 " this cluster." % len(instancelist), 1781 errors.ECODE_INVAL)
1782
1783 - def Exec(self, feedback_fn):
1784 """Destroys the cluster. 1785 1786 """ 1787 master_params = self.cfg.GetMasterNetworkParameters() 1788 1789 # Run post hooks on master node before it's removed 1790 _RunPostHook(self, master_params.name) 1791 1792 ems = self.cfg.GetUseExternalMipScript() 1793 result = self.rpc.call_node_deactivate_master_ip(master_params.name, 1794 master_params, ems) 1795 if result.fail_msg: 1796 self.LogWarning("Error disabling the master IP address: %s", 1797 result.fail_msg) 1798 1799 return master_params.name
1800
1801 1802 -def _VerifyCertificate(filename):
1803 """Verifies a certificate for L{LUClusterVerifyConfig}. 1804 1805 @type filename: string 1806 @param filename: Path to PEM file 1807 1808 """ 1809 try: 1810 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 1811 utils.ReadFile(filename)) 1812 except Exception, err: # pylint: disable=W0703 1813 return (LUClusterVerifyConfig.ETYPE_ERROR, 1814 "Failed to load X509 certificate %s: %s" % (filename, err)) 1815 1816 (errcode, msg) = \ 1817 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN, 1818 constants.SSL_CERT_EXPIRATION_ERROR) 1819 1820 if msg: 1821 fnamemsg = "While verifying %s: %s" % (filename, msg) 1822 else: 1823 fnamemsg = None 1824 1825 if errcode is None: 1826 return (None, fnamemsg) 1827 elif errcode == utils.CERT_WARNING: 1828 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg) 1829 elif errcode == utils.CERT_ERROR: 1830 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg) 1831 1832 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1833
1834 1835 -def _GetAllHypervisorParameters(cluster, instances):
1836 """Compute the set of all hypervisor parameters. 1837 1838 @type cluster: L{objects.Cluster} 1839 @param cluster: the cluster object 1840 @param instances: list of L{objects.Instance} 1841 @param instances: additional instances from which to obtain parameters 1842 @rtype: list of (origin, hypervisor, parameters) 1843 @return: a list with all parameters found, indicating the hypervisor they 1844 apply to, and the origin (can be "cluster", "os X", or "instance Y") 1845 1846 """ 1847 hvp_data = [] 1848 1849 for hv_name in cluster.enabled_hypervisors: 1850 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name))) 1851 1852 for os_name, os_hvp in cluster.os_hvp.items(): 1853 for hv_name, hv_params in os_hvp.items(): 1854 if hv_params: 1855 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name) 1856 hvp_data.append(("os %s" % os_name, hv_name, full_params)) 1857 1858 # TODO: collapse identical parameter values in a single one 1859 for instance in instances: 1860 if instance.hvparams: 1861 hvp_data.append(("instance %s" % instance.name, instance.hypervisor, 1862 cluster.FillHV(instance))) 1863 1864 return hvp_data
1865
1866 1867 -class _VerifyErrors(object):
1868 """Mix-in for cluster/group verify LUs. 1869 1870 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects 1871 self.op and self._feedback_fn to be available.) 1872 1873 """ 1874 1875 ETYPE_FIELD = "code" 1876 ETYPE_ERROR = "ERROR" 1877 ETYPE_WARNING = "WARNING" 1878
1879 - def _Error(self, ecode, item, msg, *args, **kwargs):
1880 """Format an error message. 1881 1882 Based on the opcode's error_codes parameter, either format a 1883 parseable error code, or a simpler error string. 1884 1885 This must be called only from Exec and functions called from Exec. 1886 1887 """ 1888 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) 1889 itype, etxt, _ = ecode 1890 # first complete the msg 1891 if args: 1892 msg = msg % args 1893 # then format the whole message 1894 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101 1895 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) 1896 else: 1897 if item: 1898 item = " " + item 1899 else: 1900 item = "" 1901 msg = "%s: %s%s: %s" % (ltype, itype, item, msg) 1902 # and finally report it via the feedback_fn 1903 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1904
1905 - def _ErrorIf(self, cond, ecode, *args, **kwargs):
1906 """Log an error message if the passed condition is True. 1907 1908 """ 1909 cond = (bool(cond) 1910 or self.op.debug_simulate_errors) # pylint: disable=E1101 1911 1912 # If the error code is in the list of ignored errors, demote the error to a 1913 # warning 1914 (_, etxt, _) = ecode 1915 if etxt in self.op.ignore_errors: # pylint: disable=E1101 1916 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING 1917 1918 if cond: 1919 self._Error(ecode, *args, **kwargs) 1920 1921 # do not mark the operation as failed for WARN cases only 1922 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR: 1923 self.bad = self.bad or cond
1924
1925 1926 -class LUClusterVerify(NoHooksLU):
1927 """Submits all jobs necessary to verify the cluster. 1928 1929 """ 1930 REQ_BGL = False 1931
1932 - def ExpandNames(self):
1933 self.needed_locks = {}
1934
1935 - def Exec(self, feedback_fn):
1936 jobs = [] 1937 1938 if self.op.group_name: 1939 groups = [self.op.group_name] 1940 depends_fn = lambda: None 1941 else: 1942 groups = self.cfg.GetNodeGroupList() 1943 1944 # Verify global configuration 1945 jobs.append([ 1946 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors) 1947 ]) 1948 1949 # Always depend on global verification 1950 depends_fn = lambda: [(-len(jobs), [])] 1951 1952 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group, 1953 ignore_errors=self.op.ignore_errors, 1954 depends=depends_fn())] 1955 for group in groups) 1956 1957 # Fix up all parameters 1958 for op in itertools.chain(*jobs): # pylint: disable=W0142 1959 op.debug_simulate_errors = self.op.debug_simulate_errors 1960 op.verbose = self.op.verbose 1961 op.error_codes = self.op.error_codes 1962 try: 1963 op.skip_checks = self.op.skip_checks 1964 except AttributeError: 1965 assert not isinstance(op, opcodes.OpClusterVerifyGroup) 1966 1967 return ResultWithJobs(jobs)
1968
1969 1970 -class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1971 """Verifies the cluster config. 1972 1973 """ 1974 REQ_BGL = False 1975
1976 - def _VerifyHVP(self, hvp_data):
1977 """Verifies locally the syntax of the hypervisor parameters. 1978 1979 """ 1980 for item, hv_name, hv_params in hvp_data: 1981 msg = ("hypervisor %s parameters syntax check (source %s): %%s" % 1982 (item, hv_name)) 1983 try: 1984 hv_class = hypervisor.GetHypervisorClass(hv_name) 1985 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1986 hv_class.CheckParameterSyntax(hv_params) 1987 except errors.GenericError, err: 1988 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1989
1990 - def ExpandNames(self):
1991 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET) 1992 self.share_locks = _ShareAll()
1993
1994 - def CheckPrereq(self):
1995 """Check prerequisites. 1996 1997 """ 1998 # Retrieve all information 1999 self.all_group_info = self.cfg.GetAllNodeGroupsInfo() 2000 self.all_node_info = self.cfg.GetAllNodesInfo() 2001 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2002
2003 - def Exec(self, feedback_fn):
2004 """Verify integrity of cluster, performing various test on nodes. 2005 2006 """ 2007 self.bad = False 2008 self._feedback_fn = feedback_fn 2009 2010 feedback_fn("* Verifying cluster config") 2011 2012 for msg in self.cfg.VerifyConfig(): 2013 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg) 2014 2015 feedback_fn("* Verifying cluster certificate files") 2016 2017 for cert_filename in constants.ALL_CERT_FILES: 2018 (errcode, msg) = _VerifyCertificate(cert_filename) 2019 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode) 2020 2021 feedback_fn("* Verifying hypervisor parameters") 2022 2023 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(), 2024 self.all_inst_info.values())) 2025 2026 feedback_fn("* Verifying all nodes belong to an existing group") 2027 2028 # We do this verification here because, should this bogus circumstance 2029 # occur, it would never be caught by VerifyGroup, which only acts on 2030 # nodes/instances reachable from existing node groups. 2031 2032 dangling_nodes = set(node.name for node in self.all_node_info.values() 2033 if node.group not in self.all_group_info) 2034 2035 dangling_instances = {} 2036 no_node_instances = [] 2037 2038 for inst in self.all_inst_info.values(): 2039 if inst.primary_node in dangling_nodes: 2040 dangling_instances.setdefault(inst.primary_node, []).append(inst.name) 2041 elif inst.primary_node not in self.all_node_info: 2042 no_node_instances.append(inst.name) 2043 2044 pretty_dangling = [ 2045 "%s (%s)" % 2046 (node.name, 2047 utils.CommaJoin(dangling_instances.get(node.name, 2048 ["no instances"]))) 2049 for node in dangling_nodes] 2050 2051 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES, 2052 None, 2053 "the following nodes (and their instances) belong to a non" 2054 " existing group: %s", utils.CommaJoin(pretty_dangling)) 2055 2056 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST, 2057 None, 2058 "the following instances have a non-existing primary-node:" 2059 " %s", utils.CommaJoin(no_node_instances)) 2060 2061 return not self.bad
2062
2063 2064 -class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2065 """Verifies the status of a node group. 2066 2067 """ 2068 HPATH = "cluster-verify" 2069 HTYPE = constants.HTYPE_CLUSTER 2070 REQ_BGL = False 2071 2072 _HOOKS_INDENT_RE = re.compile("^", re.M) 2073
2074 - class NodeImage(object):
2075 """A class representing the logical and physical status of a node. 2076 2077 @type name: string 2078 @ivar name: the node name to which this object refers 2079 @ivar volumes: a structure as returned from 2080 L{ganeti.backend.GetVolumeList} (runtime) 2081 @ivar instances: a list of running instances (runtime) 2082 @ivar pinst: list of configured primary instances (config) 2083 @ivar sinst: list of configured secondary instances (config) 2084 @ivar sbp: dictionary of {primary-node: list of instances} for all 2085 instances for which this node is secondary (config) 2086 @ivar mfree: free memory, as reported by hypervisor (runtime) 2087 @ivar dfree: free disk, as reported by the node (runtime) 2088 @ivar offline: the offline status (config) 2089 @type rpc_fail: boolean 2090 @ivar rpc_fail: whether the RPC verify call was successfull (overall, 2091 not whether the individual keys were correct) (runtime) 2092 @type lvm_fail: boolean 2093 @ivar lvm_fail: whether the RPC call didn't return valid LVM data 2094 @type hyp_fail: boolean 2095 @ivar hyp_fail: whether the RPC call didn't return the instance list 2096 @type ghost: boolean 2097 @ivar ghost: whether this is a known node or not (config) 2098 @type os_fail: boolean 2099 @ivar os_fail: whether the RPC call didn't return valid OS data 2100 @type oslist: list 2101 @ivar oslist: list of OSes as diagnosed by DiagnoseOS 2102 @type vm_capable: boolean 2103 @ivar vm_capable: whether the node can host instances 2104 2105 """
2106 - def __init__(self, offline=False, name=None, vm_capable=True):
2107 self.name = name 2108 self.volumes = {} 2109 self.instances = [] 2110 self.pinst = [] 2111 self.sinst = [] 2112 self.sbp = {} 2113 self.mfree = 0 2114 self.dfree = 0 2115 self.offline = offline 2116 self.vm_capable = vm_capable 2117 self.rpc_fail = False 2118 self.lvm_fail = False 2119 self.hyp_fail = False 2120 self.ghost = False 2121 self.os_fail = False 2122 self.oslist = {}
2123
2124 - def ExpandNames(self):
2125 # This raises errors.OpPrereqError on its own: 2126 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 2127 2128 # Get instances in node group; this is unsafe and needs verification later 2129 inst_names = \ 2130 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 2131 2132 self.needed_locks = { 2133 locking.LEVEL_INSTANCE: inst_names, 2134 locking.LEVEL_NODEGROUP: [self.group_uuid], 2135 locking.LEVEL_NODE: [], 2136 } 2137 2138 self.share_locks = _ShareAll()
2139
2140 - def DeclareLocks(self, level):
2141 if level == locking.LEVEL_NODE: 2142 # Get members of node group; this is unsafe and needs verification later 2143 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members) 2144 2145 all_inst_info = self.cfg.GetAllInstancesInfo() 2146 2147 # In Exec(), we warn about mirrored instances that have primary and 2148 # secondary living in separate node groups. To fully verify that 2149 # volumes for these instances are healthy, we will need to do an 2150 # extra call to their secondaries. We ensure here those nodes will 2151 # be locked. 2152 for inst in self.owned_locks(locking.LEVEL_INSTANCE): 2153 # Important: access only the instances whose lock is owned 2154 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR: 2155 nodes.update(all_inst_info[inst].secondary_nodes) 2156 2157 self.needed_locks[locking.LEVEL_NODE] = nodes
2158
2159 - def CheckPrereq(self):
2160 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 2161 self.group_info = self.cfg.GetNodeGroup(self.group_uuid) 2162 2163 group_nodes = set(self.group_info.members) 2164 group_instances = \ 2165 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 2166 2167 unlocked_nodes = \ 2168 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) 2169 2170 unlocked_instances = \ 2171 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE)) 2172 2173 if unlocked_nodes: 2174 raise errors.OpPrereqError("Missing lock for nodes: %s" % 2175 utils.CommaJoin(unlocked_nodes), 2176 errors.ECODE_STATE) 2177 2178 if unlocked_instances: 2179 raise errors.OpPrereqError("Missing lock for instances: %s" % 2180 utils.CommaJoin(unlocked_instances), 2181 errors.ECODE_STATE) 2182 2183 self.all_node_info = self.cfg.GetAllNodesInfo() 2184 self.all_inst_info = self.cfg.GetAllInstancesInfo() 2185 2186 self.my_node_names = utils.NiceSort(group_nodes) 2187 self.my_inst_names = utils.NiceSort(group_instances) 2188 2189 self.my_node_info = dict((name, self.all_node_info[name]) 2190 for name in self.my_node_names) 2191 2192 self.my_inst_info = dict((name, self.all_inst_info[name]) 2193 for name in self.my_inst_names) 2194 2195 # We detect here the nodes that will need the extra RPC calls for verifying 2196 # split LV volumes; they should be locked. 2197 extra_lv_nodes = set() 2198 2199 for inst in self.my_inst_info.values(): 2200 if inst.disk_template in constants.DTS_INT_MIRROR: 2201 for nname in inst.all_nodes: 2202 if self.all_node_info[nname].group != self.group_uuid: 2203 extra_lv_nodes.add(nname) 2204 2205 unlocked_lv_nodes = \ 2206 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) 2207 2208 if unlocked_lv_nodes: 2209 raise errors.OpPrereqError("Missing node locks for LV check: %s" % 2210 utils.CommaJoin(unlocked_lv_nodes), 2211 errors.ECODE_STATE) 2212 self.extra_lv_nodes = list(extra_lv_nodes)
2213
2214 - def _VerifyNode(self, ninfo, nresult):
2215 """Perform some basic validation on data returned from a node. 2216 2217 - check the result data structure is well formed and has all the 2218 mandatory fields 2219 - check ganeti version 2220 2221 @type ninfo: L{objects.Node} 2222 @param ninfo: the node to check 2223 @param nresult: the results from the node 2224 @rtype: boolean 2225 @return: whether overall this call was successful (and we can expect 2226 reasonable values in the respose) 2227 2228 """ 2229 node = ninfo.name 2230 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2231 2232 # main result, nresult should be a non-empty dict 2233 test = not nresult or not isinstance(nresult, dict) 2234 _ErrorIf(test, constants.CV_ENODERPC, node, 2235 "unable to verify node: no data returned") 2236 if test: 2237 return False 2238 2239 # compares ganeti version 2240 local_version = constants.PROTOCOL_VERSION 2241 remote_version = nresult.get("version", None) 2242 test = not (remote_version and 2243 isinstance(remote_version, (list, tuple)) and 2244 len(remote_version) == 2) 2245 _ErrorIf(test, constants.CV_ENODERPC, node, 2246 "connection to node returned invalid data") 2247 if test: 2248 return False 2249 2250 test = local_version != remote_version[0] 2251 _ErrorIf(test, constants.CV_ENODEVERSION, node, 2252 "incompatible protocol versions: master %s," 2253 " node %s", local_version, remote_version[0]) 2254 if test: 2255 return False 2256 2257 # node seems compatible, we can actually try to look into its results 2258 2259 # full package version 2260 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], 2261 constants.CV_ENODEVERSION, node, 2262 "software version mismatch: master %s, node %s", 2263 constants.RELEASE_VERSION, remote_version[1], 2264 code=self.ETYPE_WARNING) 2265 2266 hyp_result = nresult.get(constants.NV_HYPERVISOR, None) 2267 if ninfo.vm_capable and isinstance(hyp_result, dict): 2268 for hv_name, hv_result in hyp_result.iteritems(): 2269 test = hv_result is not None 2270 _ErrorIf(test, constants.CV_ENODEHV, node, 2271 "hypervisor %s verify failure: '%s'", hv_name, hv_result) 2272 2273 hvp_result = nresult.get(constants.NV_HVPARAMS, None) 2274 if ninfo.vm_capable and isinstance(hvp_result, list): 2275 for item, hv_name, hv_result in hvp_result: 2276 _ErrorIf(True, constants.CV_ENODEHV, node, 2277 "hypervisor %s parameter verify failure (source %s): %s", 2278 hv_name, item, hv_result) 2279 2280 test = nresult.get(constants.NV_NODESETUP, 2281 ["Missing NODESETUP results"]) 2282 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s", 2283 "; ".join(test)) 2284 2285 return True
2286
2287 - def _VerifyNodeTime(self, ninfo, nresult, 2288 nvinfo_starttime, nvinfo_endtime):
2289 """Check the node time. 2290 2291 @type ninfo: L{objects.Node} 2292 @param ninfo: the node to check 2293 @param nresult: the remote results for the node 2294 @param nvinfo_starttime: the start time of the RPC call 2295 @param nvinfo_endtime: the end time of the RPC call 2296 2297 """ 2298 node = ninfo.name 2299 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2300 2301 ntime = nresult.get(constants.NV_TIME, None) 2302 try: 2303 ntime_merged = utils.MergeTime(ntime) 2304 except (ValueError, TypeError): 2305 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time") 2306 return 2307 2308 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): 2309 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) 2310 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): 2311 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) 2312 else: 2313 ntime_diff = None 2314 2315 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node, 2316 "Node time diverges by at least %s from master node time", 2317 ntime_diff)
2318
2319 - def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2320 """Check the node LVM results. 2321 2322 @type ninfo: L{objects.Node} 2323 @param ninfo: the node to check 2324 @param nresult: the remote results for the node 2325 @param vg_name: the configured VG name 2326 2327 """ 2328 if vg_name is None: 2329 return 2330 2331 node = ninfo.name 2332 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2333 2334 # checks vg existence and size > 20G 2335 vglist = nresult.get(constants.NV_VGLIST, None) 2336 test = not vglist 2337 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups") 2338 if not test: 2339 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, 2340 constants.MIN_VG_SIZE) 2341 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus) 2342 2343 # check pv names 2344 pvlist = nresult.get(constants.NV_PVLIST, None) 2345 test = pvlist is None 2346 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node") 2347 if not test: 2348 # check that ':' is not present in PV names, since it's a 2349 # special character for lvcreate (denotes the range of PEs to 2350 # use on the PV) 2351 for _, pvname, owner_vg in pvlist: 2352 test = ":" in pvname 2353 _ErrorIf(test, constants.CV_ENODELVM, node, 2354 "Invalid character ':' in PV '%s' of VG '%s'", 2355 pvname, owner_vg)
2356
2357 - def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2358 """Check the node bridges. 2359 2360 @type ninfo: L{objects.Node} 2361 @param ninfo: the node to check 2362 @param nresult: the remote results for the node 2363 @param bridges: the expected list of bridges 2364 2365 """ 2366 if not bridges: 2367 return 2368 2369 node = ninfo.name 2370 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2371 2372 missing = nresult.get(constants.NV_BRIDGES, None) 2373 test = not isinstance(missing, list) 2374 _ErrorIf(test, constants.CV_ENODENET, node, 2375 "did not return valid bridge information") 2376 if not test: 2377 _ErrorIf(bool(missing), constants.CV_ENODENET, node, 2378 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2379
2380 - def _VerifyNodeUserScripts(self, ninfo, nresult):
2381 """Check the results of user scripts presence and executability on the node 2382 2383 @type ninfo: L{objects.Node} 2384 @param ninfo: the node to check 2385 @param nresult: the remote results for the node 2386 2387 """ 2388 node = ninfo.name 2389 2390 test = not constants.NV_USERSCRIPTS in nresult 2391 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node, 2392 "did not return user scripts information") 2393 2394 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None) 2395 if not test: 2396 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node, 2397 "user scripts not present or not executable: %s" % 2398 utils.CommaJoin(sorted(broken_scripts)))
2399
2400 - def _VerifyNodeNetwork(self, ninfo, nresult):
2401 """Check the node network connectivity results. 2402 2403 @type ninfo: L{objects.Node} 2404 @param ninfo: the node to check 2405 @param nresult: the remote results for the node 2406 2407 """ 2408 node = ninfo.name 2409 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2410 2411 test = constants.NV_NODELIST not in nresult 2412 _ErrorIf(test, constants.CV_ENODESSH, node, 2413 "node hasn't returned node ssh connectivity data") 2414 if not test: 2415 if nresult[constants.NV_NODELIST]: 2416 for a_node, a_msg in nresult[constants.NV_NODELIST].items(): 2417 _ErrorIf(True, constants.CV_ENODESSH, node, 2418 "ssh communication with node '%s': %s", a_node, a_msg) 2419 2420 test = constants.NV_NODENETTEST not in nresult 2421 _ErrorIf(test, constants.CV_ENODENET, node, 2422 "node hasn't returned node tcp connectivity data") 2423 if not test: 2424 if nresult[constants.NV_NODENETTEST]: 2425 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) 2426 for anode in nlist: 2427 _ErrorIf(True, constants.CV_ENODENET, node, 2428 "tcp communication with node '%s': %s", 2429 anode, nresult[constants.NV_NODENETTEST][anode]) 2430 2431 test = constants.NV_MASTERIP not in nresult 2432 _ErrorIf(test, constants.CV_ENODENET, node, 2433 "node hasn't returned node master IP reachability data") 2434 if not test: 2435 if not nresult[constants.NV_MASTERIP]: 2436 if node == self.master_node: 2437 msg = "the master node cannot reach the master IP (not configured?)" 2438 else: 2439 msg = "cannot reach the master IP" 2440 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2441
2442 - def _VerifyInstance(self, instance, instanceconfig, node_image, 2443 diskstatus):
2444 """Verify an instance. 2445 2446 This function checks to see if the required block devices are 2447 available on the instance's node. 2448 2449 """ 2450 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2451 node_current = instanceconfig.primary_node 2452 2453 node_vol_should = {} 2454 instanceconfig.MapLVsByNode(node_vol_should) 2455 2456 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info) 2457 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig) 2458 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err)) 2459 2460 for node in node_vol_should: 2461 n_img = node_image[node] 2462 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 2463 # ignore missing volumes on offline or broken nodes 2464 continue 2465 for volume in node_vol_should[node]: 2466 test = volume not in n_img.volumes 2467 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance, 2468 "volume %s missing on node %s", volume, node) 2469 2470 if instanceconfig.admin_state == constants.ADMINST_UP: 2471 pri_img = node_image[node_current] 2472 test = instance not in pri_img.instances and not pri_img.offline 2473 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance, 2474 "instance not running on its primary node %s", 2475 node_current) 2476 2477 diskdata = [(nname, success, status, idx) 2478 for (nname, disks) in diskstatus.items() 2479 for idx, (success, status) in enumerate(disks)] 2480 2481 for nname, success, bdev_status, idx in diskdata: 2482 # the 'ghost node' construction in Exec() ensures that we have a 2483 # node here 2484 snode = node_image[nname] 2485 bad_snode = snode.ghost or snode.offline 2486 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and 2487 not success and not bad_snode, 2488 constants.CV_EINSTANCEFAULTYDISK, instance, 2489 "couldn't retrieve status for disk/%s on %s: %s", 2490 idx, nname, bdev_status) 2491 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and 2492 success and bdev_status.ldisk_status == constants.LDS_FAULTY), 2493 constants.CV_EINSTANCEFAULTYDISK, instance, 2494 "disk/%s on %s is faulty", idx, nname)
2495
2496 - def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2497 """Verify if there are any unknown volumes in the cluster. 2498 2499 The .os, .swap and backup volumes are ignored. All other volumes are 2500 reported as unknown. 2501 2502 @type reserved: L{ganeti.utils.FieldSet} 2503 @param reserved: a FieldSet of reserved volume names 2504 2505 """ 2506 for node, n_img in node_image.items(): 2507 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or 2508 self.all_node_info[node].group != self.group_uuid): 2509 # skip non-healthy nodes 2510 continue 2511 for volume in n_img.volumes: 2512 test = ((node not in node_vol_should or 2513 volume not in node_vol_should[node]) and 2514 not reserved.Matches(volume)) 2515 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node, 2516 "volume %s is unknown", volume)
2517
2518 - def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2519 """Verify N+1 Memory Resilience. 2520 2521 Check that if one single node dies we can still start all the 2522 instances it was primary for. 2523 2524 """ 2525 cluster_info = self.cfg.GetClusterInfo() 2526 for node, n_img in node_image.items(): 2527 # This code checks that every node which is now listed as 2528 # secondary has enough memory to host all instances it is 2529 # supposed to should a single other node in the cluster fail. 2530 # FIXME: not ready for failover to an arbitrary node 2531 # FIXME: does not support file-backed instances 2532 # WARNING: we currently take into account down instances as well 2533 # as up ones, considering that even if they're down someone 2534 # might want to start them even in the event of a node failure. 2535 if n_img.offline or self.all_node_info[node].group != self.group_uuid: 2536 # we're skipping nodes marked offline and nodes in other groups from 2537 # the N+1 warning, since most likely we don't have good memory 2538 # infromation from them; we already list instances living on such 2539 # nodes, and that's enough warning 2540 continue 2541 #TODO(dynmem): also consider ballooning out other instances 2542 for prinode, instances in n_img.sbp.items(): 2543 needed_mem = 0 2544 for instance in instances: 2545 bep = cluster_info.FillBE(instance_cfg[instance]) 2546 if bep[constants.BE_AUTO_BALANCE]: 2547 needed_mem += bep[constants.BE_MINMEM] 2548 test = n_img.mfree < needed_mem 2549 self._ErrorIf(test, constants.CV_ENODEN1, node, 2550 "not enough memory to accomodate instance failovers" 2551 " should node %s fail (%dMiB needed, %dMiB available)", 2552 prinode, needed_mem, n_img.mfree)
2553 2554 @classmethod
2555 - def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo, 2556 (files_all, files_opt, files_mc, files_vm)):
2557 """Verifies file checksums collected from all nodes. 2558 2559 @param errorif: Callback for reporting errors 2560 @param nodeinfo: List of L{objects.Node} objects 2561 @param master_node: Name of master node 2562 @param all_nvinfo: RPC results 2563 2564 """ 2565 # Define functions determining which nodes to consider for a file 2566 files2nodefn = [ 2567 (files_all, None), 2568 (files_mc, lambda node: (node.master_candidate or 2569 node.name == master_node)), 2570 (files_vm, lambda node: node.vm_capable), 2571 ] 2572 2573 # Build mapping from filename to list of nodes which should have the file 2574 nodefiles = {} 2575 for (files, fn) in files2nodefn: 2576 if fn is None: 2577 filenodes = nodeinfo 2578 else: 2579 filenodes = filter(fn, nodeinfo) 2580 nodefiles.update((filename, 2581 frozenset(map(operator.attrgetter("name"), filenodes))) 2582 for filename in files) 2583 2584 assert set(nodefiles) == (files_all | files_mc | files_vm) 2585 2586 fileinfo = dict((filename, {}) for filename in nodefiles) 2587 ignore_nodes = set() 2588 2589 for node in nodeinfo: 2590 if node.offline: 2591 ignore_nodes.add(node.name) 2592 continue 2593 2594 nresult = all_nvinfo[node.name] 2595 2596 if nresult.fail_msg or not nresult.payload: 2597 node_files = None 2598 else: 2599 node_files = nresult.payload.get(constants.NV_FILELIST, None) 2600 2601 test = not (node_files and isinstance(node_files, dict)) 2602 errorif(test, constants.CV_ENODEFILECHECK, node.name, 2603 "Node did not return file checksum data") 2604 if test: 2605 ignore_nodes.add(node.name) 2606 continue 2607 2608 # Build per-checksum mapping from filename to nodes having it 2609 for (filename, checksum) in node_files.items(): 2610 assert filename in nodefiles 2611 fileinfo[filename].setdefault(checksum, set()).add(node.name) 2612 2613 for (filename, checksums) in fileinfo.items(): 2614 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum" 2615 2616 # Nodes having the file 2617 with_file = frozenset(node_name 2618 for nodes in fileinfo[filename].values() 2619 for node_name in nodes) - ignore_nodes 2620 2621 expected_nodes = nodefiles[filename] - ignore_nodes 2622 2623 # Nodes missing file 2624 missing_file = expected_nodes - with_file 2625 2626 if filename in files_opt: 2627 # All or no nodes 2628 errorif(missing_file and missing_file != expected_nodes, 2629 constants.CV_ECLUSTERFILECHECK, None, 2630 "File %s is optional, but it must exist on all or no" 2631 " nodes (not found on %s)", 2632 filename, utils.CommaJoin(utils.NiceSort(missing_file))) 2633 else: 2634 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None, 2635 "File %s is missing from node(s) %s", filename, 2636 utils.CommaJoin(utils.NiceSort(missing_file))) 2637 2638 # Warn if a node has a file it shouldn't 2639 unexpected = with_file - expected_nodes 2640 errorif(unexpected, 2641 constants.CV_ECLUSTERFILECHECK, None, 2642 "File %s should not exist on node(s) %s", 2643 filename, utils.CommaJoin(utils.NiceSort(unexpected))) 2644 2645 # See if there are multiple versions of the file 2646 test = len(checksums) > 1 2647 if test: 2648 variants = ["variant %s on %s" % 2649 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes))) 2650 for (idx, (checksum, nodes)) in 2651 enumerate(sorted(checksums.items()))] 2652 else: 2653 variants = [] 2654 2655 errorif(test, constants.CV_ECLUSTERFILECHECK, None, 2656 "File %s found with %s different checksums (%s)", 2657 filename, len(checksums), "; ".join(variants))
2658
2659 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper, 2660 drbd_map):
2661 """Verifies and the node DRBD status. 2662 2663 @type ninfo: L{objects.Node} 2664 @param ninfo: the node to check 2665 @param nresult: the remote results for the node 2666 @param instanceinfo: the dict of instances 2667 @param drbd_helper: the configured DRBD usermode helper 2668 @param drbd_map: the DRBD map as returned by 2669 L{ganeti.config.ConfigWriter.ComputeDRBDMap} 2670 2671 """ 2672 node = ninfo.name 2673 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2674 2675 if drbd_helper: 2676 helper_result = nresult.get(constants.NV_DRBDHELPER, None) 2677 test = (helper_result == None) 2678 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, 2679 "no drbd usermode helper returned") 2680 if helper_result: 2681 status, payload = helper_result 2682 test = not status 2683 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, 2684 "drbd usermode helper check unsuccessful: %s", payload) 2685 test = status and (payload != drbd_helper) 2686 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, 2687 "wrong drbd usermode helper: %s", payload) 2688 2689 # compute the DRBD minors 2690 node_drbd = {} 2691 for minor, instance in drbd_map[node].items(): 2692 test = instance not in instanceinfo 2693 _ErrorIf(test, constants.CV_ECLUSTERCFG, None, 2694 "ghost instance '%s' in temporary DRBD map", instance) 2695 # ghost instance should not be running, but otherwise we 2696 # don't give double warnings (both ghost instance and 2697 # unallocated minor in use) 2698 if test: 2699 node_drbd[minor] = (instance, False) 2700 else: 2701 instance = instanceinfo[instance] 2702 node_drbd[minor] = (instance.name, 2703 instance.admin_state == constants.ADMINST_UP) 2704 2705 # and now check them 2706 used_minors = nresult.get(constants.NV_DRBDLIST, []) 2707 test = not isinstance(used_minors, (tuple, list)) 2708 _ErrorIf(test, constants.CV_ENODEDRBD, node, 2709 "cannot parse drbd status file: %s", str(used_minors)) 2710 if test: 2711 # we cannot check drbd status 2712 return 2713 2714 for minor, (iname, must_exist) in node_drbd.items(): 2715 test = minor not in used_minors and must_exist 2716 _ErrorIf(test, constants.CV_ENODEDRBD, node, 2717 "drbd minor %d of instance %s is not active", minor, iname) 2718 for minor in used_minors: 2719 test = minor not in node_drbd 2720 _ErrorIf(test, constants.CV_ENODEDRBD, node, 2721 "unallocated drbd minor %d is in use", minor)
2722
2723 - def _UpdateNodeOS(self, ninfo, nresult, nimg):
2724 """Builds the node OS structures. 2725 2726 @type ninfo: L{objects.Node} 2727 @param ninfo: the node to check 2728 @param nresult: the remote results for the node 2729 @param nimg: the node image object 2730 2731 """ 2732 node = ninfo.name 2733 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2734 2735 remote_os = nresult.get(constants.NV_OSLIST, None) 2736 test = (not isinstance(remote_os, list) or 2737 not compat.all(isinstance(v, list) and len(v) == 7 2738 for v in remote_os)) 2739 2740 _ErrorIf(test, constants.CV_ENODEOS, node, 2741 "node hasn't returned valid OS data") 2742 2743 nimg.os_fail = test 2744 2745 if test: 2746 return 2747 2748 os_dict = {} 2749 2750 for (name, os_path, status, diagnose, 2751 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]: 2752 2753 if name not in os_dict: 2754 os_dict[name] = [] 2755 2756 # parameters is a list of lists instead of list of tuples due to 2757 # JSON lacking a real tuple type, fix it: 2758 parameters = [tuple(v) for v in parameters] 2759 os_dict[name].append((os_path, status, diagnose, 2760 set(variants), set(parameters), set(api_ver))) 2761 2762 nimg.oslist = os_dict
2763
2764 - def _VerifyNodeOS(self, ninfo, nimg, base):
2765 """Verifies the node OS list. 2766 2767 @type ninfo: L{objects.Node} 2768 @param ninfo: the node to check 2769 @param nimg: the node image object 2770 @param base: the 'template' node we match against (e.g. from the master) 2771 2772 """ 2773 node = ninfo.name 2774 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2775 2776 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" 2777 2778 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l] 2779 for os_name, os_data in nimg.oslist.items(): 2780 assert os_data, "Empty OS status for OS %s?!" % os_name 2781 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] 2782 _ErrorIf(not f_status, constants.CV_ENODEOS, node, 2783 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag) 2784 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node, 2785 "OS '%s' has multiple entries (first one shadows the rest): %s", 2786 os_name, utils.CommaJoin([v[0] for v in os_data])) 2787 # comparisons with the 'base' image 2788 test = os_name not in base.oslist 2789 _ErrorIf(test, constants.CV_ENODEOS, node, 2790 "Extra OS %s not present on reference node (%s)", 2791 os_name, base.name) 2792 if test: 2793 continue 2794 assert base.oslist[os_name], "Base node has empty OS status?" 2795 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0] 2796 if not b_status: 2797 # base OS is invalid, skipping 2798 continue 2799 for kind, a, b in [("API version", f_api, b_api), 2800 ("variants list", f_var, b_var), 2801 ("parameters", beautify_params(f_param), 2802 beautify_params(b_param))]: 2803 _ErrorIf(a != b, constants.CV_ENODEOS, node, 2804 "OS %s for %s differs from reference node %s: [%s] vs. [%s]", 2805 kind, os_name, base.name, 2806 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b))) 2807 2808 # check any missing OSes 2809 missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) 2810 _ErrorIf(missing, constants.CV_ENODEOS, node, 2811 "OSes present on reference node %s but missing on this node: %s", 2812 base.name, utils.CommaJoin(missing))
2813
2814 - def _VerifyOob(self, ninfo, nresult):
2815 """Verifies out of band functionality of a node. 2816 2817 @type ninfo: L{objects.Node} 2818 @param ninfo: the node to check 2819 @param nresult: the remote results for the node 2820 2821 """ 2822 node = ninfo.name 2823 # We just have to verify the paths on master and/or master candidates 2824 # as the oob helper is invoked on the master 2825 if ((ninfo.master_candidate or ninfo.master_capable) and 2826 constants.NV_OOB_PATHS in nresult): 2827 for path_result in nresult[constants.NV_OOB_PATHS]: 2828 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2829
2830 - def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2831 """Verifies and updates the node volume data. 2832 2833 This function will update a L{NodeImage}'s internal structures 2834 with data from the remote call. 2835 2836 @type ninfo: L{objects.Node} 2837 @param ninfo: the node to check 2838 @param nresult: the remote results for the node 2839 @param nimg: the node image object 2840 @param vg_name: the configured VG name 2841 2842 """ 2843 node = ninfo.name 2844 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2845 2846 nimg.lvm_fail = True 2847 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") 2848 if vg_name is None: 2849 pass 2850 elif isinstance(lvdata, basestring): 2851 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s", 2852 utils.SafeEncode(lvdata)) 2853 elif not isinstance(lvdata, dict): 2854 _ErrorIf(True, constants.CV_ENODELVM, node, 2855 "rpc call to node failed (lvlist)") 2856 else: 2857 nimg.volumes = lvdata 2858 nimg.lvm_fail = False
2859
2860 - def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2861 """Verifies and updates the node instance list. 2862 2863 If the listing was successful, then updates this node's instance 2864 list. Otherwise, it marks the RPC call as failed for the instance 2865 list key. 2866 2867 @type ninfo: L{objects.Node} 2868 @param ninfo: the node to check 2869 @param nresult: the remote results for the node 2870 @param nimg: the node image object 2871 2872 """ 2873 idata = nresult.get(constants.NV_INSTANCELIST, None) 2874 test = not isinstance(idata, list) 2875 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, 2876 "rpc call to node failed (instancelist): %s", 2877 utils.SafeEncode(str(idata))) 2878 if test: 2879 nimg.hyp_fail = True 2880 else: 2881 nimg.instances = idata
2882
2883 - def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2884 """Verifies and computes a node information map 2885 2886 @type ninfo: L{objects.Node} 2887 @param ninfo: the node to check 2888 @param nresult: the remote results for the node 2889 @param nimg: the node image object 2890 @param vg_name: the configured VG name 2891 2892 """ 2893 node = ninfo.name 2894 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2895 2896 # try to read free memory (from the hypervisor) 2897 hv_info = nresult.get(constants.NV_HVINFO, None) 2898 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info 2899 _ErrorIf(test, constants.CV_ENODEHV, node, 2900 "rpc call to node failed (hvinfo)") 2901 if not test: 2902 try: 2903 nimg.mfree = int(hv_info["memory_free"]) 2904 except (ValueError, TypeError): 2905 _ErrorIf(True, constants.CV_ENODERPC, node, 2906 "node returned invalid nodeinfo, check hypervisor") 2907 2908 # FIXME: devise a free space model for file based instances as well 2909 if vg_name is not None: 2910 test = (constants.NV_VGLIST not in nresult or 2911 vg_name not in nresult[constants.NV_VGLIST]) 2912 _ErrorIf(test, constants.CV_ENODELVM, node, 2913 "node didn't return data for the volume group '%s'" 2914 " - it is either missing or broken", vg_name) 2915 if not test: 2916 try: 2917 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) 2918 except (ValueError, TypeError): 2919 _ErrorIf(True, constants.CV_ENODERPC, node, 2920 "node returned invalid LVM info, check LVM status")
2921
2922 - def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2923 """Gets per-disk status information for all instances. 2924 2925 @type nodelist: list of strings 2926 @param nodelist: Node names 2927 @type node_image: dict of (name, L{objects.Node}) 2928 @param node_image: Node objects 2929 @type instanceinfo: dict of (name, L{objects.Instance}) 2930 @param instanceinfo: Instance objects 2931 @rtype: {instance: {node: [(succes, payload)]}} 2932 @return: a dictionary of per-instance dictionaries with nodes as 2933 keys and disk information as values; the disk information is a 2934 list of tuples (success, payload) 2935 2936 """ 2937 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2938 2939 node_disks = {} 2940 node_disks_devonly = {} 2941 diskless_instances = set() 2942 diskless = constants.DT_DISKLESS 2943 2944 for nname in nodelist: 2945 node_instances = list(itertools.chain(node_image[nname].pinst, 2946 node_image[nname].sinst)) 2947 diskless_instances.update(inst for inst in node_instances 2948 if instanceinfo[inst].disk_template == diskless) 2949 disks = [(inst, disk) 2950 for inst in node_instances 2951 for disk in instanceinfo[inst].disks] 2952 2953 if not disks: 2954 # No need to collect data 2955 continue 2956 2957 node_disks[nname] = disks 2958 2959 # _AnnotateDiskParams makes already copies of the disks 2960 devonly = [] 2961 for (inst, dev) in disks: 2962 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg) 2963 self.cfg.SetDiskID(anno_disk, nname) 2964 devonly.append(anno_disk) 2965 2966 node_disks_devonly[nname] = devonly 2967 2968 assert len(node_disks) == len(node_disks_devonly) 2969 2970 # Collect data from all nodes with disks 2971 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(), 2972 node_disks_devonly) 2973 2974 assert len(result) == len(node_disks) 2975 2976 instdisk = {} 2977 2978 for (nname, nres) in result.items(): 2979 disks = node_disks[nname] 2980 2981 if nres.offline: 2982 # No data from this node 2983 data = len(disks) * [(False, "node offline")] 2984 else: 2985 msg = nres.fail_msg 2986 _ErrorIf(msg, constants.CV_ENODERPC, nname, 2987 "while getting disk information: %s", msg) 2988 if msg: 2989 # No data from this node 2990 data = len(disks) * [(False, msg)] 2991 else: 2992 data = [] 2993 for idx, i in enumerate(nres.payload): 2994 if isinstance(i, (tuple, list)) and len(i) == 2: 2995 data.append(i) 2996 else: 2997 logging.warning("Invalid result from node %s, entry %d: %s", 2998 nname, idx, i) 2999 data.append((False, "Invalid result from the remote node")) 3000 3001 for ((inst, _), status) in zip(disks, data): 3002 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status) 3003 3004 # Add empty entries for diskless instances. 3005 for inst in diskless_instances: 3006 assert inst not in instdisk 3007 instdisk[inst] = {} 3008 3009 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and 3010 len(nnames) <= len(instanceinfo[inst].all_nodes) and 3011 compat.all(isinstance(s, (tuple, list)) and 3012 len(s) == 2 for s in statuses) 3013 for inst, nnames in instdisk.items() 3014 for nname, statuses in nnames.items()) 3015 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure" 3016 3017 return instdisk
3018 3019 @staticmethod
3020 - def _SshNodeSelector(group_uuid, all_nodes):
3021 """Create endless iterators for all potential SSH check hosts. 3022 3023 """ 3024 nodes = [node for node in all_nodes 3025 if (node.group != group_uuid and 3026 not node.offline)] 3027 keyfunc = operator.attrgetter("group") 3028 3029 return map(itertools.cycle, 3030 [sorted(map(operator.attrgetter("name"), names)) 3031 for _, names in itertools.groupby(sorted(nodes, key=keyfunc), 3032 keyfunc)])
3033 3034 @classmethod
3035 - def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3036 """Choose which nodes should talk to which other nodes. 3037 3038 We will make nodes contact all nodes in their group, and one node from 3039 every other group. 3040 3041 @warning: This algorithm has a known issue if one node group is much 3042 smaller than others (e.g. just one node). In such a case all other 3043 nodes will talk to the single node. 3044 3045 """ 3046 online_nodes = sorted(node.name for node in group_nodes if not node.offline) 3047 sel = cls._SshNodeSelector(group_uuid, all_nodes) 3048 3049 return (online_nodes, 3050 dict((name, sorted([i.next() for i in sel])) 3051 for name in online_nodes))
3052
3053 - def BuildHooksEnv(self):
3054 """Build hooks env. 3055 3056 Cluster-Verify hooks just ran in the post phase and their failure makes 3057 the output be logged in the verify output and the verification to fail. 3058 3059 """ 3060 env = { 3061 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()) 3062 } 3063 3064 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags())) 3065 for node in self.my_node_info.values()) 3066 3067 return env
3068
3069 - def BuildHooksNodes(self):
3070 """Build hooks nodes. 3071 3072 """ 3073 return ([], self.my_node_names)
3074
3075 - def Exec(self, feedback_fn):
3076 """Verify integrity of the node group, performing various test on nodes. 3077 3078 """ 3079 # This method has too many local variables. pylint: disable=R0914 3080 feedback_fn("* Verifying group '%s'" % self.group_info.name) 3081 3082 if not self.my_node_names: 3083 # empty node group 3084 feedback_fn("* Empty node group, skipping verification") 3085 return True 3086 3087 self.bad = False 3088 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3089 verbose = self.op.verbose 3090 self._feedback_fn = feedback_fn 3091 3092 vg_name = self.cfg.GetVGName() 3093 drbd_helper = self.cfg.GetDRBDHelper() 3094 cluster = self.cfg.GetClusterInfo() 3095 groupinfo = self.cfg.GetAllNodeGroupsInfo() 3096 hypervisors = cluster.enabled_hypervisors 3097 node_data_list = [self.my_node_info[name] for name in self.my_node_names] 3098 3099 i_non_redundant = [] # Non redundant instances 3100 i_non_a_balanced = [] # Non auto-balanced instances 3101 i_offline = 0 # Count of offline instances 3102 n_offline = 0 # Count of offline nodes 3103 n_drained = 0 # Count of nodes being drained 3104 node_vol_should = {} 3105 3106 # FIXME: verify OS list 3107 3108 # File verification 3109 filemap = _ComputeAncillaryFiles(cluster, False) 3110 3111 # do local checksums 3112 master_node = self.master_node = self.cfg.GetMasterNode() 3113 master_ip = self.cfg.GetMasterIP() 3114 3115 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names)) 3116 3117 user_scripts = [] 3118 if self.cfg.GetUseExternalMipScript(): 3119 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT) 3120 3121 node_verify_param = { 3122 constants.NV_FILELIST: 3123 utils.UniqueSequence(filename 3124 for files in filemap 3125 for filename in files), 3126 constants.NV_NODELIST: 3127 self._SelectSshCheckNodes(node_data_list, self.group_uuid, 3128 self.all_node_info.values()), 3129 constants.NV_HYPERVISOR: hypervisors, 3130 constants.NV_HVPARAMS: 3131 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()), 3132 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip) 3133 for node in node_data_list 3134 if not node.offline], 3135 constants.NV_INSTANCELIST: hypervisors, 3136 constants.NV_VERSION: None, 3137 constants.NV_HVINFO: self.cfg.GetHypervisorType(), 3138 constants.NV_NODESETUP: None, 3139 constants.NV_TIME: None, 3140 constants.NV_MASTERIP: (master_node, master_ip), 3141 constants.NV_OSLIST: None, 3142 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(), 3143 constants.NV_USERSCRIPTS: user_scripts, 3144 } 3145 3146 if vg_name is not None: 3147 node_verify_param[constants.NV_VGLIST] = None 3148 node_verify_param[constants.NV_LVLIST] = vg_name 3149 node_verify_param[constants.NV_PVLIST] = [vg_name] 3150 3151 if drbd_helper: 3152 node_verify_param[constants.NV_DRBDLIST] = None 3153 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper 3154 3155 # bridge checks 3156 # FIXME: this needs to be changed per node-group, not cluster-wide 3157 bridges = set() 3158 default_nicpp = cluster.nicparams[constants.PP_DEFAULT] 3159 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 3160 bridges.add(default_nicpp[constants.NIC_LINK]) 3161 for instance in self.my_inst_info.values(): 3162 for nic in instance.nics: 3163 full_nic = cluster.SimpleFillNIC(nic.nicparams) 3164 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 3165 bridges.add(full_nic[constants.NIC_LINK]) 3166 3167 if bridges: 3168 node_verify_param[constants.NV_BRIDGES] = list(bridges) 3169 3170 # Build our expected cluster state 3171 node_image = dict((node.name, self.NodeImage(offline=node.offline, 3172 name=node.name, 3173 vm_capable=node.vm_capable)) 3174 for node in node_data_list) 3175 3176 # Gather OOB paths 3177 oob_paths = [] 3178 for node in self.all_node_info.values(): 3179 path = _SupportsOob(self.cfg, node) 3180 if path and path not in oob_paths: 3181 oob_paths.append(path) 3182 3183 if oob_paths: 3184 node_verify_param[constants.NV_OOB_PATHS] = oob_paths 3185 3186 for instance in self.my_inst_names: 3187 inst_config = self.my_inst_info[instance] 3188 if inst_config.admin_state == constants.ADMINST_OFFLINE: 3189 i_offline += 1 3190 3191 for nname in inst_config.all_nodes: 3192 if nname not in node_image: 3193 gnode = self.NodeImage(name=nname) 3194 gnode.ghost = (nname not in self.all_node_info) 3195 node_image[nname] = gnode 3196 3197 inst_config.MapLVsByNode(node_vol_should) 3198 3199 pnode = inst_config.primary_node 3200 node_image[pnode].pinst.append(instance) 3201 3202 for snode in inst_config.secondary_nodes: 3203 nimg = node_image[snode] 3204 nimg.sinst.append(instance) 3205 if pnode not in nimg.sbp: 3206 nimg.sbp[pnode] = [] 3207 nimg.sbp[pnode].append(instance) 3208 3209 # At this point, we have the in-memory data structures complete, 3210 # except for the runtime information, which we'll gather next 3211 3212 # Due to the way our RPC system works, exact response times cannot be 3213 # guaranteed (e.g. a broken node could run into a timeout). By keeping the 3214 # time before and after executing the request, we can at least have a time 3215 # window. 3216 nvinfo_starttime = time.time() 3217 all_nvinfo = self.rpc.call_node_verify(self.my_node_names, 3218 node_verify_param, 3219 self.cfg.GetClusterName()) 3220 nvinfo_endtime = time.time() 3221 3222 if self.extra_lv_nodes and vg_name is not None: 3223 extra_lv_nvinfo = \ 3224 self.rpc.call_node_verify(self.extra_lv_nodes, 3225 {constants.NV_LVLIST: vg_name}, 3226 self.cfg.GetClusterName()) 3227 else: 3228 extra_lv_nvinfo = {} 3229 3230 all_drbd_map = self.cfg.ComputeDRBDMap() 3231 3232 feedback_fn("* Gathering disk information (%s nodes)" % 3233 len(self.my_node_names)) 3234 instdisk = self._CollectDiskInfo(self.my_node_names, node_image, 3235 self.my_inst_info) 3236 3237 feedback_fn("* Verifying configuration file consistency") 3238 3239 # If not all nodes are being checked, we need to make sure the master node 3240 # and a non-checked vm_capable node are in the list. 3241 absent_nodes = set(self.all_node_info).difference(self.my_node_info) 3242 if absent_nodes: 3243 vf_nvinfo = all_nvinfo.copy() 3244 vf_node_info = list(self.my_node_info.values()) 3245 additional_nodes = [] 3246 if master_node not in self.my_node_info: 3247 additional_nodes.append(master_node) 3248 vf_node_info.append(self.all_node_info[master_node]) 3249 # Add the first vm_capable node we find which is not included, 3250 # excluding the master node (which we already have) 3251 for node in absent_nodes: 3252 nodeinfo = self.all_node_info[node] 3253 if (nodeinfo.vm_capable