Package ganeti :: Module cmdlib
[hide private]
[frames] | no frames]

Source Code for Module ganeti.cmdlib

    1  # 
    2  # 
    3   
    4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. 
    5  # 
    6  # This program is free software; you can redistribute it and/or modify 
    7  # it under the terms of the GNU General Public License as published by 
    8  # the Free Software Foundation; either version 2 of the License, or 
    9  # (at your option) any later version. 
   10  # 
   11  # This program is distributed in the hope that it will be useful, but 
   12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
   13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
   14  # General Public License for more details. 
   15  # 
   16  # You should have received a copy of the GNU General Public License 
   17  # along with this program; if not, write to the Free Software 
   18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
   19  # 02110-1301, USA. 
   20   
   21   
   22  """Module implementing the master-side code.""" 
   23   
   24  # pylint: disable=W0201,C0302 
   25   
   26  # W0201 since most LU attributes are defined in CheckPrereq or similar 
   27  # functions 
   28   
   29  # C0302: since we have waaaay too many lines in this module 
   30   
   31  import os 
   32  import os.path 
   33  import time 
   34  import re 
   35  import logging 
   36  import copy 
   37  import OpenSSL 
   38  import socket 
   39  import tempfile 
   40  import shutil 
   41  import itertools 
   42  import operator 
   43   
   44  from ganeti import ssh 
   45  from ganeti import utils 
   46  from ganeti import errors 
   47  from ganeti import hypervisor 
   48  from ganeti import locking 
   49  from ganeti import constants 
   50  from ganeti import objects 
   51  from ganeti import ssconf 
   52  from ganeti import uidpool 
   53  from ganeti import compat 
   54  from ganeti import masterd 
   55  from ganeti import netutils 
   56  from ganeti import query 
   57  from ganeti import qlang 
   58  from ganeti import opcodes 
   59  from ganeti import ht 
   60  from ganeti import rpc 
   61  from ganeti import runtime 
   62  from ganeti import pathutils 
   63  from ganeti import vcluster 
   64  from ganeti import network 
   65  from ganeti.masterd import iallocator 
   66   
   67  import ganeti.masterd.instance # pylint: disable=W0611 
   68   
   69   
   70  # States of instance 
   71  INSTANCE_DOWN = [constants.ADMINST_DOWN] 
   72  INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP] 
   73  INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE] 
   74   
   75  #: Instance status in which an instance can be marked as offline/online 
   76  CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([ 
   77    constants.ADMINST_OFFLINE, 
   78    ])) 
79 80 81 -class ResultWithJobs:
82 """Data container for LU results with jobs. 83 84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized 85 by L{mcpu._ProcessResult}. The latter will then submit the jobs 86 contained in the C{jobs} attribute and include the job IDs in the opcode 87 result. 88 89 """
90 - def __init__(self, jobs, **kwargs):
91 """Initializes this class. 92 93 Additional return values can be specified as keyword arguments. 94 95 @type jobs: list of lists of L{opcode.OpCode} 96 @param jobs: A list of lists of opcode objects 97 98 """ 99 self.jobs = jobs 100 self.other = kwargs
101
102 103 -class LogicalUnit(object):
104 """Logical Unit base class. 105 106 Subclasses must follow these rules: 107 - implement ExpandNames 108 - implement CheckPrereq (except when tasklets are used) 109 - implement Exec (except when tasklets are used) 110 - implement BuildHooksEnv 111 - implement BuildHooksNodes 112 - redefine HPATH and HTYPE 113 - optionally redefine their run requirements: 114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively 115 116 Note that all commands require root permissions. 117 118 @ivar dry_run_result: the value (if any) that will be returned to the caller 119 in dry-run mode (signalled by opcode dry_run parameter) 120 121 """ 122 HPATH = None 123 HTYPE = None 124 REQ_BGL = True 125
126 - def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit. 128 129 This needs to be overridden in derived classes in order to check op 130 validity. 131 132 """ 133 self.proc = processor 134 self.op = op 135 self.cfg = context.cfg 136 self.glm = context.glm 137 # readability alias 138 self.owned_locks = context.glm.list_owned 139 self.context = context 140 self.rpc = rpc_runner 141 142 # Dictionaries used to declare locking needs to mcpu 143 self.needed_locks = None 144 self.share_locks = dict.fromkeys(locking.LEVELS, 0) 145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False) 146 147 self.add_locks = {} 148 self.remove_locks = {} 149 150 # Used to force good behavior when calling helper functions 151 self.recalculate_locks = {} 152 153 # logging 154 self.Log = processor.Log # pylint: disable=C0103 155 self.LogWarning = processor.LogWarning # pylint: disable=C0103 156 self.LogInfo = processor.LogInfo # pylint: disable=C0103 157 self.LogStep = processor.LogStep # pylint: disable=C0103 158 # support for dry-run 159 self.dry_run_result = None 160 # support for generic debug attribute 161 if (not hasattr(self.op, "debug_level") or 162 not isinstance(self.op.debug_level, int)): 163 self.op.debug_level = 0 164 165 # Tasklets 166 self.tasklets = None 167 168 # Validate opcode parameters and set defaults 169 self.op.Validate(True) 170 171 self.CheckArguments()
172
173 - def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments. 175 176 This method is for doing a simple syntactic check and ensure 177 validity of opcode parameters, without any cluster-related 178 checks. While the same can be accomplished in ExpandNames and/or 179 CheckPrereq, doing these separate is better because: 180 181 - ExpandNames is left as as purely a lock-related function 182 - CheckPrereq is run after we have acquired locks (and possible 183 waited for them) 184 185 The function is allowed to change the self.op attribute so that 186 later methods can no longer worry about missing parameters. 187 188 """ 189 pass
190
191 - def ExpandNames(self):
192 """Expand names for this LU. 193 194 This method is called before starting to execute the opcode, and it should 195 update all the parameters of the opcode to their canonical form (e.g. a 196 short node name must be fully expanded after this method has successfully 197 completed). This way locking, hooks, logging, etc. can work correctly. 198 199 LUs which implement this method must also populate the self.needed_locks 200 member, as a dict with lock levels as keys, and a list of needed lock names 201 as values. Rules: 202 203 - use an empty dict if you don't need any lock 204 - if you don't need any lock at a particular level omit that 205 level (note that in this case C{DeclareLocks} won't be called 206 at all for that level) 207 - if you need locks at a level, but you can't calculate it in 208 this function, initialise that level with an empty list and do 209 further processing in L{LogicalUnit.DeclareLocks} (see that 210 function's docstring) 211 - don't put anything for the BGL level 212 - if you want all locks at a level use L{locking.ALL_SET} as a value 213 214 If you need to share locks (rather than acquire them exclusively) at one 215 level you can modify self.share_locks, setting a true value (usually 1) for 216 that level. By default locks are not shared. 217 218 This function can also define a list of tasklets, which then will be 219 executed in order instead of the usual LU-level CheckPrereq and Exec 220 functions, if those are not defined by the LU. 221 222 Examples:: 223 224 # Acquire all nodes and one instance 225 self.needed_locks = { 226 locking.LEVEL_NODE: locking.ALL_SET, 227 locking.LEVEL_INSTANCE: ['instance1.example.com'], 228 } 229 # Acquire just two nodes 230 self.needed_locks = { 231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'], 232 } 233 # Acquire no locks 234 self.needed_locks = {} # No, you can't leave it to the default value None 235 236 """ 237 # The implementation of this method is mandatory only if the new LU is 238 # concurrent, so that old LUs don't need to be changed all at the same 239 # time. 240 if self.REQ_BGL: 241 self.needed_locks = {} # Exclusive LUs don't need locks. 242 else: 243 raise NotImplementedError
244
245 - def DeclareLocks(self, level):
246 """Declare LU locking needs for a level 247 248 While most LUs can just declare their locking needs at ExpandNames time, 249 sometimes there's the need to calculate some locks after having acquired 250 the ones before. This function is called just before acquiring locks at a 251 particular level, but after acquiring the ones at lower levels, and permits 252 such calculations. It can be used to modify self.needed_locks, and by 253 default it does nothing. 254 255 This function is only called if you have something already set in 256 self.needed_locks for the level. 257 258 @param level: Locking level which is going to be locked 259 @type level: member of L{ganeti.locking.LEVELS} 260 261 """
262
263 - def CheckPrereq(self):
264 """Check prerequisites for this LU. 265 266 This method should check that the prerequisites for the execution 267 of this LU are fulfilled. It can do internode communication, but 268 it should be idempotent - no cluster or system changes are 269 allowed. 270 271 The method should raise errors.OpPrereqError in case something is 272 not fulfilled. Its return value is ignored. 273 274 This method should also update all the parameters of the opcode to 275 their canonical form if it hasn't been done by ExpandNames before. 276 277 """ 278 if self.tasklets is not None: 279 for (idx, tl) in enumerate(self.tasklets): 280 logging.debug("Checking prerequisites for tasklet %s/%s", 281 idx + 1, len(self.tasklets)) 282 tl.CheckPrereq() 283 else: 284 pass
285
286 - def Exec(self, feedback_fn):
287 """Execute the LU. 288 289 This method should implement the actual work. It should raise 290 errors.OpExecError for failures that are somewhat dealt with in 291 code, or expected. 292 293 """ 294 if self.tasklets is not None: 295 for (idx, tl) in enumerate(self.tasklets): 296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets)) 297 tl.Exec(feedback_fn) 298 else: 299 raise NotImplementedError
300
301 - def BuildHooksEnv(self):
302 """Build hooks environment for this LU. 303 304 @rtype: dict 305 @return: Dictionary containing the environment that will be used for 306 running the hooks for this LU. The keys of the dict must not be prefixed 307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner 308 will extend the environment with additional variables. If no environment 309 should be defined, an empty dictionary should be returned (not C{None}). 310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function 311 will not be called. 312 313 """ 314 raise NotImplementedError
315
316 - def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks. 318 319 @rtype: tuple; (list, list) 320 @return: Tuple containing a list of node names on which the hook 321 should run before the execution and a list of node names on which the 322 hook should run after the execution. No nodes should be returned as an 323 empty list (and not None). 324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function 325 will not be called. 326 327 """ 328 raise NotImplementedError
329
330 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks. 332 333 This method is called every time a hooks phase is executed, and notifies 334 the Logical Unit about the hooks' result. The LU can then use it to alter 335 its result based on the hooks. By default the method does nothing and the 336 previous result is passed back unchanged but any LU can define it if it 337 wants to use the local cluster hook-scripts somehow. 338 339 @param phase: one of L{constants.HOOKS_PHASE_POST} or 340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 341 @param hook_results: the results of the multi-node hooks rpc call 342 @param feedback_fn: function used send feedback back to the caller 343 @param lu_result: the previous Exec result this LU had, or None 344 in the PRE phase 345 @return: the new Exec result, based on the previous result 346 and hook results 347 348 """ 349 # API must be kept, thus we ignore the unused argument and could 350 # be a function warnings 351 # pylint: disable=W0613,R0201 352 return lu_result
353
354 - def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance. 356 357 Many LUs that work on an instance take its name in self.op.instance_name 358 and need to expand it and then declare the expanded name for locking. This 359 function does it, and then updates self.op.instance_name to the expanded 360 name. It also initializes needed_locks as a dict, if this hasn't been done 361 before. 362 363 """ 364 if self.needed_locks is None: 365 self.needed_locks = {} 366 else: 367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \ 368 "_ExpandAndLockInstance called with instance-level locks set" 369 self.op.instance_name = _ExpandInstanceName(self.cfg, 370 self.op.instance_name) 371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
372
373 - def _LockInstancesNodes(self, primary_only=False, 374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking. 376 377 This function should be called after locking one or more instances to lock 378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE] 379 with all primary or secondary nodes for instances already locked and 380 present in self.needed_locks[locking.LEVEL_INSTANCE]. 381 382 It should be called from DeclareLocks, and for safety only works if 383 self.recalculate_locks[locking.LEVEL_NODE] is set. 384 385 In the future it may grow parameters to just lock some instance's nodes, or 386 to just lock primaries or secondary nodes, if needed. 387 388 If should be called in DeclareLocks in a way similar to:: 389 390 if level == locking.LEVEL_NODE: 391 self._LockInstancesNodes() 392 393 @type primary_only: boolean 394 @param primary_only: only lock primary nodes of locked instances 395 @param level: Which lock level to use for locking nodes 396 397 """ 398 assert level in self.recalculate_locks, \ 399 "_LockInstancesNodes helper function called with no nodes to recalculate" 400 401 # TODO: check if we're really been called with the instance locks held 402 403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the 404 # future we might want to have different behaviors depending on the value 405 # of self.recalculate_locks[locking.LEVEL_NODE] 406 wanted_nodes = [] 407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE) 408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i): 409 wanted_nodes.append(instance.primary_node) 410 if not primary_only: 411 wanted_nodes.extend(instance.secondary_nodes) 412 413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE: 414 self.needed_locks[level] = wanted_nodes 415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND: 416 self.needed_locks[level].extend(wanted_nodes) 417 else: 418 raise errors.ProgrammerError("Unknown recalculation mode") 419 420 del self.recalculate_locks[level]
421
422 423 -class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks. 425 426 This LU is intended as a parent for other LogicalUnits which will 427 run no hooks, in order to reduce duplicate code. 428 429 """ 430 HPATH = None 431 HTYPE = None 432
433 - def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu. 435 436 This just raises an error. 437 438 """ 439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
440
441 - def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU. 443 444 """ 445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
446
447 448 -class Tasklet:
449 """Tasklet base class. 450 451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or 452 they can mix legacy code with tasklets. Locking needs to be done in the LU, 453 tasklets know nothing about locks. 454 455 Subclasses must follow these rules: 456 - Implement CheckPrereq 457 - Implement Exec 458 459 """
460 - def __init__(self, lu):
461 self.lu = lu 462 463 # Shortcuts 464 self.cfg = lu.cfg 465 self.rpc = lu.rpc
466
467 - def CheckPrereq(self):
468 """Check prerequisites for this tasklets. 469 470 This method should check whether the prerequisites for the execution of 471 this tasklet are fulfilled. It can do internode communication, but it 472 should be idempotent - no cluster or system changes are allowed. 473 474 The method should raise errors.OpPrereqError in case something is not 475 fulfilled. Its return value is ignored. 476 477 This method should also update all parameters to their canonical form if it 478 hasn't been done before. 479 480 """ 481 pass
482
483 - def Exec(self, feedback_fn):
484 """Execute the tasklet. 485 486 This method should implement the actual work. It should raise 487 errors.OpExecError for failures that are somewhat dealt with in code, or 488 expected. 489 490 """ 491 raise NotImplementedError
492
493 494 -class _QueryBase:
495 """Base for query utility classes. 496 497 """ 498 #: Attribute holding field definitions 499 FIELDS = None 500 501 #: Field to sort by 502 SORT_FIELD = "name" 503
504 - def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class. 506 507 """ 508 self.use_locking = use_locking 509 510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter, 511 namefield=self.SORT_FIELD) 512 self.requested_data = self.query.RequestedData() 513 self.names = self.query.RequestedNames() 514 515 # Sort only if no names were requested 516 self.sort_by_name = not self.names 517 518 self.do_locking = None 519 self.wanted = None
520
521 - def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query. 523 524 """ 525 if self.do_locking: 526 names = lu.owned_locks(lock_level) 527 else: 528 names = all_names 529 530 if self.wanted == locking.ALL_SET: 531 assert not self.names 532 # caller didn't specify names, so ordering is not important 533 return utils.NiceSort(names) 534 535 # caller specified names and we must keep the same order 536 assert self.names 537 assert not self.do_locking or lu.glm.is_owned(lock_level) 538 539 missing = set(self.wanted).difference(names) 540 if missing: 541 raise errors.OpExecError("Some items were removed before retrieving" 542 " their data: %s" % missing) 543 544 # Return expanded names 545 return self.wanted
546
547 - def ExpandNames(self, lu):
548 """Expand names for this query. 549 550 See L{LogicalUnit.ExpandNames}. 551 552 """ 553 raise NotImplementedError()
554
555 - def DeclareLocks(self, lu, level):
556 """Declare locks for this query. 557 558 See L{LogicalUnit.DeclareLocks}. 559 560 """ 561 raise NotImplementedError()
562
563 - def _GetQueryData(self, lu):
564 """Collects all data for this query. 565 566 @return: Query data object 567 568 """ 569 raise NotImplementedError()
570
571 - def NewStyleQuery(self, lu):
572 """Collect data and execute query. 573 574 """ 575 return query.GetQueryResponse(self.query, self._GetQueryData(lu), 576 sort_by_name=self.sort_by_name)
577
578 - def OldStyleQuery(self, lu):
579 """Collect data and execute query. 580 581 """ 582 return self.query.OldStyleQuery(self._GetQueryData(lu), 583 sort_by_name=self.sort_by_name)
584
585 586 -def _ShareAll():
587 """Returns a dict declaring all lock levels shared. 588 589 """ 590 return dict.fromkeys(locking.LEVELS, 1)
591
592 593 -def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method. 595 596 @param instance: The instance object 597 @type devs: List of L{objects.Disk} 598 @param devs: The root devices (not any of its children!) 599 @param cfg: The config object 600 @returns The annotated disk copies 601 @see L{rpc.AnnotateDiskParams} 602 603 """ 604 return rpc.AnnotateDiskParams(instance.disk_template, devs, 605 cfg.GetInstanceDiskParams(instance))
606
607 608 -def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes, 609 cur_group_uuid):
610 """Checks if node groups for locked instances are still correct. 611 612 @type cfg: L{config.ConfigWriter} 613 @param cfg: Cluster configuration 614 @type instances: dict; string as key, L{objects.Instance} as value 615 @param instances: Dictionary, instance name as key, instance object as value 616 @type owned_groups: iterable of string 617 @param owned_groups: List of owned groups 618 @type owned_nodes: iterable of string 619 @param owned_nodes: List of owned nodes 620 @type cur_group_uuid: string or None 621 @param cur_group_uuid: Optional group UUID to check against instance's groups 622 623 """ 624 for (name, inst) in instances.items(): 625 assert owned_nodes.issuperset(inst.all_nodes), \ 626 "Instance %s's nodes changed while we kept the lock" % name 627 628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups) 629 630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \ 631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
632
633 634 -def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups, 635 primary_only=False):
636 """Checks if the owned node groups are still correct for an instance. 637 638 @type cfg: L{config.ConfigWriter} 639 @param cfg: The cluster configuration 640 @type instance_name: string 641 @param instance_name: Instance name 642 @type owned_groups: set or frozenset 643 @param owned_groups: List of currently owned node groups 644 @type primary_only: boolean 645 @param primary_only: Whether to check node groups for only the primary node 646 647 """ 648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only) 649 650 if not owned_groups.issuperset(inst_groups): 651 raise errors.OpPrereqError("Instance %s's node groups changed since" 652 " locks were acquired, current groups are" 653 " are '%s', owning groups '%s'; retry the" 654 " operation" % 655 (instance_name, 656 utils.CommaJoin(inst_groups), 657 utils.CommaJoin(owned_groups)), 658 errors.ECODE_STATE) 659 660 return inst_groups
661
662 663 -def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct. 665 666 @type cfg: L{config.ConfigWriter} 667 @param cfg: The cluster configuration 668 @type group_uuid: string 669 @param group_uuid: Node group UUID 670 @type owned_instances: set or frozenset 671 @param owned_instances: List of currently owned instances 672 673 """ 674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid) 675 if owned_instances != wanted_instances: 676 raise errors.OpPrereqError("Instances in node group '%s' changed since" 677 " locks were acquired, wanted '%s', have '%s';" 678 " retry the operation" % 679 (group_uuid, 680 utils.CommaJoin(wanted_instances), 681 utils.CommaJoin(owned_instances)), 682 errors.ECODE_STATE) 683 684 return wanted_instances
685
686 687 -def _SupportsOob(cfg, node):
688 """Tells if node supports OOB. 689 690 @type cfg: L{config.ConfigWriter} 691 @param cfg: The cluster configuration 692 @type node: L{objects.Node} 693 @param node: The node 694 @return: The OOB script if supported or an empty string otherwise 695 696 """ 697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
698
699 700 -def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node. 702 703 @type cfg: L{config.ConfigWriter} 704 @param cfg: The cluster configuration 705 @type node: L{objects.Node} 706 @param node: The node 707 @rtype: bool 708 @return: The effective value of exclusive_storage 709 710 """ 711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
712
713 714 -def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node. 716 717 @type cfg: L{config.ConfigWriter} 718 @param cfg: The cluster configuration 719 @type nodename: string 720 @param nodename: The node 721 @rtype: bool 722 @return: The effective value of exclusive_storage 723 @raise errors.OpPrereqError: if no node exists with the given name 724 725 """ 726 ni = cfg.GetNodeInfo(nodename) 727 if ni is None: 728 raise errors.OpPrereqError("Invalid node name %s" % nodename, 729 errors.ECODE_NOENT) 730 return _IsExclusiveStorageEnabledNode(cfg, ni)
731
732 733 -def _CopyLockList(names):
734 """Makes a copy of a list of lock names. 735 736 Handles L{locking.ALL_SET} correctly. 737 738 """ 739 if names == locking.ALL_SET: 740 return locking.ALL_SET 741 else: 742 return names[:]
743
744 745 -def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names. 747 748 @type lu: L{LogicalUnit} 749 @param lu: the logical unit on whose behalf we execute 750 @type nodes: list 751 @param nodes: list of node names or None for all nodes 752 @rtype: list 753 @return: the list of nodes, sorted 754 @raise errors.ProgrammerError: if the nodes parameter is wrong type 755 756 """ 757 if nodes: 758 return [_ExpandNodeName(lu.cfg, name) for name in nodes] 759 760 return utils.NiceSort(lu.cfg.GetNodeList())
761
762 763 -def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names. 765 766 @type lu: L{LogicalUnit} 767 @param lu: the logical unit on whose behalf we execute 768 @type instances: list 769 @param instances: list of instance names or None for all instances 770 @rtype: list 771 @return: the list of instances, sorted 772 @raise errors.OpPrereqError: if the instances parameter is wrong type 773 @raise errors.OpPrereqError: if any of the passed instances is not found 774 775 """ 776 if instances: 777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances] 778 else: 779 wanted = utils.NiceSort(lu.cfg.GetInstanceList()) 780 return wanted
781
782 783 -def _GetUpdatedParams(old_params, update_dict, 784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary. 786 787 @type old_params: dict 788 @param old_params: old parameters 789 @type update_dict: dict 790 @param update_dict: dict containing new parameter values, or 791 constants.VALUE_DEFAULT to reset the parameter to its default 792 value 793 @param use_default: boolean 794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT} 795 values as 'to be deleted' values 796 @param use_none: boolean 797 @type use_none: whether to recognise C{None} values as 'to be 798 deleted' values 799 @rtype: dict 800 @return: the new parameter dictionary 801 802 """ 803 params_copy = copy.deepcopy(old_params) 804 for key, val in update_dict.iteritems(): 805 if ((use_default and val == constants.VALUE_DEFAULT) or 806 (use_none and val is None)): 807 try: 808 del params_copy[key] 809 except KeyError: 810 pass 811 else: 812 params_copy[key] = val 813 return params_copy
814
815 816 -def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy. 818 819 @param group_policy: whether this policy applies to a group and thus 820 we should support removal of policy entries 821 822 """ 823 use_none = use_default = group_policy 824 ipolicy = copy.deepcopy(old_ipolicy) 825 for key, value in new_ipolicy.items(): 826 if key not in constants.IPOLICY_ALL_KEYS: 827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key, 828 errors.ECODE_INVAL) 829 if key in constants.IPOLICY_ISPECS: 830 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value, 831 use_none=use_none, 832 use_default=use_default) 833 utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES) 834 else: 835 if (not value or value == [constants.VALUE_DEFAULT] or 836 value == constants.VALUE_DEFAULT): 837 if group_policy: 838 del ipolicy[key] 839 else: 840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'" 841 " on the cluster'" % key, 842 errors.ECODE_INVAL) 843 else: 844 if key in constants.IPOLICY_PARAMETERS: 845 # FIXME: we assume all such values are float 846 try: 847 ipolicy[key] = float(value) 848 except (TypeError, ValueError), err: 849 raise errors.OpPrereqError("Invalid value for attribute" 850 " '%s': '%s', error: %s" % 851 (key, value, err), errors.ECODE_INVAL) 852 else: 853 # FIXME: we assume all others are lists; this should be redone 854 # in a nicer way 855 ipolicy[key] = list(value) 856 try: 857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy) 858 except errors.ConfigurationError, err: 859 raise errors.OpPrereqError("Invalid instance policy: %s" % err, 860 errors.ECODE_INVAL) 861 return ipolicy
862
863 864 -def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type. 866 867 @param base: The dict with the old data 868 @param updates: The dict with the new data 869 @param type_check: Dict suitable to ForceDictType to verify correct types 870 @returns: A new dict with updated and verified values 871 872 """ 873 def fn(old, value): 874 new = _GetUpdatedParams(old, value) 875 utils.ForceDictType(new, type_check) 876 return new
877 878 ret = copy.deepcopy(base) 879 ret.update(dict((key, fn(base.get(key, {}), value)) 880 for key, value in updates.items())) 881 return ret 882
883 884 -def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object 886 887 @param op_input: The input dict from the opcode 888 @param obj_input: The input dict from the objects 889 @return: The verified and updated dict 890 891 """ 892 if op_input: 893 invalid_hvs = set(op_input) - constants.HYPER_TYPES 894 if invalid_hvs: 895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:" 896 " %s" % utils.CommaJoin(invalid_hvs), 897 errors.ECODE_INVAL) 898 if obj_input is None: 899 obj_input = {} 900 type_check = constants.HVSTS_PARAMETER_TYPES 901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check) 902 903 return None
904
905 906 -def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object 908 909 @param op_input: The input dict from the opcode 910 @param obj_input: The input dict from the objects 911 @return: The verified and updated dict 912 """ 913 if op_input: 914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES 915 if invalid_dst: 916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" % 917 utils.CommaJoin(invalid_dst), 918 errors.ECODE_INVAL) 919 type_check = constants.DSS_PARAMETER_TYPES 920 if obj_input is None: 921 obj_input = {} 922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value, 923 type_check)) 924 for key, value in op_input.items()) 925 926 return None
927
928 929 -def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU. 931 932 @type lu: L{LogicalUnit} 933 @param level: Lock level 934 @type names: list or None 935 @param names: Names of locks to release 936 @type keep: list or None 937 @param keep: Names of locks to retain 938 939 """ 940 assert not (keep is not None and names is not None), \ 941 "Only one of the 'names' and the 'keep' parameters can be given" 942 943 if names is not None: 944 should_release = names.__contains__ 945 elif keep: 946 should_release = lambda name: name not in keep 947 else: 948 should_release = None 949 950 owned = lu.owned_locks(level) 951 if not owned: 952 # Not owning any lock at this level, do nothing 953 pass 954 955 elif should_release: 956 retain = [] 957 release = [] 958 959 # Determine which locks to release 960 for name in owned: 961 if should_release(name): 962 release.append(name) 963 else: 964 retain.append(name) 965 966 assert len(lu.owned_locks(level)) == (len(retain) + len(release)) 967 968 # Release just some locks 969 lu.glm.release(level, names=release) 970 971 assert frozenset(lu.owned_locks(level)) == frozenset(retain) 972 else: 973 # Release everything 974 lu.glm.release(level) 975 976 assert not lu.glm.is_owned(level), "No locks should be owned"
977
978 979 -def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name. 981 982 @type instances: list of L{objects.Instance} 983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value 984 985 """ 986 return dict(((node, vol), inst.name) 987 for inst in instances 988 for (node, vols) in inst.MapLVsByNode().items() 989 for vol in vols)
990
991 992 -def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node. 994 995 """ 996 hm = lu.proc.BuildHooksManager(lu) 997 try: 998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name]) 999 except Exception, err: # pylint: disable=W0703 1000 lu.LogWarning("Errors occurred running hooks on %s: %s", 1001 node_name, err)
1002
1003 1004 -def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid. 1006 1007 @type static: L{utils.FieldSet} 1008 @param static: static fields set 1009 @type dynamic: L{utils.FieldSet} 1010 @param dynamic: dynamic fields set 1011 1012 """ 1013 f = utils.FieldSet() 1014 f.Extend(static) 1015 f.Extend(dynamic) 1016 1017 delta = f.NonMatching(selected) 1018 if delta: 1019 raise errors.OpPrereqError("Unknown output fields selected: %s" 1020 % ",".join(delta), errors.ECODE_INVAL)
1021
1022 1023 -def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024 """Make sure that none of the given paramters is global. 1025 1026 If a global parameter is found, an L{errors.OpPrereqError} exception is 1027 raised. This is used to avoid setting global parameters for individual nodes. 1028 1029 @type params: dictionary 1030 @param params: Parameters to check 1031 @type glob_pars: dictionary 1032 @param glob_pars: Forbidden parameters 1033 @type kind: string 1034 @param kind: Kind of parameters (e.g. "node") 1035 @type bad_levels: string 1036 @param bad_levels: Level(s) at which the parameters are forbidden (e.g. 1037 "instance") 1038 @type good_levels: strings 1039 @param good_levels: Level(s) at which the parameters are allowed (e.g. 1040 "cluster or group") 1041 1042 """ 1043 used_globals = glob_pars.intersection(params) 1044 if used_globals: 1045 msg = ("The following %s parameters are global and cannot" 1046 " be customized at %s level, please modify them at" 1047 " %s level: %s" % 1048 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals))) 1049 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1050
1051 1052 -def _CheckNodeOnline(lu, node, msg=None):
1053 """Ensure that a given node is online. 1054 1055 @param lu: the LU on behalf of which we make the check 1056 @param node: the node to check 1057 @param msg: if passed, should be a message to replace the default one 1058 @raise errors.OpPrereqError: if the node is offline 1059 1060 """ 1061 if msg is None: 1062 msg = "Can't use offline node" 1063 if lu.cfg.GetNodeInfo(node).offline: 1064 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1065
1066 1067 -def _CheckNodeNotDrained(lu, node):
1068 """Ensure that a given node is not drained. 1069 1070 @param lu: the LU on behalf of which we make the check 1071 @param node: the node to check 1072 @raise errors.OpPrereqError: if the node is drained 1073 1074 """ 1075 if lu.cfg.GetNodeInfo(node).drained: 1076 raise errors.OpPrereqError("Can't use drained node %s" % node, 1077 errors.ECODE_STATE)
1078
1079 1080 -def _CheckNodeVmCapable(lu, node):
1081 """Ensure that a given node is vm capable. 1082 1083 @param lu: the LU on behalf of which we make the check 1084 @param node: the node to check 1085 @raise errors.OpPrereqError: if the node is not vm capable 1086 1087 """ 1088 if not lu.cfg.GetNodeInfo(node).vm_capable: 1089 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node, 1090 errors.ECODE_STATE)
1091
1092 1093 -def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094 """Ensure that a node supports a given OS. 1095 1096 @param lu: the LU on behalf of which we make the check 1097 @param node: the node to check 1098 @param os_name: the OS to query about 1099 @param force_variant: whether to ignore variant errors 1100 @raise errors.OpPrereqError: if the node is not supporting the OS 1101 1102 """ 1103 result = lu.rpc.call_os_get(node, os_name) 1104 result.Raise("OS '%s' not in supported OS list for node %s" % 1105 (os_name, node), 1106 prereq=True, ecode=errors.ECODE_INVAL) 1107 if not force_variant: 1108 _CheckOSVariant(result.payload, os_name)
1109
1110 1111 -def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112 """Ensure that a node has the given secondary ip. 1113 1114 @type lu: L{LogicalUnit} 1115 @param lu: the LU on behalf of which we make the check 1116 @type node: string 1117 @param node: the node to check 1118 @type secondary_ip: string 1119 @param secondary_ip: the ip to check 1120 @type prereq: boolean 1121 @param prereq: whether to throw a prerequisite or an execute error 1122 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True 1123 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False 1124 1125 """ 1126 result = lu.rpc.call_node_has_ip_address(node, secondary_ip) 1127 result.Raise("Failure checking secondary ip on node %s" % node, 1128 prereq=prereq, ecode=errors.ECODE_ENVIRON) 1129 if not result.payload: 1130 msg = ("Node claims it doesn't have the secondary ip you gave (%s)," 1131 " please fix and re-run this command" % secondary_ip) 1132 if prereq: 1133 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) 1134 else: 1135 raise errors.OpExecError(msg)
1136
1137 1138 -def _CheckNodePVs(nresult, exclusive_storage):
1139 """Check node PVs. 1140 1141 """ 1142 pvlist_dict = nresult.get(constants.NV_PVLIST, None) 1143 if pvlist_dict is None: 1144 return (["Can't get PV list from node"], None) 1145 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict) 1146 errlist = [] 1147 # check that ':' is not present in PV names, since it's a 1148 # special character for lvcreate (denotes the range of PEs to 1149 # use on the PV) 1150 for pv in pvlist: 1151 if ":" in pv.name: 1152 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" % 1153 (pv.name, pv.vg_name)) 1154 es_pvinfo = None 1155 if exclusive_storage: 1156 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist) 1157 errlist.extend(errmsgs) 1158 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None) 1159 if shared_pvs: 1160 for (pvname, lvlist) in shared_pvs: 1161 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...) 1162 errlist.append("PV %s is shared among unrelated LVs (%s)" % 1163 (pvname, utils.CommaJoin(lvlist))) 1164 return (errlist, es_pvinfo)
1165
1166 1167 -def _GetClusterDomainSecret():
1168 """Reads the cluster domain secret. 1169 1170 """ 1171 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE, 1172 strict=True)
1173
1174 1175 -def _CheckInstanceState(lu, instance, req_states, msg=None):
1176 """Ensure that an instance is in one of the required states. 1177 1178 @param lu: the LU on behalf of which we make the check 1179 @param instance: the instance to check 1180 @param msg: if passed, should be a message to replace the default one 1181 @raise errors.OpPrereqError: if the instance is not in the required state 1182 1183 """ 1184 if msg is None: 1185 msg = ("can't use instance from outside %s states" % 1186 utils.CommaJoin(req_states)) 1187 if instance.admin_state not in req_states: 1188 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" % 1189 (instance.name, instance.admin_state, msg), 1190 errors.ECODE_STATE) 1191 1192 if constants.ADMINST_UP not in req_states: 1193 pnode = instance.primary_node 1194 if not lu.cfg.GetNodeInfo(pnode).offline: 1195 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode] 1196 ins_l.Raise("Can't contact node %s for instance information" % pnode, 1197 prereq=True, ecode=errors.ECODE_ENVIRON) 1198 if instance.name in ins_l.payload: 1199 raise errors.OpPrereqError("Instance %s is running, %s" % 1200 (instance.name, msg), errors.ECODE_STATE) 1201 else: 1202 lu.LogWarning("Primary node offline, ignoring check that instance" 1203 " is down")
1204
1205 1206 -def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207 """Computes if value is in the desired range. 1208 1209 @param name: name of the parameter for which we perform the check 1210 @param qualifier: a qualifier used in the error message (e.g. 'disk/1', 1211 not just 'disk') 1212 @param ipolicy: dictionary containing min, max and std values 1213 @param value: actual value that we want to use 1214 @return: None or element not meeting the criteria 1215 1216 1217 """ 1218 if value in [None, constants.VALUE_AUTO]: 1219 return None 1220 max_v = ipolicy[constants.ISPECS_MAX].get(name, value) 1221 min_v = ipolicy[constants.ISPECS_MIN].get(name, value) 1222 if value > max_v or min_v > value: 1223 if qualifier: 1224 fqn = "%s/%s" % (name, qualifier) 1225 else: 1226 fqn = name 1227 return ("%s value %s is not in range [%s, %s]" % 1228 (fqn, value, min_v, max_v)) 1229 return None
1230
1231 1232 -def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count, 1233 nic_count, disk_sizes, spindle_use, 1234 disk_template, 1235 _compute_fn=_ComputeMinMaxSpec):
1236 """Verifies ipolicy against provided specs. 1237 1238 @type ipolicy: dict 1239 @param ipolicy: The ipolicy 1240 @type mem_size: int 1241 @param mem_size: The memory size 1242 @type cpu_count: int 1243 @param cpu_count: Used cpu cores 1244 @type disk_count: int 1245 @param disk_count: Number of disks used 1246 @type nic_count: int 1247 @param nic_count: Number of nics used 1248 @type disk_sizes: list of ints 1249 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count}) 1250 @type spindle_use: int 1251 @param spindle_use: The number of spindles this instance uses 1252 @type disk_template: string 1253 @param disk_template: The disk template of the instance 1254 @param _compute_fn: The compute function (unittest only) 1255 @return: A list of violations, or an empty list of no violations are found 1256 1257 """ 1258 assert disk_count == len(disk_sizes) 1259 1260 test_settings = [ 1261 (constants.ISPEC_MEM_SIZE, "", mem_size), 1262 (constants.ISPEC_CPU_COUNT, "", cpu_count), 1263 (constants.ISPEC_NIC_COUNT, "", nic_count), 1264 (constants.ISPEC_SPINDLE_USE, "", spindle_use), 1265 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d) 1266 for idx, d in enumerate(disk_sizes)] 1267 if disk_template != constants.DT_DISKLESS: 1268 # This check doesn't make sense for diskless instances 1269 test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count)) 1270 ret = [] 1271 allowed_dts = ipolicy[constants.IPOLICY_DTS] 1272 if disk_template not in allowed_dts: 1273 ret.append("Disk template %s is not allowed (allowed templates: %s)" % 1274 (disk_template, utils.CommaJoin(allowed_dts))) 1275 1276 return ret + filter(None, 1277 (_compute_fn(name, qualifier, ipolicy, value) 1278 for (name, qualifier, value) in test_settings))
1279
1280 1281 -def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg, 1282 _compute_fn=_ComputeIPolicySpecViolation):
1283 """Compute if instance meets the specs of ipolicy. 1284 1285 @type ipolicy: dict 1286 @param ipolicy: The ipolicy to verify against 1287 @type instance: L{objects.Instance} 1288 @param instance: The instance to verify 1289 @type cfg: L{config.ConfigWriter} 1290 @param cfg: Cluster configuration 1291 @param _compute_fn: The function to verify ipolicy (unittest only) 1292 @see: L{_ComputeIPolicySpecViolation} 1293 1294 """ 1295 be_full = cfg.GetClusterInfo().FillBE(instance) 1296 mem_size = be_full[constants.BE_MAXMEM] 1297 cpu_count = be_full[constants.BE_VCPUS] 1298 spindle_use = be_full[constants.BE_SPINDLE_USE] 1299 disk_count = len(instance.disks) 1300 disk_sizes = [disk.size for disk in instance.disks] 1301 nic_count = len(instance.nics) 1302 disk_template = instance.disk_template 1303 1304 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count, 1305 disk_sizes, spindle_use, disk_template)
1306
1307 1308 -def _ComputeIPolicyInstanceSpecViolation( 1309 ipolicy, instance_spec, disk_template, 1310 _compute_fn=_ComputeIPolicySpecViolation):
1311 """Compute if instance specs meets the specs of ipolicy. 1312 1313 @type ipolicy: dict 1314 @param ipolicy: The ipolicy to verify against 1315 @param instance_spec: dict 1316 @param instance_spec: The instance spec to verify 1317 @type disk_template: string 1318 @param disk_template: the disk template of the instance 1319 @param _compute_fn: The function to verify ipolicy (unittest only) 1320 @see: L{_ComputeIPolicySpecViolation} 1321 1322 """ 1323 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None) 1324 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None) 1325 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0) 1326 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, []) 1327 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0) 1328 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None) 1329 1330 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count, 1331 disk_sizes, spindle_use, disk_template)
1332
1333 1334 -def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group, 1335 target_group, cfg, 1336 _compute_fn=_ComputeIPolicyInstanceViolation):
1337 """Compute if instance meets the specs of the new target group. 1338 1339 @param ipolicy: The ipolicy to verify 1340 @param instance: The instance object to verify 1341 @param current_group: The current group of the instance 1342 @param target_group: The new group of the instance 1343 @type cfg: L{config.ConfigWriter} 1344 @param cfg: Cluster configuration 1345 @param _compute_fn: The function to verify ipolicy (unittest only) 1346 @see: L{_ComputeIPolicySpecViolation} 1347 1348 """ 1349 if current_group == target_group: 1350 return [] 1351 else: 1352 return _compute_fn(ipolicy, instance, cfg)
1353
1354 1355 -def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False, 1356 _compute_fn=_ComputeIPolicyNodeViolation):
1357 """Checks that the target node is correct in terms of instance policy. 1358 1359 @param ipolicy: The ipolicy to verify 1360 @param instance: The instance object to verify 1361 @param node: The new node to relocate 1362 @type cfg: L{config.ConfigWriter} 1363 @param cfg: Cluster configuration 1364 @param ignore: Ignore violations of the ipolicy 1365 @param _compute_fn: The function to verify ipolicy (unittest only) 1366 @see: L{_ComputeIPolicySpecViolation} 1367 1368 """ 1369 primary_node = lu.cfg.GetNodeInfo(instance.primary_node) 1370 res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg) 1371 1372 if res: 1373 msg = ("Instance does not meet target node group's (%s) instance" 1374 " policy: %s") % (node.group, utils.CommaJoin(res)) 1375 if ignore: 1376 lu.LogWarning(msg) 1377 else: 1378 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1379
1380 1381 -def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg):
1382 """Computes a set of any instances that would violate the new ipolicy. 1383 1384 @param old_ipolicy: The current (still in-place) ipolicy 1385 @param new_ipolicy: The new (to become) ipolicy 1386 @param instances: List of instances to verify 1387 @type cfg: L{config.ConfigWriter} 1388 @param cfg: Cluster configuration 1389 @return: A list of instances which violates the new ipolicy but 1390 did not before 1391 1392 """ 1393 return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) - 1394 _ComputeViolatingInstances(old_ipolicy, instances, cfg))
1395
1396 1397 -def _ExpandItemName(fn, name, kind):
1398 """Expand an item name. 1399 1400 @param fn: the function to use for expansion 1401 @param name: requested item name 1402 @param kind: text description ('Node' or 'Instance') 1403 @return: the resolved (full) name 1404 @raise errors.OpPrereqError: if the item is not found 1405 1406 """ 1407 full_name = fn(name) 1408 if full_name is None: 1409 raise errors.OpPrereqError("%s '%s' not known" % (kind, name), 1410 errors.ECODE_NOENT) 1411 return full_name
1412
1413 1414 -def _ExpandNodeName(cfg, name):
1415 """Wrapper over L{_ExpandItemName} for nodes.""" 1416 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1417
1418 1419 -def _ExpandInstanceName(cfg, name):
1420 """Wrapper over L{_ExpandItemName} for instance.""" 1421 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1422
1423 1424 -def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6, 1425 mac_prefix, tags):
1426 """Builds network related env variables for hooks 1427 1428 This builds the hook environment from individual variables. 1429 1430 @type name: string 1431 @param name: the name of the network 1432 @type subnet: string 1433 @param subnet: the ipv4 subnet 1434 @type gateway: string 1435 @param gateway: the ipv4 gateway 1436 @type network6: string 1437 @param network6: the ipv6 subnet 1438 @type gateway6: string 1439 @param gateway6: the ipv6 gateway 1440 @type mac_prefix: string 1441 @param mac_prefix: the mac_prefix 1442 @type tags: list 1443 @param tags: the tags of the network 1444 1445 """ 1446 env = {} 1447 if name: 1448 env["NETWORK_NAME"] = name 1449 if subnet: 1450 env["NETWORK_SUBNET"] = subnet 1451 if gateway: 1452 env["NETWORK_GATEWAY"] = gateway 1453 if network6: 1454 env["NETWORK_SUBNET6"] = network6 1455 if gateway6: 1456 env["NETWORK_GATEWAY6"] = gateway6 1457 if mac_prefix: 1458 env["NETWORK_MAC_PREFIX"] = mac_prefix 1459 if tags: 1460 env["NETWORK_TAGS"] = " ".join(tags) 1461 1462 return env
1463
1464 1465 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, 1466 minmem, maxmem, vcpus, nics, disk_template, disks, 1467 bep, hvp, hypervisor_name, tags):
1468 """Builds instance related env variables for hooks 1469 1470 This builds the hook environment from individual variables. 1471 1472 @type name: string 1473 @param name: the name of the instance 1474 @type primary_node: string 1475 @param primary_node: the name of the instance's primary node 1476 @type secondary_nodes: list 1477 @param secondary_nodes: list of secondary nodes as strings 1478 @type os_type: string 1479 @param os_type: the name of the instance's OS 1480 @type status: string 1481 @param status: the desired status of the instance 1482 @type minmem: string 1483 @param minmem: the minimum memory size of the instance 1484 @type maxmem: string 1485 @param maxmem: the maximum memory size of the instance 1486 @type vcpus: string 1487 @param vcpus: the count of VCPUs the instance has 1488 @type nics: list 1489 @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing 1490 the NICs the instance has 1491 @type disk_template: string 1492 @param disk_template: the disk template of the instance 1493 @type disks: list 1494 @param disks: the list of (size, mode) pairs 1495 @type bep: dict 1496 @param bep: the backend parameters for the instance 1497 @type hvp: dict 1498 @param hvp: the hypervisor parameters for the instance 1499 @type hypervisor_name: string 1500 @param hypervisor_name: the hypervisor for the instance 1501 @type tags: list 1502 @param tags: list of instance tags as strings 1503 @rtype: dict 1504 @return: the hook environment for this instance 1505 1506 """ 1507 env = { 1508 "OP_TARGET": name, 1509 "INSTANCE_NAME": name, 1510 "INSTANCE_PRIMARY": primary_node, 1511 "INSTANCE_SECONDARIES": " ".join(secondary_nodes), 1512 "INSTANCE_OS_TYPE": os_type, 1513 "INSTANCE_STATUS": status, 1514 "INSTANCE_MINMEM": minmem, 1515 "INSTANCE_MAXMEM": maxmem, 1516 # TODO(2.9) remove deprecated "memory" value 1517 "INSTANCE_MEMORY": maxmem, 1518 "INSTANCE_VCPUS": vcpus, 1519 "INSTANCE_DISK_TEMPLATE": disk_template, 1520 "INSTANCE_HYPERVISOR": hypervisor_name, 1521 } 1522 if nics: 1523 nic_count = len(nics) 1524 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics): 1525 if ip is None: 1526 ip = "" 1527 env["INSTANCE_NIC%d_IP" % idx] = ip 1528 env["INSTANCE_NIC%d_MAC" % idx] = mac 1529 env["INSTANCE_NIC%d_MODE" % idx] = mode 1530 env["INSTANCE_NIC%d_LINK" % idx] = link 1531 if netinfo: 1532 nobj = objects.Network.FromDict(netinfo) 1533 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx)) 1534 elif network: 1535 # FIXME: broken network reference: the instance NIC specifies a 1536 # network, but the relevant network entry was not in the config. This 1537 # should be made impossible. 1538 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net 1539 if mode == constants.NIC_MODE_BRIDGED: 1540 env["INSTANCE_NIC%d_BRIDGE" % idx] = link 1541 else: 1542 nic_count = 0 1543 1544 env["INSTANCE_NIC_COUNT"] = nic_count 1545 1546 if disks: 1547 disk_count = len(disks) 1548 for idx, (size, mode) in enumerate(disks): 1549 env["INSTANCE_DISK%d_SIZE" % idx] = size 1550 env["INSTANCE_DISK%d_MODE" % idx] = mode 1551 else: 1552 disk_count = 0 1553 1554 env["INSTANCE_DISK_COUNT"] = disk_count 1555 1556 if not tags: 1557 tags = [] 1558 1559 env["INSTANCE_TAGS"] = " ".join(tags) 1560 1561 for source, kind in [(bep, "BE"), (hvp, "HV")]: 1562 for key, value in source.items(): 1563 env["INSTANCE_%s_%s" % (kind, key)] = value 1564 1565 return env
1566
1567 1568 -def _NICToTuple(lu, nic):
1569 """Build a tupple of nic information. 1570 1571 @type lu: L{LogicalUnit} 1572 @param lu: the logical unit on whose behalf we execute 1573 @type nic: L{objects.NIC} 1574 @param nic: nic to convert to hooks tuple 1575 1576 """ 1577 cluster = lu.cfg.GetClusterInfo() 1578 filled_params = cluster.SimpleFillNIC(nic.nicparams) 1579 mode = filled_params[constants.NIC_MODE] 1580 link = filled_params[constants.NIC_LINK] 1581 netinfo = None 1582 if nic.network: 1583 nobj = lu.cfg.GetNetwork(nic.network) 1584 netinfo = objects.Network.ToDict(nobj) 1585 return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1586
1587 1588 -def _NICListToTuple(lu, nics):
1589 """Build a list of nic information tuples. 1590 1591 This list is suitable to be passed to _BuildInstanceHookEnv or as a return 1592 value in LUInstanceQueryData. 1593 1594 @type lu: L{LogicalUnit} 1595 @param lu: the logical unit on whose behalf we execute 1596 @type nics: list of L{objects.NIC} 1597 @param nics: list of nics to convert to hooks tuples 1598 1599 """ 1600 hooks_nics = [] 1601 for nic in nics: 1602 hooks_nics.append(_NICToTuple(lu, nic)) 1603 return hooks_nics
1604
1605 1606 -def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1607 """Builds instance related env variables for hooks from an object. 1608 1609 @type lu: L{LogicalUnit} 1610 @param lu: the logical unit on whose behalf we execute 1611 @type instance: L{objects.Instance} 1612 @param instance: the instance for which we should build the 1613 environment 1614 @type override: dict 1615 @param override: dictionary with key/values that will override 1616 our values 1617 @rtype: dict 1618 @return: the hook environment dictionary 1619 1620 """ 1621 cluster = lu.cfg.GetClusterInfo() 1622 bep = cluster.FillBE(instance) 1623 hvp = cluster.FillHV(instance) 1624 args = { 1625 "name": instance.name, 1626 "primary_node": instance.primary_node, 1627 "secondary_nodes": instance.secondary_nodes, 1628 "os_type": instance.os, 1629 "status": instance.admin_state, 1630 "maxmem": bep[constants.BE_MAXMEM], 1631 "minmem": bep[constants.BE_MINMEM], 1632 "vcpus": bep[constants.BE_VCPUS], 1633 "nics": _NICListToTuple(lu, instance.nics), 1634 "disk_template": instance.disk_template, 1635 "disks": [(disk.size, disk.mode) for disk in instance.disks], 1636 "bep": bep, 1637 "hvp": hvp, 1638 "hypervisor_name": instance.hypervisor, 1639 "tags": instance.tags, 1640 } 1641 if override: 1642 args.update(override) 1643 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1644
1645 1646 -def _AdjustCandidatePool(lu, exceptions):
1647 """Adjust the candidate pool after node operations. 1648 1649 """ 1650 mod_list = lu.cfg.MaintainCandidatePool(exceptions) 1651 if mod_list: 1652 lu.LogInfo("Promoted nodes to master candidate role: %s", 1653 utils.CommaJoin(node.name for node in mod_list)) 1654 for name in mod_list: 1655 lu.context.ReaddNode(name) 1656 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions) 1657 if mc_now > mc_max: 1658 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % 1659 (mc_now, mc_max))
1660
1661 1662 -def _DecideSelfPromotion(lu, exceptions=None):
1663 """Decide whether I should promote myself as a master candidate. 1664 1665 """ 1666 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size 1667 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions) 1668 # the new node will increase mc_max with one, so: 1669 mc_should = min(mc_should + 1, cp_size) 1670 return mc_now < mc_should
1671
1672 1673 -def _ComputeViolatingInstances(ipolicy, instances, cfg):
1674 """Computes a set of instances who violates given ipolicy. 1675 1676 @param ipolicy: The ipolicy to verify 1677 @type instances: L{objects.Instance} 1678 @param instances: List of instances to verify 1679 @type cfg: L{config.ConfigWriter} 1680 @param cfg: Cluster configuration 1681 @return: A frozenset of instance names violating the ipolicy 1682 1683 """ 1684 return frozenset([inst.name for inst in instances 1685 if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)])
1686
1687 1688 -def _CheckNicsBridgesExist(lu, target_nics, target_node):
1689 """Check that the brigdes needed by a list of nics exist. 1690 1691 """ 1692 cluster = lu.cfg.GetClusterInfo() 1693 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics] 1694 brlist = [params[constants.NIC_LINK] for params in paramslist 1695 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED] 1696 if brlist: 1697 result = lu.rpc.call_bridges_exist(target_node, brlist) 1698 result.Raise("Error checking bridges on destination node '%s'" % 1699 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1700
1701 1702 -def _CheckInstanceBridgesExist(lu, instance, node=None):
1703 """Check that the brigdes needed by an instance exist. 1704 1705 """ 1706 if node is None: 1707 node = instance.primary_node 1708 _CheckNicsBridgesExist(lu, instance.nics, node)
1709
1710 1711 -def _CheckOSVariant(os_obj, name):
1712 """Check whether an OS name conforms to the os variants specification. 1713 1714 @type os_obj: L{objects.OS} 1715 @param os_obj: OS object to check 1716 @type name: string 1717 @param name: OS name passed by the user, to check for validity 1718 1719 """ 1720 variant = objects.OS.GetVariant(name) 1721 if not os_obj.supported_variants: 1722 if variant: 1723 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'" 1724 " passed)" % (os_obj.name, variant), 1725 errors.ECODE_INVAL) 1726 return 1727 if not variant: 1728 raise errors.OpPrereqError("OS name must include a variant", 1729 errors.ECODE_INVAL) 1730 1731 if variant not in os_obj.supported_variants: 1732 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1733
1734 1735 -def _GetNodeInstancesInner(cfg, fn):
1736 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1737
1738 1739 -def _GetNodeInstances(cfg, node_name):
1740 """Returns a list of all primary and secondary instances on a node. 1741 1742 """ 1743 1744 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1745
1746 1747 -def _GetNodePrimaryInstances(cfg, node_name):
1748 """Returns primary instances on a node. 1749 1750 """ 1751 return _GetNodeInstancesInner(cfg, 1752 lambda inst: node_name == inst.primary_node)
1753
1754 1755 -def _GetNodeSecondaryInstances(cfg, node_name):
1756 """Returns secondary instances on a node. 1757 1758 """ 1759 return _GetNodeInstancesInner(cfg, 1760 lambda inst: node_name in inst.secondary_nodes)
1761
1762 1763 -def _GetStorageTypeArgs(cfg, storage_type):
1764 """Returns the arguments for a storage type. 1765 1766 """ 1767 # Special case for file storage 1768 if storage_type == constants.ST_FILE: 1769 # storage.FileStorage wants a list of storage directories 1770 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]] 1771 1772 return []
1773
1774 1775 -def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1776 faulty = [] 1777 1778 for dev in instance.disks: 1779 cfg.SetDiskID(dev, node_name) 1780 1781 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks, 1782 instance)) 1783 result.Raise("Failed to get disk status from node %s" % node_name, 1784 prereq=prereq, ecode=errors.ECODE_ENVIRON) 1785 1786 for idx, bdev_status in enumerate(result.payload): 1787 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY: 1788 faulty.append(idx) 1789 1790 return faulty
1791
1792 1793 -def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1794 """Check the sanity of iallocator and node arguments and use the 1795 cluster-wide iallocator if appropriate. 1796 1797 Check that at most one of (iallocator, node) is specified. If none is 1798 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT}, 1799 then the LU's opcode's iallocator slot is filled with the cluster-wide 1800 default iallocator. 1801 1802 @type iallocator_slot: string 1803 @param iallocator_slot: the name of the opcode iallocator slot 1804 @type node_slot: string 1805 @param node_slot: the name of the opcode target node slot 1806 1807 """ 1808 node = getattr(lu.op, node_slot, None) 1809 ialloc = getattr(lu.op, iallocator_slot, None) 1810 if node == []: 1811 node = None 1812 1813 if node is not None and ialloc is not None: 1814 raise errors.OpPrereqError("Do not specify both, iallocator and node", 1815 errors.ECODE_INVAL) 1816 elif ((node is None and ialloc is None) or 1817 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT): 1818 default_iallocator = lu.cfg.GetDefaultIAllocator() 1819 if default_iallocator: 1820 setattr(lu.op, iallocator_slot, default_iallocator) 1821 else: 1822 raise errors.OpPrereqError("No iallocator or node given and no" 1823 " cluster-wide default iallocator found;" 1824 " please specify either an iallocator or a" 1825 " node, or set a cluster-wide default" 1826 " iallocator", errors.ECODE_INVAL)
1827
1828 1829 -def _GetDefaultIAllocator(cfg, ialloc):
1830 """Decides on which iallocator to use. 1831 1832 @type cfg: L{config.ConfigWriter} 1833 @param cfg: Cluster configuration object 1834 @type ialloc: string or None 1835 @param ialloc: Iallocator specified in opcode 1836 @rtype: string 1837 @return: Iallocator name 1838 1839 """ 1840 if not ialloc: 1841 # Use default iallocator 1842 ialloc = cfg.GetDefaultIAllocator() 1843 1844 if not ialloc: 1845 raise errors.OpPrereqError("No iallocator was specified, neither in the" 1846 " opcode nor as a cluster-wide default", 1847 errors.ECODE_INVAL) 1848 1849 return ialloc
1850
1851 1852 -def _CheckHostnameSane(lu, name):
1853 """Ensures that a given hostname resolves to a 'sane' name. 1854 1855 The given name is required to be a prefix of the resolved hostname, 1856 to prevent accidental mismatches. 1857 1858 @param lu: the logical unit on behalf of which we're checking 1859 @param name: the name we should resolve and check 1860 @return: the resolved hostname object 1861 1862 """ 1863 hostname = netutils.GetHostname(name=name) 1864 if hostname.name != name: 1865 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name) 1866 if not utils.MatchNameComponent(name, [hostname.name]): 1867 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the" 1868 " same as given hostname '%s'") % 1869 (hostname.name, name), errors.ECODE_INVAL) 1870 return hostname
1871
1872 1873 -class LUClusterPostInit(LogicalUnit):
1874 """Logical unit for running hooks after cluster initialization. 1875 1876 """ 1877 HPATH = "cluster-init" 1878 HTYPE = constants.HTYPE_CLUSTER 1879
1880 - def BuildHooksEnv(self):
1881 """Build hooks env. 1882 1883 """ 1884 return { 1885 "OP_TARGET": self.cfg.GetClusterName(), 1886 }
1887
1888 - def BuildHooksNodes(self):
1889 """Build hooks nodes. 1890 1891 """ 1892 return ([], [self.cfg.GetMasterNode()])
1893
1894 - def Exec(self, feedback_fn):
1895 """Nothing to do. 1896 1897 """ 1898 return True
1899
1900 1901 -class LUClusterDestroy(LogicalUnit):
1902 """Logical unit for destroying the cluster. 1903 1904 """ 1905 HPATH = "cluster-destroy" 1906 HTYPE = constants.HTYPE_CLUSTER 1907
1908 - def BuildHooksEnv(self):
1909 """Build hooks env. 1910 1911 """ 1912 return { 1913 "OP_TARGET": self.cfg.GetClusterName(), 1914 }
1915
1916 - def BuildHooksNodes(self):
1917 """Build hooks nodes. 1918 1919 """ 1920 return ([], [])
1921
1922 - def CheckPrereq(self):
1923 """Check prerequisites. 1924 1925 This checks whether the cluster is empty. 1926 1927 Any errors are signaled by raising errors.OpPrereqError. 1928 1929 """ 1930 master = self.cfg.GetMasterNode() 1931 1932 nodelist = self.cfg.GetNodeList() 1933 if len(nodelist) != 1 or nodelist[0] != master: 1934 raise errors.OpPrereqError("There are still %d node(s) in" 1935 " this cluster." % (len(nodelist) - 1), 1936 errors.ECODE_INVAL) 1937 instancelist = self.cfg.GetInstanceList() 1938 if instancelist: 1939 raise errors.OpPrereqError("There are still %d instance(s) in" 1940 " this cluster." % len(instancelist), 1941 errors.ECODE_INVAL)
1942
1943 - def Exec(self, feedback_fn):
1944 """Destroys the cluster. 1945 1946 """ 1947 master_params = self.cfg.GetMasterNetworkParameters() 1948 1949 # Run post hooks on master node before it's removed 1950 _RunPostHook(self, master_params.name) 1951 1952 ems = self.cfg.GetUseExternalMipScript() 1953 result = self.rpc.call_node_deactivate_master_ip(master_params.name, 1954 master_params, ems) 1955 if result.fail_msg: 1956 self.LogWarning("Error disabling the master IP address: %s", 1957 result.fail_msg) 1958 1959 return master_params.name
1960
1961 1962 -def _VerifyCertificate(filename):
1963 """Verifies a certificate for L{LUClusterVerifyConfig}. 1964 1965 @type filename: string 1966 @param filename: Path to PEM file 1967 1968 """ 1969 try: 1970 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 1971 utils.ReadFile(filename)) 1972 except Exception, err: # pylint: disable=W0703 1973 return (LUClusterVerifyConfig.ETYPE_ERROR, 1974 "Failed to load X509 certificate %s: %s" % (filename, err)) 1975 1976 (errcode, msg) = \ 1977 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN, 1978 constants.SSL_CERT_EXPIRATION_ERROR) 1979 1980 if msg: 1981 fnamemsg = "While verifying %s: %s" % (filename, msg) 1982 else: 1983 fnamemsg = None 1984 1985 if errcode is None: 1986 return (None, fnamemsg) 1987 elif errcode == utils.CERT_WARNING: 1988 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg) 1989 elif errcode == utils.CERT_ERROR: 1990 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg) 1991 1992 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1993
1994 1995 -def _GetAllHypervisorParameters(cluster, instances):
1996 """Compute the set of all hypervisor parameters. 1997 1998 @type cluster: L{objects.Cluster} 1999 @param cluster: the cluster object 2000 @param instances: list of L{objects.Instance} 2001 @param instances: additional instances from which to obtain parameters 2002 @rtype: list of (origin, hypervisor, parameters) 2003 @return: a list with all parameters found, indicating the hypervisor they 2004 apply to, and the origin (can be "cluster", "os X", or "instance Y") 2005 2006 """ 2007 hvp_data = [] 2008 2009 for hv_name in cluster.enabled_hypervisors: 2010 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name))) 2011 2012 for os_name, os_hvp in cluster.os_hvp.items(): 2013 for hv_name, hv_params in os_hvp.items(): 2014 if hv_params: 2015 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name) 2016 hvp_data.append(("os %s" % os_name, hv_name, full_params)) 2017 2018 # TODO: collapse identical parameter values in a single one 2019 for instance in instances: 2020 if instance.hvparams: 2021 hvp_data.append(("instance %s" % instance.name, instance.hypervisor, 2022 cluster.FillHV(instance))) 2023 2024 return hvp_data
2025
2026 2027 -class _VerifyErrors(object):
2028 """Mix-in for cluster/group verify LUs. 2029 2030 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects 2031 self.op and self._feedback_fn to be available.) 2032 2033 """ 2034 2035 ETYPE_FIELD = "code" 2036 ETYPE_ERROR = "ERROR" 2037 ETYPE_WARNING = "WARNING" 2038
2039 - def _Error(self, ecode, item, msg, *args, **kwargs):
2040 """Format an error message. 2041 2042 Based on the opcode's error_codes parameter, either format a 2043 parseable error code, or a simpler error string. 2044 2045 This must be called only from Exec and functions called from Exec. 2046 2047 """ 2048 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) 2049 itype, etxt, _ = ecode 2050 # If the error code is in the list of ignored errors, demote the error to a 2051 # warning 2052 if etxt in self.op.ignore_errors: # pylint: disable=E1101 2053 ltype = self.ETYPE_WARNING 2054 # first complete the msg 2055 if args: 2056 msg = msg % args 2057 # then format the whole message 2058 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101 2059 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) 2060 else: 2061 if item: 2062 item = " " + item 2063 else: 2064 item = "" 2065 msg = "%s: %s%s: %s" % (ltype, itype, item, msg) 2066 # and finally report it via the feedback_fn 2067 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101 2068 # do not mark the operation as failed for WARN cases only 2069 if ltype == self.ETYPE_ERROR: 2070 self.bad = True
2071
2072 - def _ErrorIf(self, cond, *args, **kwargs):
2073 """Log an error message if the passed condition is True. 2074 2075 """ 2076 if (bool(cond) 2077 or self.op.debug_simulate_errors): # pylint: disable=E1101 2078 self._Error(*args, **kwargs)
2079
2080 2081 -class LUClusterVerify(NoHooksLU):
2082 """Submits all jobs necessary to verify the cluster. 2083 2084 """ 2085 REQ_BGL = False 2086
2087 - def ExpandNames(self):
2088 self.needed_locks = {}
2089
2090 - def Exec(self, feedback_fn):
2091 jobs = [] 2092 2093 if self.op.group_name: 2094 groups = [self.op.group_name] 2095 depends_fn = lambda: None 2096 else: 2097 groups = self.cfg.GetNodeGroupList() 2098 2099 # Verify global configuration 2100 jobs.append([ 2101 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors), 2102 ]) 2103 2104 # Always depend on global verification 2105 depends_fn = lambda: [(-len(jobs), [])] 2106 2107 jobs.extend( 2108 [opcodes.OpClusterVerifyGroup(group_name=group, 2109 ignore_errors=self.op.ignore_errors, 2110 depends=depends_fn())] 2111 for group in groups) 2112 2113 # Fix up all parameters 2114 for op in itertools.chain(*jobs): # pylint: disable=W0142 2115 op.debug_simulate_errors = self.op.debug_simulate_errors 2116 op.verbose = self.op.verbose 2117 op.error_codes = self.op.error_codes 2118 try: 2119 op.skip_checks = self.op.skip_checks 2120 except AttributeError: 2121 assert not isinstance(op, opcodes.OpClusterVerifyGroup) 2122 2123 return ResultWithJobs(jobs)
2124
2125 2126 -class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2127 """Verifies the cluster config. 2128 2129 """ 2130 REQ_BGL = False 2131
2132 - def _VerifyHVP(self, hvp_data):
2133 """Verifies locally the syntax of the hypervisor parameters. 2134 2135 """ 2136 for item, hv_name, hv_params in hvp_data: 2137 msg = ("hypervisor %s parameters syntax check (source %s): %%s" % 2138 (item, hv_name)) 2139 try: 2140 hv_class = hypervisor.GetHypervisorClass(hv_name) 2141 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 2142 hv_class.CheckParameterSyntax(hv_params) 2143 except errors.GenericError, err: 2144 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2145
2146 - def ExpandNames(self):
2147 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET) 2148 self.share_locks = _ShareAll()
2149
2150 - def CheckPrereq(self):
2151 """Check prerequisites. 2152 2153 """ 2154 # Retrieve all information 2155 self.all_group_info = self.cfg.GetAllNodeGroupsInfo() 2156 self.all_node_info = self.cfg.GetAllNodesInfo() 2157 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2158
2159 - def Exec(self, feedback_fn):
2160 """Verify integrity of cluster, performing various test on nodes. 2161 2162 """ 2163 self.bad = False 2164 self._feedback_fn = feedback_fn 2165 2166 feedback_fn("* Verifying cluster config") 2167 2168 for msg in self.cfg.VerifyConfig(): 2169 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg) 2170 2171 feedback_fn("* Verifying cluster certificate files") 2172 2173 for cert_filename in pathutils.ALL_CERT_FILES: 2174 (errcode, msg) = _VerifyCertificate(cert_filename) 2175 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode) 2176 2177 feedback_fn("* Verifying hypervisor parameters") 2178 2179 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(), 2180 self.all_inst_info.values())) 2181 2182 feedback_fn("* Verifying all nodes belong to an existing group") 2183 2184 # We do this verification here because, should this bogus circumstance 2185 # occur, it would never be caught by VerifyGroup, which only acts on 2186 # nodes/instances reachable from existing node groups. 2187 2188 dangling_nodes = set(node.name for node in self.all_node_info.values() 2189 if node.group not in self.all_group_info) 2190 2191 dangling_instances = {} 2192 no_node_instances = [] 2193 2194 for inst in self.all_inst_info.values(): 2195 if inst.primary_node in dangling_nodes: 2196 dangling_instances.setdefault(inst.primary_node, []).append(inst.name) 2197 elif inst.primary_node not in self.all_node_info: 2198 no_node_instances.append(inst.name) 2199 2200 pretty_dangling = [ 2201 "%s (%s)" % 2202 (node.name, 2203 utils.CommaJoin(dangling_instances.get(node.name, 2204 ["no instances"]))) 2205 for node in dangling_nodes] 2206 2207 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES, 2208 None, 2209 "the following nodes (and their instances) belong to a non" 2210 " existing group: %s", utils.CommaJoin(pretty_dangling)) 2211 2212 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST, 2213 None, 2214 "the following instances have a non-existing primary-node:" 2215 " %s", utils.CommaJoin(no_node_instances)) 2216 2217 return not self.bad
2218
2219 2220 -class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2221 """Verifies the status of a node group. 2222 2223 """ 2224 HPATH = "cluster-verify" 2225 HTYPE = constants.HTYPE_CLUSTER 2226 REQ_BGL = False 2227 2228 _HOOKS_INDENT_RE = re.compile("^", re.M) 2229
2230 - class NodeImage(object):
2231 """A class representing the logical and physical status of a node. 2232 2233 @type name: string 2234 @ivar name: the node name to which this object refers 2235 @ivar volumes: a structure as returned from 2236 L{ganeti.backend.GetVolumeList} (runtime) 2237 @ivar instances: a list of running instances (runtime) 2238 @ivar pinst: list of configured primary instances (config) 2239 @ivar sinst: list of configured secondary instances (config) 2240 @ivar sbp: dictionary of {primary-node: list of instances} for all 2241 instances for which this node is secondary (config) 2242 @ivar mfree: free memory, as reported by hypervisor (runtime) 2243 @ivar dfree: free disk, as reported by the node (runtime) 2244 @ivar offline: the offline status (config) 2245 @type rpc_fail: boolean 2246 @ivar rpc_fail: whether the RPC verify call was successfull (overall, 2247 not whether the individual keys were correct) (runtime) 2248 @type lvm_fail: boolean 2249 @ivar lvm_fail: whether the RPC call didn't return valid LVM data 2250 @type hyp_fail: boolean 2251 @ivar hyp_fail: whether the RPC call didn't return the instance list 2252 @type ghost: boolean 2253 @ivar ghost: whether this is a known node or not (config) 2254 @type os_fail: boolean 2255 @ivar os_fail: whether the RPC call didn't return valid OS data 2256 @type oslist: list 2257 @ivar oslist: list of OSes as diagnosed by DiagnoseOS 2258 @type vm_capable: boolean 2259 @ivar vm_capable: whether the node can host instances 2260 @type pv_min: float 2261 @ivar pv_min: size in MiB of the smallest PVs 2262 @type pv_max: float 2263 @ivar pv_max: size in MiB of the biggest PVs 2264 2265 """
2266 - def __init__(self, offline=False, name=None, vm_capable=True):
2267 self.name = name 2268 self.volumes = {} 2269 self.instances = [] 2270 self.pinst = [] 2271 self.sinst = [] 2272 self.sbp = {} 2273 self.mfree = 0 2274 self.dfree = 0 2275 self.offline = offline 2276 self.vm_capable = vm_capable 2277 self.rpc_fail = False 2278 self.lvm_fail = False 2279 self.hyp_fail = False 2280 self.ghost = False 2281 self.os_fail = False 2282 self.oslist = {} 2283 self.pv_min = None 2284 self.pv_max = None
2285
2286 - def ExpandNames(self):
2287 # This raises errors.OpPrereqError on its own: 2288 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) 2289 2290 # Get instances in node group; this is unsafe and needs verification later 2291 inst_names = \ 2292 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 2293 2294 self.needed_locks = { 2295 locking.LEVEL_INSTANCE: inst_names, 2296 locking.LEVEL_NODEGROUP: [self.group_uuid], 2297 locking.LEVEL_NODE: [], 2298 2299 # This opcode is run by watcher every five minutes and acquires all nodes 2300 # for a group. It doesn't run for a long time, so it's better to acquire 2301 # the node allocation lock as well. 2302 locking.LEVEL_NODE_ALLOC: locking.ALL_SET, 2303 } 2304 2305 self.share_locks = _ShareAll()
2306
2307 - def DeclareLocks(self, level):
2308 if level == locking.LEVEL_NODE: 2309 # Get members of node group; this is unsafe and needs verification later 2310 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members) 2311 2312 all_inst_info = self.cfg.GetAllInstancesInfo() 2313 2314 # In Exec(), we warn about mirrored instances that have primary and 2315 # secondary living in separate node groups. To fully verify that 2316 # volumes for these instances are healthy, we will need to do an 2317 # extra call to their secondaries. We ensure here those nodes will 2318 # be locked. 2319 for inst in self.owned_locks(locking.LEVEL_INSTANCE): 2320 # Important: access only the instances whose lock is owned 2321 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR: 2322 nodes.update(all_inst_info[inst].secondary_nodes) 2323 2324 self.needed_locks[locking.LEVEL_NODE] = nodes
2325
2326 - def CheckPrereq(self):
2327 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) 2328 self.group_info = self.cfg.GetNodeGroup(self.group_uuid) 2329 2330 group_nodes = set(self.group_info.members) 2331 group_instances = \ 2332 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) 2333 2334 unlocked_nodes = \ 2335 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) 2336 2337 unlocked_instances = \ 2338 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE)) 2339 2340 if unlocked_nodes: 2341 raise errors.OpPrereqError("Missing lock for nodes: %s" % 2342 utils.CommaJoin(unlocked_nodes), 2343 errors.ECODE_STATE) 2344 2345 if unlocked_instances: 2346 raise errors.OpPrereqError("Missing lock for instances: %s" % 2347 utils.CommaJoin(unlocked_instances), 2348 errors.ECODE_STATE) 2349 2350 self.all_node_info = self.cfg.GetAllNodesInfo() 2351 self.all_inst_info = self.cfg.GetAllInstancesInfo() 2352 2353 self.my_node_names = utils.NiceSort(group_nodes) 2354 self.my_inst_names = utils.NiceSort(group_instances) 2355 2356 self.my_node_info = dict((name, self.all_node_info[name]) 2357 for name in self.my_node_names) 2358 2359 self.my_inst_info = dict((name, self.all_inst_info[name]) 2360 for name in self.my_inst_names) 2361 2362 # We detect here the nodes that will need the extra RPC calls for verifying 2363 # split LV volumes; they should be locked. 2364 extra_lv_nodes = set() 2365 2366 for inst in self.my_inst_info.values(): 2367 if inst.disk_template in constants.DTS_INT_MIRROR: 2368 for nname in inst.all_nodes: 2369 if self.all_node_info[nname].group != self.group_uuid: 2370 extra_lv_nodes.add(nname) 2371 2372 unlocked_lv_nodes = \ 2373 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) 2374 2375 if unlocked_lv_nodes: 2376 raise errors.OpPrereqError("Missing node locks for LV check: %s" % 2377 utils.CommaJoin(unlocked_lv_nodes), 2378 errors.ECODE_STATE) 2379 self.extra_lv_nodes = list(extra_lv_nodes)
2380
2381 - def _VerifyNode(self, ninfo, nresult):
2382 """Perform some basic validation on data returned from a node. 2383 2384 - check the result data structure is well formed and has all the 2385 mandatory fields 2386 - check ganeti version 2387 2388 @type ninfo: L{objects.Node} 2389 @param ninfo: the node to check 2390 @param nresult: the results from the node 2391 @rtype: boolean 2392 @return: whether overall this call was successful (and we can expect 2393 reasonable values in the respose) 2394 2395 """ 2396 node = ninfo.name 2397 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2398 2399 # main result, nresult should be a non-empty dict 2400 test = not nresult or not isinstance(nresult, dict) 2401 _ErrorIf(test, constants.CV_ENODERPC, node, 2402 "unable to verify node: no data returned") 2403 if test: 2404 return False 2405 2406 # compares ganeti version 2407 local_version = constants.PROTOCOL_VERSION 2408 remote_version = nresult.get("version", None) 2409 test = not (remote_version and 2410 isinstance(remote_version, (list, tuple)) and 2411 len(remote_version) == 2) 2412 _ErrorIf(test, constants.CV_ENODERPC, node, 2413 "connection to node returned invalid data") 2414 if test: 2415 return False 2416 2417 test = local_version != remote_version[0] 2418 _ErrorIf(test, constants.CV_ENODEVERSION, node, 2419 "incompatible protocol versions: master %s," 2420 " node %s", local_version, remote_version[0]) 2421 if test: 2422 return False 2423 2424 # node seems compatible, we can actually try to look into its results 2425 2426 # full package version 2427 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], 2428 constants.CV_ENODEVERSION, node, 2429 "software version mismatch: master %s, node %s", 2430 constants.RELEASE_VERSION, remote_version[1], 2431 code=self.ETYPE_WARNING) 2432 2433 hyp_result = nresult.get(constants.NV_HYPERVISOR, None) 2434 if ninfo.vm_capable and isinstance(hyp_result, dict): 2435 for hv_name, hv_result in hyp_result.iteritems(): 2436 test = hv_result is not None 2437 _ErrorIf(test, constants.CV_ENODEHV, node, 2438 "hypervisor %s verify failure: '%s'", hv_name, hv_result) 2439 2440 hvp_result = nresult.get(constants.NV_HVPARAMS, None) 2441 if ninfo.vm_capable and isinstance(hvp_result, list): 2442 for item, hv_name, hv_result in hvp_result: 2443 _ErrorIf(True, constants.CV_ENODEHV, node, 2444 "hypervisor %s parameter verify failure (source %s): %s", 2445 hv_name, item, hv_result) 2446 2447 test = nresult.get(constants.NV_NODESETUP, 2448 ["Missing NODESETUP results"]) 2449 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s", 2450 "; ".join(test)) 2451 2452 return True
2453
2454 - def _VerifyNodeTime(self, ninfo, nresult, 2455 nvinfo_starttime, nvinfo_endtime):
2456 """Check the node time. 2457 2458 @type ninfo: L{objects.Node} 2459 @param ninfo: the node to check 2460 @param nresult: the remote results for the node 2461 @param nvinfo_starttime: the start time of the RPC call 2462 @param nvinfo_endtime: the end time of the RPC call 2463 2464 """ 2465 node = ninfo.name 2466 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2467 2468 ntime = nresult.get(constants.NV_TIME, None) 2469 try: 2470 ntime_merged = utils.MergeTime(ntime) 2471 except (ValueError, TypeError): 2472 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time") 2473 return 2474 2475 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): 2476 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) 2477 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): 2478 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) 2479 else: 2480 ntime_diff = None 2481 2482 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node, 2483 "Node time diverges by at least %s from master node time", 2484 ntime_diff)
2485
2486 - def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2487 """Check the node LVM results and update info for cross-node checks. 2488 2489 @type ninfo: L{objects.Node} 2490 @param ninfo: the node to check 2491 @param nresult: the remote results for the node 2492 @param vg_name: the configured VG name 2493 @type nimg: L{NodeImage} 2494 @param nimg: node image 2495 2496 """ 2497 if vg_name is None: 2498 return 2499 2500 node = ninfo.name 2501 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2502 2503 # checks vg existence and size > 20G 2504 vglist = nresult.get(constants.NV_VGLIST, None) 2505 test = not vglist 2506 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups") 2507 if not test: 2508 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, 2509 constants.MIN_VG_SIZE) 2510 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus) 2511 2512 # Check PVs 2513 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage) 2514 for em in errmsgs: 2515 self._Error(constants.CV_ENODELVM, node, em) 2516 if pvminmax is not None: 2517 (nimg.pv_min, nimg.pv_max) = pvminmax
2518
2519 - def _VerifyGroupLVM(self, node_image, vg_name):
2520 """Check cross-node consistency in LVM. 2521 2522 @type node_image: dict 2523 @param node_image: info about nodes, mapping from node to names to 2524 L{NodeImage} objects 2525 @param vg_name: the configured VG name 2526 2527 """ 2528 if vg_name is None: 2529 return 2530 2531 # Only exlcusive storage needs this kind of checks 2532 if not self._exclusive_storage: 2533 return 2534 2535 # exclusive_storage wants all PVs to have the same size (approximately), 2536 # if the smallest and the biggest ones are okay, everything is fine. 2537 # pv_min is None iff pv_max is None 2538 vals = filter((lambda ni: ni.pv_min is not None), node_image.values()) 2539 if not vals: 2540 return 2541 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals) 2542 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals) 2543 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax) 2544 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name, 2545 "PV sizes differ too much in the group; smallest (%s MB) is" 2546 " on %s, biggest (%s MB) is on %s", 2547 pvmin, minnode, pvmax, maxnode)
2548
2549 - def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2550 """Check the node bridges. 2551 2552 @type ninfo: L{objects.Node} 2553 @param ninfo: the node to check 2554 @param nresult: the remote results for the node 2555 @param bridges: the expected list of bridges 2556 2557 """ 2558 if not bridges: 2559 return 2560 2561 node = ninfo.name 2562 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2563 2564 missing = nresult.get(constants.NV_BRIDGES, None) 2565 test = not isinstance(missing, list) 2566 _ErrorIf(test, constants.CV_ENODENET, node, 2567 "did not return valid bridge information") 2568 if not test: 2569 _ErrorIf(bool(missing), constants.CV_ENODENET, node, 2570 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2571
2572 - def _VerifyNodeUserScripts(self, ninfo, nresult):
2573 """Check the results of user scripts presence and executability on the node 2574 2575 @type ninfo: L{objects.Node} 2576 @param ninfo: the node to check 2577 @param nresult: the remote results for the node 2578 2579 """ 2580 node = ninfo.name 2581 2582 test = not constants.NV_USERSCRIPTS in nresult 2583 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node, 2584 "did not return user scripts information") 2585 2586 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None) 2587 if not test: 2588 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node, 2589 "user scripts not present or not executable: %s" % 2590 utils.CommaJoin(sorted(broken_scripts)))
2591
2592 - def _VerifyNodeNetwork(self, ninfo, nresult):
2593 """Check the node network connectivity results. 2594 2595 @type ninfo: L{objects.Node} 2596 @param ninfo: the node to check 2597 @param nresult: the remote results for the node 2598 2599 """ 2600 node = ninfo.name 2601 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2602 2603 test = constants.NV_NODELIST not in nresult 2604 _ErrorIf(test, constants.CV_ENODESSH, node, 2605 "node hasn't returned node ssh connectivity data") 2606 if not test: 2607 if nresult[constants.NV_NODELIST]: 2608 for a_node, a_msg in nresult[constants.NV_NODELIST].items(): 2609 _ErrorIf(True, constants.CV_ENODESSH, node, 2610 "ssh communication with node '%s': %s", a_node, a_msg) 2611 2612 test = constants.NV_NODENETTEST not in nresult 2613 _ErrorIf(test, constants.CV_ENODENET, node, 2614 "node hasn't returned node tcp connectivity data") 2615 if not test: 2616 if nresult[constants.NV_NODENETTEST]: 2617 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) 2618 for anode in nlist: 2619 _ErrorIf(True, constants.CV_ENODENET, node, 2620 "tcp communication with node '%s': %s", 2621 anode, nresult[constants.NV_NODENETTEST][anode]) 2622 2623 test = constants.NV_MASTERIP not in nresult 2624 _ErrorIf(test, constants.CV_ENODENET, node, 2625 "node hasn't returned node master IP reachability data") 2626 if not test: 2627 if not nresult[constants.NV_MASTERIP]: 2628 if node == self.master_node: 2629 msg = "the master node cannot reach the master IP (not configured?)" 2630 else: 2631 msg = "cannot reach the master IP" 2632 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2633
2634 - def _VerifyInstance(self, instance, inst_config, node_image, 2635 diskstatus):
2636 """Verify an instance. 2637 2638 This function checks to see if the required block devices are 2639 available on the instance's node, and that the nodes are in the correct 2640 state. 2641 2642 """ 2643 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2644 pnode = inst_config.primary_node 2645 pnode_img = node_image[pnode] 2646 groupinfo = self.cfg.GetAllNodeGroupsInfo() 2647 2648 node_vol_should = {} 2649 inst_config.MapLVsByNode(node_vol_should) 2650 2651 cluster = self.cfg.GetClusterInfo() 2652 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, 2653 self.group_info) 2654 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg) 2655 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err), 2656 code=self.ETYPE_WARNING) 2657 2658 for node in node_vol_should: 2659 n_img = node_image[node] 2660 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 2661 # ignore missing volumes on offline or broken nodes 2662 continue 2663 for volume in node_vol_should[node]: 2664 test = volume not in n_img.volumes 2665 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance, 2666 "volume %s missing on node %s", volume, node) 2667 2668 if inst_config.admin_state == constants.ADMINST_UP: 2669 test = instance not in pnode_img.instances and not pnode_img.offline 2670 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance, 2671 "instance not running on its primary node %s", 2672 pnode) 2673 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance, 2674 "instance is marked as running and lives on offline node %s", 2675 pnode) 2676 2677 diskdata = [(nname, success, status, idx) 2678 for (nname, disks) in diskstatus.items() 2679 for idx, (success, status) in enumerate(disks)] 2680 2681 for nname, success, bdev_status, idx in diskdata: 2682 # the 'ghost node' construction in Exec() ensures that we have a 2683 # node here 2684 snode = node_image[nname] 2685 bad_snode = snode.ghost or snode.offline 2686 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and 2687 not success and not bad_snode, 2688 constants.CV_EINSTANCEFAULTYDISK, instance, 2689 "couldn't retrieve status for disk/%s on %s: %s", 2690 idx, nname, bdev_status) 2691 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and 2692 success and bdev_status.ldisk_status == constants.LDS_FAULTY), 2693 constants.CV_EINSTANCEFAULTYDISK, instance, 2694 "disk/%s on %s is faulty", idx, nname) 2695 2696 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, 2697 constants.CV_ENODERPC, pnode, "instance %s, connection to" 2698 " primary node failed", instance) 2699 2700 _ErrorIf(len(inst_config.secondary_nodes) > 1, 2701 constants.CV_EINSTANCELAYOUT, 2702 instance, "instance has multiple secondary nodes: %s", 2703 utils.CommaJoin(inst_config.secondary_nodes), 2704 code=self.ETYPE_WARNING) 2705 2706 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE: 2707 # Disk template not compatible with exclusive_storage: no instance 2708 # node should have the flag set 2709 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, 2710 inst_config.all_nodes) 2711 es_nodes = [n for (n, es) in es_flags.items() 2712 if es] 2713 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance, 2714 "instance has template %s, which is not supported on nodes" 2715 " that have exclusive storage set: %s", 2716 inst_config.disk_template, utils.CommaJoin(es_nodes)) 2717 2718 if inst_config.disk_template in constants.DTS_INT_MIRROR: 2719 instance_nodes = utils.NiceSort(inst_config.all_nodes) 2720 instance_groups = {} 2721 2722 for node in instance_nodes: 2723 instance_groups.setdefault(self.all_node_info[node].group, 2724 []).append(node) 2725 2726 pretty_list = [ 2727 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name) 2728 # Sort so that we always list the primary node first. 2729 for group, nodes in sorted(instance_groups.items(), 2730 key=lambda (_, nodes): pnode in nodes, 2731 reverse=True)] 2732 2733 self._ErrorIf(len(instance_groups) > 1, 2734 constants.CV_EINSTANCESPLITGROUPS, 2735 instance, "instance has primary and secondary nodes in" 2736 " different groups: %s", utils.CommaJoin(pretty_list), 2737 code=self.ETYPE_WARNING) 2738 2739 inst_nodes_offline = [] 2740 for snode in inst_config.secondary_nodes: 2741 s_img = node_image[snode] 2742 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC, 2743 snode, "instance %s, connection to secondary node failed", 2744 instance) 2745 2746 if s_img.offline: 2747 inst_nodes_offline.append(snode) 2748 2749 # warn that the instance lives on offline nodes 2750 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance, 2751 "instance has offline secondary node(s) %s", 2752 utils.CommaJoin(inst_nodes_offline)) 2753 # ... or ghost/non-vm_capable nodes 2754 for node in inst_config.all_nodes: 2755 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE, 2756 instance, "instance lives on ghost node %s", node) 2757 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE, 2758 instance, "instance lives on non-vm_capable node %s", node)
2759
2760 - def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2761 """Verify if there are any unknown volumes in the cluster. 2762 2763 The .os, .swap and backup volumes are ignored. All other volumes are 2764 reported as unknown. 2765 2766 @type reserved: L{ganeti.utils.FieldSet} 2767 @param reserved: a FieldSet of reserved volume names 2768 2769 """ 2770 for node, n_img in node_image.items(): 2771 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or 2772 self.all_node_info[node].group != self.group_uuid): 2773 # skip non-healthy nodes 2774 continue 2775 for volume in n_img.volumes: 2776 test = ((node not in node_vol_should or 2777 volume not in node_vol_should[node]) and 2778 not reserved.Matches(volume)) 2779 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node, 2780 "volume %s is unknown", volume)
2781
2782 - def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2783 """Verify N+1 Memory Resilience. 2784 2785 Check that if one single node dies we can still start all the 2786 instances it was primary for. 2787 2788 """ 2789 cluster_info = self.cfg.GetClusterInfo() 2790 for node, n_img in node_image.items(): 2791 # This code checks that every node which is now listed as 2792 # secondary has enough memory to host all instances it is 2793 # supposed to should a single other node in the cluster fail. 2794 # FIXME: not ready for failover to an arbitrary node 2795 # FIXME: does not support file-backed instances 2796 # WARNING: we currently take into account down instances as well 2797 # as up ones, considering that even if they're down someone 2798 # might want to start them even in the event of a node failure. 2799 if n_img.offline or self.all_node_info[node].group != self.group_uuid: 2800 # we're skipping nodes marked offline and nodes in other groups from 2801 # the N+1 warning, since most likely we don't have good memory 2802 # infromation from them; we already list instances living on such 2803 # nodes, and that's enough warning 2804 continue 2805 #TODO(dynmem): also consider ballooning out other instances 2806 for prinode, instances in n_img.sbp.items(): 2807 needed_mem = 0 2808 for instance in instances: 2809 bep = cluster_info.FillBE(instance_cfg[instance]) 2810 if bep[constants.BE_AUTO_BALANCE]: 2811 needed_mem += bep[constants.BE_MINMEM] 2812 test = n_img.mfree < needed_mem 2813 self._ErrorIf(test, constants.CV_ENODEN1, node, 2814 "not enough memory to accomodate instance failovers" 2815 " should node %s fail (%dMiB needed, %dMiB available)", 2816 prinode, needed_mem, n_img.mfree)
2817 2818 @classmethod
2819 - def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo, 2820 (files_all, files_opt, files_mc, files_vm)):
2821 """Verifies file checksums collected from all nodes. 2822 2823 @param errorif: Callback for reporting errors 2824 @param nodeinfo: List of L{objects.Node} objects 2825 @param master_node: Name of master node 2826 @param all_nvinfo: RPC results 2827 2828 """ 2829 # Define functions determining which nodes to consider for a file 2830 files2nodefn = [ 2831 (files_all, None), 2832 (files_mc, lambda node: (node.master_candidate or 2833 node.name == master_node)), 2834 (files_vm, lambda node: node.vm_capable), 2835 ] 2836 2837 # Build mapping from filename to list of nodes which should have the file 2838 nodefiles = {} 2839 for (files, fn) in files2nodefn: 2840 if fn is None: 2841 filenodes = nodeinfo 2842 else: 2843 filenodes = filter(fn, nodeinfo) 2844 nodefiles.update((filename, 2845 frozenset(map(operator.attrgetter("name"), filenodes))) 2846 for filename in files) 2847 2848 assert set(nodefiles) == (files_all | files_mc | files_vm) 2849 2850 fileinfo = dict((filename, {}) for filename in nodefiles) 2851 ignore_nodes = set() 2852 2853 for node in nodeinfo: 2854 if node.offline: 2855 ignore_nodes.add(node.name) 2856 continue 2857 2858 nresult = all_nvinfo[node.name] 2859 2860 if nresult.fail_msg or not nresult.payload: 2861 node_files = None 2862 else: 2863 fingerprints = nresult.payload.get(constants.NV_FILELIST, None) 2864 node_files = dict((vcluster.LocalizeVirtualPath(key), value) 2865 for (key, value) in fingerprints.items()) 2866 del fingerprints 2867 2868 test = not (node_files and isinstance(node_files, dict)) 2869 errorif(test, constants.CV_ENODEFILECHECK, node.name, 2870 "Node did not return file checksum data") 2871 if test: 2872 ignore_nodes.add(node.name) 2873 continue 2874 2875 # Build per-checksum mapping from filename to nodes having it 2876 for (filename, checksum) in node_files.items(): 2877 assert filename in nodefiles 2878 fileinfo[filename].setdefault(checksum, set()).add(node.name) 2879 2880 for (filename, checksums) in fileinfo.items(): 2881 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum" 2882 2883 # Nodes having the file 2884 with_file = frozenset(node_name 2885 for nodes in fileinfo[filename].values() 2886 for node_name in nodes) - ignore_nodes 2887 2888 expected_nodes = nodefiles[filename] - ignore_nodes 2889 2890 # Nodes missing file 2891 missing_file = expected_nodes - with_file 2892 2893 if filename in files_opt: 2894 # All or no nodes 2895 errorif(missing_file and missing_file != expected_nodes, 2896 constants.CV_ECLUSTERFILECHECK, None, 2897 "File %s is optional, but it must exist on all or no" 2898 " nodes (not found on %s)", 2899 filename, utils.CommaJoin(utils.NiceSort(missing_file))) 2900 else: 2901 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None, 2902 "File %s is missing from node(s) %s", filename, 2903 utils.CommaJoin(utils.NiceSort(missing_file))) 2904 2905 # Warn if a node has a file it shouldn't 2906 unexpected = with_file - expected_nodes 2907 errorif(unexpected, 2908 constants.CV_ECLUSTERFILECHECK, None, 2909 "File %s should not exist on node(s) %s", 2910 filename, utils.CommaJoin(utils.NiceSort(unexpected))) 2911 2912 # See if there are multiple versions of the file 2913 test = len(checksums) > 1 2914 if test: 2915 variants = ["variant %s on %s" % 2916 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes))) 2917 for (idx, (checksum, nodes)) in 2918 enumerate(sorted(checksums.items()))] 2919 else: 2920 variants = [] 2921 2922 errorif(test, constants.CV_ECLUSTERFILECHECK, None, 2923 "File %s found with %s different checksums (%s)", 2924 filename, len(checksums), "; ".join(variants))
2925
2926 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper, 2927 drbd_map):
2928 """Verifies and the node DRBD status. 2929 2930 @type ninfo: L{objects.Node} 2931 @param ninfo: the node to check 2932 @param nresult: the remote results for the node 2933 @param instanceinfo: the dict of instances 2934 @param drbd_helper: the configured DRBD usermode helper 2935 @param drbd_map: the DRBD map as returned by 2936 L{ganeti.config.ConfigWriter.ComputeDRBDMap} 2937 2938 """ 2939 node = ninfo.name 2940 _ErrorIf = self._ErrorIf # pylint: disable=C0103 2941 2942 if drbd_helper: 2943 helper_result = nresult.get(constants.NV_DRBDHELPER, None) 2944 test = (helper_result is None) 2945 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, 2946 "no drbd usermode helper returned") 2947 if helper_result: 2948 status, payload = helper_result 2949 test = not status 2950 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, 2951 "drbd usermode helper check unsuccessful: %s", payload) 2952 test = status and (payload != drbd_helper) 2953 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, 2954 "wrong drbd usermode helper: %s", payload) 2955 2956 # compute the DRBD minors 2957 node_drbd = {} 2958 for minor, instance in drbd_map[node].items(): 2959 test = instance not in instanceinfo 2960 _ErrorIf(test, constants.CV_ECLUSTERCFG, None, 2961 "ghost instance '%s' in temporary DRBD map", instance) 2962 # ghost instance should not be running, but otherwise we 2963 # don't give double warnings (both ghost instance and 2964 # unallocated minor in use) 2965 if test: 2966 node_drbd[minor] = (instance, False) 2967 else: 2968 instance = instanceinfo[instance] 2969 node_drbd[minor] = (instance.name, 2970 instance.admin_state == constants.ADMINST_UP) 2971 2972 # and now check them 2973 used_minors = nresult.get(constants.NV_DRBDLIST, []) 2974 test = not isinstance(used_minors, (tuple, list)) 2975 _ErrorIf(test, constants.CV_ENODEDRBD, node, 2976 "cannot parse drbd status file: %s", str(used_minors)) 2977 if test: 2978 # we cannot check drbd status 2979 return 2980 2981 for minor, (iname, must_exist) in node_drbd.items(): 2982 test = minor not in used_minors and must_exist 2983 _ErrorIf(test, constants.CV_ENODEDRBD, node, 2984 "drbd minor %d of instance %s is not active", minor, iname) 2985 for minor in used_minors: 2986 test = minor not in node_drbd 2987 _ErrorIf(test, constants.CV_ENODEDRBD, node, 2988 "unallocated drbd minor %d is in use", minor)
2989
2990 - def _UpdateNodeOS(self, ninfo, nresult, nimg):
2991 """Builds the node OS structures. 2992 2993 @type ninfo: L{objects.Node} 2994 @param ninfo: the node to check 2995 @param nresult: the remote results for the node 2996 @param nimg: the node image object 2997 2998 """ 2999 node = ninfo.name 3000 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3001 3002 remote_os = nresult.get(constants.NV_OSLIST, None) 3003 test = (not isinstance(remote_os, list) or 3004 not compat.all(isinstance(v, list) and len(v) == 7 3005 for v in remote_os)) 3006 3007 _ErrorIf(test, constants.CV_ENODEOS, node, 3008 "node hasn't returned valid OS data") 3009 3010 nimg.os_fail = test 3011 3012 if test: 3013 return 3014 3015 os_dict = {} 3016 3017 for (name, os_path, status, diagnose, 3018 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]: 3019 3020 if name not in os_dict: 3021 os_dict[name] = [] 3022 3023 # parameters is a list of lists instead of list of tuples due to 3024 # JSON lacking a real tuple type, fix it: 3025 parameters = [tuple(v) for v in parameters] 3026 os_dict[name].append((os_path, status, diagnose, 3027 set(variants), set(parameters), set(api_ver))) 3028 3029 nimg.oslist = os_dict
3030
3031 - def _VerifyNodeOS(self, ninfo, nimg, base):
3032 """Verifies the node OS list. 3033 3034 @type ninfo: L{objects.Node} 3035 @param ninfo: the node to check 3036 @param nimg: the node image object 3037 @param base: the 'template' node we match against (e.g. from the master) 3038 3039 """ 3040 node = ninfo.name 3041 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3042 3043 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" 3044 3045 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l] 3046 for os_name, os_data in nimg.oslist.items(): 3047 assert os_data, "Empty OS status for OS %s?!" % os_name 3048 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] 3049 _ErrorIf(not f_status, constants.CV_ENODEOS, node, 3050 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag) 3051 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node, 3052 "OS '%s' has multiple entries (first one shadows the rest): %s", 3053 os_name, utils.CommaJoin([v[0] for v in os_data])) 3054 # comparisons with the 'base' image 3055 test = os_name not in base.oslist 3056 _ErrorIf(test, constants.CV_ENODEOS, node, 3057 "Extra OS %s not present on reference node (%s)", 3058 os_name, base.name) 3059 if test: 3060 continue 3061 assert base.oslist[os_name], "Base node has empty OS status?" 3062 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0] 3063 if not b_status: 3064 # base OS is invalid, skipping 3065 continue 3066 for kind, a, b in [("API version", f_api, b_api), 3067 ("variants list", f_var, b_var), 3068 ("parameters", beautify_params(f_param), 3069 beautify_params(b_param))]: 3070 _ErrorIf(a != b, constants.CV_ENODEOS, node, 3071 "OS %s for %s differs from reference node %s: [%s] vs. [%s]", 3072 kind, os_name, base.name, 3073 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b))) 3074 3075 # check any missing OSes 3076 missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) 3077 _ErrorIf(missing, constants.CV_ENODEOS, node, 3078 "OSes present on reference node %s but missing on this node: %s", 3079 base.name, utils.CommaJoin(missing))
3080
3081 - def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3082 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}. 3083 3084 @type ninfo: L{objects.Node} 3085 @param ninfo: the node to check 3086 @param nresult: the remote results for the node 3087 @type is_master: bool 3088 @param is_master: Whether node is the master node 3089 3090 """ 3091 node = ninfo.name 3092 3093 if (is_master and 3094 (constants.ENABLE_FILE_STORAGE or 3095 constants.ENABLE_SHARED_FILE_STORAGE)): 3096 try: 3097 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS] 3098 except KeyError: 3099 # This should never happen 3100 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node, 3101 "Node did not return forbidden file storage paths") 3102 else: 3103 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node, 3104 "Found forbidden file storage paths: %s", 3105 utils.CommaJoin(fspaths)) 3106 else: 3107 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult, 3108 constants.CV_ENODEFILESTORAGEPATHS, node, 3109 "Node should not have returned forbidden file storage" 3110 " paths")
3111
3112 - def _VerifyOob(self, ninfo, nresult):
3113 """Verifies out of band functionality of a node. 3114 3115 @type ninfo: L{objects.Node} 3116 @param ninfo: the node to check 3117 @param nresult: the remote results for the node 3118 3119 """ 3120 node = ninfo.name 3121 # We just have to verify the paths on master and/or master candidates 3122 # as the oob helper is invoked on the master 3123 if ((ninfo.master_candidate or ninfo.master_capable) and 3124 constants.NV_OOB_PATHS in nresult): 3125 for path_result in nresult[constants.NV_OOB_PATHS]: 3126 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3127
3128 - def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3129 """Verifies and updates the node volume data. 3130 3131 This function will update a L{NodeImage}'s internal structures 3132 with data from the remote call. 3133 3134 @type ninfo: L{objects.Node} 3135 @param ninfo: the node to check 3136 @param nresult: the remote results for the node 3137 @param nimg: the node image object 3138 @param vg_name: the configured VG name 3139 3140 """ 3141 node = ninfo.name 3142 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3143 3144 nimg.lvm_fail = True 3145 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") 3146 if vg_name is None: 3147 pass 3148 elif isinstance(lvdata, basestring): 3149 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s", 3150 utils.SafeEncode(lvdata)) 3151 elif not isinstance(lvdata, dict): 3152 _ErrorIf(True, constants.CV_ENODELVM, node, 3153 "rpc call to node failed (lvlist)") 3154 else: 3155 nimg.volumes = lvdata 3156 nimg.lvm_fail = False
3157
3158 - def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3159 """Verifies and updates the node instance list. 3160 3161 If the listing was successful, then updates this node's instance 3162 list. Otherwise, it marks the RPC call as failed for the instance 3163 list key. 3164 3165 @type ninfo: L{objects.Node} 3166 @param ninfo: the node to check 3167 @param nresult: the remote results for the node 3168 @param nimg: the node image object 3169 3170 """ 3171 idata = nresult.get(constants.NV_INSTANCELIST, None) 3172 test = not isinstance(idata, list) 3173 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name, 3174 "rpc call to node failed (instancelist): %s", 3175 utils.SafeEncode(str(idata))) 3176 if test: 3177 nimg.hyp_fail = True 3178 else: 3179 nimg.instances = idata
3180
3181 - def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3182 """Verifies and computes a node information map 3183 3184 @type ninfo: L{objects.Node} 3185 @param ninfo: the node to check 3186 @param nresult: the remote results for the node 3187 @param nimg: the node image object 3188 @param vg_name: the configured VG name 3189 3190 """ 3191 node = ninfo.name 3192 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3193 3194 # try to read free memory (from the hypervisor) 3195 hv_info = nresult.get(constants.NV_HVINFO, None) 3196 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info 3197 _ErrorIf(test, constants.CV_ENODEHV, node, 3198 "rpc call to node failed (hvinfo)") 3199 if not test: 3200 try: 3201 nimg.mfree = int(hv_info["memory_free"]) 3202 except (ValueError, TypeError): 3203 _ErrorIf(True, constants.CV_ENODERPC, node, 3204 "node returned invalid nodeinfo, check hypervisor") 3205 3206 # FIXME: devise a free space model for file based instances as well 3207 if vg_name is not None: 3208 test = (constants.NV_VGLIST not in nresult or 3209 vg_name not in nresult[constants.NV_VGLIST]) 3210 _ErrorIf(test, constants.CV_ENODELVM, node, 3211 "node didn't return data for the volume group '%s'" 3212 " - it is either missing or broken", vg_name) 3213 if not test: 3214 try: 3215 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) 3216 except (ValueError, TypeError): 3217 _ErrorIf(True, constants.CV_ENODERPC, node, 3218 "node returned invalid LVM info, check LVM status")
3219
3220 - def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3221 """Gets per-disk status information for all instances. 3222 3223 @type nodelist: list of strings 3224 @param nodelist: Node names 3225 @type node_image: dict of (name, L{objects.Node}) 3226 @param node_image: Node objects 3227 @type instanceinfo: dict of (name, L{objects.Instance}) 3228 @param instanceinfo: Instance objects 3229 @rtype: {instance: {node: [(succes, payload)]}} 3230 @return: a dictionary of per-instance dictionaries with nodes as 3231 keys and disk information as values; the disk information is a 3232 list of tuples (success, payload) 3233 3234 """ 3235 _ErrorIf = self._ErrorIf # pylint: disable=C0103 3236 3237 node_disks = {} 3238 node_disks_devonly = {} 3239 diskless_instances = set() 3240 diskless = constants.DT_DISKLESS 3241 3242 for nname in nodelist: 3243 node_instances = list(itertools.chain(node_image[nname].pinst, 3244 node_image[nname].sinst)) 3245 diskless_instances.update(inst for inst in node_instances 3246 if instanceinfo[inst].disk_template == diskless) 3247 disks = [(inst, disk) 3248 for inst in node_instances 3249 for disk in instanceinfo[inst].disks] 3250 3251 if not disks: 3252 # No need to collect data 3253 continue 3254 3255 node_disks[nname] = disks 3256 3257 # _AnnotateDiskParams makes already copies of the disks 3258 devonly = [] 3259 for (inst, dev) in disks: 3260 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev],