Package ganeti :: Module cmdlib
[hide private]
[frames] | no frames]

Source Code for Module ganeti.cmdlib

    1  # 
    2  # 
    3   
    4  # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc. 
    5  # 
    6  # This program is free software; you can redistribute it and/or modify 
    7  # it under the terms of the GNU General Public License as published by 
    8  # the Free Software Foundation; either version 2 of the License, or 
    9  # (at your option) any later version. 
   10  # 
   11  # This program is distributed in the hope that it will be useful, but 
   12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
   13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
   14  # General Public License for more details. 
   15  # 
   16  # You should have received a copy of the GNU General Public License 
   17  # along with this program; if not, write to the Free Software 
   18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
   19  # 02110-1301, USA. 
   20   
   21   
   22  """Module implementing the master-side code.""" 
   23   
   24  # pylint: disable-msg=W0201,C0302 
   25   
   26  # W0201 since most LU attributes are defined in CheckPrereq or similar 
   27  # functions 
   28   
   29  # C0302: since we have waaaay to many lines in this module 
   30   
   31  import os 
   32  import os.path 
   33  import time 
   34  import re 
   35  import platform 
   36  import logging 
   37  import copy 
   38  import OpenSSL 
   39  import socket 
   40  import tempfile 
   41  import shutil 
   42   
   43  from ganeti import ssh 
   44  from ganeti import utils 
   45  from ganeti import errors 
   46  from ganeti import hypervisor 
   47  from ganeti import locking 
   48  from ganeti import constants 
   49  from ganeti import objects 
   50  from ganeti import serializer 
   51  from ganeti import ssconf 
   52  from ganeti import uidpool 
   53  from ganeti import compat 
   54  from ganeti import masterd 
   55  from ganeti import netutils 
   56  from ganeti import ht 
   57   
   58  import ganeti.masterd.instance # pylint: disable-msg=W0611 
   59   
   60  # Common opcode attributes 
   61   
   62  #: output fields for a query operation 
   63  _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)) 
   64   
   65   
   66  #: the shutdown timeout 
   67  _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, 
   68                       ht.TPositiveInt) 
   69   
   70  #: the force parameter 
   71  _PForce = ("force", False, ht.TBool) 
   72   
   73  #: a required instance name (for single-instance LUs) 
   74  _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString) 
   75   
   76  #: Whether to ignore offline nodes 
   77  _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool) 
   78   
   79  #: a required node name (for single-node LUs) 
   80  _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString) 
   81   
   82  #: the migration type (live/non-live) 
   83  _PMigrationMode = ("mode", None, 
   84                     ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES))) 
   85   
   86  #: the obsolete 'live' mode (boolean) 
   87  _PMigrationLive = ("live", None, ht.TMaybeBool) 
88 89 90 # End types 91 -class LogicalUnit(object):
92 """Logical Unit base class. 93 94 Subclasses must follow these rules: 95 - implement ExpandNames 96 - implement CheckPrereq (except when tasklets are used) 97 - implement Exec (except when tasklets are used) 98 - implement BuildHooksEnv 99 - redefine HPATH and HTYPE 100 - optionally redefine their run requirements: 101 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively 102 103 Note that all commands require root permissions. 104 105 @ivar dry_run_result: the value (if any) that will be returned to the caller 106 in dry-run mode (signalled by opcode dry_run parameter) 107 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values 108 they should get if not already defined, and types they must match 109 110 """ 111 HPATH = None 112 HTYPE = None 113 _OP_PARAMS = [] 114 REQ_BGL = True 115
116 - def __init__(self, processor, op, context, rpc):
117 """Constructor for LogicalUnit. 118 119 This needs to be overridden in derived classes in order to check op 120 validity. 121 122 """ 123 self.proc = processor 124 self.op = op 125 self.cfg = context.cfg 126 self.context = context 127 self.rpc = rpc 128 # Dicts used to declare locking needs to mcpu 129 self.needed_locks = None 130 self.acquired_locks = {} 131 self.share_locks = dict.fromkeys(locking.LEVELS, 0) 132 self.add_locks = {} 133 self.remove_locks = {} 134 # Used to force good behavior when calling helper functions 135 self.recalculate_locks = {} 136 self.__ssh = None 137 # logging 138 self.Log = processor.Log # pylint: disable-msg=C0103 139 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103 140 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103 141 self.LogStep = processor.LogStep # pylint: disable-msg=C0103 142 # support for dry-run 143 self.dry_run_result = None 144 # support for generic debug attribute 145 if (not hasattr(self.op, "debug_level") or 146 not isinstance(self.op.debug_level, int)): 147 self.op.debug_level = 0 148 149 # Tasklets 150 self.tasklets = None 151 152 # The new kind-of-type-system 153 op_id = self.op.OP_ID 154 for attr_name, aval, test in self._OP_PARAMS: 155 if not hasattr(op, attr_name): 156 if aval == ht.NoDefault: 157 raise errors.OpPrereqError("Required parameter '%s.%s' missing" % 158 (op_id, attr_name), errors.ECODE_INVAL) 159 else: 160 if callable(aval): 161 dval = aval() 162 else: 163 dval = aval 164 setattr(self.op, attr_name, dval) 165 attr_val = getattr(op, attr_name) 166 if test == ht.NoType: 167 # no tests here 168 continue 169 if not callable(test): 170 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed," 171 " given type is not a proper type (%s)" % 172 (op_id, attr_name, test)) 173 if not test(attr_val): 174 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s", 175 self.op.OP_ID, attr_name, type(attr_val), attr_val) 176 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" % 177 (op_id, attr_name), errors.ECODE_INVAL) 178 179 self.CheckArguments()
180
181 - def __GetSSH(self):
182 """Returns the SshRunner object 183 184 """ 185 if not self.__ssh: 186 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName()) 187 return self.__ssh
188 189 ssh = property(fget=__GetSSH) 190
191 - def CheckArguments(self):
192 """Check syntactic validity for the opcode arguments. 193 194 This method is for doing a simple syntactic check and ensure 195 validity of opcode parameters, without any cluster-related 196 checks. While the same can be accomplished in ExpandNames and/or 197 CheckPrereq, doing these separate is better because: 198 199 - ExpandNames is left as as purely a lock-related function 200 - CheckPrereq is run after we have acquired locks (and possible 201 waited for them) 202 203 The function is allowed to change the self.op attribute so that 204 later methods can no longer worry about missing parameters. 205 206 """ 207 pass
208
209 - def ExpandNames(self):
210 """Expand names for this LU. 211 212 This method is called before starting to execute the opcode, and it should 213 update all the parameters of the opcode to their canonical form (e.g. a 214 short node name must be fully expanded after this method has successfully 215 completed). This way locking, hooks, logging, ecc. can work correctly. 216 217 LUs which implement this method must also populate the self.needed_locks 218 member, as a dict with lock levels as keys, and a list of needed lock names 219 as values. Rules: 220 221 - use an empty dict if you don't need any lock 222 - if you don't need any lock at a particular level omit that level 223 - don't put anything for the BGL level 224 - if you want all locks at a level use locking.ALL_SET as a value 225 226 If you need to share locks (rather than acquire them exclusively) at one 227 level you can modify self.share_locks, setting a true value (usually 1) for 228 that level. By default locks are not shared. 229 230 This function can also define a list of tasklets, which then will be 231 executed in order instead of the usual LU-level CheckPrereq and Exec 232 functions, if those are not defined by the LU. 233 234 Examples:: 235 236 # Acquire all nodes and one instance 237 self.needed_locks = { 238 locking.LEVEL_NODE: locking.ALL_SET, 239 locking.LEVEL_INSTANCE: ['instance1.example.com'], 240 } 241 # Acquire just two nodes 242 self.needed_locks = { 243 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'], 244 } 245 # Acquire no locks 246 self.needed_locks = {} # No, you can't leave it to the default value None 247 248 """ 249 # The implementation of this method is mandatory only if the new LU is 250 # concurrent, so that old LUs don't need to be changed all at the same 251 # time. 252 if self.REQ_BGL: 253 self.needed_locks = {} # Exclusive LUs don't need locks. 254 else: 255 raise NotImplementedError
256
257 - def DeclareLocks(self, level):
258 """Declare LU locking needs for a level 259 260 While most LUs can just declare their locking needs at ExpandNames time, 261 sometimes there's the need to calculate some locks after having acquired 262 the ones before. This function is called just before acquiring locks at a 263 particular level, but after acquiring the ones at lower levels, and permits 264 such calculations. It can be used to modify self.needed_locks, and by 265 default it does nothing. 266 267 This function is only called if you have something already set in 268 self.needed_locks for the level. 269 270 @param level: Locking level which is going to be locked 271 @type level: member of ganeti.locking.LEVELS 272 273 """
274
275 - def CheckPrereq(self):
276 """Check prerequisites for this LU. 277 278 This method should check that the prerequisites for the execution 279 of this LU are fulfilled. It can do internode communication, but 280 it should be idempotent - no cluster or system changes are 281 allowed. 282 283 The method should raise errors.OpPrereqError in case something is 284 not fulfilled. Its return value is ignored. 285 286 This method should also update all the parameters of the opcode to 287 their canonical form if it hasn't been done by ExpandNames before. 288 289 """ 290 if self.tasklets is not None: 291 for (idx, tl) in enumerate(self.tasklets): 292 logging.debug("Checking prerequisites for tasklet %s/%s", 293 idx + 1, len(self.tasklets)) 294 tl.CheckPrereq() 295 else: 296 pass
297
298 - def Exec(self, feedback_fn):
299 """Execute the LU. 300 301 This method should implement the actual work. It should raise 302 errors.OpExecError for failures that are somewhat dealt with in 303 code, or expected. 304 305 """ 306 if self.tasklets is not None: 307 for (idx, tl) in enumerate(self.tasklets): 308 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets)) 309 tl.Exec(feedback_fn) 310 else: 311 raise NotImplementedError
312
313 - def BuildHooksEnv(self):
314 """Build hooks environment for this LU. 315 316 This method should return a three-node tuple consisting of: a dict 317 containing the environment that will be used for running the 318 specific hook for this LU, a list of node names on which the hook 319 should run before the execution, and a list of node names on which 320 the hook should run after the execution. 321 322 The keys of the dict must not have 'GANETI_' prefixed as this will 323 be handled in the hooks runner. Also note additional keys will be 324 added by the hooks runner. If the LU doesn't define any 325 environment, an empty dict (and not None) should be returned. 326 327 No nodes should be returned as an empty list (and not None). 328 329 Note that if the HPATH for a LU class is None, this function will 330 not be called. 331 332 """ 333 raise NotImplementedError
334
335 - def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
336 """Notify the LU about the results of its hooks. 337 338 This method is called every time a hooks phase is executed, and notifies 339 the Logical Unit about the hooks' result. The LU can then use it to alter 340 its result based on the hooks. By default the method does nothing and the 341 previous result is passed back unchanged but any LU can define it if it 342 wants to use the local cluster hook-scripts somehow. 343 344 @param phase: one of L{constants.HOOKS_PHASE_POST} or 345 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 346 @param hook_results: the results of the multi-node hooks rpc call 347 @param feedback_fn: function used send feedback back to the caller 348 @param lu_result: the previous Exec result this LU had, or None 349 in the PRE phase 350 @return: the new Exec result, based on the previous result 351 and hook results 352 353 """ 354 # API must be kept, thus we ignore the unused argument and could 355 # be a function warnings 356 # pylint: disable-msg=W0613,R0201 357 return lu_result
358
359 - def _ExpandAndLockInstance(self):
360 """Helper function to expand and lock an instance. 361 362 Many LUs that work on an instance take its name in self.op.instance_name 363 and need to expand it and then declare the expanded name for locking. This 364 function does it, and then updates self.op.instance_name to the expanded 365 name. It also initializes needed_locks as a dict, if this hasn't been done 366 before. 367 368 """ 369 if self.needed_locks is None: 370 self.needed_locks = {} 371 else: 372 assert locking.LEVEL_INSTANCE not in self.needed_locks, \ 373 "_ExpandAndLockInstance called with instance-level locks set" 374 self.op.instance_name = _ExpandInstanceName(self.cfg, 375 self.op.instance_name) 376 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
377
378 - def _LockInstancesNodes(self, primary_only=False):
379 """Helper function to declare instances' nodes for locking. 380 381 This function should be called after locking one or more instances to lock 382 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE] 383 with all primary or secondary nodes for instances already locked and 384 present in self.needed_locks[locking.LEVEL_INSTANCE]. 385 386 It should be called from DeclareLocks, and for safety only works if 387 self.recalculate_locks[locking.LEVEL_NODE] is set. 388 389 In the future it may grow parameters to just lock some instance's nodes, or 390 to just lock primaries or secondary nodes, if needed. 391 392 If should be called in DeclareLocks in a way similar to:: 393 394 if level == locking.LEVEL_NODE: 395 self._LockInstancesNodes() 396 397 @type primary_only: boolean 398 @param primary_only: only lock primary nodes of locked instances 399 400 """ 401 assert locking.LEVEL_NODE in self.recalculate_locks, \ 402 "_LockInstancesNodes helper function called with no nodes to recalculate" 403 404 # TODO: check if we're really been called with the instance locks held 405 406 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the 407 # future we might want to have different behaviors depending on the value 408 # of self.recalculate_locks[locking.LEVEL_NODE] 409 wanted_nodes = [] 410 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]: 411 instance = self.context.cfg.GetInstanceInfo(instance_name) 412 wanted_nodes.append(instance.primary_node) 413 if not primary_only: 414 wanted_nodes.extend(instance.secondary_nodes) 415 416 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE: 417 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes 418 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND: 419 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes) 420 421 del self.recalculate_locks[locking.LEVEL_NODE]
422
423 424 -class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
425 """Simple LU which runs no hooks. 426 427 This LU is intended as a parent for other LogicalUnits which will 428 run no hooks, in order to reduce duplicate code. 429 430 """ 431 HPATH = None 432 HTYPE = None 433
434 - def BuildHooksEnv(self):
435 """Empty BuildHooksEnv for NoHooksLu. 436 437 This just raises an error. 438 439 """ 440 assert False, "BuildHooksEnv called for NoHooksLUs"
441
442 443 -class Tasklet:
444 """Tasklet base class. 445 446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or 447 they can mix legacy code with tasklets. Locking needs to be done in the LU, 448 tasklets know nothing about locks. 449 450 Subclasses must follow these rules: 451 - Implement CheckPrereq 452 - Implement Exec 453 454 """
455 - def __init__(self, lu):
456 self.lu = lu 457 458 # Shortcuts 459 self.cfg = lu.cfg 460 self.rpc = lu.rpc
461
462 - def CheckPrereq(self):
463 """Check prerequisites for this tasklets. 464 465 This method should check whether the prerequisites for the execution of 466 this tasklet are fulfilled. It can do internode communication, but it 467 should be idempotent - no cluster or system changes are allowed. 468 469 The method should raise errors.OpPrereqError in case something is not 470 fulfilled. Its return value is ignored. 471 472 This method should also update all parameters to their canonical form if it 473 hasn't been done before. 474 475 """ 476 pass
477
478 - def Exec(self, feedback_fn):
479 """Execute the tasklet. 480 481 This method should implement the actual work. It should raise 482 errors.OpExecError for failures that are somewhat dealt with in code, or 483 expected. 484 485 """ 486 raise NotImplementedError
487
488 489 -def _GetWantedNodes(lu, nodes):
490 """Returns list of checked and expanded node names. 491 492 @type lu: L{LogicalUnit} 493 @param lu: the logical unit on whose behalf we execute 494 @type nodes: list 495 @param nodes: list of node names or None for all nodes 496 @rtype: list 497 @return: the list of nodes, sorted 498 @raise errors.ProgrammerError: if the nodes parameter is wrong type 499 500 """ 501 if not nodes: 502 raise errors.ProgrammerError("_GetWantedNodes should only be called with a" 503 " non-empty list of nodes whose name is to be expanded.") 504 505 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes] 506 return utils.NiceSort(wanted)
507
508 509 -def _GetWantedInstances(lu, instances):
510 """Returns list of checked and expanded instance names. 511 512 @type lu: L{LogicalUnit} 513 @param lu: the logical unit on whose behalf we execute 514 @type instances: list 515 @param instances: list of instance names or None for all instances 516 @rtype: list 517 @return: the list of instances, sorted 518 @raise errors.OpPrereqError: if the instances parameter is wrong type 519 @raise errors.OpPrereqError: if any of the passed instances is not found 520 521 """ 522 if instances: 523 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances] 524 else: 525 wanted = utils.NiceSort(lu.cfg.GetInstanceList()) 526 return wanted
527
528 529 -def _GetUpdatedParams(old_params, update_dict, 530 use_default=True, use_none=False):
531 """Return the new version of a parameter dictionary. 532 533 @type old_params: dict 534 @param old_params: old parameters 535 @type update_dict: dict 536 @param update_dict: dict containing new parameter values, or 537 constants.VALUE_DEFAULT to reset the parameter to its default 538 value 539 @param use_default: boolean 540 @type use_default: whether to recognise L{constants.VALUE_DEFAULT} 541 values as 'to be deleted' values 542 @param use_none: boolean 543 @type use_none: whether to recognise C{None} values as 'to be 544 deleted' values 545 @rtype: dict 546 @return: the new parameter dictionary 547 548 """ 549 params_copy = copy.deepcopy(old_params) 550 for key, val in update_dict.iteritems(): 551 if ((use_default and val == constants.VALUE_DEFAULT) or 552 (use_none and val is None)): 553 try: 554 del params_copy[key] 555 except KeyError: 556 pass 557 else: 558 params_copy[key] = val 559 return params_copy
560
561 562 -def _CheckOutputFields(static, dynamic, selected):
563 """Checks whether all selected fields are valid. 564 565 @type static: L{utils.FieldSet} 566 @param static: static fields set 567 @type dynamic: L{utils.FieldSet} 568 @param dynamic: dynamic fields set 569 570 """ 571 f = utils.FieldSet() 572 f.Extend(static) 573 f.Extend(dynamic) 574 575 delta = f.NonMatching(selected) 576 if delta: 577 raise errors.OpPrereqError("Unknown output fields selected: %s" 578 % ",".join(delta), errors.ECODE_INVAL)
579
580 581 -def _CheckGlobalHvParams(params):
582 """Validates that given hypervisor params are not global ones. 583 584 This will ensure that instances don't get customised versions of 585 global params. 586 587 """ 588 used_globals = constants.HVC_GLOBALS.intersection(params) 589 if used_globals: 590 msg = ("The following hypervisor parameters are global and cannot" 591 " be customized at instance level, please modify them at" 592 " cluster level: %s" % utils.CommaJoin(used_globals)) 593 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
594
595 596 -def _CheckNodeOnline(lu, node, msg=None):
597 """Ensure that a given node is online. 598 599 @param lu: the LU on behalf of which we make the check 600 @param node: the node to check 601 @param msg: if passed, should be a message to replace the default one 602 @raise errors.OpPrereqError: if the node is offline 603 604 """ 605 if msg is None: 606 msg = "Can't use offline node" 607 if lu.cfg.GetNodeInfo(node).offline: 608 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
609
610 611 -def _CheckNodeNotDrained(lu, node):
612 """Ensure that a given node is not drained. 613 614 @param lu: the LU on behalf of which we make the check 615 @param node: the node to check 616 @raise errors.OpPrereqError: if the node is drained 617 618 """ 619 if lu.cfg.GetNodeInfo(node).drained: 620 raise errors.OpPrereqError("Can't use drained node %s" % node, 621 errors.ECODE_STATE)
622
623 624 -def _CheckNodeVmCapable(lu, node):
625 """Ensure that a given node is vm capable. 626 627 @param lu: the LU on behalf of which we make the check 628 @param node: the node to check 629 @raise errors.OpPrereqError: if the node is not vm capable 630 631 """ 632 if not lu.cfg.GetNodeInfo(node).vm_capable: 633 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node, 634 errors.ECODE_STATE)
635
636 637 -def _CheckNodeHasOS(lu, node, os_name, force_variant):
638 """Ensure that a node supports a given OS. 639 640 @param lu: the LU on behalf of which we make the check 641 @param node: the node to check 642 @param os_name: the OS to query about 643 @param force_variant: whether to ignore variant errors 644 @raise errors.OpPrereqError: if the node is not supporting the OS 645 646 """ 647 result = lu.rpc.call_os_get(node, os_name) 648 result.Raise("OS '%s' not in supported OS list for node %s" % 649 (os_name, node), 650 prereq=True, ecode=errors.ECODE_INVAL) 651 if not force_variant: 652 _CheckOSVariant(result.payload, os_name)
653
654 655 -def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
656 """Ensure that a node has the given secondary ip. 657 658 @type lu: L{LogicalUnit} 659 @param lu: the LU on behalf of which we make the check 660 @type node: string 661 @param node: the node to check 662 @type secondary_ip: string 663 @param secondary_ip: the ip to check 664 @type prereq: boolean 665 @param prereq: whether to throw a prerequisite or an execute error 666 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True 667 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False 668 669 """ 670 result = lu.rpc.call_node_has_ip_address(node, secondary_ip) 671 result.Raise("Failure checking secondary ip on node %s" % node, 672 prereq=prereq, ecode=errors.ECODE_ENVIRON) 673 if not result.payload: 674 msg = ("Node claims it doesn't have the secondary ip you gave (%s)," 675 " please fix and re-run this command" % secondary_ip) 676 if prereq: 677 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON) 678 else: 679 raise errors.OpExecError(msg)
680
681 682 -def _RequireFileStorage():
683 """Checks that file storage is enabled. 684 685 @raise errors.OpPrereqError: when file storage is disabled 686 687 """ 688 if not constants.ENABLE_FILE_STORAGE: 689 raise errors.OpPrereqError("File storage disabled at configure time", 690 errors.ECODE_INVAL)
691
692 693 -def _CheckDiskTemplate(template):
694 """Ensure a given disk template is valid. 695 696 """ 697 if template not in constants.DISK_TEMPLATES: 698 msg = ("Invalid disk template name '%s', valid templates are: %s" % 699 (template, utils.CommaJoin(constants.DISK_TEMPLATES))) 700 raise errors.OpPrereqError(msg, errors.ECODE_INVAL) 701 if template == constants.DT_FILE: 702 _RequireFileStorage() 703 return True
704
705 706 -def _CheckStorageType(storage_type):
707 """Ensure a given storage type is valid. 708 709 """ 710 if storage_type not in constants.VALID_STORAGE_TYPES: 711 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type, 712 errors.ECODE_INVAL) 713 if storage_type == constants.ST_FILE: 714 _RequireFileStorage() 715 return True
716
717 718 -def _GetClusterDomainSecret():
719 """Reads the cluster domain secret. 720 721 """ 722 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE, 723 strict=True)
724
725 726 -def _CheckInstanceDown(lu, instance, reason):
727 """Ensure that an instance is not running.""" 728 if instance.admin_up: 729 raise errors.OpPrereqError("Instance %s is marked to be up, %s" % 730 (instance.name, reason), errors.ECODE_STATE) 731 732 pnode = instance.primary_node 733 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode] 734 ins_l.Raise("Can't contact node %s for instance information" % pnode, 735 prereq=True, ecode=errors.ECODE_ENVIRON) 736 737 if instance.name in ins_l.payload: 738 raise errors.OpPrereqError("Instance %s is running, %s" % 739 (instance.name, reason), errors.ECODE_STATE)
740
741 742 -def _ExpandItemName(fn, name, kind):
743 """Expand an item name. 744 745 @param fn: the function to use for expansion 746 @param name: requested item name 747 @param kind: text description ('Node' or 'Instance') 748 @return: the resolved (full) name 749 @raise errors.OpPrereqError: if the item is not found 750 751 """ 752 full_name = fn(name) 753 if full_name is None: 754 raise errors.OpPrereqError("%s '%s' not known" % (kind, name), 755 errors.ECODE_NOENT) 756 return full_name
757
758 759 -def _ExpandNodeName(cfg, name):
760 """Wrapper over L{_ExpandItemName} for nodes.""" 761 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
762
763 764 -def _ExpandInstanceName(cfg, name):
765 """Wrapper over L{_ExpandItemName} for instance.""" 766 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
767
768 769 -def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, 770 memory, vcpus, nics, disk_template, disks, 771 bep, hvp, hypervisor_name):
772 """Builds instance related env variables for hooks 773 774 This builds the hook environment from individual variables. 775 776 @type name: string 777 @param name: the name of the instance 778 @type primary_node: string 779 @param primary_node: the name of the instance's primary node 780 @type secondary_nodes: list 781 @param secondary_nodes: list of secondary nodes as strings 782 @type os_type: string 783 @param os_type: the name of the instance's OS 784 @type status: boolean 785 @param status: the should_run status of the instance 786 @type memory: string 787 @param memory: the memory size of the instance 788 @type vcpus: string 789 @param vcpus: the count of VCPUs the instance has 790 @type nics: list 791 @param nics: list of tuples (ip, mac, mode, link) representing 792 the NICs the instance has 793 @type disk_template: string 794 @param disk_template: the disk template of the instance 795 @type disks: list 796 @param disks: the list of (size, mode) pairs 797 @type bep: dict 798 @param bep: the backend parameters for the instance 799 @type hvp: dict 800 @param hvp: the hypervisor parameters for the instance 801 @type hypervisor_name: string 802 @param hypervisor_name: the hypervisor for the instance 803 @rtype: dict 804 @return: the hook environment for this instance 805 806 """ 807 if status: 808 str_status = "up" 809 else: 810 str_status = "down" 811 env = { 812 "OP_TARGET": name, 813 "INSTANCE_NAME": name, 814 "INSTANCE_PRIMARY": primary_node, 815 "INSTANCE_SECONDARIES": " ".join(secondary_nodes), 816 "INSTANCE_OS_TYPE": os_type, 817 "INSTANCE_STATUS": str_status, 818 "INSTANCE_MEMORY": memory, 819 "INSTANCE_VCPUS": vcpus, 820 "INSTANCE_DISK_TEMPLATE": disk_template, 821 "INSTANCE_HYPERVISOR": hypervisor_name, 822 } 823 824 if nics: 825 nic_count = len(nics) 826 for idx, (ip, mac, mode, link) in enumerate(nics): 827 if ip is None: 828 ip = "" 829 env["INSTANCE_NIC%d_IP" % idx] = ip 830 env["INSTANCE_NIC%d_MAC" % idx] = mac 831 env["INSTANCE_NIC%d_MODE" % idx] = mode 832 env["INSTANCE_NIC%d_LINK" % idx] = link 833 if mode == constants.NIC_MODE_BRIDGED: 834 env["INSTANCE_NIC%d_BRIDGE" % idx] = link 835 else: 836 nic_count = 0 837 838 env["INSTANCE_NIC_COUNT"] = nic_count 839 840 if disks: 841 disk_count = len(disks) 842 for idx, (size, mode) in enumerate(disks): 843 env["INSTANCE_DISK%d_SIZE" % idx] = size 844 env["INSTANCE_DISK%d_MODE" % idx] = mode 845 else: 846 disk_count = 0 847 848 env["INSTANCE_DISK_COUNT"] = disk_count 849 850 for source, kind in [(bep, "BE"), (hvp, "HV")]: 851 for key, value in source.items(): 852 env["INSTANCE_%s_%s" % (kind, key)] = value 853 854 return env
855
856 857 -def _NICListToTuple(lu, nics):
858 """Build a list of nic information tuples. 859 860 This list is suitable to be passed to _BuildInstanceHookEnv or as a return 861 value in LUQueryInstanceData. 862 863 @type lu: L{LogicalUnit} 864 @param lu: the logical unit on whose behalf we execute 865 @type nics: list of L{objects.NIC} 866 @param nics: list of nics to convert to hooks tuples 867 868 """ 869 hooks_nics = [] 870 cluster = lu.cfg.GetClusterInfo() 871 for nic in nics: 872 ip = nic.ip 873 mac = nic.mac 874 filled_params = cluster.SimpleFillNIC(nic.nicparams) 875 mode = filled_params[constants.NIC_MODE] 876 link = filled_params[constants.NIC_LINK] 877 hooks_nics.append((ip, mac, mode, link)) 878 return hooks_nics
879
880 881 -def _BuildInstanceHookEnvByObject(lu, instance, override=None):
882 """Builds instance related env variables for hooks from an object. 883 884 @type lu: L{LogicalUnit} 885 @param lu: the logical unit on whose behalf we execute 886 @type instance: L{objects.Instance} 887 @param instance: the instance for which we should build the 888 environment 889 @type override: dict 890 @param override: dictionary with key/values that will override 891 our values 892 @rtype: dict 893 @return: the hook environment dictionary 894 895 """ 896 cluster = lu.cfg.GetClusterInfo() 897 bep = cluster.FillBE(instance) 898 hvp = cluster.FillHV(instance) 899 args = { 900 'name': instance.name, 901 'primary_node': instance.primary_node, 902 'secondary_nodes': instance.secondary_nodes, 903 'os_type': instance.os, 904 'status': instance.admin_up, 905 'memory': bep[constants.BE_MEMORY], 906 'vcpus': bep[constants.BE_VCPUS], 907 'nics': _NICListToTuple(lu, instance.nics), 908 'disk_template': instance.disk_template, 909 'disks': [(disk.size, disk.mode) for disk in instance.disks], 910 'bep': bep, 911 'hvp': hvp, 912 'hypervisor_name': instance.hypervisor, 913 } 914 if override: 915 args.update(override) 916 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
917
918 919 -def _AdjustCandidatePool(lu, exceptions):
920 """Adjust the candidate pool after node operations. 921 922 """ 923 mod_list = lu.cfg.MaintainCandidatePool(exceptions) 924 if mod_list: 925 lu.LogInfo("Promoted nodes to master candidate role: %s", 926 utils.CommaJoin(node.name for node in mod_list)) 927 for name in mod_list: 928 lu.context.ReaddNode(name) 929 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions) 930 if mc_now > mc_max: 931 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % 932 (mc_now, mc_max))
933
934 935 -def _DecideSelfPromotion(lu, exceptions=None):
936 """Decide whether I should promote myself as a master candidate. 937 938 """ 939 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size 940 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions) 941 # the new node will increase mc_max with one, so: 942 mc_should = min(mc_should + 1, cp_size) 943 return mc_now < mc_should
944
945 946 -def _CheckNicsBridgesExist(lu, target_nics, target_node):
947 """Check that the brigdes needed by a list of nics exist. 948 949 """ 950 cluster = lu.cfg.GetClusterInfo() 951 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics] 952 brlist = [params[constants.NIC_LINK] for params in paramslist 953 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED] 954 if brlist: 955 result = lu.rpc.call_bridges_exist(target_node, brlist) 956 result.Raise("Error checking bridges on destination node '%s'" % 957 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
958
959 960 -def _CheckInstanceBridgesExist(lu, instance, node=None):
961 """Check that the brigdes needed by an instance exist. 962 963 """ 964 if node is None: 965 node = instance.primary_node 966 _CheckNicsBridgesExist(lu, instance.nics, node)
967
968 969 -def _CheckOSVariant(os_obj, name):
970 """Check whether an OS name conforms to the os variants specification. 971 972 @type os_obj: L{objects.OS} 973 @param os_obj: OS object to check 974 @type name: string 975 @param name: OS name passed by the user, to check for validity 976 977 """ 978 if not os_obj.supported_variants: 979 return 980 variant = objects.OS.GetVariant(name) 981 if not variant: 982 raise errors.OpPrereqError("OS name must include a variant", 983 errors.ECODE_INVAL) 984 985 if variant not in os_obj.supported_variants: 986 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
987
988 989 -def _GetNodeInstancesInner(cfg, fn):
990 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
991
992 993 -def _GetNodeInstances(cfg, node_name):
994 """Returns a list of all primary and secondary instances on a node. 995 996 """ 997 998 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
999
1000 1001 -def _GetNodePrimaryInstances(cfg, node_name):
1002 """Returns primary instances on a node. 1003 1004 """ 1005 return _GetNodeInstancesInner(cfg, 1006 lambda inst: node_name == inst.primary_node)
1007
1008 1009 -def _GetNodeSecondaryInstances(cfg, node_name):
1010 """Returns secondary instances on a node. 1011 1012 """ 1013 return _GetNodeInstancesInner(cfg, 1014 lambda inst: node_name in inst.secondary_nodes)
1015
1016 1017 -def _GetStorageTypeArgs(cfg, storage_type):
1018 """Returns the arguments for a storage type. 1019 1020 """ 1021 # Special case for file storage 1022 if storage_type == constants.ST_FILE: 1023 # storage.FileStorage wants a list of storage directories 1024 return [[cfg.GetFileStorageDir()]] 1025 1026 return []
1027
1028 1029 -def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1030 faulty = [] 1031 1032 for dev in instance.disks: 1033 cfg.SetDiskID(dev, node_name) 1034 1035 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks) 1036 result.Raise("Failed to get disk status from node %s" % node_name, 1037 prereq=prereq, ecode=errors.ECODE_ENVIRON) 1038 1039 for idx, bdev_status in enumerate(result.payload): 1040 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY: 1041 faulty.append(idx) 1042 1043 return faulty
1044
1045 1046 -def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1047 """Check the sanity of iallocator and node arguments and use the 1048 cluster-wide iallocator if appropriate. 1049 1050 Check that at most one of (iallocator, node) is specified. If none is 1051 specified, then the LU's opcode's iallocator slot is filled with the 1052 cluster-wide default iallocator. 1053 1054 @type iallocator_slot: string 1055 @param iallocator_slot: the name of the opcode iallocator slot 1056 @type node_slot: string 1057 @param node_slot: the name of the opcode target node slot 1058 1059 """ 1060 node = getattr(lu.op, node_slot, None) 1061 iallocator = getattr(lu.op, iallocator_slot, None) 1062 1063 if node is not None and iallocator is not None: 1064 raise errors.OpPrereqError("Do not specify both, iallocator and node.", 1065 errors.ECODE_INVAL) 1066 elif node is None and iallocator is None: 1067 default_iallocator = lu.cfg.GetDefaultIAllocator() 1068 if default_iallocator: 1069 setattr(lu.op, iallocator_slot, default_iallocator) 1070 else: 1071 raise errors.OpPrereqError("No iallocator or node given and no" 1072 " cluster-wide default iallocator found." 1073 " Please specify either an iallocator or a" 1074 " node, or set a cluster-wide default" 1075 " iallocator.")
1076
1077 1078 -class LUPostInitCluster(LogicalUnit):
1079 """Logical unit for running hooks after cluster initialization. 1080 1081 """ 1082 HPATH = "cluster-init" 1083 HTYPE = constants.HTYPE_CLUSTER 1084
1085 - def BuildHooksEnv(self):
1086 """Build hooks env. 1087 1088 """ 1089 env = {"OP_TARGET": self.cfg.GetClusterName()} 1090 mn = self.cfg.GetMasterNode() 1091 return env, [], [mn]
1092
1093 - def Exec(self, feedback_fn):
1094 """Nothing to do. 1095 1096 """ 1097 return True
1098
1099 1100 -class LUDestroyCluster(LogicalUnit):
1101 """Logical unit for destroying the cluster. 1102 1103 """ 1104 HPATH = "cluster-destroy" 1105 HTYPE = constants.HTYPE_CLUSTER 1106
1107 - def BuildHooksEnv(self):
1108 """Build hooks env. 1109 1110 """ 1111 env = {"OP_TARGET": self.cfg.GetClusterName()} 1112 return env, [], []
1113
1114 - def CheckPrereq(self):
1115 """Check prerequisites. 1116 1117 This checks whether the cluster is empty. 1118 1119 Any errors are signaled by raising errors.OpPrereqError. 1120 1121 """ 1122 master = self.cfg.GetMasterNode() 1123 1124 nodelist = self.cfg.GetNodeList() 1125 if len(nodelist) != 1 or nodelist[0] != master: 1126 raise errors.OpPrereqError("There are still %d node(s) in" 1127 " this cluster." % (len(nodelist) - 1), 1128 errors.ECODE_INVAL) 1129 instancelist = self.cfg.GetInstanceList() 1130 if instancelist: 1131 raise errors.OpPrereqError("There are still %d instance(s) in" 1132 " this cluster." % len(instancelist), 1133 errors.ECODE_INVAL)
1134
1135 - def Exec(self, feedback_fn):
1136 """Destroys the cluster. 1137 1138 """ 1139 master = self.cfg.GetMasterNode() 1140 1141 # Run post hooks on master node before it's removed 1142 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self) 1143 try: 1144 hm.RunPhase(constants.HOOKS_PHASE_POST, [master]) 1145 except: 1146 # pylint: disable-msg=W0702 1147 self.LogWarning("Errors occurred running hooks on %s" % master) 1148 1149 result = self.rpc.call_node_stop_master(master, False) 1150 result.Raise("Could not disable the master role") 1151 1152 return master
1153
1154 1155 -def _VerifyCertificate(filename):
1156 """Verifies a certificate for LUVerifyCluster. 1157 1158 @type filename: string 1159 @param filename: Path to PEM file 1160 1161 """ 1162 try: 1163 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 1164 utils.ReadFile(filename)) 1165 except Exception, err: # pylint: disable-msg=W0703 1166 return (LUVerifyCluster.ETYPE_ERROR, 1167 "Failed to load X509 certificate %s: %s" % (filename, err)) 1168 1169 (errcode, msg) = \ 1170 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN, 1171 constants.SSL_CERT_EXPIRATION_ERROR) 1172 1173 if msg: 1174 fnamemsg = "While verifying %s: %s" % (filename, msg) 1175 else: 1176 fnamemsg = None 1177 1178 if errcode is None: 1179 return (None, fnamemsg) 1180 elif errcode == utils.CERT_WARNING: 1181 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg) 1182 elif errcode == utils.CERT_ERROR: 1183 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg) 1184 1185 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1186
1187 1188 -class LUVerifyCluster(LogicalUnit):
1189 """Verifies the cluster status. 1190 1191 """ 1192 HPATH = "cluster-verify" 1193 HTYPE = constants.HTYPE_CLUSTER 1194 _OP_PARAMS = [ 1195 ("skip_checks", ht.EmptyList, 1196 ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))), 1197 ("verbose", False, ht.TBool), 1198 ("error_codes", False, ht.TBool), 1199 ("debug_simulate_errors", False, ht.TBool), 1200 ] 1201 REQ_BGL = False 1202 1203 TCLUSTER = "cluster" 1204 TNODE = "node" 1205 TINSTANCE = "instance" 1206 1207 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG") 1208 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT") 1209 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE") 1210 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN") 1211 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT") 1212 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK") 1213 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK") 1214 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE") 1215 ENODEDRBD = (TNODE, "ENODEDRBD") 1216 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER") 1217 ENODEFILECHECK = (TNODE, "ENODEFILECHECK") 1218 ENODEHOOKS = (TNODE, "ENODEHOOKS") 1219 ENODEHV = (TNODE, "ENODEHV") 1220 ENODELVM = (TNODE, "ENODELVM") 1221 ENODEN1 = (TNODE, "ENODEN1") 1222 ENODENET = (TNODE, "ENODENET") 1223 ENODEOS = (TNODE, "ENODEOS") 1224 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE") 1225 ENODEORPHANLV = (TNODE, "ENODEORPHANLV") 1226 ENODERPC = (TNODE, "ENODERPC") 1227 ENODESSH = (TNODE, "ENODESSH") 1228 ENODEVERSION = (TNODE, "ENODEVERSION") 1229 ENODESETUP = (TNODE, "ENODESETUP") 1230 ENODETIME = (TNODE, "ENODETIME") 1231 1232 ETYPE_FIELD = "code" 1233 ETYPE_ERROR = "ERROR" 1234 ETYPE_WARNING = "WARNING" 1235
1236 - class NodeImage(object):
1237 """A class representing the logical and physical status of a node. 1238 1239 @type name: string 1240 @ivar name: the node name to which this object refers 1241 @ivar volumes: a structure as returned from 1242 L{ganeti.backend.GetVolumeList} (runtime) 1243 @ivar instances: a list of running instances (runtime) 1244 @ivar pinst: list of configured primary instances (config) 1245 @ivar sinst: list of configured secondary instances (config) 1246 @ivar sbp: diction of {secondary-node: list of instances} of all peers 1247 of this node (config) 1248 @ivar mfree: free memory, as reported by hypervisor (runtime) 1249 @ivar dfree: free disk, as reported by the node (runtime) 1250 @ivar offline: the offline status (config) 1251 @type rpc_fail: boolean 1252 @ivar rpc_fail: whether the RPC verify call was successfull (overall, 1253 not whether the individual keys were correct) (runtime) 1254 @type lvm_fail: boolean 1255 @ivar lvm_fail: whether the RPC call didn't return valid LVM data 1256 @type hyp_fail: boolean 1257 @ivar hyp_fail: whether the RPC call didn't return the instance list 1258 @type ghost: boolean 1259 @ivar ghost: whether this is a known node or not (config) 1260 @type os_fail: boolean 1261 @ivar os_fail: whether the RPC call didn't return valid OS data 1262 @type oslist: list 1263 @ivar oslist: list of OSes as diagnosed by DiagnoseOS 1264 @type vm_capable: boolean 1265 @ivar vm_capable: whether the node can host instances 1266 1267 """
1268 - def __init__(self, offline=False, name=None, vm_capable=True):
1269 self.name = name 1270 self.volumes = {} 1271 self.instances = [] 1272 self.pinst = [] 1273 self.sinst = [] 1274 self.sbp = {} 1275 self.mfree = 0 1276 self.dfree = 0 1277 self.offline = offline 1278 self.vm_capable = vm_capable 1279 self.rpc_fail = False 1280 self.lvm_fail = False 1281 self.hyp_fail = False 1282 self.ghost = False 1283 self.os_fail = False 1284 self.oslist = {}
1285
1286 - def ExpandNames(self):
1287 self.needed_locks = { 1288 locking.LEVEL_NODE: locking.ALL_SET, 1289 locking.LEVEL_INSTANCE: locking.ALL_SET, 1290 } 1291 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1292
1293 - def _Error(self, ecode, item, msg, *args, **kwargs):
1294 """Format an error message. 1295 1296 Based on the opcode's error_codes parameter, either format a 1297 parseable error code, or a simpler error string. 1298 1299 This must be called only from Exec and functions called from Exec. 1300 1301 """ 1302 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) 1303 itype, etxt = ecode 1304 # first complete the msg 1305 if args: 1306 msg = msg % args 1307 # then format the whole message 1308 if self.op.error_codes: 1309 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) 1310 else: 1311 if item: 1312 item = " " + item 1313 else: 1314 item = "" 1315 msg = "%s: %s%s: %s" % (ltype, itype, item, msg) 1316 # and finally report it via the feedback_fn 1317 self._feedback_fn(" - %s" % msg)
1318
1319 - def _ErrorIf(self, cond, *args, **kwargs):
1320 """Log an error message if the passed condition is True. 1321 1322 """ 1323 cond = bool(cond) or self.op.debug_simulate_errors 1324 if cond: 1325 self._Error(*args, **kwargs) 1326 # do not mark the operation as failed for WARN cases only 1327 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR: 1328 self.bad = self.bad or cond
1329
1330 - def _VerifyNode(self, ninfo, nresult):
1331 """Perform some basic validation on data returned from a node. 1332 1333 - check the result data structure is well formed and has all the 1334 mandatory fields 1335 - check ganeti version 1336 1337 @type ninfo: L{objects.Node} 1338 @param ninfo: the node to check 1339 @param nresult: the results from the node 1340 @rtype: boolean 1341 @return: whether overall this call was successful (and we can expect 1342 reasonable values in the respose) 1343 1344 """ 1345 node = ninfo.name 1346 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1347 1348 # main result, nresult should be a non-empty dict 1349 test = not nresult or not isinstance(nresult, dict) 1350 _ErrorIf(test, self.ENODERPC, node, 1351 "unable to verify node: no data returned") 1352 if test: 1353 return False 1354 1355 # compares ganeti version 1356 local_version = constants.PROTOCOL_VERSION 1357 remote_version = nresult.get("version", None) 1358 test = not (remote_version and 1359 isinstance(remote_version, (list, tuple)) and 1360 len(remote_version) == 2) 1361 _ErrorIf(test, self.ENODERPC, node, 1362 "connection to node returned invalid data") 1363 if test: 1364 return False 1365 1366 test = local_version != remote_version[0] 1367 _ErrorIf(test, self.ENODEVERSION, node, 1368 "incompatible protocol versions: master %s," 1369 " node %s", local_version, remote_version[0]) 1370 if test: 1371 return False 1372 1373 # node seems compatible, we can actually try to look into its results 1374 1375 # full package version 1376 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], 1377 self.ENODEVERSION, node, 1378 "software version mismatch: master %s, node %s", 1379 constants.RELEASE_VERSION, remote_version[1], 1380 code=self.ETYPE_WARNING) 1381 1382 hyp_result = nresult.get(constants.NV_HYPERVISOR, None) 1383 if ninfo.vm_capable and isinstance(hyp_result, dict): 1384 for hv_name, hv_result in hyp_result.iteritems(): 1385 test = hv_result is not None 1386 _ErrorIf(test, self.ENODEHV, node, 1387 "hypervisor %s verify failure: '%s'", hv_name, hv_result) 1388 1389 test = nresult.get(constants.NV_NODESETUP, 1390 ["Missing NODESETUP results"]) 1391 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s", 1392 "; ".join(test)) 1393 1394 return True
1395
1396 - def _VerifyNodeTime(self, ninfo, nresult, 1397 nvinfo_starttime, nvinfo_endtime):
1398 """Check the node time. 1399 1400 @type ninfo: L{objects.Node} 1401 @param ninfo: the node to check 1402 @param nresult: the remote results for the node 1403 @param nvinfo_starttime: the start time of the RPC call 1404 @param nvinfo_endtime: the end time of the RPC call 1405 1406 """ 1407 node = ninfo.name 1408 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1409 1410 ntime = nresult.get(constants.NV_TIME, None) 1411 try: 1412 ntime_merged = utils.MergeTime(ntime) 1413 except (ValueError, TypeError): 1414 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time") 1415 return 1416 1417 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): 1418 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) 1419 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): 1420 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) 1421 else: 1422 ntime_diff = None 1423 1424 _ErrorIf(ntime_diff is not None, self.ENODETIME, node, 1425 "Node time diverges by at least %s from master node time", 1426 ntime_diff)
1427
1428 - def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1429 """Check the node time. 1430 1431 @type ninfo: L{objects.Node} 1432 @param ninfo: the node to check 1433 @param nresult: the remote results for the node 1434 @param vg_name: the configured VG name 1435 1436 """ 1437 if vg_name is None: 1438 return 1439 1440 node = ninfo.name 1441 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1442 1443 # checks vg existence and size > 20G 1444 vglist = nresult.get(constants.NV_VGLIST, None) 1445 test = not vglist 1446 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups") 1447 if not test: 1448 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, 1449 constants.MIN_VG_SIZE) 1450 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus) 1451 1452 # check pv names 1453 pvlist = nresult.get(constants.NV_PVLIST, None) 1454 test = pvlist is None 1455 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node") 1456 if not test: 1457 # check that ':' is not present in PV names, since it's a 1458 # special character for lvcreate (denotes the range of PEs to 1459 # use on the PV) 1460 for _, pvname, owner_vg in pvlist: 1461 test = ":" in pvname 1462 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV" 1463 " '%s' of VG '%s'", pvname, owner_vg)
1464
1465 - def _VerifyNodeNetwork(self, ninfo, nresult):
1466 """Check the node time. 1467 1468 @type ninfo: L{objects.Node} 1469 @param ninfo: the node to check 1470 @param nresult: the remote results for the node 1471 1472 """ 1473 node = ninfo.name 1474 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1475 1476 test = constants.NV_NODELIST not in nresult 1477 _ErrorIf(test, self.ENODESSH, node, 1478 "node hasn't returned node ssh connectivity data") 1479 if not test: 1480 if nresult[constants.NV_NODELIST]: 1481 for a_node, a_msg in nresult[constants.NV_NODELIST].items(): 1482 _ErrorIf(True, self.ENODESSH, node, 1483 "ssh communication with node '%s': %s", a_node, a_msg) 1484 1485 test = constants.NV_NODENETTEST not in nresult 1486 _ErrorIf(test, self.ENODENET, node, 1487 "node hasn't returned node tcp connectivity data") 1488 if not test: 1489 if nresult[constants.NV_NODENETTEST]: 1490 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) 1491 for anode in nlist: 1492 _ErrorIf(True, self.ENODENET, node, 1493 "tcp communication with node '%s': %s", 1494 anode, nresult[constants.NV_NODENETTEST][anode]) 1495 1496 test = constants.NV_MASTERIP not in nresult 1497 _ErrorIf(test, self.ENODENET, node, 1498 "node hasn't returned node master IP reachability data") 1499 if not test: 1500 if not nresult[constants.NV_MASTERIP]: 1501 if node == self.master_node: 1502 msg = "the master node cannot reach the master IP (not configured?)" 1503 else: 1504 msg = "cannot reach the master IP" 1505 _ErrorIf(True, self.ENODENET, node, msg)
1506
1507 - def _VerifyInstance(self, instance, instanceconfig, node_image, 1508 diskstatus):
1509 """Verify an instance. 1510 1511 This function checks to see if the required block devices are 1512 available on the instance's node. 1513 1514 """ 1515 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1516 node_current = instanceconfig.primary_node 1517 1518 node_vol_should = {} 1519 instanceconfig.MapLVsByNode(node_vol_should) 1520 1521 for node in node_vol_should: 1522 n_img = node_image[node] 1523 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 1524 # ignore missing volumes on offline or broken nodes 1525 continue 1526 for volume in node_vol_should[node]: 1527 test = volume not in n_img.volumes 1528 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance, 1529 "volume %s missing on node %s", volume, node) 1530 1531 if instanceconfig.admin_up: 1532 pri_img = node_image[node_current] 1533 test = instance not in pri_img.instances and not pri_img.offline 1534 _ErrorIf(test, self.EINSTANCEDOWN, instance, 1535 "instance not running on its primary node %s", 1536 node_current) 1537 1538 for node, n_img in node_image.items(): 1539 if (not node == node_current): 1540 test = instance in n_img.instances 1541 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance, 1542 "instance should not run on node %s", node) 1543 1544 diskdata = [(nname, success, status, idx) 1545 for (nname, disks) in diskstatus.items() 1546 for idx, (success, status) in enumerate(disks)] 1547 1548 for nname, success, bdev_status, idx in diskdata: 1549 _ErrorIf(instanceconfig.admin_up and not success, 1550 self.EINSTANCEFAULTYDISK, instance, 1551 "couldn't retrieve status for disk/%s on %s: %s", 1552 idx, nname, bdev_status) 1553 _ErrorIf((instanceconfig.admin_up and success and 1554 bdev_status.ldisk_status == constants.LDS_FAULTY), 1555 self.EINSTANCEFAULTYDISK, instance, 1556 "disk/%s on %s is faulty", idx, nname)
1557
1558 - def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1559 """Verify if there are any unknown volumes in the cluster. 1560 1561 The .os, .swap and backup volumes are ignored. All other volumes are 1562 reported as unknown. 1563 1564 @type reserved: L{ganeti.utils.FieldSet} 1565 @param reserved: a FieldSet of reserved volume names 1566 1567 """ 1568 for node, n_img in node_image.items(): 1569 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: 1570 # skip non-healthy nodes 1571 continue 1572 for volume in n_img.volumes: 1573 test = ((node not in node_vol_should or 1574 volume not in node_vol_should[node]) and 1575 not reserved.Matches(volume)) 1576 self._ErrorIf(test, self.ENODEORPHANLV, node, 1577 "volume %s is unknown", volume)
1578
1579 - def _VerifyOrphanInstances(self, instancelist, node_image):
1580 """Verify the list of running instances. 1581 1582 This checks what instances are running but unknown to the cluster. 1583 1584 """ 1585 for node, n_img in node_image.items(): 1586 for o_inst in n_img.instances: 1587 test = o_inst not in instancelist 1588 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node, 1589 "instance %s on node %s should not exist", o_inst, node)
1590
1591 - def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1592 """Verify N+1 Memory Resilience. 1593 1594 Check that if one single node dies we can still start all the 1595 instances it was primary for. 1596 1597 """ 1598 for node, n_img in node_image.items(): 1599 # This code checks that every node which is now listed as 1600 # secondary has enough memory to host all instances it is 1601 # supposed to should a single other node in the cluster fail. 1602 # FIXME: not ready for failover to an arbitrary node 1603 # FIXME: does not support file-backed instances 1604 # WARNING: we currently take into account down instances as well 1605 # as up ones, considering that even if they're down someone 1606 # might want to start them even in the event of a node failure. 1607 for prinode, instances in n_img.sbp.items(): 1608 needed_mem = 0 1609 for instance in instances: 1610 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance]) 1611 if bep[constants.BE_AUTO_BALANCE]: 1612 needed_mem += bep[constants.BE_MEMORY] 1613 test = n_img.mfree < needed_mem 1614 self._ErrorIf(test, self.ENODEN1, node, 1615 "not enough memory on to accommodate" 1616 " failovers should peer node %s fail", prinode)
1617
1618 - def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum, 1619 master_files):
1620 """Verifies and computes the node required file checksums. 1621 1622 @type ninfo: L{objects.Node} 1623 @param ninfo: the node to check 1624 @param nresult: the remote results for the node 1625 @param file_list: required list of files 1626 @param local_cksum: dictionary of local files and their checksums 1627 @param master_files: list of files that only masters should have 1628 1629 """ 1630 node = ninfo.name 1631 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1632 1633 remote_cksum = nresult.get(constants.NV_FILELIST, None) 1634 test = not isinstance(remote_cksum, dict) 1635 _ErrorIf(test, self.ENODEFILECHECK, node, 1636 "node hasn't returned file checksum data") 1637 if test: 1638 return 1639 1640 for file_name in file_list: 1641 node_is_mc = ninfo.master_candidate 1642 must_have = (file_name not in master_files) or node_is_mc 1643 # missing 1644 test1 = file_name not in remote_cksum 1645 # invalid checksum 1646 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name] 1647 # existing and good 1648 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name] 1649 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node, 1650 "file '%s' missing", file_name) 1651 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node, 1652 "file '%s' has wrong checksum", file_name) 1653 # not candidate and this is not a must-have file 1654 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node, 1655 "file '%s' should not exist on non master" 1656 " candidates (and the file is outdated)", file_name) 1657 # all good, except non-master/non-must have combination 1658 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node, 1659 "file '%s' should not exist" 1660 " on non master candidates", file_name)
1661
1662 - def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper, 1663 drbd_map):
1664 """Verifies and the node DRBD status. 1665 1666 @type ninfo: L{objects.Node} 1667 @param ninfo: the node to check 1668 @param nresult: the remote results for the node 1669 @param instanceinfo: the dict of instances 1670 @param drbd_helper: the configured DRBD usermode helper 1671 @param drbd_map: the DRBD map as returned by 1672 L{ganeti.config.ConfigWriter.ComputeDRBDMap} 1673 1674 """ 1675 node = ninfo.name 1676 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1677 1678 if drbd_helper: 1679 helper_result = nresult.get(constants.NV_DRBDHELPER, None) 1680 test = (helper_result == None) 1681 _ErrorIf(test, self.ENODEDRBDHELPER, node, 1682 "no drbd usermode helper returned") 1683 if helper_result: 1684 status, payload = helper_result 1685 test = not status 1686 _ErrorIf(test, self.ENODEDRBDHELPER, node, 1687 "drbd usermode helper check unsuccessful: %s", payload) 1688 test = status and (payload != drbd_helper) 1689 _ErrorIf(test, self.ENODEDRBDHELPER, node, 1690 "wrong drbd usermode helper: %s", payload) 1691 1692 # compute the DRBD minors 1693 node_drbd = {} 1694 for minor, instance in drbd_map[node].items(): 1695 test = instance not in instanceinfo 1696 _ErrorIf(test, self.ECLUSTERCFG, None, 1697 "ghost instance '%s' in temporary DRBD map", instance) 1698 # ghost instance should not be running, but otherwise we 1699 # don't give double warnings (both ghost instance and 1700 # unallocated minor in use) 1701 if test: 1702 node_drbd[minor] = (instance, False) 1703 else: 1704 instance = instanceinfo[instance] 1705 node_drbd[minor] = (instance.name, instance.admin_up) 1706 1707 # and now check them 1708 used_minors = nresult.get(constants.NV_DRBDLIST, []) 1709 test = not isinstance(used_minors, (tuple, list)) 1710 _ErrorIf(test, self.ENODEDRBD, node, 1711 "cannot parse drbd status file: %s", str(used_minors)) 1712 if test: 1713 # we cannot check drbd status 1714 return 1715 1716 for minor, (iname, must_exist) in node_drbd.items(): 1717 test = minor not in used_minors and must_exist 1718 _ErrorIf(test, self.ENODEDRBD, node, 1719 "drbd minor %d of instance %s is not active", minor, iname) 1720 for minor in used_minors: 1721 test = minor not in node_drbd 1722 _ErrorIf(test, self.ENODEDRBD, node, 1723 "unallocated drbd minor %d is in use", minor)
1724
1725 - def _UpdateNodeOS(self, ninfo, nresult, nimg):
1726 """Builds the node OS structures. 1727 1728 @type ninfo: L{objects.Node} 1729 @param ninfo: the node to check 1730 @param nresult: the remote results for the node 1731 @param nimg: the node image object 1732 1733 """ 1734 node = ninfo.name 1735 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1736 1737 remote_os = nresult.get(constants.NV_OSLIST, None) 1738 test = (not isinstance(remote_os, list) or 1739 not compat.all(isinstance(v, list) and len(v) == 7 1740 for v in remote_os)) 1741 1742 _ErrorIf(test, self.ENODEOS, node, 1743 "node hasn't returned valid OS data") 1744 1745 nimg.os_fail = test 1746 1747 if test: 1748 return 1749 1750 os_dict = {} 1751 1752 for (name, os_path, status, diagnose, 1753 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]: 1754 1755 if name not in os_dict: 1756 os_dict[name] = [] 1757 1758 # parameters is a list of lists instead of list of tuples due to 1759 # JSON lacking a real tuple type, fix it: 1760 parameters = [tuple(v) for v in parameters] 1761 os_dict[name].append((os_path, status, diagnose, 1762 set(variants), set(parameters), set(api_ver))) 1763 1764 nimg.oslist = os_dict
1765
1766 - def _VerifyNodeOS(self, ninfo, nimg, base):
1767 """Verifies the node OS list. 1768 1769 @type ninfo: L{objects.Node} 1770 @param ninfo: the node to check 1771 @param nimg: the node image object 1772 @param base: the 'template' node we match against (e.g. from the master) 1773 1774 """ 1775 node = ninfo.name 1776 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1777 1778 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" 1779 1780 for os_name, os_data in nimg.oslist.items(): 1781 assert os_data, "Empty OS status for OS %s?!" % os_name 1782 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] 1783 _ErrorIf(not f_status, self.ENODEOS, node, 1784 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag) 1785 _ErrorIf(len(os_data) > 1, self.ENODEOS, node, 1786 "OS '%s' has multiple entries (first one shadows the rest): %s", 1787 os_name, utils.CommaJoin([v[0] for v in os_data])) 1788 # this will catched in backend too 1789 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api) 1790 and not f_var, self.ENODEOS, node, 1791 "OS %s with API at least %d does not declare any variant", 1792 os_name, constants.OS_API_V15) 1793 # comparisons with the 'base' image 1794 test = os_name not in base.oslist 1795 _ErrorIf(test, self.ENODEOS, node, 1796 "Extra OS %s not present on reference node (%s)", 1797 os_name, base.name) 1798 if test: 1799 continue 1800 assert base.oslist[os_name], "Base node has empty OS status?" 1801 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0] 1802 if not b_status: 1803 # base OS is invalid, skipping 1804 continue 1805 for kind, a, b in [("API version", f_api, b_api), 1806 ("variants list", f_var, b_var), 1807 ("parameters", f_param, b_param)]: 1808 _ErrorIf(a != b, self.ENODEOS, node, 1809 "OS %s %s differs from reference node %s: %s vs. %s", 1810 kind, os_name, base.name, 1811 utils.CommaJoin(a), utils.CommaJoin(b)) 1812 1813 # check any missing OSes 1814 missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) 1815 _ErrorIf(missing, self.ENODEOS, node, 1816 "OSes present on reference node %s but missing on this node: %s", 1817 base.name, utils.CommaJoin(missing))
1818
1819 - def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1820 """Verifies and updates the node volume data. 1821 1822 This function will update a L{NodeImage}'s internal structures 1823 with data from the remote call. 1824 1825 @type ninfo: L{objects.Node} 1826 @param ninfo: the node to check 1827 @param nresult: the remote results for the node 1828 @param nimg: the node image object 1829 @param vg_name: the configured VG name 1830 1831 """ 1832 node = ninfo.name 1833 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1834 1835 nimg.lvm_fail = True 1836 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") 1837 if vg_name is None: 1838 pass 1839 elif isinstance(lvdata, basestring): 1840 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s", 1841 utils.SafeEncode(lvdata)) 1842 elif not isinstance(lvdata, dict): 1843 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)") 1844 else: 1845 nimg.volumes = lvdata 1846 nimg.lvm_fail = False
1847
1848 - def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1849 """Verifies and updates the node instance list. 1850 1851 If the listing was successful, then updates this node's instance 1852 list. Otherwise, it marks the RPC call as failed for the instance 1853 list key. 1854 1855 @type ninfo: L{objects.Node} 1856 @param ninfo: the node to check 1857 @param nresult: the remote results for the node 1858 @param nimg: the node image object 1859 1860 """ 1861 idata = nresult.get(constants.NV_INSTANCELIST, None) 1862 test = not isinstance(idata, list) 1863 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed" 1864 " (instancelist): %s", utils.SafeEncode(str(idata))) 1865 if test: 1866 nimg.hyp_fail = True 1867 else: 1868 nimg.instances = idata
1869
1870 - def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1871 """Verifies and computes a node information map 1872 1873 @type ninfo: L{objects.Node} 1874 @param ninfo: the node to check 1875 @param nresult: the remote results for the node 1876 @param nimg: the node image object 1877 @param vg_name: the configured VG name 1878 1879 """ 1880 node = ninfo.name 1881 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1882 1883 # try to read free memory (from the hypervisor) 1884 hv_info = nresult.get(constants.NV_HVINFO, None) 1885 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info 1886 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)") 1887 if not test: 1888 try: 1889 nimg.mfree = int(hv_info["memory_free"]) 1890 except (ValueError, TypeError): 1891 _ErrorIf(True, self.ENODERPC, node, 1892 "node returned invalid nodeinfo, check hypervisor") 1893 1894 # FIXME: devise a free space model for file based instances as well 1895 if vg_name is not None: 1896 test = (constants.NV_VGLIST not in nresult or 1897 vg_name not in nresult[constants.NV_VGLIST]) 1898 _ErrorIf(test, self.ENODELVM, node, 1899 "node didn't return data for the volume group '%s'" 1900 " - it is either missing or broken", vg_name) 1901 if not test: 1902 try: 1903 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) 1904 except (ValueError, TypeError): 1905 _ErrorIf(True, self.ENODERPC, node, 1906 "node returned invalid LVM info, check LVM status")
1907
1908 - def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1909 """Gets per-disk status information for all instances. 1910 1911 @type nodelist: list of strings 1912 @param nodelist: Node names 1913 @type node_image: dict of (name, L{objects.Node}) 1914 @param node_image: Node objects 1915 @type instanceinfo: dict of (name, L{objects.Instance}) 1916 @param instanceinfo: Instance objects 1917 @rtype: {instance: {node: [(succes, payload)]}} 1918 @return: a dictionary of per-instance dictionaries with nodes as 1919 keys and disk information as values; the disk information is a 1920 list of tuples (success, payload) 1921 1922 """ 1923 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 1924 1925 node_disks = {} 1926 node_disks_devonly = {} 1927 1928 for nname in nodelist: 1929 disks = [(inst, disk) 1930 for instlist in [node_image[nname].pinst, 1931 node_image[nname].sinst] 1932 for inst in instlist 1933 for disk in instanceinfo[inst].disks] 1934 1935 if not disks: 1936 # No need to collect data 1937 continue 1938 1939 node_disks[nname] = disks 1940 1941 # Creating copies as SetDiskID below will modify the objects and that can 1942 # lead to incorrect data returned from nodes 1943 devonly = [dev.Copy() for (_, dev) in disks] 1944 1945 for dev in devonly: 1946 self.cfg.SetDiskID(dev, nname) 1947 1948 node_disks_devonly[nname] = devonly 1949 1950 assert len(node_disks) == len(node_disks_devonly) 1951 1952 # Collect data from all nodes with disks 1953 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(), 1954 node_disks_devonly) 1955 1956 assert len(result) == len(node_disks) 1957 1958 instdisk = {} 1959 1960 for (nname, nres) in result.items(): 1961 disks = node_disks[nname] 1962 1963 if nres.offline: 1964 # No data from this node 1965 data = len(disks) * [(False, "node offline")] 1966 else: 1967 msg = nres.fail_msg 1968 _ErrorIf(msg, self.ENODERPC, nname, 1969 "while getting disk information: %s", msg) 1970 if msg: 1971 # No data from this node 1972 data = len(disks) * [(False, msg)] 1973 else: 1974 data = [] 1975 for idx, i in enumerate(nres.payload): 1976 if isinstance(i, (tuple, list)) and len(i) == 2: 1977 data.append(i) 1978 else: 1979 logging.warning("Invalid result from node %s, entry %d: %s", 1980 nname, idx, i) 1981 data.append((False, "Invalid result from the remote node")) 1982 1983 for ((inst, _), status) in zip(disks, data): 1984 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status) 1985 1986 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and 1987 len(nnames) <= len(instanceinfo[inst].all_nodes) and 1988 compat.all(isinstance(s, (tuple, list)) and 1989 len(s) == 2 for s in statuses) 1990 for inst, nnames in instdisk.items() 1991 for nname, statuses in nnames.items()) 1992 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure" 1993 1994 return instdisk
1995
1996 - def BuildHooksEnv(self):
1997 """Build hooks env. 1998 1999 Cluster-Verify hooks just ran in the post phase and their failure makes 2000 the output be logged in the verify output and the verification to fail. 2001 2002 """ 2003 all_nodes = self.cfg.GetNodeList() 2004 env = { 2005 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()) 2006 } 2007 for node in self.cfg.GetAllNodesInfo().values(): 2008 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags()) 2009 2010 return env, [], all_nodes
2011
2012 - def Exec(self, feedback_fn):
2013 """Verify integrity of cluster, performing various test on nodes. 2014 2015 """ 2016 self.bad = False 2017 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 2018 verbose = self.op.verbose 2019 self._feedback_fn = feedback_fn 2020 feedback_fn("* Verifying global settings") 2021 for msg in self.cfg.VerifyConfig(): 2022 _ErrorIf(True, self.ECLUSTERCFG, None, msg) 2023 2024 # Check the cluster certificates 2025 for cert_filename in constants.ALL_CERT_FILES: 2026 (errcode, msg) = _VerifyCertificate(cert_filename) 2027 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode) 2028 2029 vg_name = self.cfg.GetVGName() 2030 drbd_helper = self.cfg.GetDRBDHelper() 2031 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors 2032 cluster = self.cfg.GetClusterInfo() 2033 nodelist = utils.NiceSort(self.cfg.GetNodeList()) 2034 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist] 2035 instancelist = utils.NiceSort(self.cfg.GetInstanceList()) 2036 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname)) 2037 for iname in instancelist) 2038 i_non_redundant = [] # Non redundant instances 2039 i_non_a_balanced = [] # Non auto-balanced instances 2040 n_offline = 0 # Count of offline nodes 2041 n_drained = 0 # Count of nodes being drained 2042 node_vol_should = {} 2043 2044 # FIXME: verify OS list 2045 # do local checksums 2046 master_files = [constants.CLUSTER_CONF_FILE] 2047 master_node = self.master_node = self.cfg.GetMasterNode() 2048 master_ip = self.cfg.GetMasterIP() 2049 2050 file_names = ssconf.SimpleStore().GetFileList() 2051 file_names.extend(constants.ALL_CERT_FILES) 2052 file_names.extend(master_files) 2053 if cluster.modify_etc_hosts: 2054 file_names.append(constants.ETC_HOSTS) 2055 2056 local_checksums = utils.FingerprintFiles(file_names) 2057 2058 feedback_fn("* Gathering data (%d nodes)" % len(nodelist)) 2059 node_verify_param = { 2060 constants.NV_FILELIST: file_names, 2061 constants.NV_NODELIST: [node.name for node in nodeinfo 2062 if not node.offline], 2063 constants.NV_HYPERVISOR: hypervisors, 2064 constants.NV_NODENETTEST: [(node.name, node.primary_ip, 2065 node.secondary_ip) for node in nodeinfo 2066 if not node.offline], 2067 constants.NV_INSTANCELIST: hypervisors, 2068 constants.NV_VERSION: None, 2069 constants.NV_HVINFO: self.cfg.GetHypervisorType(), 2070 constants.NV_NODESETUP: None, 2071 constants.NV_TIME: None, 2072 constants.NV_MASTERIP: (master_node, master_ip), 2073 constants.NV_OSLIST: None, 2074 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(), 2075 } 2076 2077 if vg_name is not None: 2078 node_verify_param[constants.NV_VGLIST] = None 2079 node_verify_param[constants.NV_LVLIST] = vg_name 2080 node_verify_param[constants.NV_PVLIST] = [vg_name] 2081 node_verify_param[constants.NV_DRBDLIST] = None 2082 2083 if drbd_helper: 2084 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper 2085 2086 # Build our expected cluster state 2087 node_image = dict((node.name, self.NodeImage(offline=node.offline, 2088 name=node.name, 2089 vm_capable=node.vm_capable)) 2090 for node in nodeinfo) 2091 2092 for instance in instancelist: 2093 inst_config = instanceinfo[instance] 2094 2095 for nname in inst_config.all_nodes: 2096 if nname not in node_image: 2097 # ghost node 2098 gnode = self.NodeImage(name=nname) 2099 gnode.ghost = True 2100 node_image[nname] = gnode 2101 2102 inst_config.MapLVsByNode(node_vol_should) 2103 2104 pnode = inst_config.primary_node 2105 node_image[pnode].pinst.append(instance) 2106 2107 for snode in inst_config.secondary_nodes: 2108 nimg = node_image[snode] 2109 nimg.sinst.append(instance) 2110 if pnode not in nimg.sbp: 2111 nimg.sbp[pnode] = [] 2112 nimg.sbp[pnode].append(instance) 2113 2114 # At this point, we have the in-memory data structures complete, 2115 # except for the runtime information, which we'll gather next 2116 2117 # Due to the way our RPC system works, exact response times cannot be 2118 # guaranteed (e.g. a broken node could run into a timeout). By keeping the 2119 # time before and after executing the request, we can at least have a time 2120 # window. 2121 nvinfo_starttime = time.time() 2122 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param, 2123 self.cfg.GetClusterName()) 2124 nvinfo_endtime = time.time() 2125 2126 all_drbd_map = self.cfg.ComputeDRBDMap() 2127 2128 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist)) 2129 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo) 2130 2131 feedback_fn("* Verifying node status") 2132 2133 refos_img = None 2134 2135 for node_i in nodeinfo: 2136 node = node_i.name 2137 nimg = node_image[node] 2138 2139 if node_i.offline: 2140 if verbose: 2141 feedback_fn("* Skipping offline node %s" % (node,)) 2142 n_offline += 1 2143 continue 2144 2145 if node == master_node: 2146 ntype = "master" 2147 elif node_i.master_candidate: 2148 ntype = "master candidate" 2149 elif node_i.drained: 2150 ntype = "drained" 2151 n_drained += 1 2152 else: 2153 ntype = "regular" 2154 if verbose: 2155 feedback_fn("* Verifying node %s (%s)" % (node, ntype)) 2156 2157 msg = all_nvinfo[node].fail_msg 2158 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg) 2159 if msg: 2160 nimg.rpc_fail = True 2161 continue 2162 2163 nresult = all_nvinfo[node].payload 2164 2165 nimg.call_ok = self._VerifyNode(node_i, nresult) 2166 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime) 2167 self._VerifyNodeNetwork(node_i, nresult) 2168 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums, 2169 master_files) 2170 2171 if nimg.vm_capable: 2172 self._VerifyNodeLVM(node_i, nresult, vg_name) 2173 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper, 2174 all_drbd_map) 2175 2176 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name) 2177 self._UpdateNodeInstances(node_i, nresult, nimg) 2178 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name) 2179 self._UpdateNodeOS(node_i, nresult, nimg) 2180 if not nimg.os_fail: 2181 if refos_img is None: 2182 refos_img = nimg 2183 self._VerifyNodeOS(node_i, nimg, refos_img) 2184 2185 feedback_fn("* Verifying instance status") 2186 for instance in instancelist: 2187 if verbose: 2188 feedback_fn("* Verifying instance %s" % instance) 2189 inst_config = instanceinfo[instance] 2190 self._VerifyInstance(instance, inst_config, node_image, 2191 instdisk[instance]) 2192 inst_nodes_offline = [] 2193 2194 pnode = inst_config.primary_node 2195 pnode_img = node_image[pnode] 2196 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, 2197 self.ENODERPC, pnode, "instance %s, connection to" 2198 " primary node failed", instance) 2199 2200 if pnode_img.offline: 2201 inst_nodes_offline.append(pnode) 2202 2203 # If the instance is non-redundant we cannot survive losing its primary 2204 # node, so we are not N+1 compliant. On the other hand we have no disk 2205 # templates with more than one secondary so that situation is not well 2206 # supported either. 2207 # FIXME: does not support file-backed instances 2208 if not inst_config.secondary_nodes: 2209 i_non_redundant.append(instance) 2210 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT, 2211 instance, "instance has multiple secondary nodes: %s", 2212 utils.CommaJoin(inst_config.secondary_nodes), 2213 code=self.ETYPE_WARNING) 2214 2215 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]: 2216 i_non_a_balanced.append(instance) 2217 2218 for snode in inst_config.secondary_nodes: 2219 s_img = node_image[snode] 2220 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode, 2221 "instance %s, connection to secondary node failed", instance) 2222 2223 if s_img.offline: 2224 inst_nodes_offline.append(snode) 2225 2226 # warn that the instance lives on offline nodes 2227 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance, 2228 "instance lives on offline node(s) %s", 2229 utils.CommaJoin(inst_nodes_offline)) 2230 # ... or ghost/non-vm_capable nodes 2231 for node in inst_config.all_nodes: 2232 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance, 2233 "instance lives on ghost node %s", node) 2234 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE, 2235 instance, "instance lives on non-vm_capable node %s", node) 2236 2237 feedback_fn("* Verifying orphan volumes") 2238 reserved = utils.FieldSet(*cluster.reserved_lvs) 2239 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved) 2240 2241 feedback_fn("* Verifying orphan instances") 2242 self._VerifyOrphanInstances(instancelist, node_image) 2243 2244 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks: 2245 feedback_fn("* Verifying N+1 Memory redundancy") 2246 self._VerifyNPlusOneMemory(node_image, instanceinfo) 2247 2248 feedback_fn("* Other Notes") 2249 if i_non_redundant: 2250 feedback_fn(" - NOTICE: %d non-redundant instance(s) found." 2251 % len(i_non_redundant)) 2252 2253 if i_non_a_balanced: 2254 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found." 2255 % len(i_non_a_balanced)) 2256 2257 if n_offline: 2258 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline) 2259 2260 if n_drained: 2261 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained) 2262 2263 return not self.bad
2264
2265 - def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2266 """Analyze the post-hooks' result 2267 2268 This method analyses the hook result, handles it, and sends some 2269 nicely-formatted feedback back to the user. 2270 2271 @param phase: one of L{constants.HOOKS_PHASE_POST} or 2272 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase 2273 @param hooks_results: the results of the multi-node hooks rpc call 2274 @param feedback_fn: function used send feedback back to the caller 2275 @param lu_result: previous Exec result 2276 @return: the new Exec result, based on the previous result 2277 and hook results 2278 2279 """ 2280 # We only really run POST phase hooks, and are only interested in 2281 # their results 2282 if phase == constants.HOOKS_PHASE_POST: 2283 # Used to change hooks' output to proper indentation 2284 indent_re = re.compile('^', re.M) 2285 feedback_fn("* Hooks Results") 2286 assert hooks_results, "invalid result from hooks" 2287 2288 for node_name in hooks_results: 2289 res = hooks_results[node_name] 2290 msg = res.fail_msg 2291 test = msg and not res.offline 2292 self._ErrorIf(test, self.ENODEHOOKS, node_name, 2293 "Communication failure in hooks execution: %s", msg) 2294 if res.offline or msg: 2295 # No need to investigate payload if node is offline or gave an error. 2296 # override manually lu_result here as _ErrorIf only 2297 # overrides self.bad 2298 lu_result = 1 2299 continue 2300 for script, hkr, output in res.payload: 2301 test = hkr == constants.HKR_FAIL 2302 self._ErrorIf(test, self.ENODEHOOKS, node_name, 2303 "Script %s failed, output:", script) 2304 if test: 2305 output = indent_re.sub(' ', output) 2306 feedback_fn("%s" % output) 2307 lu_result = 0 2308 2309 return lu_result
2310
2311 2312 -class LUVerifyDisks(NoHooksLU):
2313 """Verifies the cluster disks status. 2314 2315 """ 2316 REQ_BGL = False 2317
2318 - def ExpandNames(self):
2319 self.needed_locks = { 2320 locking.LEVEL_NODE: locking.ALL_SET, 2321 locking.LEVEL_INSTANCE: locking.ALL_SET, 2322 } 2323 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2324
2325 - def Exec(self, feedback_fn):
2326 """Verify integrity of cluster disks. 2327 2328 @rtype: tuple of three items 2329 @return: a tuple of (dict of node-to-node_error, list of instances 2330 which need activate-disks, dict of instance: (node, volume) for 2331 missing volumes 2332 2333 """ 2334 result = res_nodes, res_instances, res_missing = {}, [], {} 2335 2336 vg_name = self.cfg.GetVGName() 2337 nodes = utils.NiceSort(self.cfg.GetNodeList()) 2338 instances = [self.cfg.GetInstanceInfo(name) 2339 for name in self.cfg.GetInstanceList()] 2340 2341 nv_dict = {} 2342 for inst in instances: 2343 inst_lvs = {} 2344 if (not inst.admin_up or 2345 inst.disk_template not in constants.DTS_NET_MIRROR): 2346 continue 2347 inst.MapLVsByNode(inst_lvs) 2348 # transform { iname: {node: [vol,],},} to {(node, vol): iname} 2349 for node, vol_list in inst_lvs.iteritems(): 2350 for vol in vol_list: 2351 nv_dict[(node, vol)] = inst 2352 2353 if not nv_dict: 2354 return result 2355 2356 node_lvs = self.rpc.call_lv_list(nodes, vg_name) 2357 2358 for node in nodes: 2359 # node_volume 2360 node_res = node_lvs[node] 2361 if node_res.offline: 2362 continue 2363 msg = node_res.fail_msg 2364 if msg: 2365 logging.warning("Error enumerating LVs on node %s: %s", node, msg) 2366 res_nodes[node] = msg 2367 continue 2368 2369 lvs = node_res.payload 2370 for lv_name, (_, _, lv_online) in lvs.items(): 2371 inst = nv_dict.pop((node, lv_name), None) 2372 if (not lv_online and inst is not None 2373 and inst.name not in res_instances): 2374 res_instances.append(inst.name) 2375 2376 # any leftover items in nv_dict are missing LVs, let's arrange the 2377 # data better 2378 for key, inst in nv_dict.iteritems(): 2379 if inst.name not in res_missing: 2380 res_missing[inst.name] = [] 2381 res_missing[inst.name].append(key) 2382 2383 return result
2384
2385 2386 -class LURepairDiskSizes(NoHooksLU):
2387 """Verifies the cluster disks sizes. 2388 2389 """ 2390 _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))] 2391 REQ_BGL = False 2392
2393 - def ExpandNames(self):
2394 if self.op.instances: 2395 self.wanted_names = [] 2396 for name in self.op.instances: 2397 full_name = _ExpandInstanceName(self.cfg, name) 2398 self.wanted_names.append(full_name) 2399 self.needed_locks = { 2400 locking.LEVEL_NODE: [], 2401 locking.LEVEL_INSTANCE: self.wanted_names, 2402 } 2403 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE 2404 else: 2405 self.wanted_names = None 2406 self.needed_locks = { 2407 locking.LEVEL_NODE: locking.ALL_SET, 2408 locking.LEVEL_INSTANCE: locking.ALL_SET, 2409 } 2410 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2411
2412 - def DeclareLocks(self, level):
2413 if level == locking.LEVEL_NODE and self.wanted_names is not None: 2414 self._LockInstancesNodes(primary_only=True)
2415
2416 - def CheckPrereq(self):
2417 """Check prerequisites. 2418 2419 This only checks the optional instance list against the existing names. 2420 2421 """ 2422 if self.wanted_names is None: 2423 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE] 2424 2425 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name 2426 in self.wanted_names]
2427
2428 - def _EnsureChildSizes(self, disk):
2429 """Ensure children of the disk have the needed disk size. 2430 2431 This is valid mainly for DRBD8 and fixes an issue where the 2432 children have smaller disk size. 2433 2434 @param disk: an L{ganeti.objects.Disk} object 2435 2436 """ 2437 if disk.dev_type == constants.LD_DRBD8: 2438 assert disk.children, "Empty children for DRBD8?" 2439 fchild = disk.children[0] 2440 mismatch = fchild.size < disk.size 2441 if mismatch: 2442 self.LogInfo("Child disk has size %d, parent %d, fixing", 2443 fchild.size, disk.size) 2444 fchild.size = disk.size 2445 2446 # and we recurse on this child only, not on the metadev 2447 return self._EnsureChildSizes(fchild) or mismatch 2448 else: 2449 return False
2450
2451 - def Exec(self, feedback_fn):
2452 """Verify the size of cluster disks. 2453 2454 """ 2455 # TODO: check child disks too 2456 # TODO: check differences in size between primary/secondary nodes 2457 per_node_disks = {} 2458 for instance in self.wanted_instances: 2459 pnode = instance.primary_node 2460 if pnode not in per_node_disks: 2461 per_node_disks[pnode] = [] 2462 for idx, disk in enumerate(instance.disks): 2463 per_node_disks[pnode].append((instance, idx, disk)) 2464 2465 changed = [] 2466 for node, dskl in per_node_disks.items(): 2467 newl = [v[2].Copy() for v in dskl] 2468 for dsk in newl: 2469 self.cfg.SetDiskID(dsk, node) 2470 result = self.rpc.call_blockdev_getsizes(node, newl) 2471 if result.fail_msg: 2472 self.LogWarning("Failure in blockdev_getsizes call to node" 2473 " %s, ignoring", node) 2474 continue 2475 if len(result.data) != len(dskl): 2476 self.LogWarning("Invalid result from node %s, ignoring node results", 2477 node) 2478 continue 2479 for ((instance, idx, disk), size) in zip(dskl, result.data): 2480 if size is None: 2481 self.LogWarning("Disk %d of instance %s did not return size" 2482 " information, ignoring", idx, instance.name) 2483 continue 2484 if not isinstance(size, (int, long)): 2485 self.LogWarning("Disk %d of instance %s did not return valid" 2486 " size information, ignoring", idx, instance.name) 2487 continue 2488 size = size >> 20 2489 if size != disk.size: 2490 self.LogInfo("Disk %d of instance %s has mismatched size," 2491 " correcting: recorded %d, actual %d", idx, 2492 instance.name, disk.size, size) 2493 disk.size = size 2494 self.cfg.Update(instance, feedback_fn) 2495 changed.append((instance.name, idx, size)) 2496 if self._EnsureChildSizes(disk): 2497 self.cfg.Update(instance, feedback_fn) 2498 changed.append((instance.name, idx, disk.size)) 2499 return changed
2500
2501 2502 -class LURenameCluster(LogicalUnit):
2503 """Rename the cluster. 2504 2505 """ 2506 HPATH = "cluster-rename" 2507 HTYPE = constants.HTYPE_CLUSTER 2508 _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)] 2509
2510 - def BuildHooksEnv(self):
2511 """Build hooks env. 2512 2513 """ 2514 env = { 2515 "OP_TARGET": self.cfg.GetClusterName(), 2516 "NEW_NAME": self.op.name, 2517 } 2518 mn = self.cfg.GetMasterNode() 2519 all_nodes = self.cfg.GetNodeList() 2520 return env, [mn], all_nodes
2521
2522 - def CheckPrereq(self):
2523 """Verify that the passed name is a valid one. 2524 2525 """ 2526 hostname = netutils.GetHostname(name=self.op.name, 2527 family=self.cfg.GetPrimaryIPFamily()) 2528 2529 new_name = hostname.name 2530 self.ip = new_ip = hostname.ip 2531 old_name = self.cfg.GetClusterName() 2532 old_ip = self.cfg.GetMasterIP() 2533 if new_name == old_name and new_ip == old_ip: 2534 raise errors.OpPrereqError("Neither the name nor the IP address of the" 2535 " cluster has changed", 2536 errors.ECODE_INVAL) 2537 if new_ip != old_ip: 2538 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT): 2539 raise errors.OpPrereqError("The given cluster IP address (%s) is" 2540 " reachable on the network" % 2541 new_ip, errors.ECODE_NOTUNIQUE) 2542 2543 self.op.name = new_name
2544
2545 - def Exec(self, feedback_fn):
2546 """Rename the cluster. 2547 2548 """ 2549 clustername = self.op.name 2550 ip = self.ip 2551 2552 # shutdown the master IP 2553 master = self.cfg.GetMasterNode() 2554 result = self.rpc.call_node_stop_master(master, False) 2555 result.Raise("Could not disable the master role") 2556 2557 try: 2558 cluster = self.cfg.GetClusterInfo() 2559 cluster.cluster_name = clustername 2560 cluster.master_ip = ip 2561 self.cfg.Update(cluster, feedback_fn) 2562 2563 # update the known hosts file 2564 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE) 2565 node_list = self.cfg.GetNodeList() 2566 try: 2567 node_list.remove(master) 2568 except ValueError: 2569 pass 2570 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE) 2571 finally: 2572 result = self.rpc.call_node_start_master(master, False, False) 2573 msg = result.fail_msg 2574 if msg: 2575 self.LogWarning("Could not re-enable the master role on" 2576 " the master, please restart manually: %s", msg) 2577 2578 return clustername
2579
2580 2581 -class LUSetClusterParams(LogicalUnit):
2582 """Change the parameters of the cluster. 2583 2584 """ 2585 HPATH = "cluster-modify" 2586 HTYPE = constants.HTYPE_CLUSTER 2587 _OP_PARAMS = [ 2588 ("vg_name", None, ht.TMaybeString), 2589 ("enabled_hypervisors", None, 2590 ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue), 2591 ht.TNone)), 2592 ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict), 2593 ht.TNone)), 2594 ("beparams", None, ht.TOr(ht.TDict, ht.TNone)), 2595 ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict), 2596 ht.TNone)), 2597 ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict), 2598 ht.TNone)), 2599 ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)), 2600 ("uid_pool", None, ht.NoType), 2601 ("add_uids", None, ht.NoType), 2602 ("remove_uids", None, ht.NoType), 2603 ("maintain_node_health", None, ht.TMaybeBool), 2604 ("prealloc_wipe_disks", None, ht.TMaybeBool), 2605 ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)), 2606 ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)), 2607 ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)), 2608 ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)), 2609 ("hidden_os", None, ht.TOr(ht.TListOf(\ 2610 ht.TAnd(ht.TList, 2611 ht.TIsLength(2), 2612 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))), 2613 ht.TNone)), 2614 ("blacklisted_os", None, ht.TOr(ht.TListOf(\ 2615 ht.TAnd(ht.TList, 2616 ht.TIsLength(2), 2617 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))), 2618 ht.TNone)), 2619 ] 2620 REQ_BGL = False 2621
2622 - def CheckArguments(self):
2623 """Check parameters 2624 2625 """ 2626 if self.op.uid_pool: 2627 uidpool.CheckUidPool(self.op.uid_pool) 2628 2629 if self.op.add_uids: 2630 uidpool.CheckUidPool(self.op.add_uids) 2631 2632 if self.op.remove_uids: 2633 uidpool.CheckUidPool(self.op.remove_uids)
2634
2635 - def ExpandNames(self):
2636 # FIXME: in the future maybe other cluster params won't require checking on 2637 # all nodes to be modified. 2638 self.needed_locks = { 2639 locking.LEVEL_NODE: locking.ALL_SET, 2640 } 2641 self.share_locks[locking.LEVEL_NODE] = 1
2642
2643 - def BuildHooksEnv(self):
2644 """Build hooks env. 2645 2646 """ 2647 env = { 2648 "OP_TARGET": self.cfg.GetClusterName(), 2649 "NEW_VG_NAME": self.op.vg_name, 2650 } 2651 mn = self.cfg.GetMasterNode() 2652 return env, [mn], [mn]
2653
2654 - def CheckPrereq(self):
2655 """Check prerequisites. 2656 2657 This checks whether the given params don't conflict and 2658 if the given volume group is valid. 2659 2660 """ 2661 if self.op.vg_name is not None and not self.op.vg_name: 2662 if self.cfg.HasAnyDiskOfType(constants.LD_LV): 2663 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based" 2664 " instances exist", errors.ECODE_INVAL) 2665 2666 if self.op.drbd_helper is not None and not self.op.drbd_helper: 2667 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8): 2668 raise errors.OpPrereqError("Cannot disable drbd helper while" 2669 " drbd-based instances exist", 2670 errors.ECODE_INVAL) 2671 2672 node_list = self.acquired_locks[locking.LEVEL_NODE] 2673 2674 # if vg_name not None, checks given volume group on all nodes 2675 if self.op.vg_name: 2676 vglist = self.rpc.call_vg_list(node_list) 2677 for node in node_list: 2678 msg = vglist[node].fail_msg 2679 if msg: 2680 # ignoring down node 2681 self.LogWarning("Error while gathering data on node %s" 2682 " (ignoring node): %s", node, msg) 2683 continue 2684 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload, 2685 self.op.vg_name, 2686 constants.MIN_VG_SIZE) 2687 if vgstatus: 2688 raise errors.OpPrereqError("Error on node '%s': %s" % 2689 (node, vgstatus), errors.ECODE_ENVIRON) 2690 2691 if self.op.drbd_helper: 2692 # checks given drbd helper on all nodes 2693 helpers = self.rpc.call_drbd_helper(node_list) 2694 for node in node_list: 2695 ninfo = self.cfg.GetNodeInfo(node) 2696 if ninfo.offline: 2697 self.LogInfo("Not checking drbd helper on offline node %s", node) 2698 continue 2699 msg = helpers[node].fail_msg 2700 if msg: 2701 raise errors.OpPrereqError("Error checking drbd helper on node" 2702 " '%s': %s" % (node, msg), 2703 errors.ECODE_ENVIRON) 2704 node_helper = helpers[node].payload 2705 if node_helper != self.op.drbd_helper: 2706 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" % 2707 (node, node_helper), errors.ECODE_ENVIRON) 2708 2709 self.cluster = cluster = self.cfg.GetClusterInfo() 2710 # validate params changes 2711 if self.op.beparams: 2712 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES) 2713 self.new_beparams = cluster.SimpleFillBE(self.op.beparams) 2714 2715 if self.op.nicparams: 2716 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES) 2717 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams) 2718 objects.NIC.CheckParameterSyntax(self.new_nicparams) 2719 nic_errors = [] 2720 2721 # check all instances for consistency 2722 for instance in self.cfg.GetAllInstancesInfo().values(): 2723 for nic_idx, nic in enumerate(instance.nics): 2724 params_copy = copy.deepcopy(nic.nicparams) 2725 params_filled = objects.FillDict(self.new_nicparams, params_copy) 2726 2727 # check parameter syntax 2728 try: 2729 objects.NIC.CheckParameterSyntax(params_filled) 2730 except errors.ConfigurationError, err: 2731 nic_errors.append("Instance %s, nic/%d: %s" % 2732 (instance.name, nic_idx, err)) 2733 2734 # if we're moving instances to routed, check that they have an ip 2735 target_mode = params_filled[constants.NIC_MODE] 2736 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip: 2737 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" % 2738 (instance.name, nic_idx)) 2739 if nic_errors: 2740 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" % 2741 "\n".join(nic_errors)) 2742 2743 # hypervisor list/parameters 2744 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {}) 2745 if self.op.hvparams: 2746 for hv_name, hv_dict in self.op.hvparams.items(): 2747 if hv_name not in self.new_hvparams: 2748 self.new_hvparams[hv_name] = hv_dict 2749 else: 2750 self.new_hvparams[hv_name].update(hv_dict) 2751 2752 # os hypervisor parameters 2753 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {}) 2754 if self.op.os_hvp: 2755 for os_name, hvs in self.op.os_hvp.items(): 2756 if os_name not in self.new_os_hvp: 2757 self.new_os_hvp[os_name] = hvs 2758 else: 2759 for hv_name, hv_dict in hvs.items(): 2760 if hv_name not in self.new_os_hvp[os_name]: 2761 self.new_os_hvp[os_name][hv_name] = hv_dict 2762 else: 2763 self.new_os_hvp[os_name][hv_name].update(hv_dict) 2764 2765 # os parameters 2766 self.new_osp = objects.FillDict(cluster.osparams, {}) 2767 if self.op.osparams: 2768 for os_name, osp in self.op.osparams.items(): 2769 if os_name not in self.new_osp: 2770 self.new_osp[os_name] = {} 2771 2772 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp, 2773 use_none=True) 2774 2775 if not self.new_osp[os_name]: 2776 # we removed all parameters 2777 del self.new_osp[os_name] 2778 else: 2779 # check the parameter validity (remote check) 2780 _CheckOSParams(self, False, [self.cfg.GetMasterNode()], 2781 os_name, self.new_osp[os_name]) 2782 2783 # changes to the hypervisor list 2784 if self.op.enabled_hypervisors is not None: 2785 self.hv_list = self.op.enabled_hypervisors 2786 for hv in self.hv_list: 2787 # if the hypervisor doesn't already exist in the cluster 2788 # hvparams, we initialize it to empty, and then (in both 2789 # cases) we make sure to fill the defaults, as we might not 2790 # have a complete defaults list if the hypervisor wasn't 2791 # enabled before 2792 if hv not in new_hvp: 2793 new_hvp[hv] = {} 2794 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv]) 2795 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES) 2796 else: 2797 self.hv_list = cluster.enabled_hypervisors 2798 2799 if self.op.hvparams or self.op.enabled_hypervisors is not None: 2800 # either the enabled list has changed, or the parameters have, validate 2801 for hv_name, hv_params in self.new_hvparams.items(): 2802 if ((self.op.hvparams and hv_name in self.op.hvparams) or 2803 (self.op.enabled_hypervisors and 2804 hv_name in self.op.enabled_hypervisors)): 2805 # either this is a new hypervisor, or its parameters have changed 2806 hv_class = hypervisor.GetHypervisor(hv_name) 2807 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 2808 hv_class.CheckParameterSyntax(hv_params) 2809 _CheckHVParams(self, node_list, hv_name, hv_params) 2810 2811 if self.op.os_hvp: 2812 # no need to check any newly-enabled hypervisors, since the 2813 # defaults have already been checked in the above code-block 2814 for os_name, os_hvp in self.new_os_hvp.items(): 2815 for hv_name, hv_params in os_hvp.items(): 2816 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 2817 # we need to fill in the new os_hvp on top of the actual hv_p 2818 cluster_defaults = self.new_hvparams.get(hv_name, {}) 2819 new_osp = objects.FillDict(cluster_defaults, hv_params) 2820 hv_class = hypervisor.GetHypervisor(hv_name) 2821 hv_class.CheckParameterSyntax(new_osp) 2822 _CheckHVParams(self, node_list, hv_name, new_osp) 2823 2824 if self.op.default_iallocator: 2825 alloc_script = utils.FindFile(self.op.default_iallocator, 2826 constants.IALLOCATOR_SEARCH_PATH, 2827 os.path.isfile) 2828 if alloc_script is None: 2829 raise errors.OpPrereqError("Invalid default iallocator script '%s'" 2830 " specified" % self.op.default_iallocator, 2831 errors.ECODE_INVAL)
2832
2833 - def Exec(self, feedback_fn):
2834 """Change the parameters of the cluster. 2835 2836 """ 2837 if self.op.vg_name is not None: 2838 new_volume = self.op.vg_name 2839 if not new_volume: 2840 new_volume = None 2841 if new_volume != self.cfg.GetVGName(): 2842 self.cfg.SetVGName(new_volume) 2843 else: 2844 feedback_fn("Cluster LVM configuration already in desired" 2845 " state, not changing") 2846 if self.op.drbd_helper is not None: 2847 new_helper = self.op.drbd_helper 2848 if not new_helper: 2849 new_helper = None 2850 if new_helper != self.cfg.GetDRBDHelper(): 2851 self.cfg.SetDRBDHelper(new_helper) 2852 else: 2853 feedback_fn("Cluster DRBD helper already in desired state," 2854 " not changing") 2855 if self.op.hvparams: 2856 self.cluster.hvparams = self.new_hvparams 2857 if self.op.os_hvp: 2858 self.cluster.os_hvp = self.new_os_hvp 2859 if self.op.enabled_hypervisors is not None: 2860 self.cluster.hvparams = self.new_hvparams 2861 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors 2862 if self.op.beparams: 2863 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams 2864 if self.op.nicparams: 2865 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams 2866 if self.op.osparams: 2867 self.cluster.osparams = self.new_osp 2868 2869 if self.op.candidate_pool_size is not None: 2870 self.cluster.candidate_pool_size = self.op.candidate_pool_size 2871 # we need to update the pool size here, otherwise the save will fail 2872 _AdjustCandidatePool(self, []) 2873 2874 if self.op.maintain_node_health is not None: 2875 self.cluster.maintain_node_health = self.op.maintain_node_health 2876 2877 if self.op.prealloc_wipe_disks is not None: 2878 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks 2879 2880 if self.op.add_uids is not None: 2881 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids) 2882 2883 if self.op.remove_uids is not None: 2884 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids) 2885 2886 if self.op.uid_pool is not None: 2887 self.cluster.uid_pool = self.op.uid_pool 2888 2889 if self.op.default_iallocator is not None: 2890 self.cluster.default_iallocator = self.op.default_iallocator 2891 2892 if self.op.reserved_lvs is not None: 2893 self.cluster.reserved_lvs = self.op.reserved_lvs 2894 2895 def helper_os(aname, mods, desc): 2896 desc += " OS list" 2897 lst = getattr(self.cluster, aname) 2898 for key, val in mods: 2899 if key == constants.DDM_ADD: 2900 if val in lst: 2901 feedback_fn("OS %s already in %s, ignoring" % (val, desc)) 2902 else: 2903 lst.append(val) 2904 elif key == constants.DDM_REMOVE: 2905 if val in lst: 2906 lst.remove(val) 2907 else: 2908 feedback_fn("OS %s not found in %s, ignoring" % (val, desc)) 2909 else: 2910 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2911 2912 if self.op.hidden_os: 2913 helper_os("hidden_os", self.op.hidden_os, "hidden") 2914 2915 if self.op.blacklisted_os: 2916 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted") 2917 2918 self.cfg.Update(self.cluster, feedback_fn)
2919
2920 2921 -def _UploadHelper(lu, nodes, fname):
2922 """Helper for uploading a file and showing warnings. 2923 2924 """ 2925 if os.path.exists(fname): 2926 result = lu.rpc.call_upload_file(nodes, fname) 2927 for to_node, to_result in result.items(): 2928 msg = to_result.fail_msg 2929 if msg: 2930 msg = ("Copy of file %s to node %s failed: %s" % 2931 (fname, to_node, msg)) 2932 lu.proc.LogWarning(msg)
2933
2934 2935 -def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2936 """Distribute additional files which are part of the cluster configuration. 2937 2938 ConfigWriter takes care of distributing the config and ssconf files, but 2939 there are more files which should be distributed to all nodes. This function 2940 makes sure those are copied. 2941 2942 @param lu: calling logical unit 2943 @param additional_nodes: list of nodes not in the config to distribute to 2944 @type additional_vm: boolean 2945 @param additional_vm: whether the additional nodes are vm-capable or not 2946 2947 """ 2948 # 1. Gather target nodes 2949 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode()) 2950 dist_nodes = lu.cfg.GetOnlineNodeList() 2951 nvm_nodes = lu.cfg.GetNonVmCapableNodeList() 2952 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes] 2953 if additional_nodes is not None: 2954 dist_nodes.extend(additional_nodes) 2955 if additional_vm: 2956 vm_nodes.extend(additional_nodes) 2957 if myself.name in dist_nodes: 2958 dist_nodes.remove(myself.name) 2959 if myself.name in vm_nodes: 2960 vm_nodes.remove(myself.name) 2961 2962 # 2. Gather files to distribute 2963 dist_files = set([constants.ETC_HOSTS, 2964 constants.SSH_KNOWN_HOSTS_FILE, 2965 constants.RAPI_CERT_FILE, 2966 constants.RAPI_USERS_FILE, 2967 constants.CONFD_HMAC_KEY, 2968 constants.CLUSTER_DOMAIN_SECRET_FILE, 2969 ]) 2970 2971 vm_files = set() 2972 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors 2973 for hv_name in enabled_hypervisors: 2974 hv_class = hypervisor.GetHypervisor(hv_name) 2975 vm_files.update(hv_class.GetAncillaryFiles()) 2976 2977 # 3. Perform the files upload 2978 for fname in dist_files: 2979 _UploadHelper(lu, dist_nodes, fname) 2980 for fname in vm_files: 2981 _UploadHelper(lu, vm_nodes, fname)
2982
2983 2984 -class LURedistributeConfig(NoHooksLU):
2985 """Force the redistribution of cluster configuration. 2986 2987 This is a very simple LU. 2988 2989 """ 2990 REQ_BGL = False 2991
2992 - def ExpandNames(self):
2993 self.needed_locks = { 2994 locking.LEVEL_NODE: locking.ALL_SET, 2995 } 2996 self.share_locks[locking.LEVEL_NODE] = 1
2997
2998 - def Exec(self, feedback_fn):
2999 """Redistribute the configuration. 3000 3001 """ 3002 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn) 3003 _RedistributeAncillaryFiles(self)
3004
3005 3006 -