Package ganeti :: Module backend
[hide private]
[frames] | no frames]

Source Code for Module ganeti.backend

   1  # 
   2  # 
   3   
   4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc. 
   5  # All rights reserved. 
   6  # 
   7  # Redistribution and use in source and binary forms, with or without 
   8  # modification, are permitted provided that the following conditions are 
   9  # met: 
  10  # 
  11  # 1. Redistributions of source code must retain the above copyright notice, 
  12  # this list of conditions and the following disclaimer. 
  13  # 
  14  # 2. Redistributions in binary form must reproduce the above copyright 
  15  # notice, this list of conditions and the following disclaimer in the 
  16  # documentation and/or other materials provided with the distribution. 
  17  # 
  18  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
  19  # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
  20  # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
  21  # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
  22  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  23  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  24  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
  25  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
  26  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
  27  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
  28  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  29   
  30   
  31  """Functions used by the node daemon 
  32   
  33  @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in 
  34       the L{UploadFile} function 
  35  @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted 
  36       in the L{_CleanDirectory} function 
  37   
  38  """ 
  39   
  40  # pylint: disable=E1103,C0302 
  41   
  42  # E1103: %s %r has no %r member (but some types could not be 
  43  # inferred), because the _TryOSFromDisk returns either (True, os_obj) 
  44  # or (False, "string") which confuses pylint 
  45   
  46  # C0302: This module has become too big and should be split up 
  47   
  48   
  49  import os 
  50  import os.path 
  51  import shutil 
  52  import time 
  53  import stat 
  54  import errno 
  55  import re 
  56  import random 
  57  import logging 
  58  import tempfile 
  59  import zlib 
  60  import base64 
  61  import signal 
  62   
  63  from ganeti import errors 
  64  from ganeti import utils 
  65  from ganeti import ssh 
  66  from ganeti import hypervisor 
  67  from ganeti import constants 
  68  from ganeti.storage import bdev 
  69  from ganeti.storage import drbd 
  70  from ganeti.storage import filestorage 
  71  from ganeti import objects 
  72  from ganeti import ssconf 
  73  from ganeti import serializer 
  74  from ganeti import netutils 
  75  from ganeti import runtime 
  76  from ganeti import compat 
  77  from ganeti import pathutils 
  78  from ganeti import vcluster 
  79  from ganeti import ht 
  80  from ganeti.storage.base import BlockDev 
  81  from ganeti.storage.drbd import DRBD8 
  82  from ganeti import hooksmaster 
  83   
  84   
  85  _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" 
  86  _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([ 
  87    pathutils.DATA_DIR, 
  88    pathutils.JOB_QUEUE_ARCHIVE_DIR, 
  89    pathutils.QUEUE_DIR, 
  90    pathutils.CRYPTO_KEYS_DIR, 
  91    ]) 
  92  _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60 
  93  _X509_KEY_FILE = "key" 
  94  _X509_CERT_FILE = "cert" 
  95  _IES_STATUS_FILE = "status" 
  96  _IES_PID_FILE = "pid" 
  97  _IES_CA_FILE = "ca" 
  98   
  99  #: Valid LVS output line regex 
 100  _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$") 
 101   
 102  # Actions for the master setup script 
 103  _MASTER_START = "start" 
 104  _MASTER_STOP = "stop" 
 105   
 106  #: Maximum file permissions for restricted command directory and executables 
 107  _RCMD_MAX_MODE = (stat.S_IRWXU | 
 108                    stat.S_IRGRP | stat.S_IXGRP | 
 109                    stat.S_IROTH | stat.S_IXOTH) 
 110   
 111  #: Delay before returning an error for restricted commands 
 112  _RCMD_INVALID_DELAY = 10 
 113   
 114  #: How long to wait to acquire lock for restricted commands (shorter than 
 115  #: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many 
 116  #: command requests arrive 
 117  _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8 
118 119 120 -class RPCFail(Exception):
121 """Class denoting RPC failure. 122 123 Its argument is the error message. 124 125 """
126
127 128 -def _GetInstReasonFilename(instance_name):
129 """Path of the file containing the reason of the instance status change. 130 131 @type instance_name: string 132 @param instance_name: The name of the instance 133 @rtype: string 134 @return: The path of the file 135 136 """ 137 return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)
138
139 140 -def _StoreInstReasonTrail(instance_name, trail):
141 """Serialize a reason trail related to an instance change of state to file. 142 143 The exact location of the file depends on the name of the instance and on 144 the configuration of the Ganeti cluster defined at deploy time. 145 146 @type instance_name: string 147 @param instance_name: The name of the instance 148 @rtype: None 149 150 """ 151 json = serializer.DumpJson(trail) 152 filename = _GetInstReasonFilename(instance_name) 153 utils.WriteFile(filename, data=json)
154
155 156 -def _Fail(msg, *args, **kwargs):
157 """Log an error and the raise an RPCFail exception. 158 159 This exception is then handled specially in the ganeti daemon and 160 turned into a 'failed' return type. As such, this function is a 161 useful shortcut for logging the error and returning it to the master 162 daemon. 163 164 @type msg: string 165 @param msg: the text of the exception 166 @raise RPCFail 167 168 """ 169 if args: 170 msg = msg % args 171 if "log" not in kwargs or kwargs["log"]: # if we should log this error 172 if "exc" in kwargs and kwargs["exc"]: 173 logging.exception(msg) 174 else: 175 logging.error(msg) 176 raise RPCFail(msg)
177
178 179 -def _GetConfig():
180 """Simple wrapper to return a SimpleStore. 181 182 @rtype: L{ssconf.SimpleStore} 183 @return: a SimpleStore instance 184 185 """ 186 return ssconf.SimpleStore()
187
188 189 -def _GetSshRunner(cluster_name):
190 """Simple wrapper to return an SshRunner. 191 192 @type cluster_name: str 193 @param cluster_name: the cluster name, which is needed 194 by the SshRunner constructor 195 @rtype: L{ssh.SshRunner} 196 @return: an SshRunner instance 197 198 """ 199 return ssh.SshRunner(cluster_name)
200
201 202 -def _Decompress(data):
203 """Unpacks data compressed by the RPC client. 204 205 @type data: list or tuple 206 @param data: Data sent by RPC client 207 @rtype: str 208 @return: Decompressed data 209 210 """ 211 assert isinstance(data, (list, tuple)) 212 assert len(data) == 2 213 (encoding, content) = data 214 if encoding == constants.RPC_ENCODING_NONE: 215 return content 216 elif encoding == constants.RPC_ENCODING_ZLIB_BASE64: 217 return zlib.decompress(base64.b64decode(content)) 218 else: 219 raise AssertionError("Unknown data encoding")
220
221 222 -def _CleanDirectory(path, exclude=None):
223 """Removes all regular files in a directory. 224 225 @type path: str 226 @param path: the directory to clean 227 @type exclude: list 228 @param exclude: list of files to be excluded, defaults 229 to the empty list 230 231 """ 232 if path not in _ALLOWED_CLEAN_DIRS: 233 _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'", 234 path) 235 236 if not os.path.isdir(path): 237 return 238 if exclude is None: 239 exclude = [] 240 else: 241 # Normalize excluded paths 242 exclude = [os.path.normpath(i) for i in exclude] 243 244 for rel_name in utils.ListVisibleFiles(path): 245 full_name = utils.PathJoin(path, rel_name) 246 if full_name in exclude: 247 continue 248 if os.path.isfile(full_name) and not os.path.islink(full_name): 249 utils.RemoveFile(full_name)
250
251 252 -def _BuildUploadFileList():
253 """Build the list of allowed upload files. 254 255 This is abstracted so that it's built only once at module import time. 256 257 """ 258 allowed_files = set([ 259 pathutils.CLUSTER_CONF_FILE, 260 pathutils.ETC_HOSTS, 261 pathutils.SSH_KNOWN_HOSTS_FILE, 262 pathutils.VNC_PASSWORD_FILE, 263 pathutils.RAPI_CERT_FILE, 264 pathutils.SPICE_CERT_FILE, 265 pathutils.SPICE_CACERT_FILE, 266 pathutils.RAPI_USERS_FILE, 267 pathutils.CONFD_HMAC_KEY, 268 pathutils.CLUSTER_DOMAIN_SECRET_FILE, 269 ]) 270 271 for hv_name in constants.HYPER_TYPES: 272 hv_class = hypervisor.GetHypervisorClass(hv_name) 273 allowed_files.update(hv_class.GetAncillaryFiles()[0]) 274 275 assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \ 276 "Allowed file storage paths should never be uploaded via RPC" 277 278 return frozenset(allowed_files)
279 280 281 _ALLOWED_UPLOAD_FILES = _BuildUploadFileList()
282 283 284 -def JobQueuePurge():
285 """Removes job queue files and archived jobs. 286 287 @rtype: tuple 288 @return: True, None 289 290 """ 291 _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE]) 292 _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR)
293
294 295 -def GetMasterInfo():
296 """Returns master information. 297 298 This is an utility function to compute master information, either 299 for consumption here or from the node daemon. 300 301 @rtype: tuple 302 @return: master_netdev, master_ip, master_name, primary_ip_family, 303 master_netmask 304 @raise RPCFail: in case of errors 305 306 """ 307 try: 308 cfg = _GetConfig() 309 master_netdev = cfg.GetMasterNetdev() 310 master_ip = cfg.GetMasterIP() 311 master_netmask = cfg.GetMasterNetmask() 312 master_node = cfg.GetMasterNode() 313 primary_ip_family = cfg.GetPrimaryIPFamily() 314 except errors.ConfigurationError, err: 315 _Fail("Cluster configuration incomplete: %s", err, exc=True) 316 return (master_netdev, master_ip, master_node, primary_ip_family, 317 master_netmask)
318
319 320 -def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn):
321 """Decorator that runs hooks before and after the decorated function. 322 323 @type hook_opcode: string 324 @param hook_opcode: opcode of the hook 325 @type hooks_path: string 326 @param hooks_path: path of the hooks 327 @type env_builder_fn: function 328 @param env_builder_fn: function that returns a dictionary containing the 329 environment variables for the hooks. Will get all the parameters of the 330 decorated function. 331 @raise RPCFail: in case of pre-hook failure 332 333 """ 334 def decorator(fn): 335 def wrapper(*args, **kwargs): 336 _, myself = ssconf.GetMasterAndMyself() 337 nodes = ([myself], [myself]) # these hooks run locally 338 339 env_fn = compat.partial(env_builder_fn, *args, **kwargs) 340 341 cfg = _GetConfig() 342 hr = HooksRunner() 343 hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes, 344 hr.RunLocalHooks, None, env_fn, None, 345 logging.warning, cfg.GetClusterName(), 346 cfg.GetMasterNode()) 347 hm.RunPhase(constants.HOOKS_PHASE_PRE) 348 result = fn(*args, **kwargs) 349 hm.RunPhase(constants.HOOKS_PHASE_POST) 350 351 return result
352 return wrapper 353 return decorator 354
355 356 -def _BuildMasterIpEnv(master_params, use_external_mip_script=None):
357 """Builds environment variables for master IP hooks. 358 359 @type master_params: L{objects.MasterNetworkParameters} 360 @param master_params: network parameters of the master 361 @type use_external_mip_script: boolean 362 @param use_external_mip_script: whether to use an external master IP 363 address setup script (unused, but necessary per the implementation of the 364 _RunLocalHooks decorator) 365 366 """ 367 # pylint: disable=W0613 368 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family) 369 env = { 370 "MASTER_NETDEV": master_params.netdev, 371 "MASTER_IP": master_params.ip, 372 "MASTER_NETMASK": str(master_params.netmask), 373 "CLUSTER_IP_VERSION": str(ver), 374 } 375 376 return env
377
378 379 -def _RunMasterSetupScript(master_params, action, use_external_mip_script):
380 """Execute the master IP address setup script. 381 382 @type master_params: L{objects.MasterNetworkParameters} 383 @param master_params: network parameters of the master 384 @type action: string 385 @param action: action to pass to the script. Must be one of 386 L{backend._MASTER_START} or L{backend._MASTER_STOP} 387 @type use_external_mip_script: boolean 388 @param use_external_mip_script: whether to use an external master IP 389 address setup script 390 @raise backend.RPCFail: if there are errors during the execution of the 391 script 392 393 """ 394 env = _BuildMasterIpEnv(master_params) 395 396 if use_external_mip_script: 397 setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT 398 else: 399 setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT 400 401 result = utils.RunCmd([setup_script, action], env=env, reset_env=True) 402 403 if result.failed: 404 _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" % 405 (action, result.exit_code, result.output), log=True)
406 407 408 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup", 409 _BuildMasterIpEnv)
410 -def ActivateMasterIp(master_params, use_external_mip_script):
411 """Activate the IP address of the master daemon. 412 413 @type master_params: L{objects.MasterNetworkParameters} 414 @param master_params: network parameters of the master 415 @type use_external_mip_script: boolean 416 @param use_external_mip_script: whether to use an external master IP 417 address setup script 418 @raise RPCFail: in case of errors during the IP startup 419 420 """ 421 _RunMasterSetupScript(master_params, _MASTER_START, 422 use_external_mip_script)
423
424 425 -def StartMasterDaemons(no_voting):
426 """Activate local node as master node. 427 428 The function will start the master daemons (ganeti-masterd and ganeti-rapi). 429 430 @type no_voting: boolean 431 @param no_voting: whether to start ganeti-masterd without a node vote 432 but still non-interactively 433 @rtype: None 434 435 """ 436 437 if no_voting: 438 masterd_args = "--no-voting --yes-do-it" 439 else: 440 masterd_args = "" 441 442 env = { 443 "EXTRA_MASTERD_ARGS": masterd_args, 444 } 445 446 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env) 447 if result.failed: 448 msg = "Can't start Ganeti master: %s" % result.output 449 logging.error(msg) 450 _Fail(msg)
451 452 453 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown", 454 _BuildMasterIpEnv)
455 -def DeactivateMasterIp(master_params, use_external_mip_script):
456 """Deactivate the master IP on this node. 457 458 @type master_params: L{objects.MasterNetworkParameters} 459 @param master_params: network parameters of the master 460 @type use_external_mip_script: boolean 461 @param use_external_mip_script: whether to use an external master IP 462 address setup script 463 @raise RPCFail: in case of errors during the IP turndown 464 465 """ 466 _RunMasterSetupScript(master_params, _MASTER_STOP, 467 use_external_mip_script)
468
469 470 -def StopMasterDaemons():
471 """Stop the master daemons on this node. 472 473 Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node. 474 475 @rtype: None 476 477 """ 478 # TODO: log and report back to the caller the error failures; we 479 # need to decide in which case we fail the RPC for this 480 481 result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"]) 482 if result.failed: 483 logging.error("Could not stop Ganeti master, command %s had exitcode %s" 484 " and error %s", 485 result.cmd, result.exit_code, result.output)
486
487 488 -def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev):
489 """Change the netmask of the master IP. 490 491 @param old_netmask: the old value of the netmask 492 @param netmask: the new value of the netmask 493 @param master_ip: the master IP 494 @param master_netdev: the master network device 495 496 """ 497 if old_netmask == netmask: 498 return 499 500 if not netutils.IPAddress.Own(master_ip): 501 _Fail("The master IP address is not up, not attempting to change its" 502 " netmask") 503 504 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add", 505 "%s/%s" % (master_ip, netmask), 506 "dev", master_netdev, "label", 507 "%s:0" % master_netdev]) 508 if result.failed: 509 _Fail("Could not set the new netmask on the master IP address") 510 511 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del", 512 "%s/%s" % (master_ip, old_netmask), 513 "dev", master_netdev, "label", 514 "%s:0" % master_netdev]) 515 if result.failed: 516 _Fail("Could not bring down the master IP address with the old netmask")
517
518 519 -def EtcHostsModify(mode, host, ip):
520 """Modify a host entry in /etc/hosts. 521 522 @param mode: The mode to operate. Either add or remove entry 523 @param host: The host to operate on 524 @param ip: The ip associated with the entry 525 526 """ 527 if mode == constants.ETC_HOSTS_ADD: 528 if not ip: 529 RPCFail("Mode 'add' needs 'ip' parameter, but parameter not" 530 " present") 531 utils.AddHostToEtcHosts(host, ip) 532 elif mode == constants.ETC_HOSTS_REMOVE: 533 if ip: 534 RPCFail("Mode 'remove' does not allow 'ip' parameter, but" 535 " parameter is present") 536 utils.RemoveHostFromEtcHosts(host) 537 else: 538 RPCFail("Mode not supported")
539
540 541 -def LeaveCluster(modify_ssh_setup):
542 """Cleans up and remove the current node. 543 544 This function cleans up and prepares the current node to be removed 545 from the cluster. 546 547 If processing is successful, then it raises an 548 L{errors.QuitGanetiException} which is used as a special case to 549 shutdown the node daemon. 550 551 @param modify_ssh_setup: boolean 552 553 """ 554 _CleanDirectory(pathutils.DATA_DIR) 555 _CleanDirectory(pathutils.CRYPTO_KEYS_DIR) 556 JobQueuePurge() 557 558 if modify_ssh_setup: 559 try: 560 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER) 561 562 utils.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) 563 564 utils.RemoveFile(priv_key) 565 utils.RemoveFile(pub_key) 566 except errors.OpExecError: 567 logging.exception("Error while processing ssh files") 568 569 try: 570 utils.RemoveFile(pathutils.CONFD_HMAC_KEY) 571 utils.RemoveFile(pathutils.RAPI_CERT_FILE) 572 utils.RemoveFile(pathutils.SPICE_CERT_FILE) 573 utils.RemoveFile(pathutils.SPICE_CACERT_FILE) 574 utils.RemoveFile(pathutils.NODED_CERT_FILE) 575 except: # pylint: disable=W0702 576 logging.exception("Error while removing cluster secrets") 577 578 utils.StopDaemon(constants.CONFD) 579 utils.StopDaemon(constants.MOND) 580 581 # Raise a custom exception (handled in ganeti-noded) 582 raise errors.QuitGanetiException(True, "Shutdown scheduled")
583
584 585 -def _CheckStorageParams(params, num_params):
586 """Performs sanity checks for storage parameters. 587 588 @type params: list 589 @param params: list of storage parameters 590 @type num_params: int 591 @param num_params: expected number of parameters 592 593 """ 594 if params is None: 595 raise errors.ProgrammerError("No storage parameters for storage" 596 " reporting is provided.") 597 if not isinstance(params, list): 598 raise errors.ProgrammerError("The storage parameters are not of type" 599 " list: '%s'" % params) 600 if not len(params) == num_params: 601 raise errors.ProgrammerError("Did not receive the expected number of" 602 "storage parameters: expected %s," 603 " received '%s'" % (num_params, len(params)))
604
605 606 -def _CheckLvmStorageParams(params):
607 """Performs sanity check for the 'exclusive storage' flag. 608 609 @see: C{_CheckStorageParams} 610 611 """ 612 _CheckStorageParams(params, 1) 613 excl_stor = params[0] 614 if not isinstance(params[0], bool): 615 raise errors.ProgrammerError("Exclusive storage parameter is not" 616 " boolean: '%s'." % excl_stor) 617 return excl_stor
618
619 620 -def _GetLvmVgSpaceInfo(name, params):
621 """Wrapper around C{_GetVgInfo} which checks the storage parameters. 622 623 @type name: string 624 @param name: name of the volume group 625 @type params: list 626 @param params: list of storage parameters, which in this case should be 627 containing only one for exclusive storage 628 629 """ 630 excl_stor = _CheckLvmStorageParams(params) 631 return _GetVgInfo(name, excl_stor)
632
633 634 -def _GetVgInfo( 635 name, excl_stor, info_fn=bdev.LogicalVolume.GetVGInfo):
636 """Retrieves information about a LVM volume group. 637 638 """ 639 # TODO: GetVGInfo supports returning information for multiple VGs at once 640 vginfo = info_fn([name], excl_stor) 641 if vginfo: 642 vg_free = int(round(vginfo[0][0], 0)) 643 vg_size = int(round(vginfo[0][1], 0)) 644 else: 645 vg_free = None 646 vg_size = None 647 648 return { 649 "type": constants.ST_LVM_VG, 650 "name": name, 651 "storage_free": vg_free, 652 "storage_size": vg_size, 653 }
654
655 656 -def _GetLvmPvSpaceInfo(name, params):
657 """Wrapper around C{_GetVgSpindlesInfo} with sanity checks. 658 659 @see: C{_GetLvmVgSpaceInfo} 660 661 """ 662 excl_stor = _CheckLvmStorageParams(params) 663 return _GetVgSpindlesInfo(name, excl_stor)
664
665 666 -def _GetVgSpindlesInfo( 667 name, excl_stor, info_fn=bdev.LogicalVolume.GetVgSpindlesInfo):
668 """Retrieves information about spindles in an LVM volume group. 669 670 @type name: string 671 @param name: VG name 672 @type excl_stor: bool 673 @param excl_stor: exclusive storage 674 @rtype: dict 675 @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name, 676 free spindles, total spindles respectively 677 678 """ 679 if excl_stor: 680 (vg_free, vg_size) = info_fn(name) 681 else: 682 vg_free = 0 683 vg_size = 0 684 return { 685 "type": constants.ST_LVM_PV, 686 "name": name, 687 "storage_free": vg_free, 688 "storage_size": vg_size, 689 }
690
691 692 -def _GetHvInfo(name, hvparams, get_hv_fn=hypervisor.GetHypervisor):
693 """Retrieves node information from a hypervisor. 694 695 The information returned depends on the hypervisor. Common items: 696 697 - vg_size is the size of the configured volume group in MiB 698 - vg_free is the free size of the volume group in MiB 699 - memory_dom0 is the memory allocated for domain0 in MiB 700 - memory_free is the currently available (free) ram in MiB 701 - memory_total is the total number of ram in MiB 702 - hv_version: the hypervisor version, if available 703 704 @type hvparams: dict of string 705 @param hvparams: the hypervisor's hvparams 706 707 """ 708 return get_hv_fn(name).GetNodeInfo(hvparams=hvparams)
709
710 711 -def _GetHvInfoAll(hv_specs, get_hv_fn=hypervisor.GetHypervisor):
712 """Retrieves node information for all hypervisors. 713 714 See C{_GetHvInfo} for information on the output. 715 716 @type hv_specs: list of pairs (string, dict of strings) 717 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 718 719 """ 720 if hv_specs is None: 721 return None 722 723 result = [] 724 for hvname, hvparams in hv_specs: 725 result.append(_GetHvInfo(hvname, hvparams, get_hv_fn)) 726 return result
727
728 729 -def _GetNamedNodeInfo(names, fn):
730 """Calls C{fn} for all names in C{names} and returns a dictionary. 731 732 @rtype: None or dict 733 734 """ 735 if names is None: 736 return None 737 else: 738 return map(fn, names)
739
740 741 -def GetNodeInfo(storage_units, hv_specs):
742 """Gives back a hash with different information about the node. 743 744 @type storage_units: list of tuples (string, string, list) 745 @param storage_units: List of tuples (storage unit, identifier, parameters) to 746 ask for disk space information. In case of lvm-vg, the identifier is 747 the VG name. The parameters can contain additional, storage-type-specific 748 parameters, for example exclusive storage for lvm storage. 749 @type hv_specs: list of pairs (string, dict of strings) 750 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 751 @rtype: tuple; (string, None/dict, None/dict) 752 @return: Tuple containing boot ID, volume group information and hypervisor 753 information 754 755 """ 756 bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") 757 storage_info = _GetNamedNodeInfo( 758 storage_units, 759 (lambda (storage_type, storage_key, storage_params): 760 _ApplyStorageInfoFunction(storage_type, storage_key, storage_params))) 761 hv_info = _GetHvInfoAll(hv_specs) 762 return (bootid, storage_info, hv_info)
763
764 765 -def _GetFileStorageSpaceInfo(path, params):
766 """Wrapper around filestorage.GetSpaceInfo. 767 768 The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo 769 and ignore the *args parameter to not leak it into the filestorage 770 module's code. 771 772 @see: C{filestorage.GetFileStorageSpaceInfo} for description of the 773 parameters. 774 775 """ 776 _CheckStorageParams(params, 0) 777 return filestorage.GetFileStorageSpaceInfo(path)
778 779 780 # FIXME: implement storage reporting for all missing storage types. 781 _STORAGE_TYPE_INFO_FN = { 782 constants.ST_BLOCK: None, 783 constants.ST_DISKLESS: None, 784 constants.ST_EXT: None, 785 constants.ST_FILE: _GetFileStorageSpaceInfo, 786 constants.ST_LVM_PV: _GetLvmPvSpaceInfo, 787 constants.ST_LVM_VG: _GetLvmVgSpaceInfo, 788 constants.ST_RADOS: None, 789 }
790 791 792 -def _ApplyStorageInfoFunction(storage_type, storage_key, *args):
793 """Looks up and applies the correct function to calculate free and total 794 storage for the given storage type. 795 796 @type storage_type: string 797 @param storage_type: the storage type for which the storage shall be reported. 798 @type storage_key: string 799 @param storage_key: identifier of a storage unit, e.g. the volume group name 800 of an LVM storage unit 801 @type args: any 802 @param args: various parameters that can be used for storage reporting. These 803 parameters and their semantics vary from storage type to storage type and 804 are just propagated in this function. 805 @return: the results of the application of the storage space function (see 806 _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that 807 storage type 808 @raises NotImplementedError: for storage types who don't support space 809 reporting yet 810 """ 811 fn = _STORAGE_TYPE_INFO_FN[storage_type] 812 if fn is not None: 813 return fn(storage_key, *args) 814 else: 815 raise NotImplementedError
816
817 818 -def _CheckExclusivePvs(pvi_list):
819 """Check that PVs are not shared among LVs 820 821 @type pvi_list: list of L{objects.LvmPvInfo} objects 822 @param pvi_list: information about the PVs 823 824 @rtype: list of tuples (string, list of strings) 825 @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...]) 826 827 """ 828 res = [] 829 for pvi in pvi_list: 830 if len(pvi.lv_list) > 1: 831 res.append((pvi.name, pvi.lv_list)) 832 return res
833
834 835 -def _VerifyHypervisors(what, vm_capable, result, all_hvparams, 836 get_hv_fn=hypervisor.GetHypervisor):
837 """Verifies the hypervisor. Appends the results to the 'results' list. 838 839 @type what: C{dict} 840 @param what: a dictionary of things to check 841 @type vm_capable: boolean 842 @param vm_capable: whether or not this node is vm capable 843 @type result: dict 844 @param result: dictionary of verification results; results of the 845 verifications in this function will be added here 846 @type all_hvparams: dict of dict of string 847 @param all_hvparams: dictionary mapping hypervisor names to hvparams 848 @type get_hv_fn: function 849 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 850 851 """ 852 if not vm_capable: 853 return 854 855 if constants.NV_HYPERVISOR in what: 856 result[constants.NV_HYPERVISOR] = {} 857 for hv_name in what[constants.NV_HYPERVISOR]: 858 hvparams = all_hvparams[hv_name] 859 try: 860 val = get_hv_fn(hv_name).Verify(hvparams=hvparams) 861 except errors.HypervisorError, err: 862 val = "Error while checking hypervisor: %s" % str(err) 863 result[constants.NV_HYPERVISOR][hv_name] = val
864
865 866 -def _VerifyHvparams(what, vm_capable, result, 867 get_hv_fn=hypervisor.GetHypervisor):
868 """Verifies the hvparams. Appends the results to the 'results' list. 869 870 @type what: C{dict} 871 @param what: a dictionary of things to check 872 @type vm_capable: boolean 873 @param vm_capable: whether or not this node is vm capable 874 @type result: dict 875 @param result: dictionary of verification results; results of the 876 verifications in this function will be added here 877 @type get_hv_fn: function 878 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 879 880 """ 881 if not vm_capable: 882 return 883 884 if constants.NV_HVPARAMS in what: 885 result[constants.NV_HVPARAMS] = [] 886 for source, hv_name, hvparms in what[constants.NV_HVPARAMS]: 887 try: 888 logging.info("Validating hv %s, %s", hv_name, hvparms) 889 get_hv_fn(hv_name).ValidateParameters(hvparms) 890 except errors.HypervisorError, err: 891 result[constants.NV_HVPARAMS].append((source, hv_name, str(err)))
892
893 894 -def _VerifyInstanceList(what, vm_capable, result, all_hvparams):
895 """Verifies the instance list. 896 897 @type what: C{dict} 898 @param what: a dictionary of things to check 899 @type vm_capable: boolean 900 @param vm_capable: whether or not this node is vm capable 901 @type result: dict 902 @param result: dictionary of verification results; results of the 903 verifications in this function will be added here 904 @type all_hvparams: dict of dict of string 905 @param all_hvparams: dictionary mapping hypervisor names to hvparams 906 907 """ 908 if constants.NV_INSTANCELIST in what and vm_capable: 909 # GetInstanceList can fail 910 try: 911 val = GetInstanceList(what[constants.NV_INSTANCELIST], 912 all_hvparams=all_hvparams) 913 except RPCFail, err: 914 val = str(err) 915 result[constants.NV_INSTANCELIST] = val
916
917 918 -def _VerifyNodeInfo(what, vm_capable, result, all_hvparams):
919 """Verifies the node info. 920 921 @type what: C{dict} 922 @param what: a dictionary of things to check 923 @type vm_capable: boolean 924 @param vm_capable: whether or not this node is vm capable 925 @type result: dict 926 @param result: dictionary of verification results; results of the 927 verifications in this function will be added here 928 @type all_hvparams: dict of dict of string 929 @param all_hvparams: dictionary mapping hypervisor names to hvparams 930 931 """ 932 if constants.NV_HVINFO in what and vm_capable: 933 hvname = what[constants.NV_HVINFO] 934 hyper = hypervisor.GetHypervisor(hvname) 935 hvparams = all_hvparams[hvname] 936 result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams)
937
938 939 -def VerifyNode(what, cluster_name, all_hvparams):
940 """Verify the status of the local node. 941 942 Based on the input L{what} parameter, various checks are done on the 943 local node. 944 945 If the I{filelist} key is present, this list of 946 files is checksummed and the file/checksum pairs are returned. 947 948 If the I{nodelist} key is present, we check that we have 949 connectivity via ssh with the target nodes (and check the hostname 950 report). 951 952 If the I{node-net-test} key is present, we check that we have 953 connectivity to the given nodes via both primary IP and, if 954 applicable, secondary IPs. 955 956 @type what: C{dict} 957 @param what: a dictionary of things to check: 958 - filelist: list of files for which to compute checksums 959 - nodelist: list of nodes we should check ssh communication with 960 - node-net-test: list of nodes we should check node daemon port 961 connectivity with 962 - hypervisor: list with hypervisors to run the verify for 963 @type cluster_name: string 964 @param cluster_name: the cluster's name 965 @type all_hvparams: dict of dict of strings 966 @param all_hvparams: a dictionary mapping hypervisor names to hvparams 967 @rtype: dict 968 @return: a dictionary with the same keys as the input dict, and 969 values representing the result of the checks 970 971 """ 972 result = {} 973 my_name = netutils.Hostname.GetSysName() 974 port = netutils.GetDaemonPort(constants.NODED) 975 vm_capable = my_name not in what.get(constants.NV_NONVMNODES, []) 976 977 _VerifyHypervisors(what, vm_capable, result, all_hvparams) 978 _VerifyHvparams(what, vm_capable, result) 979 980 if constants.NV_FILELIST in what: 981 fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath, 982 what[constants.NV_FILELIST])) 983 result[constants.NV_FILELIST] = \ 984 dict((vcluster.MakeVirtualPath(key), value) 985 for (key, value) in fingerprints.items()) 986 987 if constants.NV_NODELIST in what: 988 (nodes, bynode) = what[constants.NV_NODELIST] 989 990 # Add nodes from other groups (different for each node) 991 try: 992 nodes.extend(bynode[my_name]) 993 except KeyError: 994 pass 995 996 # Use a random order 997 random.shuffle(nodes) 998 999 # Try to contact all nodes 1000 val = {} 1001 for node in nodes: 1002 success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node) 1003 if not success: 1004 val[node] = message 1005 1006 result[constants.NV_NODELIST] = val 1007 1008 if constants.NV_NODENETTEST in what: 1009 result[constants.NV_NODENETTEST] = tmp = {} 1010 my_pip = my_sip = None 1011 for name, pip, sip in what[constants.NV_NODENETTEST]: 1012 if name == my_name: 1013 my_pip = pip 1014 my_sip = sip 1015 break 1016 if not my_pip: 1017 tmp[my_name] = ("Can't find my own primary/secondary IP" 1018 " in the node list") 1019 else: 1020 for name, pip, sip in what[constants.NV_NODENETTEST]: 1021 fail = [] 1022 if not netutils.TcpPing(pip, port, source=my_pip): 1023 fail.append("primary") 1024 if sip != pip: 1025 if not netutils.TcpPing(sip, port, source=my_sip): 1026 fail.append("secondary") 1027 if fail: 1028 tmp[name] = ("failure using the %s interface(s)" % 1029 " and ".join(fail)) 1030 1031 if constants.NV_MASTERIP in what: 1032 # FIXME: add checks on incoming data structures (here and in the 1033 # rest of the function) 1034 master_name, master_ip = what[constants.NV_MASTERIP] 1035 if master_name == my_name: 1036 source = constants.IP4_ADDRESS_LOCALHOST 1037 else: 1038 source = None 1039 result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port, 1040 source=source) 1041 1042 if constants.NV_USERSCRIPTS in what: 1043 result[constants.NV_USERSCRIPTS] = \ 1044 [script for script in what[constants.NV_USERSCRIPTS] 1045 if not utils.IsExecutable(script)] 1046 1047 if constants.NV_OOB_PATHS in what: 1048 result[constants.NV_OOB_PATHS] = tmp = [] 1049 for path in what[constants.NV_OOB_PATHS]: 1050 try: 1051 st = os.stat(path) 1052 except OSError, err: 1053 tmp.append("error stating out of band helper: %s" % err) 1054 else: 1055 if stat.S_ISREG(st.st_mode): 1056 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR: 1057 tmp.append(None) 1058 else: 1059 tmp.append("out of band helper %s is not executable" % path) 1060 else: 1061 tmp.append("out of band helper %s is not a file" % path) 1062 1063 if constants.NV_LVLIST in what and vm_capable: 1064 try: 1065 val = GetVolumeList([what[constants.NV_LVLIST]]) 1066 except RPCFail, err: 1067 val = str(err) 1068 result[constants.NV_LVLIST] = val 1069 1070 _VerifyInstanceList(what, vm_capable, result, all_hvparams) 1071 1072 if constants.NV_VGLIST in what and vm_capable: 1073 result[constants.NV_VGLIST] = utils.ListVolumeGroups() 1074 1075 if constants.NV_PVLIST in what and vm_capable: 1076 check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what 1077 val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], 1078 filter_allocatable=False, 1079 include_lvs=check_exclusive_pvs) 1080 if check_exclusive_pvs: 1081 result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val) 1082 for pvi in val: 1083 # Avoid sending useless data on the wire 1084 pvi.lv_list = [] 1085 result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val) 1086 1087 if constants.NV_VERSION in what: 1088 result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION, 1089 constants.RELEASE_VERSION) 1090 1091 _VerifyNodeInfo(what, vm_capable, result, all_hvparams) 1092 1093 if constants.NV_DRBDVERSION in what and vm_capable: 1094 try: 1095 drbd_version = DRBD8.GetProcInfo().GetVersionString() 1096 except errors.BlockDeviceError, err: 1097 logging.warning("Can't get DRBD version", exc_info=True) 1098 drbd_version = str(err) 1099 result[constants.NV_DRBDVERSION] = drbd_version 1100 1101 if constants.NV_DRBDLIST in what and vm_capable: 1102 try: 1103 used_minors = drbd.DRBD8.GetUsedDevs() 1104 except errors.BlockDeviceError, err: 1105 logging.warning("Can't get used minors list", exc_info=True) 1106 used_minors = str(err) 1107 result[constants.NV_DRBDLIST] = used_minors 1108 1109 if constants.NV_DRBDHELPER in what and vm_capable: 1110 status = True 1111 try: 1112 payload = drbd.DRBD8.GetUsermodeHelper() 1113 except errors.BlockDeviceError, err: 1114 logging.error("Can't get DRBD usermode helper: %s", str(err)) 1115 status = False 1116 payload = str(err) 1117 result[constants.NV_DRBDHELPER] = (status, payload) 1118 1119 if constants.NV_NODESETUP in what: 1120 result[constants.NV_NODESETUP] = tmpr = [] 1121 if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"): 1122 tmpr.append("The sysfs filesytem doesn't seem to be mounted" 1123 " under /sys, missing required directories /sys/block" 1124 " and /sys/class/net") 1125 if (not os.path.isdir("/proc/sys") or 1126 not os.path.isfile("/proc/sysrq-trigger")): 1127 tmpr.append("The procfs filesystem doesn't seem to be mounted" 1128 " under /proc, missing required directory /proc/sys and" 1129 " the file /proc/sysrq-trigger") 1130 1131 if constants.NV_TIME in what: 1132 result[constants.NV_TIME] = utils.SplitTime(time.time()) 1133 1134 if constants.NV_OSLIST in what and vm_capable: 1135 result[constants.NV_OSLIST] = DiagnoseOS() 1136 1137 if constants.NV_BRIDGES in what and vm_capable: 1138 result[constants.NV_BRIDGES] = [bridge 1139 for bridge in what[constants.NV_BRIDGES] 1140 if not utils.BridgeExists(bridge)] 1141 1142 if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name: 1143 result[constants.NV_ACCEPTED_STORAGE_PATHS] = \ 1144 filestorage.ComputeWrongFileStoragePaths() 1145 1146 if what.get(constants.NV_FILE_STORAGE_PATH): 1147 pathresult = filestorage.CheckFileStoragePath( 1148 what[constants.NV_FILE_STORAGE_PATH]) 1149 if pathresult: 1150 result[constants.NV_FILE_STORAGE_PATH] = pathresult 1151 1152 if what.get(constants.NV_SHARED_FILE_STORAGE_PATH): 1153 pathresult = filestorage.CheckFileStoragePath( 1154 what[constants.NV_SHARED_FILE_STORAGE_PATH]) 1155 if pathresult: 1156 result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult 1157 1158 return result
1159
1160 1161 -def GetBlockDevSizes(devices):
1162 """Return the size of the given block devices 1163 1164 @type devices: list 1165 @param devices: list of block device nodes to query 1166 @rtype: dict 1167 @return: 1168 dictionary of all block devices under /dev (key). The value is their 1169 size in MiB. 1170 1171 {'/dev/disk/by-uuid/123456-12321231-312312-312': 124} 1172 1173 """ 1174 DEV_PREFIX = "/dev/" 1175 blockdevs = {} 1176 1177 for devpath in devices: 1178 if not utils.IsBelowDir(DEV_PREFIX, devpath): 1179 continue 1180 1181 try: 1182 st = os.stat(devpath) 1183 except EnvironmentError, err: 1184 logging.warning("Error stat()'ing device %s: %s", devpath, str(err)) 1185 continue 1186 1187 if stat.S_ISBLK(st.st_mode): 1188 result = utils.RunCmd(["blockdev", "--getsize64", devpath]) 1189 if result.failed: 1190 # We don't want to fail, just do not list this device as available 1191 logging.warning("Cannot get size for block device %s", devpath) 1192 continue 1193 1194 size = int(result.stdout) / (1024 * 1024) 1195 blockdevs[devpath] = size 1196 return blockdevs
1197
1198 1199 -def GetVolumeList(vg_names):
1200 """Compute list of logical volumes and their size. 1201 1202 @type vg_names: list 1203 @param vg_names: the volume groups whose LVs we should list, or 1204 empty for all volume groups 1205 @rtype: dict 1206 @return: 1207 dictionary of all partions (key) with value being a tuple of 1208 their size (in MiB), inactive and online status:: 1209 1210 {'xenvg/test1': ('20.06', True, True)} 1211 1212 in case of errors, a string is returned with the error 1213 details. 1214 1215 """ 1216 lvs = {} 1217 sep = "|" 1218 if not vg_names: 1219 vg_names = [] 1220 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 1221 "--separator=%s" % sep, 1222 "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names) 1223 if result.failed: 1224 _Fail("Failed to list logical volumes, lvs output: %s", result.output) 1225 1226 for line in result.stdout.splitlines(): 1227 line = line.strip() 1228 match = _LVSLINE_REGEX.match(line) 1229 if not match: 1230 logging.error("Invalid line returned from lvs output: '%s'", line) 1231 continue 1232 vg_name, name, size, attr = match.groups() 1233 inactive = attr[4] == "-" 1234 online = attr[5] == "o" 1235 virtual = attr[0] == "v" 1236 if virtual: 1237 # we don't want to report such volumes as existing, since they 1238 # don't really hold data 1239 continue 1240 lvs[vg_name + "/" + name] = (size, inactive, online) 1241 1242 return lvs
1243
1244 1245 -def ListVolumeGroups():
1246 """List the volume groups and their size. 1247 1248 @rtype: dict 1249 @return: dictionary with keys volume name and values the 1250 size of the volume 1251 1252 """ 1253 return utils.ListVolumeGroups()
1254
1255 1256 -def NodeVolumes():
1257 """List all volumes on this node. 1258 1259 @rtype: list 1260 @return: 1261 A list of dictionaries, each having four keys: 1262 - name: the logical volume name, 1263 - size: the size of the logical volume 1264 - dev: the physical device on which the LV lives 1265 - vg: the volume group to which it belongs 1266 1267 In case of errors, we return an empty list and log the 1268 error. 1269 1270 Note that since a logical volume can live on multiple physical 1271 volumes, the resulting list might include a logical volume 1272 multiple times. 1273 1274 """ 1275 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 1276 "--separator=|", 1277 "--options=lv_name,lv_size,devices,vg_name"]) 1278 if result.failed: 1279 _Fail("Failed to list logical volumes, lvs output: %s", 1280 result.output) 1281 1282 def parse_dev(dev): 1283 return dev.split("(")[0]
1284 1285 def handle_dev(dev): 1286 return [parse_dev(x) for x in dev.split(",")] 1287 1288 def map_line(line): 1289 line = [v.strip() for v in line] 1290 return [{"name": line[0], "size": line[1], 1291 "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])] 1292 1293 all_devs = [] 1294 for line in result.stdout.splitlines(): 1295 if line.count("|") >= 3: 1296 all_devs.extend(map_line(line.split("|"))) 1297 else: 1298 logging.warning("Strange line in the output from lvs: '%s'", line) 1299 return all_devs 1300
1301 1302 -def BridgesExist(bridges_list):
1303 """Check if a list of bridges exist on the current node. 1304 1305 @rtype: boolean 1306 @return: C{True} if all of them exist, C{False} otherwise 1307 1308 """ 1309 missing = [] 1310 for bridge in bridges_list: 1311 if not utils.BridgeExists(bridge): 1312 missing.append(bridge) 1313 1314 if missing: 1315 _Fail("Missing bridges %s", utils.CommaJoin(missing))
1316
1317 1318 -def GetInstanceListForHypervisor(hname, hvparams=None, 1319 get_hv_fn=hypervisor.GetHypervisor):
1320 """Provides a list of instances of the given hypervisor. 1321 1322 @type hname: string 1323 @param hname: name of the hypervisor 1324 @type hvparams: dict of strings 1325 @param hvparams: hypervisor parameters for the given hypervisor 1326 @type get_hv_fn: function 1327 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 1328 name; optional parameter to increase testability 1329 1330 @rtype: list 1331 @return: a list of all running instances on the current node 1332 - instance1.example.com 1333 - instance2.example.com 1334 1335 """ 1336 results = [] 1337 try: 1338 hv = get_hv_fn(hname) 1339 names = hv.ListInstances(hvparams=hvparams) 1340 results.extend(names) 1341 except errors.HypervisorError, err: 1342 _Fail("Error enumerating instances (hypervisor %s): %s", 1343 hname, err, exc=True) 1344 return results
1345
1346 1347 -def GetInstanceList(hypervisor_list, all_hvparams=None, 1348 get_hv_fn=hypervisor.GetHypervisor):
1349 """Provides a list of instances. 1350 1351 @type hypervisor_list: list 1352 @param hypervisor_list: the list of hypervisors to query information 1353 @type all_hvparams: dict of dict of strings 1354 @param all_hvparams: a dictionary mapping hypervisor types to respective 1355 cluster-wide hypervisor parameters 1356 @type get_hv_fn: function 1357 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 1358 name; optional parameter to increase testability 1359 1360 @rtype: list 1361 @return: a list of all running instances on the current node 1362 - instance1.example.com 1363 - instance2.example.com 1364 1365 """ 1366 results = [] 1367 for hname in hypervisor_list: 1368 hvparams = all_hvparams[hname] 1369 results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams, 1370 get_hv_fn=get_hv_fn)) 1371 return results
1372
1373 1374 -def GetInstanceInfo(instance, hname, hvparams=None):
1375 """Gives back the information about an instance as a dictionary. 1376 1377 @type instance: string 1378 @param instance: the instance name 1379 @type hname: string 1380 @param hname: the hypervisor type of the instance 1381 @type hvparams: dict of strings 1382 @param hvparams: the instance's hvparams 1383 1384 @rtype: dict 1385 @return: dictionary with the following keys: 1386 - memory: memory size of instance (int) 1387 - state: xen state of instance (string) 1388 - time: cpu time of instance (float) 1389 - vcpus: the number of vcpus (int) 1390 1391 """ 1392 output = {} 1393 1394 iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance, 1395 hvparams=hvparams) 1396 if iinfo is not None: 1397 output["memory"] = iinfo[2] 1398 output["vcpus"] = iinfo[3] 1399 output["state"] = iinfo[4] 1400 output["time"] = iinfo[5] 1401 1402 return output
1403
1404 1405 -def GetInstanceMigratable(instance):
1406 """Computes whether an instance can be migrated. 1407 1408 @type instance: L{objects.Instance} 1409 @param instance: object representing the instance to be checked. 1410 1411 @rtype: tuple 1412 @return: tuple of (result, description) where: 1413 - result: whether the instance can be migrated or not 1414 - description: a description of the issue, if relevant 1415 1416 """ 1417 hyper = hypervisor.GetHypervisor(instance.hypervisor) 1418 iname = instance.name 1419 if iname not in hyper.ListInstances(instance.hvparams): 1420 _Fail("Instance %s is not running", iname) 1421 1422 for idx in range(len(instance.disks)): 1423 link_name = _GetBlockDevSymlinkPath(iname, idx) 1424 if not os.path.islink(link_name): 1425 logging.warning("Instance %s is missing symlink %s for disk %d", 1426 iname, link_name, idx)
1427
1428 1429 -def GetAllInstancesInfo(hypervisor_list, all_hvparams):
1430 """Gather data about all instances. 1431 1432 This is the equivalent of L{GetInstanceInfo}, except that it 1433 computes data for all instances at once, thus being faster if one 1434 needs data about more than one instance. 1435 1436 @type hypervisor_list: list 1437 @param hypervisor_list: list of hypervisors to query for instance data 1438 @type all_hvparams: dict of dict of strings 1439 @param all_hvparams: mapping of hypervisor names to hvparams 1440 1441 @rtype: dict 1442 @return: dictionary of instance: data, with data having the following keys: 1443 - memory: memory size of instance (int) 1444 - state: xen state of instance (string) 1445 - time: cpu time of instance (float) 1446 - vcpus: the number of vcpus 1447 1448 """ 1449 output = {} 1450 1451 for hname in hypervisor_list: 1452 hvparams = all_hvparams[hname] 1453 iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams) 1454 if iinfo: 1455 for name, _, memory, vcpus, state, times in iinfo: 1456 value = { 1457 "memory": memory, 1458 "vcpus": vcpus, 1459 "state": state, 1460 "time": times, 1461 } 1462 if name in output: 1463 # we only check static parameters, like memory and vcpus, 1464 # and not state and time which can change between the 1465 # invocations of the different hypervisors 1466 for key in "memory", "vcpus": 1467 if value[key] != output[name][key]: 1468 _Fail("Instance %s is running twice" 1469 " with different parameters", name) 1470 output[name] = value 1471 1472 return output
1473
1474 1475 -def _InstanceLogName(kind, os_name, instance, component):
1476 """Compute the OS log filename for a given instance and operation. 1477 1478 The instance name and os name are passed in as strings since not all 1479 operations have these as part of an instance object. 1480 1481 @type kind: string 1482 @param kind: the operation type (e.g. add, import, etc.) 1483 @type os_name: string 1484 @param os_name: the os name 1485 @type instance: string 1486 @param instance: the name of the instance being imported/added/etc. 1487 @type component: string or None 1488 @param component: the name of the component of the instance being 1489 transferred 1490 1491 """ 1492 # TODO: Use tempfile.mkstemp to create unique filename 1493 if component: 1494 assert "/" not in component 1495 c_msg = "-%s" % component 1496 else: 1497 c_msg = "" 1498 base = ("%s-%s-%s%s-%s.log" % 1499 (kind, os_name, instance, c_msg, utils.TimestampForFilename())) 1500 return utils.PathJoin(pathutils.LOG_OS_DIR, base)
1501
1502 1503 -def InstanceOsAdd(instance, reinstall, debug):
1504 """Add an OS to an instance. 1505 1506 @type instance: L{objects.Instance} 1507 @param instance: Instance whose OS is to be installed 1508 @type reinstall: boolean 1509 @param reinstall: whether this is an instance reinstall 1510 @type debug: integer 1511 @param debug: debug level, passed to the OS scripts 1512 @rtype: None 1513 1514 """ 1515 inst_os = OSFromDisk(instance.os) 1516 1517 create_env = OSEnvironment(instance, inst_os, debug) 1518 if reinstall: 1519 create_env["INSTANCE_REINSTALL"] = "1" 1520 1521 logfile = _InstanceLogName("add", instance.os, instance.name, None) 1522 1523 result = utils.RunCmd([inst_os.create_script], env=create_env, 1524 cwd=inst_os.path, output=logfile, reset_env=True) 1525 if result.failed: 1526 logging.error("os create command '%s' returned error: %s, logfile: %s," 1527 " output: %s", result.cmd, result.fail_reason, logfile, 1528 result.output) 1529 lines = [utils.SafeEncode(val) 1530 for val in utils.TailFile(logfile, lines=20)] 1531 _Fail("OS create script failed (%s), last lines in the" 1532 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
1533
1534 1535 -def RunRenameInstance(instance, old_name, debug):
1536 """Run the OS rename script for an instance. 1537 1538 @type instance: L{objects.Instance} 1539 @param instance: Instance whose OS is to be installed 1540 @type old_name: string 1541 @param old_name: previous instance name 1542 @type debug: integer 1543 @param debug: debug level, passed to the OS scripts 1544 @rtype: boolean 1545 @return: the success of the operation 1546 1547 """ 1548 inst_os = OSFromDisk(instance.os) 1549 1550 rename_env = OSEnvironment(instance, inst_os, debug) 1551 rename_env["OLD_INSTANCE_NAME"] = old_name 1552 1553 logfile = _InstanceLogName("rename", instance.os, 1554 "%s-%s" % (old_name, instance.name), None) 1555 1556 result = utils.RunCmd([inst_os.rename_script], env=rename_env, 1557 cwd=inst_os.path, output=logfile, reset_env=True) 1558 1559 if result.failed: 1560 logging.error("os create command '%s' returned error: %s output: %s", 1561 result.cmd, result.fail_reason, result.output) 1562 lines = [utils.SafeEncode(val) 1563 for val in utils.TailFile(logfile, lines=20)] 1564 _Fail("OS rename script failed (%s), last lines in the" 1565 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
1566
1567 1568 -def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None):
1569 """Returns symlink path for block device. 1570 1571 """ 1572 if _dir is None: 1573 _dir = pathutils.DISK_LINKS_DIR 1574 1575 return utils.PathJoin(_dir, 1576 ("%s%s%s" % 1577 (instance_name, constants.DISK_SEPARATOR, idx)))
1578
1579 1580 -def _SymlinkBlockDev(instance_name, device_path, idx):
1581 """Set up symlinks to a instance's block device. 1582 1583 This is an auxiliary function run when an instance is start (on the primary 1584 node) or when an instance is migrated (on the target node). 1585 1586 1587 @param instance_name: the name of the target instance 1588 @param device_path: path of the physical block device, on the node 1589 @param idx: the disk index 1590 @return: absolute path to the disk's symlink 1591 1592 """ 1593 link_name = _GetBlockDevSymlinkPath(instance_name, idx) 1594 try: 1595 os.symlink(device_path, link_name) 1596 except OSError, err: 1597 if err.errno == errno.EEXIST: 1598 if (not os.path.islink(link_name) or 1599 os.readlink(link_name) != device_path): 1600 os.remove(link_name) 1601 os.symlink(device_path, link_name) 1602 else: 1603 raise 1604 1605 return link_name
1606 1619
1620 1621 -def _CalculateDeviceURI(instance, disk, device):
1622 """Get the URI for the device. 1623 1624 @type instance: L{objects.Instance} 1625 @param instance: the instance which disk belongs to 1626 @type disk: L{objects.Disk} 1627 @param disk: the target disk object 1628 @type device: L{bdev.BlockDev} 1629 @param device: the corresponding BlockDevice 1630 @rtype: string 1631 @return: the device uri if any else None 1632 1633 """ 1634 access_mode = disk.params.get(constants.LDP_ACCESS, 1635 constants.DISK_KERNELSPACE) 1636 if access_mode == constants.DISK_USERSPACE: 1637 # This can raise errors.BlockDeviceError 1638 return device.GetUserspaceAccessUri(instance.hypervisor) 1639 else: 1640 return None
1641
1642 1643 -def _GatherAndLinkBlockDevs(instance):
1644 """Set up an instance's block device(s). 1645 1646 This is run on the primary node at instance startup. The block 1647 devices must be already assembled. 1648 1649 @type instance: L{objects.Instance} 1650 @param instance: the instance whose disks we should assemble 1651 @rtype: list 1652 @return: list of (disk_object, link_name, drive_uri) 1653 1654 """ 1655 block_devices = [] 1656 for idx, disk in enumerate(instance.disks): 1657 device = _RecursiveFindBD(disk) 1658 if device is None: 1659 raise errors.BlockDeviceError("Block device '%s' is not set up." % 1660 str(disk)) 1661 device.Open() 1662 try: 1663 link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx) 1664 except OSError, e: 1665 raise errors.BlockDeviceError("Cannot create block device symlink: %s" % 1666 e.strerror) 1667 uri = _CalculateDeviceURI(instance, disk, device) 1668 1669 block_devices.append((disk, link_name, uri)) 1670 1671 return block_devices
1672
1673 1674 -def StartInstance(instance, startup_paused, reason, store_reason=True):
1675 """Start an instance. 1676 1677 @type instance: L{objects.Instance} 1678 @param instance: the instance object 1679 @type startup_paused: bool 1680 @param instance: pause instance at startup? 1681 @type reason: list of reasons 1682 @param reason: the reason trail for this startup 1683 @type store_reason: boolean 1684 @param store_reason: whether to store the shutdown reason trail on file 1685 @rtype: None 1686 1687 """ 1688 running_instances = GetInstanceListForHypervisor(instance.hypervisor, 1689 instance.hvparams) 1690 1691 if instance.name in running_instances: 1692 logging.info("Instance %s already running, not starting", instance.name) 1693 return 1694 1695 try: 1696 block_devices = _GatherAndLinkBlockDevs(instance) 1697 hyper = hypervisor.GetHypervisor(instance.hypervisor) 1698 hyper.StartInstance(instance, block_devices, startup_paused) 1699 if store_reason: 1700 _StoreInstReasonTrail(instance.name, reason) 1701 except errors.BlockDeviceError, err: 1702 _Fail("Block device error: %s", err, exc=True) 1703 except errors.HypervisorError, err: 1704 _RemoveBlockDevLinks(instance.name, instance.disks) 1705 _Fail("Hypervisor error: %s", err, exc=True)
1706
1707 1708 -def InstanceShutdown(instance, timeout, reason, store_reason=True):
1709 """Shut an instance down. 1710 1711 @note: this functions uses polling with a hardcoded timeout. 1712 1713 @type instance: L{objects.Instance} 1714 @param instance: the instance object 1715 @type timeout: integer 1716 @param timeout: maximum timeout for soft shutdown 1717 @type reason: list of reasons 1718 @param reason: the reason trail for this shutdown 1719 @type store_reason: boolean 1720 @param store_reason: whether to store the shutdown reason trail on file 1721 @rtype: None 1722 1723 """ 1724 hv_name = instance.hypervisor 1725 hyper = hypervisor.GetHypervisor(hv_name) 1726 iname = instance.name 1727 1728 if instance.name not in hyper.ListInstances(instance.hvparams): 1729 logging.info("Instance %s not running, doing nothing", iname) 1730 return 1731 1732 class _TryShutdown(object): 1733 def __init__(self): 1734 self.tried_once = False
1735 1736 def __call__(self): 1737 if iname not in hyper.ListInstances(instance.hvparams): 1738 return 1739 1740 try: 1741 hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout) 1742 if store_reason: 1743 _StoreInstReasonTrail(instance.name, reason) 1744 except errors.HypervisorError, err: 1745 if iname not in hyper.ListInstances(instance.hvparams): 1746 # if the instance is no longer existing, consider this a 1747 # success and go to cleanup 1748 return 1749 1750 _Fail("Failed to stop instance %s: %s", iname, err) 1751 1752 self.tried_once = True 1753 1754 raise utils.RetryAgain() 1755 1756 try: 1757 utils.Retry(_TryShutdown(), 5, timeout) 1758 except utils.RetryTimeout: 1759 # the shutdown did not succeed 1760 logging.error("Shutdown of '%s' unsuccessful, forcing", iname) 1761 1762 try: 1763 hyper.StopInstance(instance, force=True) 1764 except errors.HypervisorError, err: 1765 if iname in hyper.ListInstances(instance.hvparams): 1766 # only raise an error if the instance still exists, otherwise 1767 # the error could simply be "instance ... unknown"! 1768 _Fail("Failed to force stop instance %s: %s", iname, err) 1769 1770 time.sleep(1) 1771 1772 if iname in hyper.ListInstances(instance.hvparams): 1773 _Fail("Could not shutdown instance %s even by destroy", iname) 1774 1775 try: 1776 hyper.CleanupInstance(instance.name) 1777 except errors.HypervisorError, err: 1778 logging.warning("Failed to execute post-shutdown cleanup step: %s", err) 1779 1780 _RemoveBlockDevLinks(iname, instance.disks) 1781
1782 1783 -def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):
1784 """Reboot an instance. 1785 1786 @type instance: L{objects.Instance} 1787 @param instance: the instance object to reboot 1788 @type reboot_type: str 1789 @param reboot_type: the type of reboot, one the following 1790 constants: 1791 - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the 1792 instance OS, do not recreate the VM 1793 - L{constants.INSTANCE_REBOOT_HARD}: tear down and 1794 restart the VM (at the hypervisor level) 1795 - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is 1796 not accepted here, since that mode is handled differently, in 1797 cmdlib, and translates into full stop and start of the 1798 instance (instead of a call_instance_reboot RPC) 1799 @type shutdown_timeout: integer 1800 @param shutdown_timeout: maximum timeout for soft shutdown 1801 @type reason: list of reasons 1802 @param reason: the reason trail for this reboot 1803 @rtype: None 1804 1805 """ 1806 running_instances = GetInstanceListForHypervisor(instance.hypervisor, 1807 instance.hvparams) 1808 1809 if instance.name not in running_instances: 1810 _Fail("Cannot reboot instance %s that is not running", instance.name) 1811 1812 hyper = hypervisor.GetHypervisor(instance.hypervisor) 1813 if reboot_type == constants.INSTANCE_REBOOT_SOFT: 1814 try: 1815 hyper.RebootInstance(instance) 1816 except errors.HypervisorError, err: 1817 _Fail("Failed to soft reboot instance %s: %s", instance.name, err) 1818 elif reboot_type == constants.INSTANCE_REBOOT_HARD: 1819 try: 1820 InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False) 1821 result = StartInstance(instance, False, reason, store_reason=False) 1822 _StoreInstReasonTrail(instance.name, reason) 1823 return result 1824 except errors.HypervisorError, err: 1825 _Fail("Failed to hard reboot instance %s: %s", instance.name, err) 1826 else: 1827 _Fail("Invalid reboot_type received: %s", reboot_type)
1828
1829 1830 -def InstanceBalloonMemory(instance, memory):
1831 """Resize an instance's memory. 1832 1833 @type instance: L{objects.Instance} 1834 @param instance: the instance object 1835 @type memory: int 1836 @param memory: new memory amount in MB 1837 @rtype: None 1838 1839 """ 1840 hyper = hypervisor.GetHypervisor(instance.hypervisor) 1841 running = hyper.ListInstances(instance.hvparams) 1842 if instance.name not in running: 1843 logging.info("Instance %s is not running, cannot balloon", instance.name) 1844 return 1845 try: 1846 hyper.BalloonInstanceMemory(instance, memory) 1847 except errors.HypervisorError, err: 1848 _Fail("Failed to balloon instance memory: %s", err, exc=True)
1849
1850 1851 -def MigrationInfo(instance):
1852 """Gather information about an instance to be migrated. 1853 1854 @type instance: L{objects.Instance} 1855 @param instance: the instance definition 1856 1857 """ 1858 hyper = hypervisor.GetHypervisor(instance.hypervisor) 1859 try: 1860 info = hyper.MigrationInfo(instance) 1861 except errors.HypervisorError, err: 1862 _Fail("Failed to fetch migration information: %s", err, exc=True) 1863 return info
1864
1865 1866 -def AcceptInstance(instance, info, target):
1867 """Prepare the node to accept an instance. 1868 1869 @type instance: L{objects.Instance} 1870 @param instance: the instance definition 1871 @type info: string/data (opaque) 1872 @param info: migration information, from the source node 1873 @type target: string 1874 @param target: target host (usually ip), on this node 1875 1876 """ 1877 # TODO: why is this required only for DTS_EXT_MIRROR? 1878 if instance.disk_template in constants.DTS_EXT_MIRROR: 1879 # Create the symlinks, as the disks are not active 1880 # in any way 1881 try: 1882 _GatherAndLinkBlockDevs(instance) 1883 except errors.BlockDeviceError, err: 1884 _Fail("Block device error: %s", err, exc=True) 1885 1886 hyper = hypervisor.GetHypervisor(instance.hypervisor) 1887 try: 1888 hyper.AcceptInstance(instance, info, target) 1889 except errors.HypervisorError, err: 1890 if instance.disk_template in constants.DTS_EXT_MIRROR: 1891 _RemoveBlockDevLinks(instance.name, instance.disks) 1892 _Fail("Failed to accept instance: %s", err, exc=True)
1893
1894 1895 -def FinalizeMigrationDst(instance, info, success):
1896 """Finalize any preparation to accept an instance. 1897 1898 @type instance: L{objects.Instance} 1899 @param instance: the instance definition 1900 @type info: string/data (opaque) 1901 @param info: migration information, from the source node 1902 @type success: boolean 1903 @param success: whether the migration was a success or a failure 1904 1905 """ 1906 hyper = hypervisor.GetHypervisor(instance.hypervisor) 1907 try: 1908 hyper.FinalizeMigrationDst(instance, info, success) 1909 except errors.HypervisorError, err: 1910 _Fail("Failed to finalize migration on the target node: %s", err, exc=True)
1911
1912 1913 -def MigrateInstance(cluster_name, instance, target, live):
1914 """Migrates an instance to another node. 1915 1916 @type cluster_name: string 1917 @param cluster_name: name of the cluster 1918 @type instance: L{objects.Instance} 1919 @param instance: the instance definition 1920 @type target: string 1921 @param target: the target node name 1922 @type live: boolean 1923 @param live: whether the migration should be done live or not (the 1924 interpretation of this parameter is left to the hypervisor) 1925 @raise RPCFail: if migration fails for some reason 1926 1927 """ 1928 hyper = hypervisor.GetHypervisor(instance.hypervisor) 1929 1930 try: 1931 hyper.MigrateInstance(cluster_name, instance, target, live) 1932 except errors.HypervisorError, err: 1933 _Fail("Failed to migrate instance: %s", err, exc=True)
1934
1935 1936 -def FinalizeMigrationSource(instance, success, live):
1937 """Finalize the instance migration on the source node. 1938 1939 @type instance: L{objects.Instance} 1940 @param instance: the instance definition of the migrated instance 1941 @type success: bool 1942 @param success: whether the migration succeeded or not 1943 @type live: bool 1944 @param live: whether the user requested a live migration or not 1945 @raise RPCFail: If the execution fails for some reason 1946 1947 """ 1948 hyper = hypervisor.GetHypervisor(instance.hypervisor) 1949 1950 try: 1951 hyper.FinalizeMigrationSource(instance, success, live) 1952 except Exception, err: # pylint: disable=W0703 1953 _Fail("Failed to finalize the migration on the source node: %s", err, 1954 exc=True)
1955
1956 1957 -def GetMigrationStatus(instance):
1958 """Get the migration status 1959 1960 @type instance: L{objects.Instance} 1961 @param instance: the instance that is being migrated 1962 @rtype: L{objects.MigrationStatus} 1963 @return: the status of the current migration (one of 1964 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional 1965 progress info that can be retrieved from the hypervisor 1966 @raise RPCFail: If the migration status cannot be retrieved 1967 1968 """ 1969 hyper = hypervisor.GetHypervisor(instance.hypervisor) 1970 try: 1971 return hyper.GetMigrationStatus(instance) 1972 except Exception, err: # pylint: disable=W0703 1973 _Fail("Failed to get migration status: %s", err, exc=True)
1974
1975 1976 -def HotplugDevice(instance, action, dev_type, device, extra, seq):
1977 """Hotplug a device 1978 1979 Hotplug is currently supported only for KVM Hypervisor. 1980 @type instance: L{objects.Instance} 1981 @param instance: the instance to which we hotplug a device 1982 @type action: string 1983 @param action: the hotplug action to perform 1984 @type dev_type: string 1985 @param dev_type: the device type to hotplug 1986 @type device: either L{objects.NIC} or L{objects.Disk} 1987 @param device: the device object to hotplug 1988 @type extra: tuple 1989 @param extra: extra info used for disk hotplug (disk link, drive uri) 1990 @type seq: int 1991 @param seq: the index of the device from master perspective 1992 @raise RPCFail: in case instance does not have KVM hypervisor 1993 1994 """ 1995 hyper = hypervisor.GetHypervisor(instance.hypervisor) 1996 try: 1997 hyper.VerifyHotplugSupport(instance, action, dev_type) 1998 except errors.HotplugError, err: 1999 _Fail("Hotplug is not supported: %s", err) 2000 2001 if action == constants.HOTPLUG_ACTION_ADD: 2002 fn = hyper.HotAddDevice 2003 elif action == constants.HOTPLUG_ACTION_REMOVE: 2004 fn = hyper.HotDelDevice 2005 elif action == constants.HOTPLUG_ACTION_MODIFY: 2006 fn = hyper.HotModDevice 2007 else: 2008 assert action in constants.HOTPLUG_ALL_ACTIONS 2009 2010 return fn(instance, dev_type, device, extra, seq)
2011
2012 2013 -def HotplugSupported(instance):
2014 """Checks if hotplug is generally supported. 2015 2016 """ 2017 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2018 try: 2019 hyper.HotplugSupported(instance) 2020 except errors.HotplugError, err: 2021 _Fail("Hotplug is not supported: %s", err)
2022
2023 2024 -def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):
2025 """Creates a block device for an instance. 2026 2027 @type disk: L{objects.Disk} 2028 @param disk: the object describing the disk we should create 2029 @type size: int 2030 @param size: the size of the physical underlying device, in MiB 2031 @type owner: str 2032 @param owner: the name of the instance for which disk is created, 2033 used for device cache data 2034 @type on_primary: boolean 2035 @param on_primary: indicates if it is the primary node or not 2036 @type info: string 2037 @param info: string that will be sent to the physical device 2038 creation, used for example to set (LVM) tags on LVs 2039 @type excl_stor: boolean 2040 @param excl_stor: Whether exclusive_storage is active 2041 2042 @return: the new unique_id of the device (this can sometime be 2043 computed only after creation), or None. On secondary nodes, 2044 it's not required to return anything. 2045 2046 """ 2047 # TODO: remove the obsolete "size" argument 2048 # pylint: disable=W0613 2049 clist = [] 2050 if disk.children: 2051 for child in disk.children: 2052 try: 2053 crdev = _RecursiveAssembleBD(child, owner, on_primary) 2054 except errors.BlockDeviceError, err: 2055 _Fail("Can't assemble device %s: %s", child, err) 2056 if on_primary or disk.AssembleOnSecondary(): 2057 # we need the children open in case the device itself has to 2058 # be assembled 2059 try: 2060 # pylint: disable=E1103 2061 crdev.Open() 2062 except errors.BlockDeviceError, err: 2063 _Fail("Can't make child '%s' read-write: %s", child, err) 2064 clist.append(crdev) 2065 2066 try: 2067 device = bdev.Create(disk, clist, excl_stor) 2068 except errors.BlockDeviceError, err: 2069 _Fail("Can't create block device: %s", err) 2070 2071 if on_primary or disk.AssembleOnSecondary(): 2072 try: 2073 device.Assemble() 2074 except errors.BlockDeviceError, err: 2075 _Fail("Can't assemble device after creation, unusual event: %s", err) 2076 if on_primary or disk.OpenOnSecondary(): 2077 try: 2078 device.Open(force=True) 2079 except errors.BlockDeviceError, err: 2080 _Fail("Can't make device r/w after creation, unusual event: %s", err) 2081 DevCacheManager.UpdateCache(device.dev_path, owner, 2082 on_primary, disk.iv_name) 2083 2084 device.SetInfo(info) 2085 2086 return device.unique_id
2087
2088 2089 -def _WipeDevice(path, offset, size):
2090 """This function actually wipes the device. 2091 2092 @param path: The path to the device to wipe 2093 @param offset: The offset in MiB in the file 2094 @param size: The size in MiB to write 2095 2096 """ 2097 # Internal sizes are always in Mebibytes; if the following "dd" command 2098 # should use a different block size the offset and size given to this 2099 # function must be adjusted accordingly before being passed to "dd". 2100 block_size = 1024 * 1024 2101 2102 cmd = [constants.DD_CMD, "if=/dev/zero", "seek=%d" % offset, 2103 "bs=%s" % block_size, "oflag=direct", "of=%s" % path, 2104 "count=%d" % size] 2105 result = utils.RunCmd(cmd) 2106 2107 if result.failed: 2108 _Fail("Wipe command '%s' exited with error: %s; output: %s", result.cmd, 2109 result.fail_reason, result.output)
2110
2111 2112 -def BlockdevWipe(disk, offset, size):
2113 """Wipes a block device. 2114 2115 @type disk: L{objects.Disk} 2116 @param disk: the disk object we want to wipe 2117 @type offset: int 2118 @param offset: The offset in MiB in the file 2119 @type size: int 2120 @param size: The size in MiB to write 2121 2122 """ 2123 try: 2124 rdev = _RecursiveFindBD(disk) 2125 except errors.BlockDeviceError: 2126 rdev = None 2127 2128 if not rdev: 2129 _Fail("Cannot execute wipe for device %s: device not found", disk.iv_name) 2130 2131 # Do cross verify some of the parameters 2132 if offset < 0: 2133 _Fail("Negative offset") 2134 if size < 0: 2135 _Fail("Negative size") 2136 if offset > rdev.size: 2137 _Fail("Offset is bigger than device size") 2138 if (offset + size) > rdev.size: 2139 _Fail("The provided offset and size to wipe is bigger than device size") 2140 2141 _WipeDevice(rdev.dev_path, offset, size)
2142
2143 2144 -def BlockdevPauseResumeSync(disks, pause):
2145 """Pause or resume the sync of the block device. 2146 2147 @type disks: list of L{objects.Disk} 2148 @param disks: the disks object we want to pause/resume 2149 @type pause: bool 2150 @param pause: Wheater to pause or resume 2151 2152 """ 2153 success = [] 2154 for disk in disks: 2155 try: 2156 rdev = _RecursiveFindBD(disk) 2157 except errors.BlockDeviceError: 2158 rdev = None 2159 2160 if not rdev: 2161 success.append((False, ("Cannot change sync for device %s:" 2162 " device not found" % disk.iv_name))) 2163 continue 2164 2165 result = rdev.PauseResumeSync(pause) 2166 2167 if result: 2168 success.append((result, None)) 2169 else: 2170 if pause: 2171 msg = "Pause" 2172 else: 2173 msg = "Resume" 2174 success.append((result, "%s for device %s failed" % (msg, disk.iv_name))) 2175 2176 return success
2177
2178 2179 -def BlockdevRemove(disk):
2180 """Remove a block device. 2181 2182 @note: This is intended to be called recursively. 2183 2184 @type disk: L{objects.Disk} 2185 @param disk: the disk object we should remove 2186 @rtype: boolean 2187 @return: the success of the operation 2188 2189 """ 2190 msgs = [] 2191 try: 2192 rdev = _RecursiveFindBD(disk) 2193 except errors.BlockDeviceError, err: 2194 # probably can't attach 2195 logging.info("Can't attach to device %s in remove", disk) 2196 rdev = None 2197 if rdev is not None: 2198 r_path = rdev.dev_path 2199 2200 def _TryRemove(): 2201 try: 2202 rdev.Remove() 2203 return [] 2204 except errors.BlockDeviceError, err: 2205 return [str(err)]
2206 2207 msgs.extend(utils.SimpleRetry([], _TryRemove, 2208 constants.DISK_REMOVE_RETRY_INTERVAL, 2209 constants.DISK_REMOVE_RETRY_TIMEOUT)) 2210 2211 if not msgs: 2212 DevCacheManager.RemoveCache(r_path) 2213 2214 if disk.children: 2215 for child in disk.children: 2216 try: 2217 BlockdevRemove(child) 2218 except RPCFail, err: 2219 msgs.append(str(err)) 2220 2221 if msgs: 2222 _Fail("; ".join(msgs)) 2223
2224 2225 -def _RecursiveAssembleBD(disk, owner, as_primary):
2226 """Activate a block device for an instance. 2227 2228 This is run on the primary and secondary nodes for an instance. 2229 2230 @note: this function is called recursively. 2231 2232 @type disk: L{objects.Disk} 2233 @param disk: the disk we try to assemble 2234 @type owner: str 2235 @param owner: the name of the instance which owns the disk 2236 @type as_primary: boolean 2237 @param as_primary: if we should make the block device 2238 read/write 2239 2240 @return: the assembled device or None (in case no device 2241 was assembled) 2242 @raise errors.BlockDeviceError: in case there is an error 2243 during the activation of the children or the device 2244 itself 2245 2246 """ 2247 children = [] 2248 if disk.children: 2249 mcn = disk.ChildrenNeeded() 2250 if mcn == -1: 2251 mcn = 0 # max number of Nones allowed 2252 else: 2253 mcn = len(disk.children) - mcn # max number of Nones 2254 for chld_disk in disk.children: 2255 try: 2256 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary) 2257 except errors.BlockDeviceError, err: 2258 if children.count(None) >= mcn: 2259 raise 2260 cdev = None 2261 logging.error("Error in child activation (but continuing): %s", 2262 str(err)) 2263 children.append(cdev) 2264 2265 if as_primary or disk.AssembleOnSecondary(): 2266 r_dev = bdev.Assemble(disk, children) 2267 result = r_dev 2268 if as_primary or disk.OpenOnSecondary(): 2269 r_dev.Open() 2270 DevCacheManager.UpdateCache(r_dev.dev_path, owner, 2271 as_primary, disk.iv_name) 2272 2273 else: 2274 result = True 2275 return result
2276
2277 2278 -def BlockdevAssemble(disk, instance, as_primary, idx):
2279 """Activate a block device for an instance. 2280 2281 This is a wrapper over _RecursiveAssembleBD. 2282 2283 @rtype: str or boolean 2284 @return: a tuple with the C{/dev/...} path and the created symlink 2285 for primary nodes, and (C{True}, C{True}) for secondary nodes 2286 2287 """ 2288 try: 2289 result = _RecursiveAssembleBD(disk, instance.name, as_primary) 2290 if isinstance(result, BlockDev): 2291 # pylint: disable=E1103 2292 dev_path = result.dev_path 2293 link_name = None 2294 uri = None 2295 if as_primary: 2296 link_name = _SymlinkBlockDev(instance.name, dev_path, idx) 2297 uri = _CalculateDeviceURI(instance, disk, result) 2298 elif result: 2299 return result, result 2300 else: 2301 _Fail("Unexpected result from _RecursiveAssembleBD") 2302 except errors.BlockDeviceError, err: 2303 _Fail("Error while assembling disk: %s", err, exc=True) 2304 except OSError, err: 2305 _Fail("Error while symlinking disk: %s", err, exc=True) 2306 2307 return dev_path, link_name, uri
2308
2309 2310 -def BlockdevShutdown(disk):
2311 """Shut down a block device. 2312 2313 First, if the device is assembled (Attach() is successful), then 2314 the device is shutdown. Then the children of the device are 2315 shutdown. 2316 2317 This function is called recursively. Note that we don't cache the 2318 children or such, as oppossed to assemble, shutdown of different 2319 devices doesn't require that the upper device was active. 2320 2321 @type disk: L{objects.Disk} 2322 @param disk: the description of the disk we should 2323 shutdown 2324 @rtype: None 2325 2326 """ 2327 msgs = [] 2328 r_dev = _RecursiveFindBD(disk) 2329 if r_dev is not None: 2330 r_path = r_dev.dev_path 2331 try: 2332 r_dev.Shutdown() 2333 DevCacheManager.RemoveCache(r_path) 2334 except errors.BlockDeviceError, err: 2335 msgs.append(str(err)) 2336 2337 if disk.children: 2338 for child in disk.children: 2339 try: 2340 BlockdevShutdown(child) 2341 except RPCFail, err: 2342 msgs.append(str(err)) 2343 2344 if msgs: 2345 _Fail("; ".join(msgs))
2346
2347 2348 -def BlockdevAddchildren(parent_cdev, new_cdevs):
2349 """Extend a mirrored block device. 2350 2351 @type parent_cdev: L{objects.Disk} 2352 @param parent_cdev: the disk to which we should add children 2353 @type new_cdevs: list of L{objects.Disk} 2354 @param new_cdevs: the list of children which we should add 2355 @rtype: None 2356 2357 """ 2358 parent_bdev = _RecursiveFindBD(parent_cdev) 2359 if parent_bdev is None: 2360 _Fail("Can't find parent device '%s' in add children", parent_cdev) 2361 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs] 2362 if new_bdevs.count(None) > 0: 2363 _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs) 2364 parent_bdev.AddChildren(new_bdevs)
2365
2366 2367 -def BlockdevRemovechildren(parent_cdev, new_cdevs):
2368 """Shrink a mirrored block device. 2369 2370 @type parent_cdev: L{objects.Disk} 2371 @param parent_cdev: the disk from which we should remove children 2372 @type new_cdevs: list of L{objects.Disk} 2373 @param new_cdevs: the list of children which we should remove 2374 @rtype: None 2375 2376 """ 2377 parent_bdev = _RecursiveFindBD(parent_cdev) 2378 if parent_bdev is None: 2379 _Fail("Can't find parent device '%s' in remove children", parent_cdev) 2380 devs = [] 2381 for disk in new_cdevs: 2382 rpath = disk.StaticDevPath() 2383 if rpath is None: 2384 bd = _RecursiveFindBD(disk) 2385 if bd is None: 2386 _Fail("Can't find device %s while removing children", disk) 2387 else: 2388 devs.append(bd.dev_path) 2389 else: 2390 if not utils.IsNormAbsPath(rpath): 2391 _Fail("Strange path returned from StaticDevPath: '%s'", rpath) 2392 devs.append(rpath) 2393 parent_bdev.RemoveChildren(devs)
2394
2395 2396 -def BlockdevGetmirrorstatus(disks):
2397 """Get the mirroring status of a list of devices. 2398 2399 @type disks: list of L{objects.Disk} 2400 @param disks: the list of disks which we should query 2401 @rtype: disk 2402 @return: List of L{objects.BlockDevStatus}, one for each disk 2403 @raise errors.BlockDeviceError: if any of the disks cannot be 2404 found 2405 2406 """ 2407 stats = [] 2408 for dsk in disks: 2409 rbd = _RecursiveFindBD(dsk) 2410 if rbd is None: 2411 _Fail("Can't find device %s", dsk) 2412 2413 stats.append(rbd.CombinedSyncStatus()) 2414 2415 return stats
2416
2417 2418 -def BlockdevGetmirrorstatusMulti(disks):
2419 """Get the mirroring status of a list of devices. 2420 2421 @type disks: list of L{objects.Disk} 2422 @param disks: the list of disks which we should query 2423 @rtype: disk 2424 @return: List of tuples, (bool, status), one for each disk; bool denotes 2425 success/failure, status is L{objects.BlockDevStatus} on success, string 2426 otherwise 2427 2428 """ 2429 result = [] 2430 for disk in disks: 2431 try: 2432 rbd = _RecursiveFindBD(disk) 2433 if rbd is None: 2434 result.append((False, "Can't find device %s" % disk)) 2435 continue 2436 2437 status = rbd.CombinedSyncStatus() 2438 except errors.BlockDeviceError, err: 2439 logging.exception("Error while getting disk status") 2440 result.append((False, str(err))) 2441 else: 2442 result.append((True, status)) 2443 2444 assert len(disks) == len(result) 2445 2446 return result
2447
2448 2449 -def _RecursiveFindBD(disk):
2450 """Check if a device is activated. 2451 2452 If so, return information about the real device. 2453 2454 @type disk: L{objects.Disk} 2455 @param disk: the disk object we need to find 2456 2457 @return: None if the device can't be found, 2458 otherwise the device instance 2459 2460 """ 2461 children = [] 2462 if disk.children: 2463 for chdisk in disk.children: 2464 children.append(_RecursiveFindBD(chdisk)) 2465 2466 return bdev.FindDevice(disk, children)
2467
2468 2469 -def _OpenRealBD(disk):
2470 """Opens the underlying block device of a disk. 2471 2472 @type disk: L{objects.Disk} 2473 @param disk: the disk object we want to open 2474 2475 """ 2476 real_disk = _RecursiveFindBD(disk) 2477 if real_disk is None: 2478 _Fail("Block device '%s' is not set up", disk) 2479 2480 real_disk.Open() 2481 2482 return real_disk
2483
2484 2485 -def BlockdevFind(disk):
2486 """Check if a device is activated. 2487 2488 If it is, return information about the real device. 2489 2490 @type disk: L{objects.Disk} 2491 @param disk: the disk to find 2492 @rtype: None or objects.BlockDevStatus 2493 @return: None if the disk cannot be found, otherwise a the current 2494 information 2495 2496 """ 2497 try: 2498 rbd = _RecursiveFindBD(disk) 2499 except errors.BlockDeviceError, err: 2500 _Fail("Failed to find device: %s", err, exc=True) 2501 2502 if rbd is None: 2503 return None 2504 2505 return rbd.GetSyncStatus()
2506
2507 2508 -def BlockdevGetdimensions(disks):
2509 """Computes the size of the given disks. 2510 2511 If a disk is not found, returns None instead. 2512 2513 @type disks: list of L{objects.Disk} 2514 @param disks: the list of disk to compute the size for 2515 @rtype: list 2516 @return: list with elements None if the disk cannot be found, 2517 otherwise the pair (size, spindles), where spindles is None if the 2518 device doesn't support that 2519 2520 """ 2521 result = [] 2522 for cf in disks: 2523 try: 2524 rbd = _RecursiveFindBD(cf) 2525 except errors.BlockDeviceError: 2526 result.append(None) 2527 continue 2528 if rbd is None: 2529 result.append(None) 2530 else: 2531 result.append(rbd.GetActualDimensions()) 2532 return result
2533
2534 2535 -def BlockdevExport(disk, dest_node_ip, dest_path, cluster_name):
2536 """Export a block device to a remote node. 2537 2538 @type disk: L{objects.Disk} 2539 @param disk: the description of the disk to export 2540 @type dest_node_ip: str 2541 @param dest_node_ip: the destination node IP to export to 2542 @type dest_path: str 2543 @param dest_path: the destination path on the target node 2544 @type cluster_name: str 2545 @param cluster_name: the cluster name, needed for SSH hostalias 2546 @rtype: None 2547 2548 """ 2549 real_disk = _OpenRealBD(disk) 2550 2551 # the block size on the read dd is 1MiB to match our units 2552 expcmd = utils.BuildShellCmd("set -e; set -o pipefail; " 2553 "dd if=%s bs=1048576 count=%s", 2554 real_disk.dev_path, str(disk.size)) 2555 2556 # we set here a smaller block size as, due to ssh buffering, more 2557 # than 64-128k will mostly ignored; we use nocreat to fail if the 2558 # device is not already there or we pass a wrong path; we use 2559 # notrunc to no attempt truncate on an LV device; we use oflag=dsync 2560 # to not buffer too much memory; this means that at best, we flush 2561 # every 64k, which will not be very fast 2562 destcmd = utils.BuildShellCmd("dd of=%s conv=nocreat,notrunc bs=65536" 2563 " oflag=dsync", dest_path) 2564 2565 remotecmd = _GetSshRunner(cluster_name).BuildCmd(dest_node_ip, 2566 constants.SSH_LOGIN_USER, 2567 destcmd) 2568 2569 # all commands have been checked, so we're safe to combine them 2570 command = "|".join([expcmd, utils.ShellQuoteArgs(remotecmd)]) 2571 2572 result = utils.RunCmd(["bash", "-c", command]) 2573 2574 if result.failed: 2575 _Fail("Disk copy command '%s' returned error: %s" 2576 " output: %s", command, result.fail_reason, result.output)
2577
2578 2579 -def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
2580 """Write a file to the filesystem. 2581 2582 This allows the master to overwrite(!) a file. It will only perform 2583 the operation if the file belongs to a list of configuration files. 2584 2585 @type file_name: str 2586 @param file_name: the target file name 2587 @type data: str 2588 @param data: the new contents of the file 2589 @type mode: int 2590 @param mode: the mode to give the file (can be None) 2591 @type uid: string 2592 @param uid: the owner of the file 2593 @type gid: string 2594 @param gid: the group of the file 2595 @type atime: float 2596 @param atime: the atime to set on the file (can be None) 2597 @type mtime: float 2598 @param mtime: the mtime to set on the file (can be None) 2599 @rtype: None 2600 2601 """ 2602 file_name = vcluster.LocalizeVirtualPath(file_name) 2603 2604 if not os.path.isabs(file_name): 2605 _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name) 2606 2607 if file_name not in _ALLOWED_UPLOAD_FILES: 2608 _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'", 2609 file_name) 2610 2611 raw_data = _Decompress(data) 2612 2613 if not (isinstance(uid, basestring) and isinstance(gid, basestring)): 2614 _Fail("Invalid username/groupname type") 2615 2616 getents = runtime.GetEnts() 2617 uid = getents.LookupUser(uid) 2618 gid = getents.LookupGroup(gid) 2619 2620 utils.SafeWriteFile(file_name, None, 2621 data=raw_data, mode=mode, uid=uid, gid=gid, 2622 atime=atime, mtime=mtime)
2623
2624 2625 -def RunOob(oob_program, command, node, timeout):
2626 """Executes oob_program with given command on given node. 2627 2628 @param oob_program: The path to the executable oob_program 2629 @param command: The command to invoke on oob_program 2630 @param node: The node given as an argument to the program 2631 @param timeout: Timeout after which we kill the oob program 2632 2633 @return: stdout 2634 @raise RPCFail: If execution fails for some reason 2635 2636 """ 2637 result = utils.RunCmd([oob_program, command, node], timeout=timeout) 2638 2639 if result.failed: 2640 _Fail("'%s' failed with reason '%s'; output: %s", result.cmd, 2641 result.fail_reason, result.output) 2642 2643 return result.stdout
2644
2645 2646 -def _OSOndiskAPIVersion(os_dir):
2647 """Compute and return the API version of a given OS. 2648 2649 This function will try to read the API version of the OS residing in 2650 the 'os_dir' directory. 2651 2652 @type os_dir: str 2653 @param os_dir: the directory in which we should look for the OS 2654 @rtype: tuple 2655 @return: tuple (status, data) with status denoting the validity and 2656 data holding either the vaid versions or an error message 2657 2658 """ 2659 api_file = utils.PathJoin(os_dir, constants.OS_API_FILE) 2660 2661 try: 2662 st = os.stat(api_file) 2663 except EnvironmentError, err: 2664 return False, ("Required file '%s' not found under path %s: %s" % 2665 (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err))) 2666 2667 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 2668 return False, ("File '%s' in %s is not a regular file" % 2669 (constants.OS_API_FILE, os_dir)) 2670 2671 try: 2672 api_versions = utils.ReadFile(api_file).splitlines() 2673 except EnvironmentError, err: 2674 return False, ("Error while reading the API version file at %s: %s" % 2675 (api_file, utils.ErrnoOrStr(err))) 2676 2677 try: 2678 api_versions = [int(version.strip()) for version in api_versions] 2679 except (TypeError, ValueError), err: 2680 return False, ("API version(s) can't be converted to integer: %s" % 2681 str(err)) 2682 2683 return True, api_versions
2684
2685 2686 -def DiagnoseOS(top_dirs=None):
2687 """Compute the validity for all OSes. 2688 2689 @type top_dirs: list 2690 @param top_dirs: the list of directories in which to 2691 search (if not given defaults to 2692 L{pathutils.OS_SEARCH_PATH}) 2693 @rtype: list of L{objects.OS} 2694 @return: a list of tuples (name, path, status, diagnose, variants, 2695 parameters, api_version) for all (potential) OSes under all 2696 search paths, where: 2697 - name is the (potential) OS name 2698 - path is the full path to the OS 2699 - status True/False is the validity of the OS 2700 - diagnose is the error message for an invalid OS, otherwise empty 2701 - variants is a list of supported OS variants, if any 2702 - parameters is a list of (name, help) parameters, if any 2703 - api_version is a list of support OS API versions 2704 2705 """ 2706 if top_dirs is None: 2707 top_dirs = pathutils.OS_SEARCH_PATH 2708 2709 result = [] 2710 for dir_name in top_dirs: 2711 if os.path.isdir(dir_name): 2712 try: 2713 f_names = utils.ListVisibleFiles(dir_name) 2714 except EnvironmentError, err: 2715 logging.exception("Can't list the OS directory %s: %s", dir_name, err) 2716 break 2717 for name in f_names: 2718 os_path = utils.PathJoin(dir_name, name) 2719 status, os_inst = _TryOSFromDisk(name, base_dir=dir_name) 2720 if status: 2721 diagnose = "" 2722 variants = os_inst.supported_variants 2723 parameters = os_inst.supported_parameters 2724 api_versions = os_inst.api_versions 2725 else: 2726 diagnose = os_inst 2727 variants = parameters = api_versions = [] 2728 result.append((name, os_path, status, diagnose, variants, 2729 parameters, api_versions)) 2730 2731 return result
2732
2733 2734 -def _TryOSFromDisk(name, base_dir=None):
2735 """Create an OS instance from disk. 2736 2737 This function will return an OS instance if the given name is a 2738 valid OS name. 2739 2740 @type base_dir: string 2741 @keyword base_dir: Base directory containing OS installations. 2742 Defaults to a search in all the OS_SEARCH_PATH dirs. 2743 @rtype: tuple 2744 @return: success and either the OS instance if we find a valid one, 2745 or error message 2746 2747 """ 2748 if base_dir is None: 2749 os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir) 2750 else: 2751 os_dir = utils.FindFile(name, [base_dir], os.path.isdir) 2752 2753 if os_dir is None: 2754 return False, "Directory for OS %s not found in search path" % name 2755 2756 status, api_versions = _OSOndiskAPIVersion(os_dir) 2757 if not status: 2758 # push the error up 2759 return status, api_versions 2760 2761 if not constants.OS_API_VERSIONS.intersection(api_versions): 2762 return False, ("API version mismatch for path '%s': found %s, want %s." % 2763 (os_dir, api_versions, constants.OS_API_VERSIONS)) 2764 2765 # OS Files dictionary, we will populate it with the absolute path 2766 # names; if the value is True, then it is a required file, otherwise 2767 # an optional one 2768 os_files = dict.fromkeys(constants.OS_SCRIPTS, True) 2769 2770 if max(api_versions) >= constants.OS_API_V15: 2771 os_files[constants.OS_VARIANTS_FILE] = False 2772 2773 if max(api_versions) >= constants.OS_API_V20: 2774 os_files[constants.OS_PARAMETERS_FILE] = True 2775 else: 2776 del os_files[constants.OS_SCRIPT_VERIFY] 2777 2778 for (filename, required) in os_files.items(): 2779 os_files[filename] = utils.PathJoin(os_dir, filename) 2780 2781 try: 2782 st = os.stat(os_files[filename]) 2783 except EnvironmentError, err: 2784 if err.errno == errno.ENOENT and not required: 2785 del os_files[filename] 2786 continue 2787 return False, ("File '%s' under path '%s' is missing (%s)" % 2788 (filename, os_dir, utils.ErrnoOrStr(err))) 2789 2790 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 2791 return False, ("File '%s' under path '%s' is not a regular file" % 2792 (filename, os_dir)) 2793 2794 if filename in constants.OS_SCRIPTS: 2795 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR: 2796 return False, ("File '%s' under path '%s' is not executable" % 2797 (filename, os_dir)) 2798 2799 variants = [] 2800 if constants.OS_VARIANTS_FILE in os_files: 2801 variants_file = os_files[constants.OS_VARIANTS_FILE] 2802 try: 2803 variants = \ 2804 utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file)) 2805 except EnvironmentError, err: 2806 # we accept missing files, but not other errors 2807 if err.errno != errno.ENOENT: 2808 return False, ("Error while reading the OS variants file at %s: %s" % 2809 (variants_file, utils.ErrnoOrStr(err))) 2810 2811 parameters = [] 2812 if constants.OS_PARAMETERS_FILE in os_files: 2813 parameters_file = os_files[constants.OS_PARAMETERS_FILE] 2814 try: 2815 parameters = utils.ReadFile(parameters_file).splitlines() 2816 except EnvironmentError, err: 2817 return False, ("Error while reading the OS parameters file at %s: %s" % 2818 (parameters_file, utils.ErrnoOrStr(err))) 2819 parameters = [v.split(None, 1) for v in parameters] 2820 2821 os_obj = objects.OS(name=name, path=os_dir, 2822 create_script=os_files[constants.OS_SCRIPT_CREATE], 2823 export_script=os_files[constants.OS_SCRIPT_EXPORT], 2824 import_script=os_files[constants.OS_SCRIPT_IMPORT], 2825 rename_script=os_files[constants.OS_SCRIPT_RENAME], 2826 verify_script=os_files.get(constants.OS_SCRIPT_VERIFY, 2827 None), 2828 supported_variants=variants, 2829 supported_parameters=parameters, 2830 api_versions=api_versions) 2831 return True, os_obj
2832
2833 2834 -def OSFromDisk(name, base_dir=None):
2835 """Create an OS instance from disk. 2836 2837 This function will return an OS instance if the given name is a 2838 valid OS name. Otherwise, it will raise an appropriate 2839 L{RPCFail} exception, detailing why this is not a valid OS. 2840 2841 This is just a wrapper over L{_TryOSFromDisk}, which doesn't raise 2842 an exception but returns true/false status data. 2843 2844 @type base_dir: string 2845 @keyword base_dir: Base directory containing OS installations. 2846 Defaults to a search in all the OS_SEARCH_PATH dirs. 2847 @rtype: L{objects.OS} 2848 @return: the OS instance if we find a valid one 2849 @raise RPCFail: if we don't find a valid OS 2850 2851 """ 2852 name_only = objects.OS.GetName(name) 2853 status, payload = _TryOSFromDisk(name_only, base_dir) 2854 2855 if not status: 2856 _Fail(payload) 2857 2858 return payload
2859
2860 2861 -def OSCoreEnv(os_name, inst_os, os_params, debug=0):
2862 """Calculate the basic environment for an os script. 2863 2864 @type os_name: str 2865 @param os_name: full operating system name (including variant) 2866 @type inst_os: L{objects.OS} 2867 @param inst_os: operating system for which the environment is being built 2868 @type os_params: dict 2869 @param os_params: the OS parameters 2870 @type debug: integer 2871 @param debug: debug level (0 or 1, for OS Api 10) 2872 @rtype: dict 2873 @return: dict of environment variables 2874 @raise errors.BlockDeviceError: if the block device 2875 cannot be found 2876 2877 """ 2878 result = {} 2879 api_version = \ 2880 max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions)) 2881 result["OS_API_VERSION"] = "%d" % api_version 2882 result["OS_NAME"] = inst_os.name 2883 result["DEBUG_LEVEL"] = "%d" % debug 2884 2885 # OS variants 2886 if api_version >= constants.OS_API_V15 and inst_os.supported_variants: 2887 variant = objects.OS.GetVariant(os_name) 2888 if not variant: 2889 variant = inst_os.supported_variants[0] 2890 else: 2891 variant = "" 2892 result["OS_VARIANT"] = variant 2893 2894 # OS params 2895 for pname, pvalue in os_params.items(): 2896 result["OSP_%s" % pname.upper()] = pvalue 2897 2898 # Set a default path otherwise programs called by OS scripts (or 2899 # even hooks called from OS scripts) might break, and we don't want 2900 # to have each script require setting a PATH variable 2901 result["PATH"] = constants.HOOKS_PATH 2902 2903 return result
2904
2905 2906 -def OSEnvironment(instance, inst_os, debug=0):
2907 """Calculate the environment for an os script. 2908 2909 @type instance: L{objects.Instance} 2910 @param instance: target instance for the os script run 2911 @type inst_os: L{objects.OS} 2912 @param inst_os: operating system for which the environment is being built 2913 @type debug: integer 2914 @param debug: debug level (0 or 1, for OS Api 10) 2915 @rtype: dict 2916 @return: dict of environment variables 2917 @raise errors.BlockDeviceError: if the block device 2918 cannot be found 2919 2920 """ 2921 result = OSCoreEnv(instance.os, inst_os, instance.osparams, debug=debug) 2922 2923 for attr in ["name", "os", "uuid", "ctime", "mtime", "primary_node"]: 2924 result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr)) 2925 2926 result["HYPERVISOR"] = instance.hypervisor 2927 result["DISK_COUNT"] = "%d" % len(instance.disks) 2928 result["NIC_COUNT"] = "%d" % len(instance.nics) 2929 result["INSTANCE_SECONDARY_NODES"] = \ 2930 ("%s" % " ".join(instance.secondary_nodes)) 2931 2932 # Disks 2933 for idx, disk in enumerate(instance.disks): 2934 real_disk = _OpenRealBD(disk) 2935 result["DISK_%d_PATH" % idx] = real_disk.dev_path 2936 result["DISK_%d_ACCESS" % idx] = disk.mode 2937 result["DISK_%d_UUID" % idx] = disk.uuid 2938 if disk.name: 2939 result["DISK_%d_NAME" % idx] = disk.name 2940 if constants.HV_DISK_TYPE in instance.hvparams: 2941 result["DISK_%d_FRONTEND_TYPE" % idx] = \ 2942 instance.hvparams[constants.HV_DISK_TYPE] 2943 if disk.dev_type in constants.DTS_BLOCK: 2944 result["DISK_%d_BACKEND_TYPE" % idx] = "block" 2945 elif disk.dev_type in [constants.DT_FILE, constants.DT_SHARED_FILE]: 2946 result["DISK_%d_BACKEND_TYPE" % idx] = \ 2947 "file:%s" % disk.logical_id[0] 2948 2949 # NICs 2950 for idx, nic in enumerate(instance.nics): 2951 result["NIC_%d_MAC" % idx] = nic.mac 2952 result["NIC_%d_UUID" % idx] = nic.uuid 2953 if nic.name: 2954 result["NIC_%d_NAME" % idx] = nic.name 2955 if nic.ip: 2956 result["NIC_%d_IP" % idx] = nic.ip 2957 result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE] 2958 if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 2959 result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK] 2960 if nic.nicparams[constants.NIC_LINK]: 2961 result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK] 2962 if nic.netinfo: 2963 nobj = objects.Network.FromDict(nic.netinfo) 2964 result.update(nobj.HooksDict("NIC_%d_" % idx)) 2965 if constants.HV_NIC_TYPE in instance.hvparams: 2966 result["NIC_%d_FRONTEND_TYPE" % idx] = \ 2967 instance.hvparams[constants.HV_NIC_TYPE] 2968 2969 # HV/BE params 2970 for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]: 2971 for key, value in source.items(): 2972 result["INSTANCE_%s_%s" % (kind, key)] = str(value) 2973 2974 return result
2975
2976 2977 -def DiagnoseExtStorage(top_dirs=None):
2978 """Compute the validity for all ExtStorage Providers. 2979 2980 @type top_dirs: list 2981 @param top_dirs: the list of directories in which to 2982 search (if not given defaults to 2983 L{pathutils.ES_SEARCH_PATH}) 2984 @rtype: list of L{objects.ExtStorage} 2985 @return: a list of tuples (name, path, status, diagnose, parameters) 2986 for all (potential) ExtStorage Providers under all 2987 search paths, where: 2988 - name is the (potential) ExtStorage Provider 2989 - path is the full path to the ExtStorage Provider 2990 - status True/False is the validity of the ExtStorage Provider 2991 - diagnose is the error message for an invalid ExtStorage Provider, 2992 otherwise empty 2993 - parameters is a list of (name, help) parameters, if any 2994 2995 """ 2996 if top_dirs is None: 2997 top_dirs = pathutils.ES_SEARCH_PATH 2998 2999 result = [] 3000 for dir_name in top_dirs: 3001 if os.path.isdir(dir_name): 3002 try: 3003 f_names = utils.ListVisibleFiles(dir_name) 3004 except EnvironmentError, err: 3005 logging.exception("Can't list the ExtStorage directory %s: %s", 3006 dir_name, err) 3007 break 3008 for name in f_names: 3009 es_path = utils.PathJoin(dir_name, name) 3010 status, es_inst = bdev.ExtStorageFromDisk(name, base_dir=dir_name) 3011 if status: 3012 diagnose = "" 3013 parameters = es_inst.supported_parameters 3014 else: 3015 diagnose = es_inst 3016 parameters = [] 3017 result.append((name, es_path, status, diagnose, parameters)) 3018 3019 return result
3020
3021 3022 -def BlockdevGrow(disk, amount, dryrun, backingstore, excl_stor):
3023 """Grow a stack of block devices. 3024 3025 This function is called recursively, with the childrens being the 3026 first ones to resize. 3027 3028 @type disk: L{objects.Disk} 3029 @param disk: the disk to be grown 3030 @type amount: integer 3031 @param amount: the amount (in mebibytes) to grow with 3032 @type dryrun: boolean 3033 @param dryrun: whether to execute the operation in simulation mode 3034 only, without actually increasing the size 3035 @param backingstore: whether to execute the operation on backing storage 3036 only, or on "logical" storage only; e.g. DRBD is logical storage, 3037 whereas LVM, file, RBD are backing storage 3038 @rtype: (status, result) 3039 @type excl_stor: boolean 3040 @param excl_stor: Whether exclusive_storage is active 3041 @return: a tuple with the status of the operation (True/False), and 3042 the errors message if status is False 3043 3044 """ 3045 r_dev = _RecursiveFindBD(disk) 3046 if r_dev is None: 3047 _Fail("Cannot find block device %s", disk) 3048 3049 try: 3050 r_dev.Grow(amount, dryrun, backingstore, excl_stor) 3051 except errors.BlockDeviceError, err: 3052 _Fail("Failed to grow block device: %s", err, exc=True)
3053
3054 3055 -def BlockdevSnapshot(disk):
3056 """Create a snapshot copy of a block device. 3057 3058 This function is called recursively, and the snapshot is actually created 3059 just for the leaf lvm backend device. 3060 3061 @type disk: L{objects.Disk} 3062 @param disk: the disk to be snapshotted 3063 @rtype: string 3064 @return: snapshot disk ID as (vg, lv) 3065 3066 """ 3067 if disk.dev_type == constants.DT_DRBD8: 3068 if not disk.children: 3069 _Fail("DRBD device '%s' without backing storage cannot be snapshotted", 3070 disk.unique_id) 3071 return BlockdevSnapshot(disk.children[0]) 3072 elif disk.dev_type == constants.DT_PLAIN: 3073 r_dev = _RecursiveFindBD(disk) 3074 if r_dev is not None: 3075 # FIXME: choose a saner value for the snapshot size 3076 # let's stay on the safe side and ask for the full size, for now 3077 return r_dev.Snapshot(disk.size) 3078 else: 3079 _Fail("Cannot find block device %s", disk) 3080 else: 3081 _Fail("Cannot snapshot non-lvm block device '%s' of type '%s'", 3082 disk.logical_id, disk.dev_type)
3083
3084 3085 -def BlockdevSetInfo(disk, info):
3086 """Sets 'metadata' information on block devices. 3087 3088 This function sets 'info' metadata on block devices. Initial 3089 information is set at device creation; this function should be used 3090 for example after renames. 3091 3092 @type disk: L{objects.Disk} 3093 @param disk: the disk to be grown 3094 @type info: string 3095 @param info: new 'info' metadata 3096 @rtype: (status, result) 3097 @return: a tuple with the status of the operation (True/False), and 3098 the errors message if status is False 3099 3100 """ 3101 r_dev = _RecursiveFindBD(disk) 3102 if r_dev is None: 3103 _Fail("Cannot find block device %s", disk) 3104 3105 try: 3106 r_dev.SetInfo(info) 3107 except errors.BlockDeviceError, err: 3108 _Fail("Failed to set information on block device: %s", err, exc=True)
3109
3110 3111 -def FinalizeExport(instance, snap_disks):
3112 """Write out the export configuration information. 3113 3114 @type instance: L{objects.Instance} 3115 @param instance: the instance which we export, used for 3116 saving configuration 3117 @type snap_disks: list of L{objects.Disk} 3118 @param snap_disks: list of snapshot block devices, which 3119 will be used to get the actual name of the dump file 3120 3121 @rtype: None 3122 3123 """ 3124 destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new") 3125 finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name) 3126 3127 config = objects.SerializableConfigParser() 3128 3129 config.add_section(constants.INISECT_EXP) 3130 config.set(constants.INISECT_EXP, "version", "0") 3131 config.set(constants.INISECT_EXP, "timestamp", "%d" % int(time.time())) 3132 config.set(constants.INISECT_EXP, "source", instance.primary_node) 3133 config.set(constants.INISECT_EXP, "os", instance.os) 3134 config.set(constants.INISECT_EXP, "compression", "none") 3135 3136 config.add_section(constants.INISECT_INS) 3137 config.set(constants.INISECT_INS, "name", instance.name) 3138 config.set(constants.INISECT_INS, "maxmem", "%d" % 3139 instance.beparams[constants.BE_MAXMEM]) 3140 config.set(constants.INISECT_INS, "minmem", "%d" % 3141 instance.beparams[constants.BE_MINMEM]) 3142 # "memory" is deprecated, but useful for exporting to old ganeti versions 3143 config.set(constants.INISECT_INS, "memory", "%d" % 3144 instance.beparams[constants.BE_MAXMEM]) 3145 config.set(constants.INISECT_INS, "vcpus", "%d" % 3146 instance.beparams[constants.BE_VCPUS]) 3147 config.set(constants.INISECT_INS, "disk_template", instance.disk_template) 3148 config.set(constants.INISECT_INS, "hypervisor", instance.hypervisor) 3149 config.set(constants.INISECT_INS, "tags", " ".join(instance.GetTags())) 3150 3151 nic_total = 0 3152 for nic_count, nic in enumerate(instance.nics): 3153 nic_total += 1 3154 config.set(constants.INISECT_INS, "nic%d_mac" % 3155 nic_count, "%s" % nic.mac) 3156 config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip) 3157 config.set(constants.INISECT_INS, "nic%d_network" % nic_count, 3158 "%s" % nic.network) 3159 config.set(constants.INISECT_INS, "nic%d_name" % nic_count, 3160 "%s" % nic.name) 3161 for param in constants.NICS_PARAMETER_TYPES: 3162 config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param), 3163 "%s" % nic.nicparams.get(param, None)) 3164 # TODO: redundant: on load can read nics until it doesn't exist 3165 config.set(constants.INISECT_INS, "nic_count", "%d" % nic_total) 3166 3167 disk_total = 0 3168 for disk_count, disk in enumerate(snap_disks): 3169 if disk: 3170 disk_total += 1 3171 config.set(constants.INISECT_INS, "disk%d_ivname" % disk_count, 3172 ("%s" % disk.iv_name)) 3173 config.set(constants.INISECT_INS, "disk%d_dump" % disk_count, 3174 ("%s" % disk.logical_id[1])) 3175 config.set(constants.INISECT_INS, "disk%d_size" % disk_count, 3176 ("%d" % disk.size)) 3177 config.set(constants.INISECT_INS, "disk%d_name" % disk_count, 3178 "%s" % disk.name) 3179 3180 config.set(constants.INISECT_INS, "disk_count", "%d" % disk_total) 3181 3182 # New-style hypervisor/backend parameters 3183 3184 config.add_section(constants.INISECT_HYP) 3185 for name, value in instance.hvparams.items(): 3186 if name not in constants.HVC_GLOBALS: 3187 config.set(constants.INISECT_HYP, name, str(value)) 3188 3189 config.add_section(constants.INISECT_BEP) 3190 for name, value in instance.beparams.items(): 3191 config.set(constants.INISECT_BEP, name, str(value)) 3192 3193 config.add_section(constants.INISECT_OSP) 3194 for name, value in instance.osparams.items(): 3195 config.set(constants.INISECT_OSP, name, str(value)) 3196 3197 utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE), 3198 data=config.Dumps()) 3199 shutil.rmtree(finaldestdir, ignore_errors=True) 3200 shutil.move(destdir, finaldestdir)
3201
3202 3203 -def ExportInfo(dest):
3204 """Get export configuration information. 3205 3206 @type dest: str 3207 @param dest: directory containing the export 3208 3209 @rtype: L{objects.SerializableConfigParser} 3210 @return: a serializable config file containing the 3211 export info 3212 3213 """ 3214 cff = utils.PathJoin(dest, constants.EXPORT_CONF_FILE) 3215 3216 config = objects.SerializableConfigParser() 3217 config.read(cff) 3218 3219 if (not config.has_section(constants.INISECT_EXP) or 3220 not config.has_section(constants.INISECT_INS)): 3221 _Fail("Export info file doesn't have the required fields") 3222 3223 return config.Dumps()
3224
3225 3226 -def ListExports():
3227 """Return a list of exports currently available on this machine. 3228 3229 @rtype: list 3230 @return: list of the exports 3231 3232 """ 3233 if os.path.isdir(pathutils.EXPORT_DIR): 3234 return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR)) 3235 else: 3236 _Fail("No exports directory")
3237
3238 3239 -def RemoveExport(export):
3240 """Remove an existing export from the node. 3241 3242 @type export: str 3243 @param export: the name of the export to remove 3244 @rtype: None 3245 3246 """ 3247 target = utils.PathJoin(pathutils.EXPORT_DIR, export) 3248 3249 try: 3250 shutil.rmtree(target) 3251 except EnvironmentError, err: 3252 _Fail("Error while removing the export: %s", err, exc=True)
3253
3254 3255 -def BlockdevRename(devlist):
3256 """Rename a list of block devices. 3257 3258 @type devlist: list of tuples 3259 @param devlist: list of tuples of the form (disk, new_unique_id); disk is 3260 an L{objects.Disk} object describing the current disk, and new 3261 unique_id is the name we rename it to 3262 @rtype: boolean 3263 @return: True if all renames succeeded, False otherwise 3264 3265 """ 3266 msgs = [] 3267 result = True 3268 for disk, unique_id in devlist: 3269 dev = _RecursiveFindBD(disk) 3270 if dev is None: 3271 msgs.append("Can't find device %s in rename" % str(disk)) 3272 result = False 3273 continue 3274 try: 3275 old_rpath = dev.dev_path 3276 dev.Rename(unique_id) 3277 new_rpath = dev.dev_path 3278 if old_rpath != new_rpath: 3279 DevCacheManager.RemoveCache(old_rpath) 3280 # FIXME: we should add the new cache information here, like: 3281 # DevCacheManager.UpdateCache(new_rpath, owner, ...) 3282 # but we don't have the owner here - maybe parse from existing 3283 # cache? for now, we only lose lvm data when we rename, which 3284 # is less critical than DRBD or MD 3285 except errors.BlockDeviceError, err: 3286 msgs.append("Can't rename device '%s' to '%s': %s" % 3287 (dev, unique_id, err)) 3288 logging.exception("Can't rename device '%s' to '%s'", dev, unique_id) 3289 result = False 3290 if not result: 3291 _Fail("; ".join(msgs))
3292
3293 3294 -def _TransformFileStorageDir(fs_dir):
3295 """Checks whether given file_storage_dir is valid. 3296 3297 Checks wheter the given fs_dir is within the cluster-wide default 3298 file_storage_dir or the shared_file_storage_dir, which are stored in 3299 SimpleStore. Only paths under those directories are allowed. 3300 3301 @type fs_dir: str 3302 @param fs_dir: the path to check 3303 3304 @return: the normalized path if valid, None otherwise 3305 3306 """ 3307 filestorage.CheckFileStoragePath(fs_dir) 3308 3309 return os.path.normpath(fs_dir)
3310
3311 3312 -def CreateFileStorageDir(file_storage_dir):
3313 """Create file storage directory. 3314 3315 @type file_storage_dir: str 3316 @param file_storage_dir: directory to create 3317 3318 @rtype: tuple 3319 @return: tuple with first element a boolean indicating wheter dir 3320 creation was successful or not 3321 3322 """ 3323 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 3324 if os.path.exists(file_storage_dir): 3325 if not os.path.isdir(file_storage_dir): 3326 _Fail("Specified storage dir '%s' is not a directory", 3327 file_storage_dir) 3328 else: 3329 try: 3330 os.makedirs(file_storage_dir, 0750) 3331 except OSError, err: 3332 _Fail("Cannot create file storage directory '%s': %s", 3333 file_storage_dir, err, exc=True)
3334
3335 3336 -def RemoveFileStorageDir(file_storage_dir):
3337 """Remove file storage directory. 3338 3339 Remove it only if it's empty. If not log an error and return. 3340 3341 @type file_storage_dir: str 3342 @param file_storage_dir: the directory we should cleanup 3343 @rtype: tuple (success,) 3344 @return: tuple of one element, C{success}, denoting 3345 whether the operation was successful 3346 3347 """ 3348 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 3349 if os.path.exists(file_storage_dir): 3350 if not os.path.isdir(file_storage_dir): 3351 _Fail("Specified Storage directory '%s' is not a directory", 3352 file_storage_dir) 3353 # deletes dir only if empty, otherwise we want to fail the rpc call 3354 try: 3355 os.rmdir(file_storage_dir) 3356 except OSError, err: 3357 _Fail("Cannot remove file storage directory '%s': %s", 3358 file_storage_dir, err)
3359
3360 3361 -def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
3362 """Rename the file storage directory. 3363 3364 @type old_file_storage_dir: str 3365 @param old_file_storage_dir: the current path 3366 @type new_file_storage_dir: str 3367 @param new_file_storage_dir: the name we should rename to 3368 @rtype: tuple (success,) 3369 @return: tuple of one element, C{success}, denoting 3370 whether the operation was successful 3371 3372 """ 3373 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir) 3374 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir) 3375 if not os.path.exists(new_file_storage_dir): 3376 if os.path.isdir(old_file_storage_dir): 3377 try: 3378 os.rename(old_file_storage_dir, new_file_storage_dir) 3379 except OSError, err: 3380 _Fail("Cannot rename '%s' to '%s': %s", 3381 old_file_storage_dir, new_file_storage_dir, err) 3382 else: 3383 _Fail("Specified storage dir '%s' is not a directory", 3384 old_file_storage_dir) 3385 else: 3386 if os.path.exists(old_file_storage_dir): 3387 _Fail("Cannot rename '%s' to '%s': both locations exist", 3388 old_file_storage_dir, new_file_storage_dir)
3389
3390 3391 -def _EnsureJobQueueFile(file_name):
3392 """Checks whether the given filename is in the queue directory. 3393 3394 @type file_name: str 3395 @param file_name: the file name we should check 3396 @rtype: None 3397 @raises RPCFail: if the file is not valid 3398 3399 """ 3400 if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name): 3401 _Fail("Passed job queue file '%s' does not belong to" 3402 " the queue directory '%s'", file_name, pathutils.QUEUE_DIR)
3403
3404 3405 -def JobQueueUpdate(file_name, content):
3406 """Updates a file in the queue directory. 3407 3408 This is just a wrapper over L{utils.io.WriteFile}, with proper 3409 checking. 3410 3411 @type file_name: str 3412 @param file_name: the job file name 3413 @type content: str 3414 @param content: the new job contents 3415 @rtype: boolean 3416 @return: the success of the operation 3417 3418 """ 3419 file_name = vcluster.LocalizeVirtualPath(file_name) 3420 3421 _EnsureJobQueueFile(file_name) 3422 getents = runtime.GetEnts() 3423 3424 # Write and replace the file atomically 3425 utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid, 3426 gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS)
3427
3428 3429 -def JobQueueRename(old, new):
3430 """Renames a job queue file. 3431 3432 This is just a wrapper over os.rename with proper checking. 3433 3434 @type old: str 3435 @param old: the old (actual) file name 3436 @type new: str 3437 @param new: the desired file name 3438 @rtype: tuple 3439 @return: the success of the operation and payload 3440 3441 """ 3442 old = vcluster.LocalizeVirtualPath(old) 3443 new = vcluster.LocalizeVirtualPath(new) 3444 3445 _EnsureJobQueueFile(old) 3446 _EnsureJobQueueFile(new) 3447 3448 getents = runtime.GetEnts() 3449 3450 utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750, 3451 dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid)
3452
3453 3454 -def BlockdevClose(instance_name, disks):
3455 """Closes the given block devices. 3456 3457 This means they will be switched to secondary mode (in case of 3458 DRBD). 3459 3460 @param instance_name: if the argument is not empty, the symlinks 3461 of this instance will be removed 3462 @type disks: list of L{objects.Disk} 3463 @param disks: the list of disks to be closed 3464 @rtype: tuple (success, message) 3465 @return: a tuple of success and message, where success 3466 indicates the succes of the operation, and message 3467 which will contain the error details in case we 3468 failed 3469 3470 """ 3471 bdevs = [] 3472 for cf in disks: 3473 rd = _RecursiveFindBD(cf) 3474 if rd is None: 3475 _Fail("Can't find device %s", cf) 3476 bdevs.append(rd) 3477 3478 msg = [] 3479 for rd in bdevs: 3480 try: 3481 rd.Close() 3482 except errors.BlockDeviceError, err: 3483 msg.append(str(err)) 3484 if msg: 3485 _Fail("Can't make devices secondary: %s", ",".join(msg)) 3486 else: 3487 if instance_name: 3488 _RemoveBlockDevLinks(instance_name, disks)
3489
3490 3491 -def ValidateHVParams(hvname, hvparams):
3492 """Validates the given hypervisor parameters. 3493 3494 @type hvname: string 3495 @param hvname: the hypervisor name 3496 @type hvparams: dict 3497 @param hvparams: the hypervisor parameters to be validated 3498 @rtype: None 3499 3500 """ 3501 try: 3502 hv_type = hypervisor.GetHypervisor(hvname) 3503 hv_type.ValidateParameters(hvparams) 3504 except errors.HypervisorError, err: 3505 _Fail(str(err), log=False)
3506
3507 3508 -def _CheckOSPList(os_obj, parameters):
3509 """Check whether a list of parameters is supported by the OS. 3510 3511 @type os_obj: L{objects.OS} 3512 @param os_obj: OS object to check 3513 @type parameters: list 3514 @param parameters: the list of parameters to check 3515 3516 """ 3517 supported = [v[0] for v in os_obj.supported_parameters] 3518 delta = frozenset(parameters).difference(supported) 3519 if delta: 3520 _Fail("The following parameters are not supported" 3521 " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta)))
3522
3523 3524 -def ValidateOS(required, osname, checks, osparams):
3525 """Validate the given OS' parameters. 3526 3527 @type required: boolean 3528 @param required: whether absence of the OS should translate into 3529 failure or not 3530 @type osname: string 3531 @param osname: the OS to be validated 3532 @type checks: list 3533 @param checks: list of the checks to run (currently only 'parameters') 3534 @type osparams: dict 3535 @param osparams: dictionary with OS parameters 3536 @rtype: boolean 3537 @return: True if the validation passed, or False if the OS was not 3538 found and L{required} was false 3539 3540 """ 3541 if not constants.OS_VALIDATE_CALLS.issuperset(checks): 3542 _Fail("Unknown checks required for OS %s: %s", osname, 3543 set(checks).difference(constants.OS_VALIDATE_CALLS)) 3544 3545 name_only = objects.OS.GetName(osname) 3546 status, tbv = _TryOSFromDisk(name_only, None) 3547 3548 if not status: 3549 if required: 3550 _Fail(tbv) 3551 else: 3552 return False 3553 3554 if max(tbv.api_versions) < constants.OS_API_V20: 3555 return True 3556 3557 if constants.OS_VALIDATE_PARAMETERS in checks: 3558 _CheckOSPList(tbv, osparams.keys()) 3559 3560 validate_env = OSCoreEnv(osname, tbv, osparams) 3561 result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env, 3562 cwd=tbv.path, reset_env=True) 3563 if result.failed: 3564 logging.error("os validate command '%s' returned error: %s output: %s", 3565 result.cmd, result.fail_reason, result.output) 3566 _Fail("OS validation script failed (%s), output: %s", 3567 result.fail_reason, result.output, log=False) 3568 3569 return True
3570
3571 3572 -def DemoteFromMC():
3573 """Demotes the current node from master candidate role. 3574 3575 """ 3576 # try to ensure we're not the master by mistake 3577 master, myself = ssconf.GetMasterAndMyself() 3578 if master == myself: 3579 _Fail("ssconf status shows I'm the master node, will not demote") 3580 3581 result = utils.RunCmd([pathutils.DAEMON_UTIL, "check", constants.MASTERD]) 3582 if not result.failed: 3583 _Fail("The master daemon is running, will not demote") 3584 3585 try: 3586 if os.path.isfile(pathutils.CLUSTER_CONF_FILE): 3587 utils.CreateBackup(pathutils.CLUSTER_CONF_FILE) 3588 except EnvironmentError, err: 3589 if err.errno != errno.ENOENT: 3590 _Fail("Error while backing up cluster file: %s", err, exc=True) 3591 3592 utils.RemoveFile(pathutils.CLUSTER_CONF_FILE)
3593
3594 3595 -def _GetX509Filenames(cryptodir, name):
3596 """Returns the full paths for the private key and certificate. 3597 3598 """ 3599 return (utils.PathJoin(cryptodir, name), 3600 utils.PathJoin(cryptodir, name, _X509_KEY_FILE), 3601 utils.PathJoin(cryptodir, name, _X509_CERT_FILE))
3602
3603 3604 -def CreateX509Certificate(validity, cryptodir=pathutils.CRYPTO_KEYS_DIR):
3605 """Creates a new X509 certificate for SSL/TLS. 3606 3607 @type validity: int 3608 @param validity: Validity in seconds 3609 @rtype: tuple; (string, string) 3610 @return: Certificate name and public part 3611 3612 """ 3613 (key_pem, cert_pem) = \ 3614 utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(), 3615 min(validity, _MAX_SSL_CERT_VALIDITY)) 3616 3617 cert_dir = tempfile.mkdtemp(dir=cryptodir, 3618 prefix="x509-%s-" % utils.TimestampForFilename()) 3619 try: 3620 name = os.path.basename(cert_dir) 3621 assert len(name) > 5 3622 3623 (_, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 3624 3625 utils.WriteFile(key_file, mode=0400, data=key_pem) 3626 utils.WriteFile(cert_file, mode=0400, data=cert_pem) 3627 3628 # Never return private key as it shouldn't leave the node 3629 return (name, cert_pem) 3630 except Exception: 3631 shutil.rmtree(cert_dir, ignore_errors=True) 3632 raise
3633
3634 3635 -def RemoveX509Certificate(name, cryptodir=pathutils.CRYPTO_KEYS_DIR):
3636 """Removes a X509 certificate. 3637 3638 @type name: string 3639 @param name: Certificate name 3640 3641 """ 3642 (cert_dir, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 3643 3644 utils.RemoveFile(key_file) 3645 utils.RemoveFile(cert_file) 3646 3647 try: 3648 os.rmdir(cert_dir) 3649 except EnvironmentError, err: 3650 _Fail("Cannot remove certificate directory '%s': %s", 3651 cert_dir, err)
3652
3653 3654 -def _GetImportExportIoCommand(instance, mode, ieio, ieargs):
3655 """Returns the command for the requested input/output. 3656 3657 @type instance: L{objects.Instance} 3658 @param instance: The instance object 3659 @param mode: Import/export mode 3660 @param ieio: Input/output type 3661 @param ieargs: Input/output arguments 3662 3663 """ 3664 assert mode in (constants.IEM_IMPORT, constants.IEM_EXPORT) 3665 3666 env = None 3667 prefix = None 3668 suffix = None 3669 exp_size = None 3670 3671 if ieio == constants.IEIO_FILE: 3672 (filename, ) = ieargs 3673 3674 if not utils.IsNormAbsPath(filename): 3675 _Fail("Path '%s' is not normalized or absolute", filename) 3676 3677 real_filename = os.path.realpath(filename) 3678 directory = os.path.dirname(real_filename) 3679 3680 if not utils.IsBelowDir(pathutils.EXPORT_DIR, real_filename): 3681 _Fail("File '%s' is not under exports directory '%s': %s", 3682 filename, pathutils.EXPORT_DIR, real_filename) 3683 3684 # Create directory 3685 utils.Makedirs(directory, mode=0750) 3686 3687 quoted_filename = utils.ShellQuote(filename) 3688 3689 if mode == constants.IEM_IMPORT: 3690 suffix = "> %s" % quoted_filename 3691 elif mode == constants.IEM_EXPORT: 3692 suffix = "< %s" % quoted_filename 3693 3694 # Retrieve file size 3695 try: 3696 st = os.stat(filename) 3697 except EnvironmentError, err: 3698 logging.error("Can't stat(2) %s: %s", filename, err) 3699 else: 3700 exp_size = utils.BytesToMebibyte(st.st_size) 3701 3702 elif ieio == constants.IEIO_RAW_DISK: 3703 (disk, ) = ieargs 3704 3705 real_disk = _OpenRealBD(disk) 3706 3707 if mode == constants.IEM_IMPORT: 3708 # we set here a smaller block size as, due to transport buffering, more 3709 # than 64-128k will mostly ignored; we use nocreat to fail if the device 3710 # is not already there or we pass a wrong path; we use notrunc to no 3711 # attempt truncate on an LV device; we use oflag=dsync to not buffer too 3712 # much memory; this means that at best, we flush every 64k, which will 3713 # not be very fast 3714 suffix = utils.BuildShellCmd(("| dd of=%s conv=nocreat,notrunc" 3715 " bs=%s oflag=dsync"), 3716 real_disk.dev_path, 3717 str(64 * 1024)) 3718 3719 elif mode == constants.IEM_EXPORT: 3720 # the block size on the read dd is 1MiB to match our units 3721 prefix = utils.BuildShellCmd("dd if=%s bs=%s count=%s |", 3722 real_disk.dev_path, 3723 str(1024 * 1024), # 1 MB 3724 str(disk.size)) 3725 exp_size = disk.size 3726 3727 elif ieio == constants.IEIO_SCRIPT: 3728 (disk, disk_index, ) = ieargs 3729 3730 assert isinstance(disk_index, (int, long)) 3731 3732 inst_os = OSFromDisk(instance.os) 3733 env = OSEnvironment(instance, inst_os) 3734 3735 if mode == constants.IEM_IMPORT: 3736 env["IMPORT_DEVICE"] = env["DISK_%d_PATH" % disk_index] 3737 env["IMPORT_INDEX"] = str(disk_index) 3738 script = inst_os.import_script 3739 3740 elif mode == constants.IEM_EXPORT: 3741 real_disk = _OpenRealBD(disk) 3742 env["EXPORT_DEVICE"] = real_disk.dev_path 3743 env["EXPORT_INDEX"] = str(disk_index) 3744 script = inst_os.export_script 3745 3746 # TODO: Pass special environment only to script 3747 script_cmd = utils.BuildShellCmd("( cd %s && %s; )", inst_os.path, script) 3748 3749 if mode == constants.IEM_IMPORT: 3750 suffix = "| %s" % script_cmd 3751 3752 elif mode == constants.IEM_EXPORT: 3753 prefix = "%s |" % script_cmd 3754 3755 # Let script predict size 3756 exp_size = constants.IE_CUSTOM_SIZE 3757 3758 else: 3759 _Fail("Invalid %s I/O mode %r", mode, ieio) 3760 3761 return (env, prefix, suffix, exp_size)
3762
3763 3764 -def _CreateImportExportStatusDir(prefix):
3765 """Creates status directory for import/export. 3766 3767 """ 3768 return tempfile.mkdtemp(dir=pathutils.IMPORT_EXPORT_DIR, 3769 prefix=("%s-%s-" % 3770 (prefix, utils.TimestampForFilename())))
3771
3772 3773 -def StartImportExportDaemon(mode, opts, host, port, instance, component, 3774 ieio, ieioargs):
3775 """Starts an import or export daemon. 3776 3777 @param mode: Import/output mode 3778 @type opts: L{objects.ImportExportOptions} 3779 @param opts: Daemon options 3780 @type host: string 3781 @param host: Remote host for export (None for import) 3782 @type port: int 3783 @param port: Remote port for export (None for import) 3784 @type instance: L{objects.Instance} 3785 @param instance: Instance object 3786 @type component: string 3787 @param component: which part of the instance is transferred now, 3788 e.g. 'disk/0' 3789 @param ieio: Input/output type 3790 @param ieioargs: Input/output arguments 3791 3792 """ 3793 if mode == constants.IEM_IMPORT: 3794 prefix = "import" 3795 3796 if not (host is None and port is None): 3797 _Fail("Can not specify host or port on import") 3798 3799 elif mode == constants.IEM_EXPORT: 3800 prefix = "export" 3801 3802 if host is None or port is None: 3803 _Fail("Host and port must be specified for an export") 3804 3805 else: 3806 _Fail("Invalid mode %r", mode) 3807 3808 if (opts.key_name is None) ^ (opts.ca_pem is None): 3809 _Fail("Cluster certificate can only be used for both key and CA") 3810 3811 (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \ 3812 _GetImportExportIoCommand(instance, mode, ieio, ieioargs) 3813 3814 if opts.key_name is None: 3815 # Use server.pem 3816 key_path = pathutils.NODED_CERT_FILE 3817 cert_path = pathutils.NODED_CERT_FILE 3818 assert opts.ca_pem is None 3819 else: 3820 (_, key_path, cert_path) = _GetX509Filenames(pathutils.CRYPTO_KEYS_DIR, 3821 opts.key_name) 3822 assert opts.ca_pem is not None 3823 3824 for i in [key_path, cert_path]: 3825 if not os.path.exists(i): 3826 _Fail("File '%s' does not exist" % i) 3827 3828 status_dir = _CreateImportExportStatusDir("%s-%s" % (prefix, component)) 3829 try: 3830 status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE) 3831 pid_file = utils.PathJoin(status_dir, _IES_PID_FILE) 3832 ca_file = utils.PathJoin(status_dir, _IES_CA_FILE) 3833 3834 if opts.ca_pem is None: 3835 # Use server.pem 3836 ca = utils.ReadFile(pathutils.NODED_CERT_FILE) 3837 else: 3838 ca = opts.ca_pem 3839 3840 # Write CA file 3841 utils.WriteFile(ca_file, data=ca, mode=0400) 3842 3843 cmd = [ 3844 pathutils.IMPORT_EXPORT_DAEMON, 3845 status_file, mode, 3846 "--key=%s" % key_path, 3847 "--cert=%s" % cert_path, 3848 "--ca=%s" % ca_file, 3849 ] 3850 3851 if host: 3852 cmd.append("--host=%s" % host) 3853 3854 if port: 3855 cmd.append("--port=%s" % port) 3856 3857 if opts.ipv6: 3858 cmd.append("--ipv6") 3859 else: 3860 cmd.append("--ipv4") 3861 3862 if opts.compress: 3863 cmd.append("--compress=%s" % opts.compress) 3864 3865 if opts.magic: 3866 cmd.append("--magic=%s" % opts.magic) 3867 3868 if exp_size is not None: 3869 cmd.append("--expected-size=%s" % exp_size) 3870 3871 if cmd_prefix: 3872 cmd.append("--cmd-prefix=%s" % cmd_prefix) 3873 3874 if cmd_suffix: 3875 cmd.append("--cmd-suffix=%s" % cmd_suffix) 3876 3877 if mode == constants.IEM_EXPORT: 3878 # Retry connection a few times when connecting to remote peer 3879 cmd.append("--connect-retries=%s" % constants.RIE_CONNECT_RETRIES) 3880 cmd.append("--connect-timeout=%s" % constants.RIE_CONNECT_ATTEMPT_TIMEOUT) 3881 elif opts.connect_timeout is not None: 3882 assert mode == constants.IEM_IMPORT 3883 # Overall timeout for establishing connection while listening 3884 cmd.append("--connect-timeout=%s" % opts.connect_timeout) 3885 3886 logfile = _InstanceLogName(prefix, instance.os, instance.name, component) 3887 3888 # TODO: Once _InstanceLogName uses tempfile.mkstemp, StartDaemon has 3889 # support for receiving a file descriptor for output 3890 utils.StartDaemon(cmd, env=cmd_env, pidfile=pid_file, 3891 output=logfile) 3892 3893 # The import/export name is simply the status directory name 3894 return os.path.basename(status_dir) 3895 3896 except Exception: 3897 shutil.rmtree(status_dir, ignore_errors=True) 3898 raise
3899
3900 3901 -def GetImportExportStatus(names):
3902 """Returns import/export daemon status. 3903 3904 @type names: sequence 3905 @param names: List of names 3906 @rtype: List of dicts 3907 @return: Returns a list of the state of each named import/export or None if a 3908 status couldn't be read 3909 3910 """ 3911 result = [] 3912 3913 for name in names: 3914 status_file = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name, 3915 _IES_STATUS_FILE) 3916 3917 try: 3918 data = utils.ReadFile(status_file) 3919 except EnvironmentError, err: 3920 if err.errno != errno.ENOENT: 3921 raise 3922 data = None 3923 3924 if not data: 3925 result.append(None) 3926 continue 3927 3928 result.append(serializer.LoadJson(data)) 3929 3930 return result
3931
3932 3933 -def AbortImportExport(name):
3934 """Sends SIGTERM to a running import/export daemon. 3935 3936 """ 3937 logging.info("Abort import/export %s", name) 3938 3939 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 3940 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 3941 3942 if pid: 3943 logging.info("Import/export %s is running with PID %s, sending SIGTERM", 3944 name, pid) 3945 utils.IgnoreProcessNotFound(os.kill, pid, signal.SIGTERM)
3946
3947 3948 -def CleanupImportExport(name):
3949 """Cleanup after an import or export. 3950 3951 If the import/export daemon is still running it's killed. Afterwards the 3952 whole status directory is removed. 3953 3954 """ 3955 logging.info("Finalizing import/export %s", name) 3956 3957 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 3958 3959 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 3960 3961 if pid: 3962 logging.info("Import/export %s is still running with PID %s", 3963 name, pid) 3964 utils.KillProcess(pid, waitpid=False) 3965 3966 shutil.rmtree(status_dir, ignore_errors=True)
3967
3968 3969 -def _FindDisks(disks):
3970 """Finds attached L{BlockDev}s for the given disks. 3971 3972 @type disks: list of L{objects.Disk} 3973 @param disks: the disk objects we need to find 3974 3975 @return: list of L{BlockDev} objects or C{None} if a given disk 3976 was not found or was no attached. 3977 3978 """ 3979 bdevs = [] 3980 3981 for disk in disks: 3982 rd = _RecursiveFindBD(disk) 3983 if rd is None: 3984 _Fail("Can't find device %s", disk) 3985 bdevs.append(rd) 3986 return bdevs
3987
3988 3989 -def DrbdDisconnectNet(disks):
3990 """Disconnects the network on a list of drbd devices. 3991 3992 """ 3993 bdevs = _FindDisks(disks) 3994 3995 # disconnect disks 3996 for rd in bdevs: 3997 try: 3998 rd.DisconnectNet() 3999 except errors.BlockDeviceError, err: 4000 _Fail("Can't change network configuration to standalone mode: %s", 4001 err, exc=True)
4002
4003 4004 -def DrbdAttachNet(disks, instance_name, multimaster):
4005 """Attaches the network on a list of drbd devices. 4006 4007 """ 4008 bdevs = _FindDisks(disks) 4009 4010 if multimaster: 4011 for idx, rd in enumerate(bdevs): 4012 try: 4013 _SymlinkBlockDev(instance_name, rd.dev_path, idx) 4014 except EnvironmentError, err: 4015 _Fail("Can't create symlink: %s", err) 4016 # reconnect disks, switch to new master configuration and if 4017 # needed primary mode 4018 for rd in bdevs: 4019 try: 4020 rd.AttachNet(multimaster) 4021 except errors.BlockDeviceError, err: 4022 _Fail("Can't change network configuration: %s", err) 4023 4024 # wait until the disks are connected; we need to retry the re-attach 4025 # if the device becomes standalone, as this might happen if the one 4026 # node disconnects and reconnects in a different mode before the 4027 # other node reconnects; in this case, one or both of the nodes will 4028 # decide it has wrong configuration and switch to standalone 4029 4030 def _Attach(): 4031 all_connected = True 4032 4033 for rd in bdevs: 4034 stats = rd.GetProcStatus() 4035 4036 if multimaster: 4037 # In the multimaster case we have to wait explicitly until 4038 # the resource is Connected and UpToDate/UpToDate, because 4039 # we promote *both nodes* to primary directly afterwards. 4040 # Being in resync is not enough, since there is a race during which we 4041 # may promote a node with an Outdated disk to primary, effectively 4042 # tearing down the connection. 4043 all_connected = (all_connected and 4044 stats.is_connected and 4045 stats.is_disk_uptodate and 4046 stats.peer_disk_uptodate) 4047 else: 4048 all_connected = (all_connected and 4049 (stats.is_connected or stats.is_in_resync)) 4050 4051 if stats.is_standalone: 4052 # peer had different config info and this node became 4053 # standalone, even though this should not happen with the 4054 # new staged way of changing disk configs 4055 try: 4056 rd.AttachNet(multimaster) 4057 except errors.BlockDeviceError, err: 4058 _Fail("Can't change network configuration: %s", err) 4059 4060 if not all_connected: 4061 raise utils.RetryAgain()
4062 4063 try: 4064 # Start with a delay of 100 miliseconds and go up to 5 seconds 4065 utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60) 4066 except utils.RetryTimeout: 4067 _Fail("Timeout in disk reconnecting") 4068 4069 if multimaster: 4070 # change to primary mode 4071 for rd in bdevs: 4072 try: 4073 rd.Open() 4074 except errors.BlockDeviceError, err: 4075 _Fail("Can't change to primary mode: %s", err) 4076
4077 4078 -def DrbdWaitSync(disks):
4079 """Wait until DRBDs have synchronized. 4080 4081 """ 4082 def _helper(rd): 4083 stats = rd.GetProcStatus() 4084 if not (stats.is_connected or stats.is_in_resync): 4085 raise utils.RetryAgain() 4086 return stats
4087 4088 bdevs = _FindDisks(disks) 4089 4090 min_resync = 100 4091 alldone = True 4092 for rd in bdevs: 4093 try: 4094 # poll each second for 15 seconds 4095 stats = utils.Retry(_helper, 1, 15, args=[rd]) 4096 except utils.RetryTimeout: 4097 stats = rd.GetProcStatus() 4098 # last check 4099 if not (stats.is_connected or stats.is_in_resync): 4100 _Fail("DRBD device %s is not in sync: stats=%s", rd, stats) 4101 alldone = alldone and (not stats.is_in_resync) 4102 if stats.sync_percent is not None: 4103 min_resync = min(min_resync, stats.sync_percent) 4104 4105 return (alldone, min_resync) 4106
4107 4108 -def DrbdNeedsActivation(disks):
4109 """Checks which of the passed disks needs activation and returns their UUIDs. 4110 4111 """ 4112 faulty_disks = [] 4113 4114 for disk in disks: 4115 rd = _RecursiveFindBD(disk) 4116 if rd is None: 4117 faulty_disks.append(disk) 4118 continue 4119 4120 stats = rd.GetProcStatus() 4121 if stats.is_standalone or stats.is_diskless: 4122 faulty_disks.append(disk) 4123 4124 return [disk.uuid for disk in faulty_disks]
4125
4126 4127 -def GetDrbdUsermodeHelper():
4128 """Returns DRBD usermode helper currently configured. 4129 4130 """ 4131 try: 4132 return drbd.DRBD8.GetUsermodeHelper() 4133 except errors.BlockDeviceError, err: 4134 _Fail(str(err))
4135
4136 4137 -def PowercycleNode(hypervisor_type, hvparams=None):
4138 """Hard-powercycle the node. 4139 4140 Because we need to return first, and schedule the powercycle in the 4141 background, we won't be able to report failures nicely. 4142 4143 """ 4144 hyper = hypervisor.GetHypervisor(hypervisor_type) 4145 try: 4146 pid = os.fork() 4147 except OSError: 4148 # if we can't fork, we'll pretend that we're in the child process 4149 pid = 0 4150 if pid > 0: 4151 return "Reboot scheduled in 5 seconds" 4152 # ensure the child is running on ram 4153 try: 4154 utils.Mlockall() 4155 except Exception: # pylint: disable=W0703 4156 pass 4157 time.sleep(5) 4158 hyper.PowercycleNode(hvparams=hvparams)
4159
4160 4161 -def _VerifyRestrictedCmdName(cmd):
4162 """Verifies a restricted command name. 4163 4164 @type cmd: string 4165 @param cmd: Command name 4166 @rtype: tuple; (boolean, string or None) 4167 @return: The tuple's first element is the status; if C{False}, the second 4168 element is an error message string, otherwise it's C{None} 4169 4170 """ 4171 if not cmd.strip(): 4172 return (False, "Missing command name") 4173 4174 if os.path.basename(cmd) != cmd: 4175 return (False, "Invalid command name") 4176 4177 if not constants.EXT_PLUGIN_MASK.match(cmd): 4178 return (False, "Command name contains forbidden characters") 4179 4180 return (True, None)
4181
4182 4183 -def _CommonRestrictedCmdCheck(path, owner):
4184 """Common checks for restricted command file system directories and files. 4185 4186 @type path: string 4187 @param path: Path to check 4188 @param owner: C{None} or tuple containing UID and GID 4189 @rtype: tuple; (boolean, string or C{os.stat} result) 4190 @return: The tuple's first element is the status; if C{False}, the second 4191 element is an error message string, otherwise it's the result of C{os.stat} 4192 4193 """ 4194 if owner is None: 4195 # Default to root as owner 4196 owner = (0, 0) 4197 4198 try: 4199 st = os.stat(path) 4200 except EnvironmentError, err: 4201 return (False, "Can't stat(2) '%s': %s" % (path, err)) 4202 4203 if stat.S_IMODE(st.st_mode) & (~_RCMD_MAX_MODE): 4204 return (False, "Permissions on '%s' are too permissive" % path) 4205 4206 if (st.st_uid, st.st_gid) != owner: 4207 (owner_uid, owner_gid) = owner 4208 return (False, "'%s' is not owned by %s:%s" % (path, owner_uid, owner_gid)) 4209 4210 return (True, st)
4211
4212 4213 -def _VerifyRestrictedCmdDirectory(path, _owner=None):
4214 """Verifies restricted command directory. 4215 4216 @type path: string 4217 @param path: Path to check 4218 @rtype: tuple; (boolean, string or None) 4219 @return: The tuple's first element is the status; if C{False}, the second 4220 element is an error message string, otherwise it's C{None} 4221 4222 """ 4223 (status, value) = _CommonRestrictedCmdCheck(path, _owner) 4224 4225 if not status: 4226 return (False, value) 4227 4228 if not stat.S_ISDIR(value.st_mode): 4229 return (False, "Path '%s' is not a directory" % path) 4230 4231 return (True, None)
4232
4233 4234 -def _VerifyRestrictedCmd(path, cmd, _owner=None):
4235 """Verifies a whole restricted command and returns its executable filename. 4236 4237 @type path: string 4238 @param path: Directory containing restricted commands 4239 @type cmd: string 4240 @param cmd: Command name 4241 @rtype: tuple; (boolean, string) 4242 @return: The tuple's first element is the status; if C{False}, the second 4243 element is an error message string, otherwise the second element is the 4244 absolute path to the executable 4245 4246 """ 4247 executable = utils.PathJoin(path, cmd) 4248 4249 (status, msg) = _CommonRestrictedCmdCheck(executable, _owner) 4250 4251 if not status: 4252 return (False, msg) 4253 4254 if not utils.IsExecutable(executable): 4255 return (False, "access(2) thinks '%s' can't be executed" % executable) 4256 4257 return (True, executable)
4258
4259 4260 -def _PrepareRestrictedCmd(path, cmd, 4261 _verify_dir=_VerifyRestrictedCmdDirectory, 4262 _verify_name=_VerifyRestrictedCmdName, 4263 _verify_cmd=_VerifyRestrictedCmd):
4264 """Performs a number of tests on a restricted command. 4265 4266 @type path: string 4267 @param path: Directory containing restricted commands 4268 @type cmd: string 4269 @param cmd: Command name 4270 @return: Same as L{_VerifyRestrictedCmd} 4271 4272 """ 4273 # Verify the directory first 4274 (status, msg) = _verify_dir(path) 4275 if status: 4276 # Check command if everything was alright 4277 (status, msg) = _verify_name(cmd) 4278 4279 if not status: 4280 return (False, msg) 4281 4282 # Check actual executable 4283 return _verify_cmd(path, cmd)
4284
4285 4286 -def RunRestrictedCmd(cmd, 4287 _lock_timeout=_RCMD_LOCK_TIMEOUT, 4288 _lock_file=pathutils.RESTRICTED_COMMANDS_LOCK_FILE, 4289 _path=pathutils.RESTRICTED_COMMANDS_DIR, 4290 _sleep_fn=time.sleep, 4291 _prepare_fn=_PrepareRestrictedCmd, 4292 _runcmd_fn=utils.RunCmd, 4293 _enabled=constants.ENABLE_RESTRICTED_COMMANDS):
4294 """Executes a restricted command after performing strict tests. 4295 4296 @type cmd: string 4297 @param cmd: Command name 4298 @rtype: string 4299 @return: Command output 4300 @raise RPCFail: In case of an error 4301 4302 """ 4303 logging.info("Preparing to run restricted command '%s'", cmd) 4304 4305 if not _enabled: 4306 _Fail("Restricted commands disabled at configure time") 4307 4308 lock = None 4309 try: 4310 cmdresult = None 4311 try: 4312 lock = utils.FileLock.Open(_lock_file) 4313 lock.Exclusive(blocking=True, timeout=_lock_timeout) 4314 4315 (status, value) = _prepare_fn(_path, cmd) 4316 4317 if status: 4318 cmdresult = _runcmd_fn([value], env={}, reset_env=True, 4319 postfork_fn=lambda _: lock.Unlock()) 4320 else: 4321 logging.error(value) 4322 except Exception: # pylint: disable=W0703 4323 # Keep original error in log 4324 logging.exception("Caught exception") 4325 4326 if cmdresult is None: 4327 logging.info("Sleeping for %0.1f seconds before returning", 4328 _RCMD_INVALID_DELAY) 4329 _sleep_fn(_RCMD_INVALID_DELAY) 4330 4331 # Do not include original error message in returned error 4332 _Fail("Executing command '%s' failed" % cmd) 4333 elif cmdresult.failed or cmdresult.fail_reason: 4334 _Fail("Restricted command '%s' failed: %s; output: %s", 4335 cmd, cmdresult.fail_reason, cmdresult.output) 4336 else: 4337 return cmdresult.output 4338 finally: 4339 if lock is not None: 4340 # Release lock at last 4341 lock.Close() 4342 lock = None
4343
4344 4345 -def SetWatcherPause(until, _filename=pathutils.WATCHER_PAUSEFILE):
4346 """Creates or removes the watcher pause file. 4347 4348 @type until: None or number 4349 @param until: Unix timestamp saying until when the watcher shouldn't run 4350 4351 """ 4352 if until is None: 4353 logging.info("Received request to no longer pause watcher") 4354 utils.RemoveFile(_filename) 4355 else: 4356 logging.info("Received request to pause watcher until %s", until) 4357 4358 if not ht.TNumber(until): 4359 _Fail("Duration must be numeric") 4360 4361 utils.WriteFile(_filename, data="%d\n" % (until, ), mode=0644)
4362
4363 4364 -def ConfigureOVS(ovs_name, ovs_link):
4365 """Creates a OpenvSwitch on the node. 4366 4367 This function sets up a OpenvSwitch on the node with given name nad 4368 connects it via a given eth device. 4369 4370 @type ovs_name: string 4371 @param ovs_name: Name of the OpenvSwitch to create. 4372 @type ovs_link: None or string 4373 @param ovs_link: Ethernet device for outside connection (can be missing) 4374 4375 """ 4376 # Initialize the OpenvSwitch 4377 result = utils.RunCmd(["ovs-vsctl", "add-br", ovs_name]) 4378 if result.failed: 4379 _Fail("Failed to create openvswitch. Script return value: %s, output: '%s'" 4380 % (result.exit_code, result.output), log=True) 4381 4382 # And connect it to a physical interface, if given 4383 if ovs_link: 4384 result = utils.RunCmd(["ovs-vsctl", "add-port", ovs_name, ovs_link]) 4385 if result.failed: 4386 _Fail("Failed to connect openvswitch to interface %s. Script return" 4387 " value: %s, output: '%s'" % (ovs_link, result.exit_code, 4388 result.output), log=True)
4389
4390 4391 -class HooksRunner(object):
4392 """Hook runner. 4393 4394 This class is instantiated on the node side (ganeti-noded) and not 4395 on the master side. 4396 4397 """
4398 - def __init__(self, hooks_base_dir=None):
4399 """Constructor for hooks runner. 4400 4401 @type hooks_base_dir: str or None 4402 @param hooks_base_dir: if not None, this overrides the 4403 L{pathutils.HOOKS_BASE_DIR} (useful for unittests) 4404 4405 """ 4406 if hooks_base_dir is None: 4407 hooks_base_dir = pathutils.HOOKS_BASE_DIR 4408 # yeah, _BASE_DIR is not valid for attributes, we use it like a 4409 # constant 4410 self._BASE_DIR = hooks_base_dir # pylint: disable=C0103
4411
4412 - def RunLocalHooks(self, node_list, hpath, phase, env):
4413 """Check that the hooks will be run only locally and then run them. 4414 4415 """ 4416 assert len(node_list) == 1 4417 node = node_list[0] 4418 _, myself = ssconf.GetMasterAndMyself() 4419 assert node == myself 4420 4421 results = self.RunHooks(hpath, phase, env) 4422 4423 # Return values in the form expected by HooksMaster 4424 return {node: (None, False, results)}
4425
4426 - def RunHooks(self, hpath, phase, env):
4427 """Run the scripts in the hooks directory. 4428 4429 @type hpath: str 4430 @param hpath: the path to the hooks directory which 4431 holds the scripts 4432 @type phase: str 4433 @param phase: either L{constants.HOOKS_PHASE_PRE} or 4434 L{constants.HOOKS_PHASE_POST} 4435 @type env: dict 4436 @param env: dictionary with the environment for the hook 4437 @rtype: list 4438 @return: list of 3-element tuples: 4439 - script path 4440 - script result, either L{constants.HKR_SUCCESS} or 4441 L{constants.HKR_FAIL} 4442 - output of the script 4443 4444 @raise errors.ProgrammerError: for invalid input 4445 parameters 4446 4447 """ 4448 if phase == constants.HOOKS_PHASE_PRE: 4449 suffix = "pre" 4450 elif phase == constants.HOOKS_PHASE_POST: 4451 suffix = "post" 4452 else: 4453 _Fail("Unknown hooks phase '%s'", phase) 4454 4455 subdir = "%s-%s.d" % (hpath, suffix) 4456 dir_name = utils.PathJoin(self._BASE_DIR, subdir) 4457 4458 results = [] 4459 4460 if not os.path.isdir(dir_name): 4461 # for non-existing/non-dirs, we simply exit instead of logging a 4462 # warning at every operation 4463 return results 4464 4465 runparts_results = utils.RunParts(dir_name, env=env, reset_env=True) 4466 4467 for (relname, relstatus, runresult) in runparts_results: 4468 if relstatus == constants.RUNPARTS_SKIP: 4469 rrval = constants.HKR_SKIP 4470 output = "" 4471 elif relstatus == constants.RUNPARTS_ERR: 4472 rrval = constants.HKR_FAIL 4473 output = "Hook script execution error: %s" % runresult 4474 elif relstatus == constants.RUNPARTS_RUN: 4475 if runresult.failed: 4476 rrval = constants.HKR_FAIL 4477 else: 4478 rrval = constants.HKR_SUCCESS 4479 output = utils.SafeEncode(runresult.output.strip()) 4480 results.append(("%s/%s" % (subdir, relname), rrval, output)) 4481 4482 return results
4483
4484 4485 -class IAllocatorRunner(object):
4486 """IAllocator runner. 4487 4488 This class is instantiated on the node side (ganeti-noded) and not on 4489 the master side. 4490 4491 """ 4492 @staticmethod
4493 - def Run(name, idata):
4494 """Run an iallocator script. 4495 4496 @type name: str 4497 @param name: the iallocator script name 4498 @type idata: str 4499 @param idata: the allocator input data 4500 4501 @rtype: tuple 4502 @return: two element tuple of: 4503 - status 4504 - either error message or stdout of allocator (for success) 4505 4506 """ 4507 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH, 4508 os.path.isfile) 4509 if alloc_script is None: 4510 _Fail("iallocator module '%s' not found in the search path", name) 4511 4512 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.") 4513 try: 4514 os.write(fd, idata) 4515 os.close(fd) 4516 result = utils.RunCmd([alloc_script, fin_name]) 4517 if result.failed: 4518 _Fail("iallocator module '%s' failed: %s, output '%s'", 4519 name, result.fail_reason, result.output) 4520 finally: 4521 os.unlink(fin_name) 4522 4523 return result.stdout
4524
4525 4526 -class DevCacheManager(object):
4527 """Simple class for managing a cache of block device information. 4528 4529 """ 4530 _DEV_PREFIX = "/dev/" 4531 _ROOT_DIR = pathutils.BDEV_CACHE_DIR 4532 4533 @classmethod
4534 - def _ConvertPath(cls, dev_path):
4535 """Converts a /dev/name path to the cache file name. 4536 4537 This replaces slashes with underscores and strips the /dev 4538 prefix. It then returns the full path to the cache file. 4539 4540 @type dev_path: str 4541 @param dev_path: the C{/dev/} path name 4542 @rtype: str 4543 @return: the converted path name 4544 4545 """ 4546 if dev_path.startswith(cls._DEV_PREFIX): 4547 dev_path = dev_path[len(cls._DEV_PREFIX):] 4548 dev_path = dev_path.replace("/", "_") 4549 fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path) 4550 return fpath
4551 4552 @classmethod
4553 - def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
4554 """Updates the cache information for a given device. 4555 4556 @type dev_path: str 4557 @param dev_path: the pathname of the device 4558 @type owner: str 4559 @param owner: the owner (instance name) of the device 4560 @type on_primary: bool 4561 @param on_primary: whether this is the primary 4562 node nor not 4563 @type iv_name: str 4564 @param iv_name: the instance-visible name of the 4565 device, as in objects.Disk.iv_name 4566 4567 @rtype: None 4568 4569 """ 4570 if dev_path is None: 4571 logging.error("DevCacheManager.UpdateCache got a None dev_path") 4572 return 4573 fpath = cls._ConvertPath(dev_path) 4574 if on_primary: 4575 state = "primary" 4576 else: 4577 state = "secondary" 4578 if iv_name is None: 4579 iv_name = "not_visible" 4580 fdata = "%s %s %s\n" % (str(owner), state, iv_name) 4581 try: 4582 utils.WriteFile(fpath, data=fdata) 4583 except EnvironmentError, err: 4584 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)
4585 4586 @classmethod
4587 - def RemoveCache(cls, dev_path):
4588 """Remove data for a dev_path. 4589 4590 This is just a wrapper over L{utils.io.RemoveFile} with a converted 4591 path name and logging. 4592 4593 @type dev_path: str 4594 @param dev_path: the pathname of the device 4595 4596 @rtype: None 4597 4598 """ 4599 if dev_path is None: 4600 logging.error("DevCacheManager.RemoveCache got a None dev_path") 4601 return 4602 fpath = cls._ConvertPath(dev_path) 4603 try: 4604 utils.RemoveFile(fpath) 4605 except EnvironmentError, err: 4606 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)
4607