ganeti.backend

Source Code for Module ganeti.backend

1 # 2 # 3 4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc. 5 # All rights reserved. 6 # 7 # Redistribution and use in source and binary forms, with or without 8 # modification, are permitted provided that the following conditions are 9 # met: 10 # 11 # 1. Redistributions of source code must retain the above copyright notice, 12 # this list of conditions and the following disclaimer. 13 # 14 # 2. Redistributions in binary form must reproduce the above copyright 15 # notice, this list of conditions and the following disclaimer in the 16 # documentation and/or other materials provided with the distribution. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 31 """Functions used by the node daemon 32 33 @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in 34 the L{UploadFile} function 35 @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted 36 in the L{_CleanDirectory} function 37 38 """ 39 40 # pylint: disable=E1103,C0302 41 42 # E1103: %s %r has no %r member (but some types could not be 43 # inferred), because the _TryOSFromDisk returns either (True, os_obj) 44 # or (False, "string") which confuses pylint 45 46 # C0302: This module has become too big and should be split up 47 48 49 import base64 50 import errno 51 import logging 52 import os 53 import os.path 54 import pycurl 55 import random 56 import re 57 import shutil 58 import signal 59 import stat 60 import tempfile 61 import time 62 import zlib 63 import contextlib 64 import collections 65 66 from ganeti import errors 67 from ganeti import http 68 from ganeti import utils 69 from ganeti import ssh 70 from ganeti import hypervisor 71 from ganeti.hypervisor import hv_base 72 from ganeti import constants 73 from ganeti.storage import bdev 74 from ganeti.storage import drbd 75 from ganeti.storage import extstorage 76 from ganeti.storage import filestorage 77 from ganeti import objects 78 from ganeti import ssconf 79 from ganeti import serializer 80 from ganeti import netutils 81 from ganeti import runtime 82 from ganeti import compat 83 from ganeti import pathutils 84 from ganeti import vcluster 85 from ganeti import ht 86 from ganeti.storage.base import BlockDev 87 from ganeti.storage.drbd import DRBD8 88 from ganeti import hooksmaster 89 import ganeti.metad as metad 90 91 92 _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" 93 _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([ 94 pathutils.DATA_DIR, 95 pathutils.JOB_QUEUE_ARCHIVE_DIR, 96 pathutils.QUEUE_DIR, 97 pathutils.CRYPTO_KEYS_DIR, 98 ]) 99 _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60 100 _X509_KEY_FILE = "key" 101 _X509_CERT_FILE = "cert" 102 _IES_STATUS_FILE = "status" 103 _IES_PID_FILE = "pid" 104 _IES_CA_FILE = "ca" 105 106 #: Valid LVS output line regex 107 _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$") 108 109 # Actions for the master setup script 110 _MASTER_START = "start" 111 _MASTER_STOP = "stop" 112 113 #: Maximum file permissions for restricted command directory and executables 114 _RCMD_MAX_MODE = (stat.S_IRWXU | 115 stat.S_IRGRP | stat.S_IXGRP | 116 stat.S_IROTH | stat.S_IXOTH) 117 118 #: Delay before returning an error for restricted commands 119 _RCMD_INVALID_DELAY = 10 120 121 #: How long to wait to acquire lock for restricted commands (shorter than 122 #: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many 123 #: command requests arrive 124 _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8

125 126 127 -class RPCFail(Exception):

128 """Class denoting RPC failure. 129 130 Its argument is the error message. 131 132 """

133

134 135 -def _GetInstReasonFilename(instance_name):

136 """Path of the file containing the reason of the instance status change. 137 138 @type instance_name: string 139 @param instance_name: The name of the instance 140 @rtype: string 141 @return: The path of the file 142 143 """ 144 return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)

145

146 147 -def _StoreInstReasonTrail(instance_name, trail):

148 """Serialize a reason trail related to an instance change of state to file. 149 150 The exact location of the file depends on the name of the instance and on 151 the configuration of the Ganeti cluster defined at deploy time. 152 153 @type instance_name: string 154 @param instance_name: The name of the instance 155 156 @type trail: list of reasons 157 @param trail: reason trail 158 159 @rtype: None 160 161 """ 162 json = serializer.DumpJson(trail) 163 filename = _GetInstReasonFilename(instance_name) 164 utils.WriteFile(filename, data=json)

165

166 167 -def _Fail(msg, *args, **kwargs):

168 """Log an error and the raise an RPCFail exception. 169 170 This exception is then handled specially in the ganeti daemon and 171 turned into a 'failed' return type. As such, this function is a 172 useful shortcut for logging the error and returning it to the master 173 daemon. 174 175 @type msg: string 176 @param msg: the text of the exception 177 @raise RPCFail 178 179 """ 180 if args: 181 msg = msg % args 182 if "log" not in kwargs or kwargs["log"]: # if we should log this error 183 if "exc" in kwargs and kwargs["exc"]: 184 logging.exception(msg) 185 else: 186 logging.error(msg) 187 raise RPCFail(msg)

188

189 190 -def _GetConfig():

191 """Simple wrapper to return a SimpleStore. 192 193 @rtype: L{ssconf.SimpleStore} 194 @return: a SimpleStore instance 195 196 """ 197 return ssconf.SimpleStore()

198

199 200 -def _GetSshRunner(cluster_name):

201 """Simple wrapper to return an SshRunner. 202 203 @type cluster_name: str 204 @param cluster_name: the cluster name, which is needed 205 by the SshRunner constructor 206 @rtype: L{ssh.SshRunner} 207 @return: an SshRunner instance 208 209 """ 210 return ssh.SshRunner(cluster_name)

211

212 213 -def _Decompress(data):

214 """Unpacks data compressed by the RPC client. 215 216 @type data: list or tuple 217 @param data: Data sent by RPC client 218 @rtype: str 219 @return: Decompressed data 220 221 """ 222 assert isinstance(data, (list, tuple)) 223 assert len(data) == 2 224 (encoding, content) = data 225 if encoding == constants.RPC_ENCODING_NONE: 226 return content 227 elif encoding == constants.RPC_ENCODING_ZLIB_BASE64: 228 return zlib.decompress(base64.b64decode(content)) 229 else: 230 raise AssertionError("Unknown data encoding")

231

232 233 -def _CleanDirectory(path, exclude=None):

234 """Removes all regular files in a directory. 235 236 @type path: str 237 @param path: the directory to clean 238 @type exclude: list 239 @param exclude: list of files to be excluded, defaults 240 to the empty list 241 242 """ 243 if path not in _ALLOWED_CLEAN_DIRS: 244 _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'", 245 path) 246 247 if not os.path.isdir(path): 248 return 249 if exclude is None: 250 exclude = [] 251 else: 252 # Normalize excluded paths 253 exclude = [os.path.normpath(i) for i in exclude] 254 255 for rel_name in utils.ListVisibleFiles(path): 256 full_name = utils.PathJoin(path, rel_name) 257 if full_name in exclude: 258 continue 259 if os.path.isfile(full_name) and not os.path.islink(full_name): 260 utils.RemoveFile(full_name)

261

262 263 -def _BuildUploadFileList():

264 """Build the list of allowed upload files. 265 266 This is abstracted so that it's built only once at module import time. 267 268 """ 269 allowed_files = set([ 270 pathutils.CLUSTER_CONF_FILE, 271 pathutils.ETC_HOSTS, 272 pathutils.SSH_KNOWN_HOSTS_FILE, 273 pathutils.VNC_PASSWORD_FILE, 274 pathutils.RAPI_CERT_FILE, 275 pathutils.SPICE_CERT_FILE, 276 pathutils.SPICE_CACERT_FILE, 277 pathutils.RAPI_USERS_FILE, 278 pathutils.CONFD_HMAC_KEY, 279 pathutils.CLUSTER_DOMAIN_SECRET_FILE, 280 ]) 281 282 for hv_name in constants.HYPER_TYPES: 283 hv_class = hypervisor.GetHypervisorClass(hv_name) 284 allowed_files.update(hv_class.GetAncillaryFiles()[0]) 285 286 assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \ 287 "Allowed file storage paths should never be uploaded via RPC" 288 289 return frozenset(allowed_files)

290 291 292 _ALLOWED_UPLOAD_FILES = _BuildUploadFileList()

293 294 295 -def JobQueuePurge():

296 """Removes job queue files and archived jobs. 297 298 @rtype: tuple 299 @return: True, None 300 301 """ 302 _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE]) 303 _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR)

304

305 306 -def GetMasterNodeName():

307 """Returns the master node name. 308 309 @rtype: string 310 @return: name of the master node 311 @raise RPCFail: in case of errors 312 313 """ 314 try: 315 return _GetConfig().GetMasterNode() 316 except errors.ConfigurationError, err: 317 _Fail("Cluster configuration incomplete: %s", err, exc=True)

318

319 320 -def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn):

321 """Decorator that runs hooks before and after the decorated function. 322 323 @type hook_opcode: string 324 @param hook_opcode: opcode of the hook 325 @type hooks_path: string 326 @param hooks_path: path of the hooks 327 @type env_builder_fn: function 328 @param env_builder_fn: function that returns a dictionary containing the 329 environment variables for the hooks. Will get all the parameters of the 330 decorated function. 331 @raise RPCFail: in case of pre-hook failure 332 333 """ 334 def decorator(fn): 335 def wrapper(*args, **kwargs): 336 _, myself = ssconf.GetMasterAndMyself() 337 nodes = ([myself], [myself]) # these hooks run locally 338 339 env_fn = compat.partial(env_builder_fn, *args, **kwargs) 340 341 cfg = _GetConfig() 342 hr = HooksRunner() 343 hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes, 344 hr.RunLocalHooks, None, env_fn, None, 345 logging.warning, cfg.GetClusterName(), 346 cfg.GetMasterNode()) 347 hm.RunPhase(constants.HOOKS_PHASE_PRE) 348 result = fn(*args, **kwargs) 349 hm.RunPhase(constants.HOOKS_PHASE_POST) 350 351 return result

352 return wrapper 353 return decorator 354

355 356 -def _BuildMasterIpEnv(master_params, use_external_mip_script=None):

357 """Builds environment variables for master IP hooks. 358 359 @type master_params: L{objects.MasterNetworkParameters} 360 @param master_params: network parameters of the master 361 @type use_external_mip_script: boolean 362 @param use_external_mip_script: whether to use an external master IP 363 address setup script (unused, but necessary per the implementation of the 364 _RunLocalHooks decorator) 365 366 """ 367 # pylint: disable=W0613 368 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family) 369 env = { 370 "MASTER_NETDEV": master_params.netdev, 371 "MASTER_IP": master_params.ip, 372 "MASTER_NETMASK": str(master_params.netmask), 373 "CLUSTER_IP_VERSION": str(ver), 374 } 375 376 return env

377

378 379 -def _RunMasterSetupScript(master_params, action, use_external_mip_script):

380 """Execute the master IP address setup script. 381 382 @type master_params: L{objects.MasterNetworkParameters} 383 @param master_params: network parameters of the master 384 @type action: string 385 @param action: action to pass to the script. Must be one of 386 L{backend._MASTER_START} or L{backend._MASTER_STOP} 387 @type use_external_mip_script: boolean 388 @param use_external_mip_script: whether to use an external master IP 389 address setup script 390 @raise backend.RPCFail: if there are errors during the execution of the 391 script 392 393 """ 394 env = _BuildMasterIpEnv(master_params) 395 396 if use_external_mip_script: 397 setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT 398 else: 399 setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT 400 401 result = utils.RunCmd([setup_script, action], env=env, reset_env=True) 402 403 if result.failed: 404 _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" % 405 (action, result.exit_code, result.output), log=True)

406 407 408 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup", 409 _BuildMasterIpEnv)

410 -def ActivateMasterIp(master_params, use_external_mip_script):

411 """Activate the IP address of the master daemon. 412 413 @type master_params: L{objects.MasterNetworkParameters} 414 @param master_params: network parameters of the master 415 @type use_external_mip_script: boolean 416 @param use_external_mip_script: whether to use an external master IP 417 address setup script 418 @raise RPCFail: in case of errors during the IP startup 419 420 """ 421 _RunMasterSetupScript(master_params, _MASTER_START, 422 use_external_mip_script)

423

424 425 -def StartMasterDaemons(no_voting):

426 """Activate local node as master node. 427 428 The function will start the master daemons (ganeti-masterd and ganeti-rapi). 429 430 @type no_voting: boolean 431 @param no_voting: whether to start ganeti-masterd without a node vote 432 but still non-interactively 433 @rtype: None 434 435 """ 436 437 if no_voting: 438 daemon_args = "--no-voting --yes-do-it" 439 else: 440 daemon_args = "" 441 442 env = { 443 "EXTRA_LUXID_ARGS": daemon_args, 444 "EXTRA_WCONFD_ARGS": daemon_args, 445 } 446 447 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env) 448 if result.failed: 449 msg = "Can't start Ganeti master: %s" % result.output 450 logging.error(msg) 451 _Fail(msg)

452 453 454 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown", 455 _BuildMasterIpEnv)

456 -def DeactivateMasterIp(master_params, use_external_mip_script):

457 """Deactivate the master IP on this node. 458 459 @type master_params: L{objects.MasterNetworkParameters} 460 @param master_params: network parameters of the master 461 @type use_external_mip_script: boolean 462 @param use_external_mip_script: whether to use an external master IP 463 address setup script 464 @raise RPCFail: in case of errors during the IP turndown 465 466 """ 467 _RunMasterSetupScript(master_params, _MASTER_STOP, 468 use_external_mip_script)

469

470 471 -def StopMasterDaemons():

472 """Stop the master daemons on this node. 473 474 Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node. 475 476 @rtype: None 477 478 """ 479 # TODO: log and report back to the caller the error failures; we 480 # need to decide in which case we fail the RPC for this 481 482 result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"]) 483 if result.failed: 484 logging.error("Could not stop Ganeti master, command %s had exitcode %s" 485 " and error %s", 486 result.cmd, result.exit_code, result.output)

487

488 489 -def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev):

490 """Change the netmask of the master IP. 491 492 @param old_netmask: the old value of the netmask 493 @param netmask: the new value of the netmask 494 @param master_ip: the master IP 495 @param master_netdev: the master network device 496 497 """ 498 if old_netmask == netmask: 499 return 500 501 if not netutils.IPAddress.Own(master_ip): 502 _Fail("The master IP address is not up, not attempting to change its" 503 " netmask") 504 505 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add", 506 "%s/%s" % (master_ip, netmask), 507 "dev", master_netdev, "label", 508 "%s:0" % master_netdev]) 509 if result.failed: 510 _Fail("Could not set the new netmask on the master IP address") 511 512 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del", 513 "%s/%s" % (master_ip, old_netmask), 514 "dev", master_netdev, "label", 515 "%s:0" % master_netdev]) 516 if result.failed: 517 _Fail("Could not bring down the master IP address with the old netmask")

518

519 520 -def EtcHostsModify(mode, host, ip):

521 """Modify a host entry in /etc/hosts. 522 523 @param mode: The mode to operate. Either add or remove entry 524 @param host: The host to operate on 525 @param ip: The ip associated with the entry 526 527 """ 528 if mode == constants.ETC_HOSTS_ADD: 529 if not ip: 530 RPCFail("Mode 'add' needs 'ip' parameter, but parameter not" 531 " present") 532 utils.AddHostToEtcHosts(host, ip) 533 elif mode == constants.ETC_HOSTS_REMOVE: 534 if ip: 535 RPCFail("Mode 'remove' does not allow 'ip' parameter, but" 536 " parameter is present") 537 utils.RemoveHostFromEtcHosts(host) 538 else: 539 RPCFail("Mode not supported")

540

541 542 -def LeaveCluster(modify_ssh_setup):

543 """Cleans up and remove the current node. 544 545 This function cleans up and prepares the current node to be removed 546 from the cluster. 547 548 If processing is successful, then it raises an 549 L{errors.QuitGanetiException} which is used as a special case to 550 shutdown the node daemon. 551 552 @param modify_ssh_setup: boolean 553 554 """ 555 _CleanDirectory(pathutils.DATA_DIR) 556 _CleanDirectory(pathutils.CRYPTO_KEYS_DIR) 557 JobQueuePurge() 558 559 if modify_ssh_setup: 560 try: 561 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER) 562 563 ssh.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) 564 565 utils.RemoveFile(priv_key) 566 utils.RemoveFile(pub_key) 567 except errors.OpExecError: 568 logging.exception("Error while processing ssh files") 569 except IOError: 570 logging.exception("At least one SSH file was not accessible.") 571 572 try: 573 utils.RemoveFile(pathutils.CONFD_HMAC_KEY) 574 utils.RemoveFile(pathutils.RAPI_CERT_FILE) 575 utils.RemoveFile(pathutils.SPICE_CERT_FILE) 576 utils.RemoveFile(pathutils.SPICE_CACERT_FILE) 577 utils.RemoveFile(pathutils.NODED_CERT_FILE) 578 except: # pylint: disable=W0702 579 logging.exception("Error while removing cluster secrets") 580 581 utils.StopDaemon(constants.CONFD) 582 utils.StopDaemon(constants.MOND) 583 utils.StopDaemon(constants.KVMD) 584 585 # Raise a custom exception (handled in ganeti-noded) 586 raise errors.QuitGanetiException(True, "Shutdown scheduled")

587

588 589 -def _CheckStorageParams(params, num_params):

590 """Performs sanity checks for storage parameters. 591 592 @type params: list 593 @param params: list of storage parameters 594 @type num_params: int 595 @param num_params: expected number of parameters 596 597 """ 598 if params is None: 599 raise errors.ProgrammerError("No storage parameters for storage" 600 " reporting is provided.") 601 if not isinstance(params, list): 602 raise errors.ProgrammerError("The storage parameters are not of type" 603 " list: '%s'" % params) 604 if not len(params) == num_params: 605 raise errors.ProgrammerError("Did not receive the expected number of" 606 "storage parameters: expected %s," 607 " received '%s'" % (num_params, len(params)))

608

609 610 -def _CheckLvmStorageParams(params):

611 """Performs sanity check for the 'exclusive storage' flag. 612 613 @see: C{_CheckStorageParams} 614 615 """ 616 _CheckStorageParams(params, 1) 617 excl_stor = params[0] 618 if not isinstance(params[0], bool): 619 raise errors.ProgrammerError("Exclusive storage parameter is not" 620 " boolean: '%s'." % excl_stor) 621 return excl_stor

622

623 624 -def _GetLvmVgSpaceInfo(name, params):

625 """Wrapper around C{_GetVgInfo} which checks the storage parameters. 626 627 @type name: string 628 @param name: name of the volume group 629 @type params: list 630 @param params: list of storage parameters, which in this case should be 631 containing only one for exclusive storage 632 633 """ 634 excl_stor = _CheckLvmStorageParams(params) 635 return _GetVgInfo(name, excl_stor)

636

637 638 -def _GetVgInfo( 639 name, excl_stor, info_fn=bdev.LogicalVolume.GetVGInfo):

640 """Retrieves information about a LVM volume group. 641 642 """ 643 # TODO: GetVGInfo supports returning information for multiple VGs at once 644 vginfo = info_fn([name], excl_stor) 645 if vginfo: 646 vg_free = int(round(vginfo[0][0], 0)) 647 vg_size = int(round(vginfo[0][1], 0)) 648 else: 649 vg_free = None 650 vg_size = None 651 652 return { 653 "type": constants.ST_LVM_VG, 654 "name": name, 655 "storage_free": vg_free, 656 "storage_size": vg_size, 657 }

658

659 660 -def _GetLvmPvSpaceInfo(name, params):

661 """Wrapper around C{_GetVgSpindlesInfo} with sanity checks. 662 663 @see: C{_GetLvmVgSpaceInfo} 664 665 """ 666 excl_stor = _CheckLvmStorageParams(params) 667 return _GetVgSpindlesInfo(name, excl_stor)

668

669 670 -def _GetVgSpindlesInfo( 671 name, excl_stor, info_fn=bdev.LogicalVolume.GetVgSpindlesInfo):

672 """Retrieves information about spindles in an LVM volume group. 673 674 @type name: string 675 @param name: VG name 676 @type excl_stor: bool 677 @param excl_stor: exclusive storage 678 @rtype: dict 679 @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name, 680 free spindles, total spindles respectively 681 682 """ 683 if excl_stor: 684 (vg_free, vg_size) = info_fn(name) 685 else: 686 vg_free = 0 687 vg_size = 0 688 return { 689 "type": constants.ST_LVM_PV, 690 "name": name, 691 "storage_free": vg_free, 692 "storage_size": vg_size, 693 }

694

695 696 -def _GetHvInfo(name, hvparams, get_hv_fn=hypervisor.GetHypervisor):

697 """Retrieves node information from a hypervisor. 698 699 The information returned depends on the hypervisor. Common items: 700 701 - vg_size is the size of the configured volume group in MiB 702 - vg_free is the free size of the volume group in MiB 703 - memory_dom0 is the memory allocated for domain0 in MiB 704 - memory_free is the currently available (free) ram in MiB 705 - memory_total is the total number of ram in MiB 706 - hv_version: the hypervisor version, if available 707 708 @type hvparams: dict of string 709 @param hvparams: the hypervisor's hvparams 710 711 """ 712 return get_hv_fn(name).GetNodeInfo(hvparams=hvparams)

713

714 715 -def _GetHvInfoAll(hv_specs, get_hv_fn=hypervisor.GetHypervisor):

716 """Retrieves node information for all hypervisors. 717 718 See C{_GetHvInfo} for information on the output. 719 720 @type hv_specs: list of pairs (string, dict of strings) 721 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 722 723 """ 724 if hv_specs is None: 725 return None 726 727 result = [] 728 for hvname, hvparams in hv_specs: 729 result.append(_GetHvInfo(hvname, hvparams, get_hv_fn)) 730 return result

731

732 733 -def _GetNamedNodeInfo(names, fn):

734 """Calls C{fn} for all names in C{names} and returns a dictionary. 735 736 @rtype: None or dict 737 738 """ 739 if names is None: 740 return None 741 else: 742 return map(fn, names)

743

744 745 -def GetNodeInfo(storage_units, hv_specs):

746 """Gives back a hash with different information about the node. 747 748 @type storage_units: list of tuples (string, string, list) 749 @param storage_units: List of tuples (storage unit, identifier, parameters) to 750 ask for disk space information. In case of lvm-vg, the identifier is 751 the VG name. The parameters can contain additional, storage-type-specific 752 parameters, for example exclusive storage for lvm storage. 753 @type hv_specs: list of pairs (string, dict of strings) 754 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 755 @rtype: tuple; (string, None/dict, None/dict) 756 @return: Tuple containing boot ID, volume group information and hypervisor 757 information 758 759 """ 760 bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") 761 storage_info = _GetNamedNodeInfo( 762 storage_units, 763 (lambda (storage_type, storage_key, storage_params): 764 _ApplyStorageInfoFunction(storage_type, storage_key, storage_params))) 765 hv_info = _GetHvInfoAll(hv_specs) 766 return (bootid, storage_info, hv_info)

767

768 769 -def _GetFileStorageSpaceInfo(path, params):

770 """Wrapper around filestorage.GetSpaceInfo. 771 772 The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo 773 and ignore the *args parameter to not leak it into the filestorage 774 module's code. 775 776 @see: C{filestorage.GetFileStorageSpaceInfo} for description of the 777 parameters. 778 779 """ 780 _CheckStorageParams(params, 0) 781 return filestorage.GetFileStorageSpaceInfo(path)

782 783 784 # FIXME: implement storage reporting for all missing storage types. 785 _STORAGE_TYPE_INFO_FN = { 786 constants.ST_BLOCK: None, 787 constants.ST_DISKLESS: None, 788 constants.ST_EXT: None, 789 constants.ST_FILE: _GetFileStorageSpaceInfo, 790 constants.ST_LVM_PV: _GetLvmPvSpaceInfo, 791 constants.ST_LVM_VG: _GetLvmVgSpaceInfo, 792 constants.ST_SHARED_FILE: None, 793 constants.ST_GLUSTER: None, 794 constants.ST_RADOS: None, 795 }

796 797 798 -def _ApplyStorageInfoFunction(storage_type, storage_key, *args):

799 """Looks up and applies the correct function to calculate free and total 800 storage for the given storage type. 801 802 @type storage_type: string 803 @param storage_type: the storage type for which the storage shall be reported. 804 @type storage_key: string 805 @param storage_key: identifier of a storage unit, e.g. the volume group name 806 of an LVM storage unit 807 @type args: any 808 @param args: various parameters that can be used for storage reporting. These 809 parameters and their semantics vary from storage type to storage type and 810 are just propagated in this function. 811 @return: the results of the application of the storage space function (see 812 _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that 813 storage type 814 @raises NotImplementedError: for storage types who don't support space 815 reporting yet 816 """ 817 fn = _STORAGE_TYPE_INFO_FN[storage_type] 818 if fn is not None: 819 return fn(storage_key, *args) 820 else: 821 raise NotImplementedError

822

823 824 -def _CheckExclusivePvs(pvi_list):

825 """Check that PVs are not shared among LVs 826 827 @type pvi_list: list of L{objects.LvmPvInfo} objects 828 @param pvi_list: information about the PVs 829 830 @rtype: list of tuples (string, list of strings) 831 @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...]) 832 833 """ 834 res = [] 835 for pvi in pvi_list: 836 if len(pvi.lv_list) > 1: 837 res.append((pvi.name, pvi.lv_list)) 838 return res

839

840 841 -def _VerifyHypervisors(what, vm_capable, result, all_hvparams, 842 get_hv_fn=hypervisor.GetHypervisor):

843 """Verifies the hypervisor. Appends the results to the 'results' list. 844 845 @type what: C{dict} 846 @param what: a dictionary of things to check 847 @type vm_capable: boolean 848 @param vm_capable: whether or not this node is vm capable 849 @type result: dict 850 @param result: dictionary of verification results; results of the 851 verifications in this function will be added here 852 @type all_hvparams: dict of dict of string 853 @param all_hvparams: dictionary mapping hypervisor names to hvparams 854 @type get_hv_fn: function 855 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 856 857 """ 858 if not vm_capable: 859 return 860 861 if constants.NV_HYPERVISOR in what: 862 result[constants.NV_HYPERVISOR] = {} 863 for hv_name in what[constants.NV_HYPERVISOR]: 864 hvparams = all_hvparams[hv_name] 865 try: 866 val = get_hv_fn(hv_name).Verify(hvparams=hvparams) 867 except errors.HypervisorError, err: 868 val = "Error while checking hypervisor: %s" % str(err) 869 result[constants.NV_HYPERVISOR][hv_name] = val

870

871 872 -def _VerifyHvparams(what, vm_capable, result, 873 get_hv_fn=hypervisor.GetHypervisor):

874 """Verifies the hvparams. Appends the results to the 'results' list. 875 876 @type what: C{dict} 877 @param what: a dictionary of things to check 878 @type vm_capable: boolean 879 @param vm_capable: whether or not this node is vm capable 880 @type result: dict 881 @param result: dictionary of verification results; results of the 882 verifications in this function will be added here 883 @type get_hv_fn: function 884 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 885 886 """ 887 if not vm_capable: 888 return 889 890 if constants.NV_HVPARAMS in what: 891 result[constants.NV_HVPARAMS] = [] 892 for source, hv_name, hvparms in what[constants.NV_HVPARAMS]: 893 try: 894 logging.info("Validating hv %s, %s", hv_name, hvparms) 895 get_hv_fn(hv_name).ValidateParameters(hvparms) 896 except errors.HypervisorError, err: 897 result[constants.NV_HVPARAMS].append((source, hv_name, str(err)))

898

899 900 -def _VerifyInstanceList(what, vm_capable, result, all_hvparams):

901 """Verifies the instance list. 902 903 @type what: C{dict} 904 @param what: a dictionary of things to check 905 @type vm_capable: boolean 906 @param vm_capable: whether or not this node is vm capable 907 @type result: dict 908 @param result: dictionary of verification results; results of the 909 verifications in this function will be added here 910 @type all_hvparams: dict of dict of string 911 @param all_hvparams: dictionary mapping hypervisor names to hvparams 912 913 """ 914 if constants.NV_INSTANCELIST in what and vm_capable: 915 # GetInstanceList can fail 916 try: 917 val = GetInstanceList(what[constants.NV_INSTANCELIST], 918 all_hvparams=all_hvparams) 919 except RPCFail, err: 920 val = str(err) 921 result[constants.NV_INSTANCELIST] = val

922

923 924 -def _VerifyNodeInfo(what, vm_capable, result, all_hvparams):

925 """Verifies the node info. 926 927 @type what: C{dict} 928 @param what: a dictionary of things to check 929 @type vm_capable: boolean 930 @param vm_capable: whether or not this node is vm capable 931 @type result: dict 932 @param result: dictionary of verification results; results of the 933 verifications in this function will be added here 934 @type all_hvparams: dict of dict of string 935 @param all_hvparams: dictionary mapping hypervisor names to hvparams 936 937 """ 938 if constants.NV_HVINFO in what and vm_capable: 939 hvname = what[constants.NV_HVINFO] 940 hyper = hypervisor.GetHypervisor(hvname) 941 hvparams = all_hvparams[hvname] 942 result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams)

943

944 945 -def _VerifyClientCertificate(cert_file=pathutils.NODED_CLIENT_CERT_FILE):

946 """Verify the existance and validity of the client SSL certificate. 947 948 Also, verify that the client certificate is not self-signed. Self- 949 signed client certificates stem from Ganeti versions 2.12.0 - 2.12.4 950 and should be replaced by client certificates signed by the server 951 certificate. Hence we output a warning when we encounter a self-signed 952 one. 953 954 """ 955 create_cert_cmd = "gnt-cluster renew-crypto --new-node-certificates" 956 if not os.path.exists(cert_file): 957 return (constants.CV_ERROR, 958 "The client certificate does not exist. Run '%s' to create" 959 " client certificates for all nodes." % create_cert_cmd) 960 961 (errcode, msg) = utils.VerifyCertificate(cert_file) 962 if errcode is not None: 963 return (errcode, msg) 964 965 (errcode, msg) = utils.IsCertificateSelfSigned(cert_file) 966 if errcode is not None: 967 return (errcode, msg) 968 969 # if everything is fine, we return the digest to be compared to the config 970 return (None, utils.GetCertificateDigest(cert_filename=cert_file))

971

972 973 -def _VerifySshSetup(node_status_list, my_name, 974 pub_key_file=pathutils.SSH_PUB_KEYS):

975 """Verifies the state of the SSH key files. 976 977 @type node_status_list: list of tuples 978 @param node_status_list: list of nodes of the cluster associated with a 979 couple of flags: (uuid, name, is_master_candidate, 980 is_potential_master_candidate, online) 981 @type my_name: str 982 @param my_name: name of this node 983 @type pub_key_file: str 984 @param pub_key_file: filename of the public key file 985 986 """ 987 if node_status_list is None: 988 return ["No node list to check against the pub_key_file received."] 989 990 my_status_list = [(my_uuid, name, mc, pot_mc, online) for 991 (my_uuid, name, mc, pot_mc, online) 992 in node_status_list if name == my_name] 993 if len(my_status_list) == 0: 994 return ["Cannot find node information for node '%s'." % my_name] 995 (my_uuid, _, _, potential_master_candidate, online) = \ 996 my_status_list[0] 997 998 result = [] 999 1000 if not os.path.exists(pub_key_file): 1001 result.append("The public key file '%s' does not exist. Consider running" 1002 " 'gnt-cluster renew-crypto --new-ssh-keys" 1003 " [--no-ssh-key-check]' to fix this." % pub_key_file) 1004 return result 1005 1006 pot_mc_uuids = [uuid for (uuid, _, _, _, _) in node_status_list] 1007 offline_nodes = [uuid for (uuid, _, _, _, online) in node_status_list 1008 if not online] 1009 pub_keys = ssh.QueryPubKeyFile(None) 1010 1011 if potential_master_candidate: 1012 # Check that the set of potential master candidates matches the 1013 # public key file 1014 pub_uuids_set = set(pub_keys.keys()) - set(offline_nodes) 1015 pot_mc_uuids_set = set(pot_mc_uuids) - set(offline_nodes) 1016 missing_uuids = set([]) 1017 if pub_uuids_set != pot_mc_uuids_set: 1018 unknown_uuids = pub_uuids_set - pot_mc_uuids_set 1019 if unknown_uuids: 1020 result.append("The following node UUIDs are listed in the public key" 1021 " file on node '%s', but are not potential master" 1022 " candidates: %s." 1023 % (my_name, ", ".join(list(unknown_uuids)))) 1024 missing_uuids = pot_mc_uuids_set - pub_uuids_set 1025 if missing_uuids: 1026 result.append("The following node UUIDs of potential master candidates" 1027 " are missing in the public key file on node %s: %s." 1028 % (my_name, ", ".join(list(missing_uuids)))) 1029 1030 (_, key_files) = \ 1031 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1032 (_, dsa_pub_key_filename) = key_files[constants.SSHK_DSA] 1033 1034 my_keys = pub_keys[my_uuid] 1035 1036 dsa_pub_key = utils.ReadFile(dsa_pub_key_filename) 1037 if dsa_pub_key.strip() not in my_keys: 1038 result.append("The dsa key of node %s does not match this node's key" 1039 " in the pub key file." % (my_name)) 1040 if len(my_keys) != 1: 1041 result.append("There is more than one key for node %s in the public key" 1042 " file." % my_name) 1043 else: 1044 if len(pub_keys.keys()) > 0: 1045 result.append("The public key file of node '%s' is not empty, although" 1046 " the node is not a potential master candidate." 1047 % my_name) 1048 1049 # Check that all master candidate keys are in the authorized_keys file 1050 (auth_key_file, _) = \ 1051 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1052 for (uuid, name, mc, _, online) in node_status_list: 1053 if not online: 1054 continue 1055 if uuid in missing_uuids: 1056 continue 1057 if mc: 1058 for key in pub_keys[uuid]: 1059 if not ssh.HasAuthorizedKey(auth_key_file, key): 1060 result.append("A SSH key of master candidate '%s' (UUID: '%s') is" 1061 " not in the 'authorized_keys' file of node '%s'." 1062 % (name, uuid, my_name)) 1063 else: 1064 for key in pub_keys[uuid]: 1065 if name != my_name and ssh.HasAuthorizedKey(auth_key_file, key): 1066 result.append("A SSH key of normal node '%s' (UUID: '%s') is in the" 1067 " 'authorized_keys' file of node '%s'." 1068 % (name, uuid, my_name)) 1069 if name == my_name and not ssh.HasAuthorizedKey(auth_key_file, key): 1070 result.append("A SSH key of normal node '%s' (UUID: '%s') is not" 1071 " in the 'authorized_keys' file of itself." 1072 % (my_name, uuid)) 1073 1074 return result

1075

1076 1077 -def _VerifySshClutter(node_status_list, my_name):

1078 """Verifies that the 'authorized_keys' files are not cluttered up. 1079 1080 @type node_status_list: list of tuples 1081 @param node_status_list: list of nodes of the cluster associated with a 1082 couple of flags: (uuid, name, is_master_candidate, 1083 is_potential_master_candidate, online) 1084 @type my_name: str 1085 @param my_name: name of this node 1086 1087 """ 1088 result = [] 1089 (auth_key_file, _) = \ 1090 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1091 node_names = [name for (_, name, _, _) in node_status_list] 1092 multiple_occurrences = ssh.CheckForMultipleKeys(auth_key_file, node_names) 1093 if multiple_occurrences: 1094 msg = "There are hosts which have more than one SSH key stored for the" \ 1095 " same user in the 'authorized_keys' file of node %s. This can be" \ 1096 " due to an unsuccessful operation which cluttered up the" \ 1097 " 'authorized_keys' file. We recommend to clean this up manually. " \ 1098 % my_name 1099 for host, occ in multiple_occurrences.items(): 1100 msg += "Entry for '%s' in lines %s. " % (host, utils.CommaJoin(occ)) 1101 result.append(msg) 1102 1103 return result

1104

1105 1106 -def VerifyNode(what, cluster_name, all_hvparams, node_groups, groups_cfg):

1107 """Verify the status of the local node. 1108 1109 Based on the input L{what} parameter, various checks are done on the 1110 local node. 1111 1112 If the I{filelist} key is present, this list of 1113 files is checksummed and the file/checksum pairs are returned. 1114 1115 If the I{nodelist} key is present, we check that we have 1116 connectivity via ssh with the target nodes (and check the hostname 1117 report). 1118 1119 If the I{node-net-test} key is present, we check that we have 1120 connectivity to the given nodes via both primary IP and, if 1121 applicable, secondary IPs. 1122 1123 @type what: C{dict} 1124 @param what: a dictionary of things to check: 1125 - filelist: list of files for which to compute checksums 1126 - nodelist: list of nodes we should check ssh communication with 1127 - node-net-test: list of nodes we should check node daemon port 1128 connectivity with 1129 - hypervisor: list with hypervisors to run the verify for 1130 @type cluster_name: string 1131 @param cluster_name: the cluster's name 1132 @type all_hvparams: dict of dict of strings 1133 @param all_hvparams: a dictionary mapping hypervisor names to hvparams 1134 @type node_groups: a dict of strings 1135 @param node_groups: node _names_ mapped to their group uuids (it's enough to 1136 have only those nodes that are in `what["nodelist"]`) 1137 @type groups_cfg: a dict of dict of strings 1138 @param groups_cfg: a dictionary mapping group uuids to their configuration 1139 @rtype: dict 1140 @return: a dictionary with the same keys as the input dict, and 1141 values representing the result of the checks 1142 1143 """ 1144 result = {} 1145 my_name = netutils.Hostname.GetSysName() 1146 port = netutils.GetDaemonPort(constants.NODED) 1147 vm_capable = my_name not in what.get(constants.NV_NONVMNODES, []) 1148 1149 _VerifyHypervisors(what, vm_capable, result, all_hvparams) 1150 _VerifyHvparams(what, vm_capable, result) 1151 1152 if constants.NV_FILELIST in what: 1153 fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath, 1154 what[constants.NV_FILELIST])) 1155 result[constants.NV_FILELIST] = \ 1156 dict((vcluster.MakeVirtualPath(key), value) 1157 for (key, value) in fingerprints.items()) 1158 1159 if constants.NV_CLIENT_CERT in what: 1160 result[constants.NV_CLIENT_CERT] = _VerifyClientCertificate() 1161 1162 if constants.NV_SSH_SETUP in what: 1163 result[constants.NV_SSH_SETUP] = \ 1164 _VerifySshSetup(what[constants.NV_SSH_SETUP], my_name) 1165 if constants.NV_SSH_CLUTTER in what: 1166 result[constants.NV_SSH_CLUTTER] = \ 1167 _VerifySshClutter(what[constants.NV_SSH_SETUP], my_name) 1168 1169 if constants.NV_NODELIST in what: 1170 (nodes, bynode, mcs) = what[constants.NV_NODELIST] 1171 1172 # Add nodes from other groups (different for each node) 1173 try: 1174 nodes.extend(bynode[my_name]) 1175 except KeyError: 1176 pass 1177 1178 # Use a random order 1179 random.shuffle(nodes) 1180 1181 # Try to contact all nodes 1182 val = {} 1183 for node in nodes: 1184 params = groups_cfg.get(node_groups.get(node)) 1185 ssh_port = params["ndparams"].get(constants.ND_SSH_PORT) 1186 logging.debug("Ssh port %s (None = default) for node %s", 1187 str(ssh_port), node) 1188 1189 # We only test if master candidates can communicate to other nodes. 1190 # We cannot test if normal nodes cannot communicate with other nodes, 1191 # because the administrator might have installed additional SSH keys, 1192 # over which Ganeti has no power. 1193 if my_name in mcs: 1194 success, message = _GetSshRunner(cluster_name). \ 1195 VerifyNodeHostname(node, ssh_port) 1196 if not success: 1197 val[node] = message 1198 1199 result[constants.NV_NODELIST] = val 1200 1201 if constants.NV_NODENETTEST in what: 1202 result[constants.NV_NODENETTEST] = tmp = {} 1203 my_pip = my_sip = None 1204 for name, pip, sip in what[constants.NV_NODENETTEST]: 1205 if name == my_name: 1206 my_pip = pip 1207 my_sip = sip 1208 break 1209 if not my_pip: 1210 tmp[my_name] = ("Can't find my own primary/secondary IP" 1211 " in the node list") 1212 else: 1213 for name, pip, sip in what[constants.NV_NODENETTEST]: 1214 fail = [] 1215 if not netutils.TcpPing(pip, port, source=my_pip): 1216 fail.append("primary") 1217 if sip != pip: 1218 if not netutils.TcpPing(sip, port, source=my_sip): 1219 fail.append("secondary") 1220 if fail: 1221 tmp[name] = ("failure using the %s interface(s)" % 1222 " and ".join(fail)) 1223 1224 if constants.NV_MASTERIP in what: 1225 # FIXME: add checks on incoming data structures (here and in the 1226 # rest of the function) 1227 master_name, master_ip = what[constants.NV_MASTERIP] 1228 if master_name == my_name: 1229 source = constants.IP4_ADDRESS_LOCALHOST 1230 else: 1231 source = None 1232 result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port, 1233 source=source) 1234 1235 if constants.NV_USERSCRIPTS in what: 1236 result[constants.NV_USERSCRIPTS] = \ 1237 [script for script in what[constants.NV_USERSCRIPTS] 1238 if not utils.IsExecutable(script)] 1239 1240 if constants.NV_OOB_PATHS in what: 1241 result[constants.NV_OOB_PATHS] = tmp = [] 1242 for path in what[constants.NV_OOB_PATHS]: 1243 try: 1244 st = os.stat(path) 1245 except OSError, err: 1246 tmp.append("error stating out of band helper: %s" % err) 1247 else: 1248 if stat.S_ISREG(st.st_mode): 1249 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR: 1250 tmp.append(None) 1251 else: 1252 tmp.append("out of band helper %s is not executable" % path) 1253 else: 1254 tmp.append("out of band helper %s is not a file" % path) 1255 1256 if constants.NV_LVLIST in what and vm_capable: 1257 try: 1258 val = GetVolumeList(utils.ListVolumeGroups().keys()) 1259 except RPCFail, err: 1260 val = str(err) 1261 result[constants.NV_LVLIST] = val 1262 1263 _VerifyInstanceList(what, vm_capable, result, all_hvparams) 1264 1265 if constants.NV_VGLIST in what and vm_capable: 1266 result[constants.NV_VGLIST] = utils.ListVolumeGroups() 1267 1268 if constants.NV_PVLIST in what and vm_capable: 1269 check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what 1270 val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], 1271 filter_allocatable=False, 1272 include_lvs=check_exclusive_pvs) 1273 if check_exclusive_pvs: 1274 result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val) 1275 for pvi in val: 1276 # Avoid sending useless data on the wire 1277 pvi.lv_list = [] 1278 result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val) 1279 1280 if constants.NV_VERSION in what: 1281 result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION, 1282 constants.RELEASE_VERSION) 1283 1284 _VerifyNodeInfo(what, vm_capable, result, all_hvparams) 1285 1286 if constants.NV_DRBDVERSION in what and vm_capable: 1287 try: 1288 drbd_version = DRBD8.GetProcInfo().GetVersionString() 1289 except errors.BlockDeviceError, err: 1290 logging.warning("Can't get DRBD version", exc_info=True) 1291 drbd_version = str(err) 1292 result[constants.NV_DRBDVERSION] = drbd_version 1293 1294 if constants.NV_DRBDLIST in what and vm_capable: 1295 try: 1296 used_minors = drbd.DRBD8.GetUsedDevs() 1297 except errors.BlockDeviceError, err: 1298 logging.warning("Can't get used minors list", exc_info=True) 1299 used_minors = str(err) 1300 result[constants.NV_DRBDLIST] = used_minors 1301 1302 if constants.NV_DRBDHELPER in what and vm_capable: 1303 status = True 1304 try: 1305 payload = drbd.DRBD8.GetUsermodeHelper() 1306 except errors.BlockDeviceError, err: 1307 logging.error("Can't get DRBD usermode helper: %s", str(err)) 1308 status = False 1309 payload = str(err) 1310 result[constants.NV_DRBDHELPER] = (status, payload) 1311 1312 if constants.NV_NODESETUP in what: 1313 result[constants.NV_NODESETUP] = tmpr = [] 1314 if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"): 1315 tmpr.append("The sysfs filesytem doesn't seem to be mounted" 1316 " under /sys, missing required directories /sys/block" 1317 " and /sys/class/net") 1318 if (not os.path.isdir("/proc/sys") or 1319 not os.path.isfile("/proc/sysrq-trigger")): 1320 tmpr.append("The procfs filesystem doesn't seem to be mounted" 1321 " under /proc, missing required directory /proc/sys and" 1322 " the file /proc/sysrq-trigger") 1323 1324 if constants.NV_TIME in what: 1325 result[constants.NV_TIME] = utils.SplitTime(time.time()) 1326 1327 if constants.NV_OSLIST in what and vm_capable: 1328 result[constants.NV_OSLIST] = DiagnoseOS() 1329 1330 if constants.NV_BRIDGES in what and vm_capable: 1331 result[constants.NV_BRIDGES] = [bridge 1332 for bridge in what[constants.NV_BRIDGES] 1333 if not utils.BridgeExists(bridge)] 1334 1335 if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name: 1336 result[constants.NV_ACCEPTED_STORAGE_PATHS] = \ 1337 filestorage.ComputeWrongFileStoragePaths() 1338 1339 if what.get(constants.NV_FILE_STORAGE_PATH): 1340 pathresult = filestorage.CheckFileStoragePath( 1341 what[constants.NV_FILE_STORAGE_PATH]) 1342 if pathresult: 1343 result[constants.NV_FILE_STORAGE_PATH] = pathresult 1344 1345 if what.get(constants.NV_SHARED_FILE_STORAGE_PATH): 1346 pathresult = filestorage.CheckFileStoragePath( 1347 what[constants.NV_SHARED_FILE_STORAGE_PATH]) 1348 if pathresult: 1349 result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult 1350 1351 return result

1352

1353 1354 -def GetCryptoTokens(token_requests):

1355 """Perform actions on the node's cryptographic tokens. 1356 1357 Token types can be 'ssl' or 'ssh'. So far only some actions are implemented 1358 for 'ssl'. Action 'get' returns the digest of the public client ssl 1359 certificate. Action 'create' creates a new client certificate and private key 1360 and also returns the digest of the certificate. The third parameter of a 1361 token request are optional parameters for the actions, so far only the 1362 filename is supported. 1363 1364 @type token_requests: list of tuples of (string, string, dict), where the 1365 first string is in constants.CRYPTO_TYPES, the second in 1366 constants.CRYPTO_ACTIONS. The third parameter is a dictionary of string 1367 to string. 1368 @param token_requests: list of requests of cryptographic tokens and actions 1369 to perform on them. The actions come with a dictionary of options. 1370 @rtype: list of tuples (string, string) 1371 @return: list of tuples of the token type and the public crypto token 1372 1373 """ 1374 tokens = [] 1375 for (token_type, action, _) in token_requests: 1376 if token_type not in constants.CRYPTO_TYPES: 1377 raise errors.ProgrammerError("Token type '%s' not supported." % 1378 token_type) 1379 if action not in constants.CRYPTO_ACTIONS: 1380 raise errors.ProgrammerError("Action '%s' is not supported." % 1381 action) 1382 if token_type == constants.CRYPTO_TYPE_SSL_DIGEST: 1383 tokens.append((token_type, 1384 utils.GetCertificateDigest())) 1385 return tokens

1386

1387 1388 -def EnsureDaemon(daemon_name, run):

1389 """Ensures the given daemon is running or stopped. 1390 1391 @type daemon_name: string 1392 @param daemon_name: name of the daemon (e.g., constants.KVMD) 1393 1394 @type run: bool 1395 @param run: whether to start or stop the daemon 1396 1397 @rtype: bool 1398 @return: 'True' if daemon successfully started/stopped, 1399 'False' otherwise 1400 1401 """ 1402 allowed_daemons = [constants.KVMD] 1403 1404 if daemon_name not in allowed_daemons: 1405 fn = lambda _: False 1406 elif run: 1407 fn = utils.EnsureDaemon 1408 else: 1409 fn = utils.StopDaemon 1410 1411 return fn(daemon_name)

1412

1413 1414 -def _InitSshUpdateData(data, noded_cert_file, ssconf_store):

1415 (_, noded_cert) = \ 1416 utils.ExtractX509Certificate(utils.ReadFile(noded_cert_file)) 1417 data[constants.SSHS_NODE_DAEMON_CERTIFICATE] = noded_cert 1418 1419 cluster_name = ssconf_store.GetClusterName() 1420 data[constants.SSHS_CLUSTER_NAME] = cluster_name

1421

1422 1423 -def AddNodeSshKey(node_uuid, node_name, 1424 potential_master_candidates, 1425 to_authorized_keys=False, 1426 to_public_keys=False, 1427 get_public_keys=False, 1428 pub_key_file=pathutils.SSH_PUB_KEYS, 1429 ssconf_store=None, 1430 noded_cert_file=pathutils.NODED_CERT_FILE, 1431 run_cmd_fn=ssh.RunSshCmdWithStdin):

1432 """Distributes a node's public SSH key across the cluster. 1433 1434 Note that this function should only be executed on the master node, which 1435 then will copy the new node's key to all nodes in the cluster via SSH. 1436 1437 Also note: at least one of the flags C{to_authorized_keys}, 1438 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for 1439 the function to actually perform any actions. 1440 1441 @type node_uuid: str 1442 @param node_uuid: the UUID of the node whose key is added 1443 @type node_name: str 1444 @param node_name: the name of the node whose key is added 1445 @type potential_master_candidates: list of str 1446 @param potential_master_candidates: list of node names of potential master 1447 candidates; this should match the list of uuids in the public key file 1448 @type to_authorized_keys: boolean 1449 @param to_authorized_keys: whether the key should be added to the 1450 C{authorized_keys} file of all nodes 1451 @type to_public_keys: boolean 1452 @param to_public_keys: whether the keys should be added to the public key file 1453 @type get_public_keys: boolean 1454 @param get_public_keys: whether the node should add the clusters' public keys 1455 to its {ganeti_pub_keys} file 1456 1457 """ 1458 node_list = [SshAddNodeInfo(name=node_name, uuid=node_uuid, 1459 to_authorized_keys=to_authorized_keys, 1460 to_public_keys=to_public_keys, 1461 get_public_keys=get_public_keys)] 1462 return AddNodeSshKeyBulk(node_list, 1463 potential_master_candidates, 1464 pub_key_file=pub_key_file, 1465 ssconf_store=ssconf_store, 1466 noded_cert_file=noded_cert_file, 1467 run_cmd_fn=run_cmd_fn)

1468 1469 1470 # Node info named tuple specifically for the use with AddNodeSshKeyBulk 1471 SshAddNodeInfo = collections.namedtuple( 1472 "SshAddNodeInfo", 1473 ["uuid", 1474 "name", 1475 "to_authorized_keys", 1476 "to_public_keys", 1477 "get_public_keys"])

1478 1479 1480 -def AddNodeSshKeyBulk(node_list, 1481 potential_master_candidates, 1482 pub_key_file=pathutils.SSH_PUB_KEYS, 1483 ssconf_store=None, 1484 noded_cert_file=pathutils.NODED_CERT_FILE, 1485 run_cmd_fn=ssh.RunSshCmdWithStdin):

1486 """Distributes a node's public SSH key across the cluster. 1487 1488 Note that this function should only be executed on the master node, which 1489 then will copy the new node's key to all nodes in the cluster via SSH. 1490 1491 Also note: at least one of the flags C{to_authorized_keys}, 1492 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for 1493 the function to actually perform any actions. 1494 1495 @type node_list: list of SshAddNodeInfo tuples 1496 @param node_list: list of tuples containing the necessary node information for 1497 adding their keys 1498 @type potential_master_candidates: list of str 1499 @param potential_master_candidates: list of node names of potential master 1500 candidates; this should match the list of uuids in the public key file 1501 1502 """ 1503 # whether there are any keys to be added or retrieved at all 1504 to_authorized_keys = any([node_info.to_authorized_keys for node_info in 1505 node_list]) 1506 to_public_keys = any([node_info.to_public_keys for node_info in 1507 node_list]) 1508 1509 if not ssconf_store: 1510 ssconf_store = ssconf.SimpleStore() 1511 1512 for node_info in node_list: 1513 # replacement not necessary for keys that are not supposed to be in the 1514 # list of public keys 1515 if not node_info.to_public_keys: 1516 continue 1517 # Check and fix sanity of key file 1518 keys_by_name = ssh.QueryPubKeyFile([node_info.name], key_file=pub_key_file) 1519 keys_by_uuid = ssh.QueryPubKeyFile([node_info.uuid], key_file=pub_key_file) 1520 1521 if (not keys_by_name or node_info.name not in keys_by_name) \ 1522 and (not keys_by_uuid or node_info.uuid not in keys_by_uuid): 1523 raise errors.SshUpdateError( 1524 "No keys found for the new node '%s' (UUID %s) in the list of public" 1525 " SSH keys, neither for the name or the UUID" % 1526 (node_info.name, node_info.uuid)) 1527 else: 1528 if node_info.name in keys_by_name: 1529 # Replace the name by UUID in the file as the name should only be used 1530 # temporarily 1531 ssh.ReplaceNameByUuid(node_info.uuid, node_info.name, 1532 error_fn=errors.SshUpdateError, 1533 key_file=pub_key_file) 1534 1535 # Retrieve updated map of UUIDs to keys 1536 keys_by_uuid = ssh.QueryPubKeyFile( 1537 [node_info.uuid for node_info in node_list], key_file=pub_key_file) 1538 1539 # Update the master node's key files 1540 (auth_key_file, _) = \ 1541 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1542 for node_info in node_list: 1543 if node_info.to_authorized_keys: 1544 ssh.AddAuthorizedKeys(auth_key_file, keys_by_uuid[node_info.uuid]) 1545 1546 base_data = {} 1547 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) 1548 cluster_name = base_data[constants.SSHS_CLUSTER_NAME] 1549 1550 ssh_port_map = ssconf_store.GetSshPortMap() 1551 1552 # Update the target nodes themselves 1553 for node_info in node_list: 1554 logging.debug("Updating SSH key files of target node '%s'.", node_info.name) 1555 if node_info.get_public_keys: 1556 node_data = {} 1557 _InitSshUpdateData(node_data, noded_cert_file, ssconf_store) 1558 all_keys = ssh.QueryPubKeyFile(None, key_file=pub_key_file) 1559 node_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1560 (constants.SSHS_OVERRIDE, all_keys) 1561 1562 try: 1563 utils.RetryByNumberOfTimes( 1564 constants.SSHS_MAX_RETRIES, 1565 errors.SshUpdateError, 1566 run_cmd_fn, cluster_name, node_info.name, pathutils.SSH_UPDATE, 1567 ssh_port_map.get(node_info.name), node_data, 1568 debug=False, verbose=False, use_cluster_key=False, 1569 ask_key=False, strict_host_check=False) 1570 except errors.SshUpdateError as e: 1571 # Clean up the master's public key file if adding key fails 1572 if node_info.to_public_keys: 1573 ssh.RemovePublicKey(node_info.uuid) 1574 raise e 1575 1576 # Update all nodes except master and the target nodes 1577 keys_by_uuid_auth = ssh.QueryPubKeyFile( 1578 [node_info.uuid for node_info in node_list 1579 if node_info.to_authorized_keys], 1580 key_file=pub_key_file) 1581 if to_authorized_keys: 1582 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1583 (constants.SSHS_ADD, keys_by_uuid_auth) 1584 1585 pot_mc_data = base_data.copy() 1586 keys_by_uuid_pub = ssh.QueryPubKeyFile( 1587 [node_info.uuid for node_info in node_list 1588 if node_info.to_public_keys], 1589 key_file=pub_key_file) 1590 if to_public_keys: 1591 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1592 (constants.SSHS_REPLACE_OR_ADD, keys_by_uuid_pub) 1593 1594 all_nodes = ssconf_store.GetNodeList() 1595 master_node = ssconf_store.GetMasterNode() 1596 online_nodes = ssconf_store.GetOnlineNodeList() 1597 1598 node_errors = [] 1599 for node in all_nodes: 1600 if node == master_node: 1601 logging.debug("Skipping master node '%s'.", master_node) 1602 continue 1603 if node not in online_nodes: 1604 logging.debug("Skipping offline node '%s'.", node) 1605 continue 1606 if node in potential_master_candidates: 1607 logging.debug("Updating SSH key files of node '%s'.", node) 1608 try: 1609 utils.RetryByNumberOfTimes( 1610 constants.SSHS_MAX_RETRIES, 1611 errors.SshUpdateError, 1612 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1613 ssh_port_map.get(node), pot_mc_data, 1614 debug=False, verbose=False, use_cluster_key=False, 1615 ask_key=False, strict_host_check=False) 1616 except errors.SshUpdateError as last_exception: 1617 error_msg = ("When adding the key of node '%s', updating SSH key" 1618 " files of node '%s' failed after %s retries." 1619 " Not trying again. Last error was: %s." % 1620 (node, node_info.name, constants.SSHS_MAX_RETRIES, 1621 last_exception)) 1622 node_errors.append((node, error_msg)) 1623 # We only log the error and don't throw an exception, because 1624 # one unreachable node shall not abort the entire procedure. 1625 logging.error(error_msg) 1626 1627 else: 1628 if to_authorized_keys: 1629 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE, 1630 ssh_port_map.get(node), base_data, 1631 debug=False, verbose=False, use_cluster_key=False, 1632 ask_key=False, strict_host_check=False) 1633 1634 return node_errors

1635

1636 1637 -def RemoveNodeSshKey(node_uuid, node_name, 1638 master_candidate_uuids, 1639 potential_master_candidates, 1640 master_uuid=None, 1641 keys_to_remove=None, 1642 from_authorized_keys=False, 1643 from_public_keys=False, 1644 clear_authorized_keys=False, 1645 clear_public_keys=False, 1646 pub_key_file=pathutils.SSH_PUB_KEYS, 1647 ssconf_store=None, 1648 noded_cert_file=pathutils.NODED_CERT_FILE, 1649 readd=False, 1650 run_cmd_fn=ssh.RunSshCmdWithStdin):

1651 """Removes the node's SSH keys from the key files and distributes those. 1652 1653 Note that at least one of the flags C{from_authorized_keys}, 1654 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys} 1655 has to be set to C{True} for the function to perform any action at all. 1656 Not doing so will trigger an assertion in the function. 1657 1658 @type node_uuid: str 1659 @param node_uuid: UUID of the node whose key is removed 1660 @type node_name: str 1661 @param node_name: name of the node whose key is remove 1662 @type master_candidate_uuids: list of str 1663 @param master_candidate_uuids: list of UUIDs of the current master candidates 1664 @type potential_master_candidates: list of str 1665 @param potential_master_candidates: list of names of potential master 1666 candidates 1667 @type keys_to_remove: dict of str to list of str 1668 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys 1669 to be removed. This list is supposed to be used only if the keys are not 1670 in the public keys file. This is for example the case when removing a 1671 master node's key. 1672 @type from_authorized_keys: boolean 1673 @param from_authorized_keys: whether or not the key should be removed 1674 from the C{authorized_keys} file 1675 @type from_public_keys: boolean 1676 @param from_public_keys: whether or not the key should be remove from 1677 the C{ganeti_pub_keys} file 1678 @type clear_authorized_keys: boolean 1679 @param clear_authorized_keys: whether or not the C{authorized_keys} file 1680 should be cleared on the node whose keys are removed 1681 @type clear_public_keys: boolean 1682 @param clear_public_keys: whether to clear the node's C{ganeti_pub_key} file 1683 @type readd: boolean 1684 @param readd: whether this is called during a readd operation. 1685 @rtype: list of string 1686 @returns: list of feedback messages 1687 1688 """ 1689 node_list = [SshRemoveNodeInfo(uuid=node_uuid, 1690 name=node_name, 1691 from_authorized_keys=from_authorized_keys, 1692 from_public_keys=from_public_keys, 1693 clear_authorized_keys=clear_authorized_keys, 1694 clear_public_keys=clear_public_keys)] 1695 return RemoveNodeSshKeyBulk(node_list, 1696 master_candidate_uuids, 1697 potential_master_candidates, 1698 master_uuid=master_uuid, 1699 keys_to_remove=keys_to_remove, 1700 pub_key_file=pub_key_file, 1701 ssconf_store=ssconf_store, 1702 noded_cert_file=noded_cert_file, 1703 readd=readd, 1704 run_cmd_fn=run_cmd_fn)

1705 1706 1707 # Node info named tuple specifically for the use with RemoveNodeSshKeyBulk 1708 SshRemoveNodeInfo = collections.namedtuple( 1709 "SshRemoveNodeInfo", 1710 ["uuid", 1711 "name", 1712 "from_authorized_keys", 1713 "from_public_keys", 1714 "clear_authorized_keys", 1715 "clear_public_keys"])

1716 1717 1718 -def RemoveNodeSshKeyBulk(node_list, 1719 master_candidate_uuids, 1720 potential_master_candidates, 1721 master_uuid=None, 1722 keys_to_remove=None, 1723 pub_key_file=pathutils.SSH_PUB_KEYS, 1724 ssconf_store=None, 1725 noded_cert_file=pathutils.NODED_CERT_FILE, 1726 readd=False, 1727 run_cmd_fn=ssh.RunSshCmdWithStdin):

1728 """Removes the node's SSH keys from the key files and distributes those. 1729 1730 Note that at least one of the flags C{from_authorized_keys}, 1731 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys} 1732 of at least one node has to be set to C{True} for the function to perform any 1733 action at all. Not doing so will trigger an assertion in the function. 1734 1735 @type node_list: list of C{SshRemoveNodeInfo}. 1736 @param node_list: list of information about nodes whose keys are being removed 1737 @type master_candidate_uuids: list of str 1738 @param master_candidate_uuids: list of UUIDs of the current master candidates 1739 @type potential_master_candidates: list of str 1740 @param potential_master_candidates: list of names of potential master 1741 candidates 1742 @type keys_to_remove: dict of str to list of str 1743 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys 1744 to be removed. This list is supposed to be used only if the keys are not 1745 in the public keys file. This is for example the case when removing a 1746 master node's key. 1747 @type readd: boolean 1748 @param readd: whether this is called during a readd operation. 1749 @rtype: list of string 1750 @returns: list of feedback messages 1751 1752 """ 1753 # Non-disruptive error messages, list of (node, msg) pairs 1754 result_msgs = [] 1755 1756 # whether there are any keys to be added or retrieved at all 1757 from_authorized_keys = any([node_info.from_authorized_keys for node_info in 1758 node_list]) 1759 from_public_keys = any([node_info.from_public_keys for node_info in 1760 node_list]) 1761 clear_authorized_keys = any([node_info.clear_authorized_keys for node_info in 1762 node_list]) 1763 clear_public_keys = any([node_info.clear_public_keys for node_info in 1764 node_list]) 1765 1766 # Make sure at least one of these flags is true. 1767 if not (from_authorized_keys or from_public_keys or clear_authorized_keys 1768 or clear_public_keys): 1769 raise errors.SshUpdateError("No removal from any key file was requested.") 1770 1771 if not ssconf_store: 1772 ssconf_store = ssconf.SimpleStore() 1773 1774 master_node = ssconf_store.GetMasterNode() 1775 ssh_port_map = ssconf_store.GetSshPortMap() 1776 1777 all_keys_to_remove = {} 1778 if from_authorized_keys or from_public_keys: 1779 for node_info in node_list: 1780 # Skip nodes that don't actually need any keys to be removed. 1781 if not (node_info.from_authorized_keys or node_info.from_public_keys): 1782 continue 1783 if node_info.name == master_node and not keys_to_remove: 1784 raise errors.SshUpdateError("Cannot remove the master node's keys.") 1785 if keys_to_remove: 1786 keys = keys_to_remove 1787 else: 1788 keys = ssh.QueryPubKeyFile([node_info.uuid], key_file=pub_key_file) 1789 if (not keys or node_info.uuid not in keys) and not readd: 1790 raise errors.SshUpdateError("Node '%s' not found in the list of" 1791 " public SSH keys. It seems someone" 1792 " tries to remove a key from outside" 1793 " the cluster!" % node_info.uuid) 1794 # During an upgrade all nodes have the master key. In this case we 1795 # should not remove it to avoid accidentally shutting down cluster 1796 # SSH communication 1797 master_keys = None 1798 if master_uuid: 1799 master_keys = ssh.QueryPubKeyFile([master_uuid], 1800 key_file=pub_key_file) 1801 for master_key in master_keys: 1802 if master_key in keys[node_info.uuid]: 1803 keys[node_info.uuid].remove(master_key) 1804 1805 all_keys_to_remove.update(keys) 1806 1807 if all_keys_to_remove: 1808 base_data = {} 1809 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) 1810 cluster_name = base_data[constants.SSHS_CLUSTER_NAME] 1811 1812 if from_authorized_keys: 1813 # UUIDs of nodes that are supposed to be removed from the 1814 # authorized_keys files. 1815 nodes_remove_from_authorized_keys = [ 1816 node_info.uuid for node_info in node_list 1817 if node_info.from_authorized_keys] 1818 keys_to_remove_from_authorized_keys = dict([ 1819 (uuid, keys) for (uuid, keys) in all_keys_to_remove.items() 1820 if uuid in nodes_remove_from_authorized_keys]) 1821 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1822 (constants.SSHS_REMOVE, keys_to_remove_from_authorized_keys) 1823 (auth_key_file, _) = \ 1824 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, 1825 dircheck=False) 1826 1827 for uuid in nodes_remove_from_authorized_keys: 1828 ssh.RemoveAuthorizedKeys(auth_key_file, 1829 keys_to_remove_from_authorized_keys[uuid]) 1830 1831 pot_mc_data = base_data.copy() 1832 1833 if from_public_keys: 1834 nodes_remove_from_public_keys = [ 1835 node_info.uuid for node_info in node_list 1836 if node_info.from_public_keys] 1837 keys_to_remove_from_public_keys = dict([ 1838 (uuid, keys) for (uuid, keys) in all_keys_to_remove.items() 1839 if uuid in nodes_remove_from_public_keys]) 1840 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1841 (constants.SSHS_REMOVE, keys_to_remove_from_public_keys) 1842 1843 all_nodes = ssconf_store.GetNodeList() 1844 online_nodes = ssconf_store.GetOnlineNodeList() 1845 all_nodes_to_remove = [node_info.name for node_info in node_list] 1846 logging.debug("Removing keys of nodes '%s' from all nodes but itself and" 1847 " master.", ", ".join(all_nodes_to_remove)) 1848 for node in all_nodes: 1849 if node == master_node: 1850 logging.debug("Skipping master node '%s'.", master_node) 1851 continue 1852 if node not in online_nodes: 1853 logging.debug("Skipping offline node '%s'.", node) 1854 continue 1855 if node in all_nodes_to_remove: 1856 logging.debug("Skipping node whose key is removed itself '%s'.", node) 1857 continue 1858 ssh_port = ssh_port_map.get(node) 1859 if not ssh_port: 1860 raise errors.OpExecError("No SSH port information available for" 1861 " node '%s', map: %s." % 1862 (node, ssh_port_map)) 1863 error_msg_final = ("When removing the key of node '%s', updating the" 1864 " SSH key files of node '%s' failed. Last error" 1865 " was: %s.") 1866 if node in potential_master_candidates: 1867 logging.debug("Updating key setup of potential master candidate node" 1868 " %s.", node) 1869 try: 1870 utils.RetryByNumberOfTimes( 1871 constants.SSHS_MAX_RETRIES, 1872 errors.SshUpdateError, 1873 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1874 ssh_port, pot_mc_data, 1875 debug=False, verbose=False, use_cluster_key=False, 1876 ask_key=False, strict_host_check=False) 1877 except errors.SshUpdateError as last_exception: 1878 error_msg = error_msg_final % ( 1879 node_info.name, node, last_exception) 1880 result_msgs.append((node, error_msg)) 1881 logging.error(error_msg) 1882 1883 else: 1884 if from_authorized_keys: 1885 logging.debug("Updating key setup of normal node %s.", node) 1886 try: 1887 utils.RetryByNumberOfTimes( 1888 constants.SSHS_MAX_RETRIES, 1889 errors.SshUpdateError, 1890 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1891 ssh_port, base_data, 1892 debug=False, verbose=False, use_cluster_key=False, 1893 ask_key=False, strict_host_check=False) 1894 except errors.SshUpdateError as last_exception: 1895 error_msg = error_msg_final % ( 1896 node_info.name, node, last_exception) 1897 result_msgs.append((node, error_msg)) 1898 logging.error(error_msg) 1899 1900 for node_info in node_list: 1901 if node_info.clear_authorized_keys or node_info.from_public_keys or \ 1902 node_info.clear_public_keys: 1903 data = {} 1904 _InitSshUpdateData(data, noded_cert_file, ssconf_store) 1905 cluster_name = data[constants.SSHS_CLUSTER_NAME] 1906 ssh_port = ssh_port_map.get(node_info.name) 1907 if not ssh_port: 1908 raise errors.OpExecError("No SSH port information available for" 1909 " node '%s', which is leaving the cluster.") 1910 1911 if node_info.clear_authorized_keys: 1912 # The 'authorized_keys' file is not solely managed by Ganeti. Therefore, 1913 # we have to specify exactly which keys to clear to leave keys untouched 1914 # that were not added by Ganeti. 1915 other_master_candidate_uuids = [uuid for uuid in master_candidate_uuids 1916 if uuid != node_info.uuid] 1917 candidate_keys = ssh.QueryPubKeyFile(other_master_candidate_uuids, 1918 key_file=pub_key_file) 1919 data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1920 (constants.SSHS_REMOVE, candidate_keys) 1921 1922 if node_info.clear_public_keys: 1923 data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1924 (constants.SSHS_CLEAR, {}) 1925 elif node_info.from_public_keys: 1926 # Since clearing the public keys subsumes removing just a single key, 1927 # we only do it if clear_public_keys is 'False'. 1928 1929 if all_keys_to_remove: 1930 data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1931 (constants.SSHS_REMOVE, all_keys_to_remove) 1932 1933 # If we have no changes to any keyfile, just return 1934 if not (constants.SSHS_SSH_PUBLIC_KEYS in data or 1935 constants.SSHS_SSH_AUTHORIZED_KEYS in data): 1936 return 1937 1938 logging.debug("Updating SSH key setup of target node '%s'.", 1939 node_info.name) 1940 try: 1941 utils.RetryByNumberOfTimes( 1942 constants.SSHS_MAX_RETRIES, 1943 errors.SshUpdateError, 1944 run_cmd_fn, cluster_name, node_info.name, pathutils.SSH_UPDATE, 1945 ssh_port, data, 1946 debug=False, verbose=False, use_cluster_key=False, 1947 ask_key=False, strict_host_check=False) 1948 except errors.SshUpdateError as last_exception: 1949 result_msgs.append( 1950 (node_info.name, 1951 ("Removing SSH keys from node '%s' failed." 1952 " This can happen when the node is already unreachable." 1953 " Error: %s" % (node_info.name, last_exception)))) 1954 1955 if all_keys_to_remove and from_public_keys: 1956 for node_uuid in nodes_remove_from_public_keys: 1957 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file) 1958 1959 return result_msgs

1960

1961 1962 -def _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, 1963 pub_key_file=pathutils.SSH_PUB_KEYS, 1964 ssconf_store=None, 1965 noded_cert_file=pathutils.NODED_CERT_FILE, 1966 run_cmd_fn=ssh.RunSshCmdWithStdin, 1967 suffix=""):

1968 """Generates the root SSH key pair on the node. 1969 1970 @type node_uuid: str 1971 @param node_uuid: UUID of the node whose key is removed 1972 @type node_name: str 1973 @param node_name: name of the node whose key is remove 1974 @type ssh_port_map: dict of str to int 1975 @param ssh_port_map: mapping of node names to their SSH port 1976 1977 """ 1978 if not ssconf_store: 1979 ssconf_store = ssconf.SimpleStore() 1980 1981 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file) 1982 if not keys_by_uuid or node_uuid not in keys_by_uuid: 1983 raise errors.SshUpdateError("Node %s (UUID: %s) whose key is requested to" 1984 " be regenerated is not registered in the" 1985 " public keys file." % (node_name, node_uuid)) 1986 1987 data = {} 1988 _InitSshUpdateData(data, noded_cert_file, ssconf_store) 1989 cluster_name = data[constants.SSHS_CLUSTER_NAME] 1990 data[constants.SSHS_GENERATE] = {constants.SSHS_SUFFIX: suffix} 1991 1992 run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE, 1993 ssh_port_map.get(node_name), data, 1994 debug=False, verbose=False, use_cluster_key=False, 1995 ask_key=False, strict_host_check=False)

1996

1997 1998 -def _GetMasterNodeUUID(node_uuid_name_map, master_node_name):

1999 master_node_uuids = [node_uuid for (node_uuid, node_name) 2000 in node_uuid_name_map 2001 if node_name == master_node_name] 2002 if len(master_node_uuids) != 1: 2003 raise errors.SshUpdateError("No (unique) master UUID found. Master node" 2004 " name: '%s', Master UUID: '%s'" % 2005 (master_node_name, master_node_uuids)) 2006 return master_node_uuids[0]

2007

2008 2009 -def _GetOldMasterKeys(master_node_uuid, pub_key_file):

2010 old_master_keys_by_uuid = ssh.QueryPubKeyFile([master_node_uuid], 2011 key_file=pub_key_file) 2012 if not old_master_keys_by_uuid: 2013 raise errors.SshUpdateError("No public key of the master node (UUID '%s')" 2014 " found, not generating a new key." 2015 % master_node_uuid) 2016 return old_master_keys_by_uuid

2017

2018 2019 -def _GetNewMasterKey(root_keyfiles, master_node_uuid):

2020 new_master_keys = [] 2021 for (_, (_, public_key_file)) in root_keyfiles.items(): 2022 public_key_dir = os.path.dirname(public_key_file) 2023 public_key_file_tmp_filename = \ 2024 os.path.splitext(os.path.basename(public_key_file))[0] \ 2025 + constants.SSHS_MASTER_SUFFIX + ".pub" 2026 public_key_path_tmp = os.path.join(public_key_dir, 2027 public_key_file_tmp_filename) 2028 if os.path.exists(public_key_path_tmp): 2029 # for some key types, there might not be any keys 2030 key = utils.ReadFile(public_key_path_tmp) 2031 new_master_keys.append(key) 2032 if not new_master_keys: 2033 raise errors.SshUpdateError("Cannot find any type of temporary SSH key.") 2034 return {master_node_uuid: new_master_keys}

2035

2036 2037 -def _ReplaceMasterKeyOnMaster(root_keyfiles):

2038 number_of_moves = 0 2039 for (_, (private_key_file, public_key_file)) in root_keyfiles.items(): 2040 key_dir = os.path.dirname(public_key_file) 2041 private_key_file_tmp = \ 2042 os.path.basename(private_key_file) + constants.SSHS_MASTER_SUFFIX 2043 public_key_file_tmp = private_key_file_tmp + ".pub" 2044 private_key_path_tmp = os.path.join(key_dir, 2045 private_key_file_tmp) 2046 public_key_path_tmp = os.path.join(key_dir, 2047 public_key_file_tmp) 2048 if os.path.exists(public_key_file): 2049 utils.CreateBackup(public_key_file) 2050 utils.RemoveFile(public_key_file) 2051 if os.path.exists(private_key_file): 2052 utils.CreateBackup(private_key_file) 2053 utils.RemoveFile(private_key_file) 2054 if os.path.exists(public_key_path_tmp) and \ 2055 os.path.exists(private_key_path_tmp): 2056 # for some key types, there might not be any keys 2057 shutil.move(public_key_path_tmp, public_key_file) 2058 shutil.move(private_key_path_tmp, private_key_file) 2059 number_of_moves += 1 2060 if not number_of_moves: 2061 raise errors.SshUpdateError("Could not move at least one master SSH key.")

2062

2063 2064 -def RenewSshKeys(node_uuids, node_names, master_candidate_uuids, 2065 potential_master_candidates, 2066 pub_key_file=pathutils.SSH_PUB_KEYS, 2067 ssconf_store=None, 2068 noded_cert_file=pathutils.NODED_CERT_FILE, 2069 run_cmd_fn=ssh.RunSshCmdWithStdin):

2070 """Renews all SSH keys and updates authorized_keys and ganeti_pub_keys. 2071 2072 @type node_uuids: list of str 2073 @param node_uuids: list of node UUIDs whose keys should be renewed 2074 @type node_names: list of str 2075 @param node_names: list of node names whose keys should be removed. This list 2076 should match the C{node_uuids} parameter 2077 @type master_candidate_uuids: list of str 2078 @param master_candidate_uuids: list of UUIDs of master candidates or 2079 master node 2080 @type pub_key_file: str 2081 @param pub_key_file: file path of the the public key file 2082 @type noded_cert_file: str 2083 @param noded_cert_file: path of the noded SSL certificate file 2084 @type run_cmd_fn: function 2085 @param run_cmd_fn: function to run commands on remote nodes via SSH 2086 @raises ProgrammerError: if node_uuids and node_names don't match; 2087 SshUpdateError if a node's key is missing from the public key file, 2088 if a node's new SSH key could not be fetched from it, if there is 2089 none or more than one entry in the public key list for the master 2090 node. 2091 2092 """ 2093 if not ssconf_store: 2094 ssconf_store = ssconf.SimpleStore() 2095 cluster_name = ssconf_store.GetClusterName() 2096 2097 if not len(node_uuids) == len(node_names): 2098 raise errors.ProgrammerError("List of nodes UUIDs and node names" 2099 " does not match in length.") 2100 2101 (_, root_keyfiles) = \ 2102 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 2103 (_, dsa_pub_keyfile) = root_keyfiles[constants.SSHK_DSA] 2104 old_master_key = utils.ReadFile(dsa_pub_keyfile) 2105 2106 node_uuid_name_map = zip(node_uuids, node_names) 2107 2108 master_node_name = ssconf_store.GetMasterNode() 2109 master_node_uuid = _GetMasterNodeUUID(node_uuid_name_map, master_node_name) 2110 ssh_port_map = ssconf_store.GetSshPortMap() 2111 # List of all node errors that happened, but which did not abort the 2112 # procedure as a whole. It is important that this is a list to have a 2113 # somewhat chronological history of events. 2114 all_node_errors = [] 2115 2116 # process non-master nodes 2117 2118 # keys to add in bulk at the end 2119 node_keys_to_add = [] 2120 2121 # list of all nodes 2122 node_list = [] 2123 2124 # list of keys to be removed before generating new keys 2125 node_info_to_remove = [] 2126 2127 for node_uuid, node_name in node_uuid_name_map: 2128 if node_name == master_node_name: 2129 continue 2130 master_candidate = node_uuid in master_candidate_uuids 2131 potential_master_candidate = node_name in potential_master_candidates 2132 node_list.append((node_uuid, node_name, master_candidate, 2133 potential_master_candidate)) 2134 2135 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file) 2136 if not keys_by_uuid: 2137 raise errors.SshUpdateError("No public key of node %s (UUID %s) found," 2138 " not generating a new key." 2139 % (node_name, node_uuid)) 2140 2141 if master_candidate: 2142 logging.debug("Fetching old SSH key from node '%s'.", node_name) 2143 old_pub_key = ssh.ReadRemoteSshPubKeys(dsa_pub_keyfile, 2144 node_name, cluster_name, 2145 ssh_port_map[node_name], 2146 False, # ask_key 2147 False) # key_check 2148 if old_pub_key != old_master_key: 2149 # If we are already in a multi-key setup (that is past Ganeti 2.12), 2150 # we can safely remove the old key of the node. Otherwise, we cannot 2151 # remove that node's key, because it is also the master node's key 2152 # and that would terminate all communication from the master to the 2153 # node. 2154 node_info_to_remove.append(SshRemoveNodeInfo( 2155 uuid=node_uuid, 2156 name=node_name, 2157 from_authorized_keys=master_candidate, 2158 from_public_keys=False, 2159 clear_authorized_keys=False, 2160 clear_public_keys=False)) 2161 else: 2162 logging.debug("Old key of node '%s' is the same as the current master" 2163 " key. Not deleting that key on the node.", node_name) 2164 2165 logging.debug("Removing old SSH keys of all master candidates.") 2166 if node_info_to_remove: 2167 node_errors = RemoveNodeSshKeyBulk( 2168 node_info_to_remove, 2169 master_candidate_uuids, 2170 potential_master_candidates, 2171 master_uuid=master_node_uuid) 2172 if node_errors: 2173 all_node_errors = all_node_errors + node_errors 2174 2175 for (node_uuid, node_name, master_candidate, potential_master_candidate) \ 2176 in node_list: 2177 2178 logging.debug("Generating new SSH key for node '%s'.", node_name) 2179 _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, 2180 pub_key_file=pub_key_file, 2181 ssconf_store=ssconf_store, 2182 noded_cert_file=noded_cert_file, 2183 run_cmd_fn=run_cmd_fn) 2184 2185 try: 2186 logging.debug("Fetching newly created SSH key from node '%s'.", node_name) 2187 pub_key = ssh.ReadRemoteSshPubKeys(dsa_pub_keyfile, 2188 node_name, cluster_name, 2189 ssh_port_map[node_name], 2190 False, # ask_key 2191 False) # key_check 2192 except: 2193 raise errors.SshUpdateError("Could not fetch key of node %s" 2194 " (UUID %s)" % (node_name, node_uuid)) 2195 2196 if potential_master_candidate: 2197 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file) 2198 ssh.AddPublicKey(node_uuid, pub_key, key_file=pub_key_file) 2199 2200 logging.debug("Add ssh key of node '%s'.", node_name) 2201 node_info = SshAddNodeInfo(name=node_name, 2202 uuid=node_uuid, 2203 to_authorized_keys=master_candidate, 2204 to_public_keys=potential_master_candidate, 2205 get_public_keys=True) 2206 node_keys_to_add.append(node_info) 2207 2208 node_errors = AddNodeSshKeyBulk( 2209 node_keys_to_add, potential_master_candidates, 2210 pub_key_file=pub_key_file, ssconf_store=ssconf_store, 2211 noded_cert_file=noded_cert_file, 2212 run_cmd_fn=run_cmd_fn) 2213 if node_errors: 2214 all_node_errors = all_node_errors + node_errors 2215 2216 # Renewing the master node's key 2217 2218 # Preserve the old keys for now 2219 old_master_keys_by_uuid = _GetOldMasterKeys(master_node_uuid, pub_key_file) 2220 2221 # Generate a new master key with a suffix, don't touch the old one for now 2222 logging.debug("Generate new ssh key of master.") 2223 _GenerateNodeSshKey(master_node_uuid, master_node_name, ssh_port_map, 2224 pub_key_file=pub_key_file, 2225 ssconf_store=ssconf_store, 2226 noded_cert_file=noded_cert_file, 2227 run_cmd_fn=run_cmd_fn, 2228 suffix=constants.SSHS_MASTER_SUFFIX) 2229 # Read newly created master key 2230 new_master_key_dict = _GetNewMasterKey(root_keyfiles, master_node_uuid) 2231 2232 # Replace master key in the master nodes' public key file 2233 ssh.RemovePublicKey(master_node_uuid, key_file=pub_key_file) 2234 for pub_key in new_master_key_dict[master_node_uuid]: 2235 ssh.AddPublicKey(master_node_uuid, pub_key, key_file=pub_key_file) 2236 2237 # Add new master key to all node's public and authorized keys 2238 logging.debug("Add new master key to all nodes.") 2239 node_errors = AddNodeSshKey( 2240 master_node_uuid, master_node_name, potential_master_candidates, 2241 to_authorized_keys=True, to_public_keys=True, 2242 get_public_keys=False, pub_key_file=pub_key_file, 2243 ssconf_store=ssconf_store, noded_cert_file=noded_cert_file, 2244 run_cmd_fn=run_cmd_fn) 2245 if node_errors: 2246 all_node_errors = all_node_errors + node_errors 2247 2248 # Remove the old key file and rename the new key to the non-temporary filename 2249 _ReplaceMasterKeyOnMaster(root_keyfiles) 2250 2251 # Remove old key from authorized keys 2252 (auth_key_file, _) = \ 2253 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 2254 ssh.RemoveAuthorizedKeys(auth_key_file, 2255 old_master_keys_by_uuid[master_node_uuid]) 2256 2257 # Remove the old key from all node's authorized keys file 2258 logging.debug("Remove the old master key from all nodes.") 2259 node_errors = RemoveNodeSshKey( 2260 master_node_uuid, master_node_name, master_candidate_uuids, 2261 potential_master_candidates, 2262 keys_to_remove=old_master_keys_by_uuid, from_authorized_keys=True, 2263 from_public_keys=False, clear_authorized_keys=False, 2264 clear_public_keys=False) 2265 if node_errors: 2266 all_node_errors = all_node_errors + node_errors 2267 2268 return all_node_errors

2269

2270 2271 -def GetBlockDevSizes(devices):

2272 """Return the size of the given block devices 2273 2274 @type devices: list 2275 @param devices: list of block device nodes to query 2276 @rtype: dict 2277 @return: 2278 dictionary of all block devices under /dev (key). The value is their 2279 size in MiB. 2280 2281 {'/dev/disk/by-uuid/123456-12321231-312312-312': 124} 2282 2283 """ 2284 DEV_PREFIX = "/dev/" 2285 blockdevs = {} 2286 2287 for devpath in devices: 2288 if not utils.IsBelowDir(DEV_PREFIX, devpath): 2289 continue 2290 2291 try: 2292 st = os.stat(devpath) 2293 except EnvironmentError, err: 2294 logging.warning("Error stat()'ing device %s: %s", devpath, str(err)) 2295 continue 2296 2297 if stat.S_ISBLK(st.st_mode): 2298 result = utils.RunCmd(["blockdev", "--getsize64", devpath]) 2299 if result.failed: 2300 # We don't want to fail, just do not list this device as available 2301 logging.warning("Cannot get size for block device %s", devpath) 2302 continue 2303 2304 size = int(result.stdout) / (1024 * 1024) 2305 blockdevs[devpath] = size 2306 return blockdevs

2307

2308 2309 -def GetVolumeList(vg_names):

2310 """Compute list of logical volumes and their size. 2311 2312 @type vg_names: list 2313 @param vg_names: the volume groups whose LVs we should list, or 2314 empty for all volume groups 2315 @rtype: dict 2316 @return: 2317 dictionary of all partions (key) with value being a tuple of 2318 their size (in MiB), inactive and online status:: 2319 2320 {'xenvg/test1': ('20.06', True, True)} 2321 2322 in case of errors, a string is returned with the error 2323 details. 2324 2325 """ 2326 lvs = {} 2327 sep = "|" 2328 if not vg_names: 2329 vg_names = [] 2330 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 2331 "--separator=%s" % sep, 2332 "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names) 2333 if result.failed: 2334 _Fail("Failed to list logical volumes, lvs output: %s", result.output) 2335 2336 for line in result.stdout.splitlines(): 2337 line = line.strip() 2338 match = _LVSLINE_REGEX.match(line) 2339 if not match: 2340 logging.error("Invalid line returned from lvs output: '%s'", line) 2341 continue 2342 vg_name, name, size, attr = match.groups() 2343 inactive = attr[4] == "-" 2344 online = attr[5] == "o" 2345 virtual = attr[0] == "v" 2346 if virtual: 2347 # we don't want to report such volumes as existing, since they 2348 # don't really hold data 2349 continue 2350 lvs[vg_name + "/" + name] = (size, inactive, online) 2351 2352 return lvs

2353

2354 2355 -def ListVolumeGroups():

2356 """List the volume groups and their size. 2357 2358 @rtype: dict 2359 @return: dictionary with keys volume name and values the 2360 size of the volume 2361 2362 """ 2363 return utils.ListVolumeGroups()

2364

2365 2366 -def NodeVolumes():

2367 """List all volumes on this node. 2368 2369 @rtype: list 2370 @return: 2371 A list of dictionaries, each having four keys: 2372 - name: the logical volume name, 2373 - size: the size of the logical volume 2374 - dev: the physical device on which the LV lives 2375 - vg: the volume group to which it belongs 2376 2377 In case of errors, we return an empty list and log the 2378 error. 2379 2380 Note that since a logical volume can live on multiple physical 2381 volumes, the resulting list might include a logical volume 2382 multiple times. 2383 2384 """ 2385 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 2386 "--separator=|", 2387 "--options=lv_name,lv_size,devices,vg_name"]) 2388 if result.failed: 2389 _Fail("Failed to list logical volumes, lvs output: %s", 2390 result.output) 2391 2392 def parse_dev(dev): 2393 return dev.split("(")[0]

2394 2395 def handle_dev(dev): 2396 return [parse_dev(x) for x in dev.split(",")] 2397 2398 def map_line(line): 2399 line = [v.strip() for v in line] 2400 return [{"name": line[0], "size": line[1], 2401 "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])] 2402 2403 all_devs = [] 2404 for line in result.stdout.splitlines(): 2405 if line.count("|") >= 3: 2406 all_devs.extend(map_line(line.split("|"))) 2407 else: 2408 logging.warning("Strange line in the output from lvs: '%s'", line) 2409 return all_devs 2410

2411 2412 -def BridgesExist(bridges_list):

2413 """Check if a list of bridges exist on the current node. 2414 2415 @rtype: boolean 2416 @return: C{True} if all of them exist, C{False} otherwise 2417 2418 """ 2419 missing = [] 2420 for bridge in bridges_list: 2421 if not utils.BridgeExists(bridge): 2422 missing.append(bridge) 2423 2424 if missing: 2425 _Fail("Missing bridges %s", utils.CommaJoin(missing))

2426

2427 2428 -def GetInstanceListForHypervisor(hname, hvparams=None, 2429 get_hv_fn=hypervisor.GetHypervisor):

2430 """Provides a list of instances of the given hypervisor. 2431 2432 @type hname: string 2433 @param hname: name of the hypervisor 2434 @type hvparams: dict of strings 2435 @param hvparams: hypervisor parameters for the given hypervisor 2436 @type get_hv_fn: function 2437 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 2438 name; optional parameter to increase testability 2439 2440 @rtype: list 2441 @return: a list of all running instances on the current node 2442 - instance1.example.com 2443 - instance2.example.com 2444 2445 """ 2446 try: 2447 return get_hv_fn(hname).ListInstances(hvparams=hvparams) 2448 except errors.HypervisorError, err: 2449 _Fail("Error enumerating instances (hypervisor %s): %s", 2450 hname, err, exc=True)

2451

2452 2453 -def GetInstanceList(hypervisor_list, all_hvparams=None, 2454 get_hv_fn=hypervisor.GetHypervisor):

2455 """Provides a list of instances. 2456 2457 @type hypervisor_list: list 2458 @param hypervisor_list: the list of hypervisors to query information 2459 @type all_hvparams: dict of dict of strings 2460 @param all_hvparams: a dictionary mapping hypervisor types to respective 2461 cluster-wide hypervisor parameters 2462 @type get_hv_fn: function 2463 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 2464 name; optional parameter to increase testability 2465 2466 @rtype: list 2467 @return: a list of all running instances on the current node 2468 - instance1.example.com 2469 - instance2.example.com 2470 2471 """ 2472 results = [] 2473 for hname in hypervisor_list: 2474 hvparams = all_hvparams[hname] 2475 results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams, 2476 get_hv_fn=get_hv_fn)) 2477 return results

2478

2479 2480 -def GetInstanceInfo(instance, hname, hvparams=None):

2481 """Gives back the information about an instance as a dictionary. 2482 2483 @type instance: string 2484 @param instance: the instance name 2485 @type hname: string 2486 @param hname: the hypervisor type of the instance 2487 @type hvparams: dict of strings 2488 @param hvparams: the instance's hvparams 2489 2490 @rtype: dict 2491 @return: dictionary with the following keys: 2492 - memory: memory size of instance (int) 2493 - state: state of instance (HvInstanceState) 2494 - time: cpu time of instance (float) 2495 - vcpus: the number of vcpus (int) 2496 2497 """ 2498 output = {} 2499 2500 iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance, 2501 hvparams=hvparams) 2502 if iinfo is not None: 2503 output["memory"] = iinfo[2] 2504 output["vcpus"] = iinfo[3] 2505 output["state"] = iinfo[4] 2506 output["time"] = iinfo[5] 2507 2508 return output

2509

2510 2511 -def GetInstanceMigratable(instance):

2512 """Computes whether an instance can be migrated. 2513 2514 @type instance: L{objects.Instance} 2515 @param instance: object representing the instance to be checked. 2516 2517 @rtype: tuple 2518 @return: tuple of (result, description) where: 2519 - result: whether the instance can be migrated or not 2520 - description: a description of the issue, if relevant 2521 2522 """ 2523 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2524 iname = instance.name 2525 if iname not in hyper.ListInstances(hvparams=instance.hvparams): 2526 _Fail("Instance %s is not running", iname) 2527 2528 for idx in range(len(instance.disks_info)): 2529 link_name = _GetBlockDevSymlinkPath(iname, idx) 2530 if not os.path.islink(link_name): 2531 logging.warning("Instance %s is missing symlink %s for disk %d", 2532 iname, link_name, idx)

2533

2534 2535 -def GetAllInstancesInfo(hypervisor_list, all_hvparams):

2536 """Gather data about all instances. 2537 2538 This is the equivalent of L{GetInstanceInfo}, except that it 2539 computes data for all instances at once, thus being faster if one 2540 needs data about more than one instance. 2541 2542 @type hypervisor_list: list 2543 @param hypervisor_list: list of hypervisors to query for instance data 2544 @type all_hvparams: dict of dict of strings 2545 @param all_hvparams: mapping of hypervisor names to hvparams 2546 2547 @rtype: dict 2548 @return: dictionary of instance: data, with data having the following keys: 2549 - memory: memory size of instance (int) 2550 - state: xen state of instance (string) 2551 - time: cpu time of instance (float) 2552 - vcpus: the number of vcpus 2553 2554 """ 2555 output = {} 2556 for hname in hypervisor_list: 2557 hvparams = all_hvparams[hname] 2558 iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams) 2559 if iinfo: 2560 for name, _, memory, vcpus, state, times in iinfo: 2561 value = { 2562 "memory": memory, 2563 "vcpus": vcpus, 2564 "state": state, 2565 "time": times, 2566 } 2567 if name in output: 2568 # we only check static parameters, like memory and vcpus, 2569 # and not state and time which can change between the 2570 # invocations of the different hypervisors 2571 for key in "memory", "vcpus": 2572 if value[key] != output[name][key]: 2573 _Fail("Instance %s is running twice" 2574 " with different parameters", name) 2575 output[name] = value 2576 2577 return output

2578

2579 2580 -def GetInstanceConsoleInfo(instance_param_dict, 2581 get_hv_fn=hypervisor.GetHypervisor):

2582 """Gather data about the console access of a set of instances of this node. 2583 2584 This function assumes that the caller already knows which instances are on 2585 this node, by calling a function such as L{GetAllInstancesInfo} or 2586 L{GetInstanceList}. 2587 2588 For every instance, a large amount of configuration data needs to be 2589 provided to the hypervisor interface in order to receive the console 2590 information. Whether this could or should be cut down can be discussed. 2591 The information is provided in a dictionary indexed by instance name, 2592 allowing any number of instance queries to be done. 2593 2594 @type instance_param_dict: dict of string to tuple of dictionaries, where the 2595 dictionaries represent: L{objects.Instance}, L{objects.Node}, 2596 L{objects.NodeGroup}, HvParams, BeParams 2597 @param instance_param_dict: mapping of instance name to parameters necessary 2598 for console information retrieval 2599 2600 @rtype: dict 2601 @return: dictionary of instance: data, with data having the following keys: 2602 - instance: instance name 2603 - kind: console kind 2604 - message: used with kind == CONS_MESSAGE, indicates console to be 2605 unavailable, supplies error message 2606 - host: host to connect to 2607 - port: port to use 2608 - user: user for login 2609 - command: the command, broken into parts as an array 2610 - display: unknown, potentially unused? 2611 2612 """ 2613 2614 output = {} 2615 for inst_name in instance_param_dict: 2616 instance = instance_param_dict[inst_name]["instance"] 2617 pnode = instance_param_dict[inst_name]["node"] 2618 group = instance_param_dict[inst_name]["group"] 2619 hvparams = instance_param_dict[inst_name]["hvParams"] 2620 beparams = instance_param_dict[inst_name]["beParams"] 2621 2622 instance = objects.Instance.FromDict(instance) 2623 pnode = objects.Node.FromDict(pnode) 2624 group = objects.NodeGroup.FromDict(group) 2625 2626 h = get_hv_fn(instance.hypervisor) 2627 output[inst_name] = h.GetInstanceConsole(instance, pnode, group, 2628 hvparams, beparams).ToDict() 2629 2630 return output

2631

2632 2633 -def _InstanceLogName(kind, os_name, instance, component):

2634 """Compute the OS log filename for a given instance and operation. 2635 2636 The instance name and os name are passed in as strings since not all 2637 operations have these as part of an instance object. 2638 2639 @type kind: string 2640 @param kind: the operation type (e.g. add, import, etc.) 2641 @type os_name: string 2642 @param os_name: the os name 2643 @type instance: string 2644 @param instance: the name of the instance being imported/added/etc. 2645 @type component: string or None 2646 @param component: the name of the component of the instance being 2647 transferred 2648 2649 """ 2650 # TODO: Use tempfile.mkstemp to create unique filename 2651 if component: 2652 assert "/" not in component 2653 c_msg = "-%s" % component 2654 else: 2655 c_msg = "" 2656 base = ("%s-%s-%s%s-%s.log" % 2657 (kind, os_name, instance, c_msg, utils.TimestampForFilename())) 2658 return utils.PathJoin(pathutils.LOG_OS_DIR, base)

2659

2660 2661 -def InstanceOsAdd(instance, reinstall, debug):

2662 """Add an OS to an instance. 2663 2664 @type instance: L{objects.Instance} 2665 @param instance: Instance whose OS is to be installed 2666 @type reinstall: boolean 2667 @param reinstall: whether this is an instance reinstall 2668 @type debug: integer 2669 @param debug: debug level, passed to the OS scripts 2670 @rtype: None 2671 2672 """ 2673 inst_os = OSFromDisk(instance.os) 2674 2675 create_env = OSEnvironment(instance, inst_os, debug) 2676 if reinstall: 2677 create_env["INSTANCE_REINSTALL"] = "1" 2678 2679 logfile = _InstanceLogName("add", instance.os, instance.name, None) 2680 2681 result = utils.RunCmd([inst_os.create_script], env=create_env, 2682 cwd=inst_os.path, output=logfile, reset_env=True) 2683 if result.failed: 2684 logging.error("os create command '%s' returned error: %s, logfile: %s," 2685 " output: %s", result.cmd, result.fail_reason, logfile, 2686 result.output) 2687 lines = [utils.SafeEncode(val) 2688 for val in utils.TailFile(logfile, lines=20)] 2689 _Fail("OS create script failed (%s), last lines in the" 2690 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)

2691

2692 2693 -def RunRenameInstance(instance, old_name, debug):

2694 """Run the OS rename script for an instance. 2695 2696 @type instance: L{objects.Instance} 2697 @param instance: Instance whose OS is to be installed 2698 @type old_name: string 2699 @param old_name: previous instance name 2700 @type debug: integer 2701 @param debug: debug level, passed to the OS scripts 2702 @rtype: boolean 2703 @return: the success of the operation 2704 2705 """ 2706 inst_os = OSFromDisk(instance.os) 2707 2708 rename_env = OSEnvironment(instance, inst_os, debug) 2709 rename_env["OLD_INSTANCE_NAME"] = old_name 2710 2711 logfile = _InstanceLogName("rename", instance.os, 2712 "%s-%s" % (old_name, instance.name), None) 2713 2714 result = utils.RunCmd([inst_os.rename_script], env=rename_env, 2715 cwd=inst_os.path, output=logfile, reset_env=True) 2716 2717 if result.failed: 2718 logging.error("os create command '%s' returned error: %s output: %s", 2719 result.cmd, result.fail_reason, result.output) 2720 lines = [utils.SafeEncode(val) 2721 for val in utils.TailFile(logfile, lines=20)] 2722 _Fail("OS rename script failed (%s), last lines in the" 2723 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)

2724

2725 2726 -def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None):

2727 """Returns symlink path for block device. 2728 2729 """ 2730 if _dir is None: 2731 _dir = pathutils.DISK_LINKS_DIR 2732 2733 return utils.PathJoin(_dir, 2734 ("%s%s%s" % 2735 (instance_name, constants.DISK_SEPARATOR, idx)))

2736

2737 2738 -def _SymlinkBlockDev(instance_name, device_path, idx):

2739 """Set up symlinks to a instance's block device. 2740 2741 This is an auxiliary function run when an instance is start (on the primary 2742 node) or when an instance is migrated (on the target node). 2743 2744 2745 @param instance_name: the name of the target instance 2746 @param device_path: path of the physical block device, on the node 2747 @param idx: the disk index 2748 @return: absolute path to the disk's symlink 2749 2750 """ 2751 # In case we have only a userspace access URI, device_path is None 2752 if not device_path: 2753 return None 2754 2755 link_name = _GetBlockDevSymlinkPath(instance_name, idx) 2756 try: 2757 os.symlink(device_path, link_name) 2758 except OSError, err: 2759 if err.errno == errno.EEXIST: 2760 if (not os.path.islink(link_name) or 2761 os.readlink(link_name) != device_path): 2762 os.remove(link_name) 2763 os.symlink(device_path, link_name) 2764 else: 2765 raise 2766 2767 return link_name

2768

2769 2770 -def _RemoveBlockDevLinks(instance_name, disks):

2771 """Remove the block device symlinks belonging to the given instance. 2772 2773 """ 2774 for idx, _ in enumerate(disks): 2775 link_name = _GetBlockDevSymlinkPath(instance_name, idx) 2776 if os.path.islink(link_name): 2777 try: 2778 os.remove(link_name) 2779 except OSError: 2780 logging.exception("Can't remove symlink '%s'", link_name)

2781

2782 2783 -def _CalculateDeviceURI(instance, disk, device):

2784 """Get the URI for the device. 2785 2786 @type instance: L{objects.Instance} 2787 @param instance: the instance which disk belongs to 2788 @type disk: L{objects.Disk} 2789 @param disk: the target disk object 2790 @type device: L{bdev.BlockDev} 2791 @param device: the corresponding BlockDevice 2792 @rtype: string 2793 @return: the device uri if any else None 2794 2795 """ 2796 access_mode = disk.params.get(constants.LDP_ACCESS, 2797 constants.DISK_KERNELSPACE) 2798 if access_mode == constants.DISK_USERSPACE: 2799 # This can raise errors.BlockDeviceError 2800 return device.GetUserspaceAccessUri(instance.hypervisor) 2801 else: 2802 return None

2803

2804 2805 -def _GatherAndLinkBlockDevs(instance):

2806 """Set up an instance's block device(s). 2807 2808 This is run on the primary node at instance startup. The block 2809 devices must be already assembled. 2810 2811 @type instance: L{objects.Instance} 2812 @param instance: the instance whose disks we should assemble 2813 @rtype: list 2814 @return: list of (disk_object, link_name, drive_uri) 2815 2816 """ 2817 block_devices = [] 2818 for idx, disk in enumerate(instance.disks_info): 2819 device = _RecursiveFindBD(disk) 2820 if device is None: 2821 raise errors.BlockDeviceError("Block device '%s' is not set up." % 2822 str(disk)) 2823 device.Open() 2824 try: 2825 link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx) 2826 except OSError, e: 2827 raise errors.BlockDeviceError("Cannot create block device symlink: %s" % 2828 e.strerror) 2829 uri = _CalculateDeviceURI(instance, disk, device) 2830 2831 block_devices.append((disk, link_name, uri)) 2832 2833 return block_devices

2834

2835 2836 -def _IsInstanceUserDown(instance_info):

2837 return instance_info and \ 2838 "state" in instance_info and \ 2839 hv_base.HvInstanceState.IsShutdown(instance_info["state"])

2840

2841 2842 -def _GetInstanceInfo(instance):

2843 """Helper function L{GetInstanceInfo}""" 2844 return GetInstanceInfo(instance.name, instance.hypervisor, 2845 hvparams=instance.hvparams)

2846

2847 2848 -def StartInstance(instance, startup_paused, reason, store_reason=True):

2849 """Start an instance. 2850 2851 @type instance: L{objects.Instance} 2852 @param instance: the instance object 2853 @type startup_paused: bool 2854 @param instance: pause instance at startup? 2855 @type reason: list of reasons 2856 @param reason: the reason trail for this startup 2857 @type store_reason: boolean 2858 @param store_reason: whether to store the shutdown reason trail on file 2859 @rtype: None 2860 2861 """ 2862 instance_info = _GetInstanceInfo(instance) 2863 2864 if instance_info and not _IsInstanceUserDown(instance_info): 2865 logging.info("Instance '%s' already running, not starting", instance.name) 2866 return 2867 2868 try: 2869 block_devices = _GatherAndLinkBlockDevs(instance) 2870 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2871 hyper.StartInstance(instance, block_devices, startup_paused) 2872 if store_reason: 2873 _StoreInstReasonTrail(instance.name, reason) 2874 except errors.BlockDeviceError, err: 2875 _Fail("Block device error: %s", err, exc=True) 2876 except errors.HypervisorError, err: 2877 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2878 _Fail("Hypervisor error: %s", err, exc=True)

2879

2880 2881 -def InstanceShutdown(instance, timeout, reason, store_reason=True):

2882 """Shut an instance down. 2883 2884 @note: this functions uses polling with a hardcoded timeout. 2885 2886 @type instance: L{objects.Instance} 2887 @param instance: the instance object 2888 @type timeout: integer 2889 @param timeout: maximum timeout for soft shutdown 2890 @type reason: list of reasons 2891 @param reason: the reason trail for this shutdown 2892 @type store_reason: boolean 2893 @param store_reason: whether to store the shutdown reason trail on file 2894 @rtype: None 2895 2896 """ 2897 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2898 2899 if not _GetInstanceInfo(instance): 2900 logging.info("Instance '%s' not running, doing nothing", instance.name) 2901 return 2902 2903 class _TryShutdown(object): 2904 def __init__(self): 2905 self.tried_once = False

2906 2907 def __call__(self): 2908 try: 2909 hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout) 2910 if store_reason: 2911 _StoreInstReasonTrail(instance.name, reason) 2912 except errors.HypervisorError, err: 2913 # if the instance does no longer exist, consider this success and go to 2914 # cleanup, otherwise fail without retrying 2915 if _GetInstanceInfo(instance): 2916 _Fail("Failed to stop instance '%s': %s", instance.name, err) 2917 return 2918 2919 # TODO: Cleanup hypervisor implementations to prevent them from failing 2920 # silently. We could easily decide if we want to retry or not by using 2921 # HypervisorSoftError()/HypervisorHardError() 2922 self.tried_once = True 2923 if _GetInstanceInfo(instance): 2924 raise utils.RetryAgain() 2925 2926 try: 2927 utils.Retry(_TryShutdown(), 5, timeout) 2928 except utils.RetryTimeout: 2929 # the shutdown did not succeed 2930 logging.error("Shutdown of '%s' unsuccessful, forcing", instance.name) 2931 2932 try: 2933 hyper.StopInstance(instance, force=True) 2934 except errors.HypervisorError, err: 2935 # only raise an error if the instance still exists, otherwise 2936 # the error could simply be "instance ... unknown"! 2937 if _GetInstanceInfo(instance): 2938 _Fail("Failed to force stop instance '%s': %s", instance.name, err) 2939 2940 time.sleep(1) 2941 2942 if _GetInstanceInfo(instance): 2943 _Fail("Could not shutdown instance '%s' even by destroy", instance.name) 2944 2945 try: 2946 hyper.CleanupInstance(instance.name) 2947 except errors.HypervisorError, err: 2948 logging.warning("Failed to execute post-shutdown cleanup step: %s", err) 2949 2950 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2951

2952 2953 -def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):

2954 """Reboot an instance. 2955 2956 @type instance: L{objects.Instance} 2957 @param instance: the instance object to reboot 2958 @type reboot_type: str 2959 @param reboot_type: the type of reboot, one the following 2960 constants: 2961 - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the 2962 instance OS, do not recreate the VM 2963 - L{constants.INSTANCE_REBOOT_HARD}: tear down and 2964 restart the VM (at the hypervisor level) 2965 - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is 2966 not accepted here, since that mode is handled differently, in 2967 cmdlib, and translates into full stop and start of the 2968 instance (instead of a call_instance_reboot RPC) 2969 @type shutdown_timeout: integer 2970 @param shutdown_timeout: maximum timeout for soft shutdown 2971 @type reason: list of reasons 2972 @param reason: the reason trail for this reboot 2973 @rtype: None 2974 2975 """ 2976 # TODO: this is inconsistent with 'StartInstance' and 'InstanceShutdown' 2977 # because those functions simply 'return' on error whereas this one 2978 # raises an exception with '_Fail' 2979 if not _GetInstanceInfo(instance): 2980 _Fail("Cannot reboot instance '%s' that is not running", instance.name) 2981 2982 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2983 if reboot_type == constants.INSTANCE_REBOOT_SOFT: 2984 try: 2985 hyper.RebootInstance(instance) 2986 except errors.HypervisorError, err: 2987 _Fail("Failed to soft reboot instance '%s': %s", instance.name, err) 2988 elif reboot_type == constants.INSTANCE_REBOOT_HARD: 2989 try: 2990 InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False) 2991 result = StartInstance(instance, False, reason, store_reason=False) 2992 _StoreInstReasonTrail(instance.name, reason) 2993 return result 2994 except errors.HypervisorError, err: 2995 _Fail("Failed to hard reboot instance '%s': %s", instance.name, err) 2996 else: 2997 _Fail("Invalid reboot_type received: '%s'", reboot_type)

2998

2999 3000 -def InstanceBalloonMemory(instance, memory):

3001 """Resize an instance's memory. 3002 3003 @type instance: L{objects.Instance} 3004 @param instance: the instance object 3005 @type memory: int 3006 @param memory: new memory amount in MB 3007 @rtype: None 3008 3009 """ 3010 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3011 running = hyper.ListInstances(hvparams=instance.hvparams) 3012 if instance.name not in running: 3013 logging.info("Instance %s is not running, cannot balloon", instance.name) 3014 return 3015 try: 3016 hyper.BalloonInstanceMemory(instance, memory) 3017 except errors.HypervisorError, err: 3018 _Fail("Failed to balloon instance memory: %s", err, exc=True)

3019

3020 3021 -def MigrationInfo(instance):

3022 """Gather information about an instance to be migrated. 3023 3024 @type instance: L{objects.Instance} 3025 @param instance: the instance definition 3026 3027 """ 3028 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3029 try: 3030 info = hyper.MigrationInfo(instance) 3031 except errors.HypervisorError, err: 3032 _Fail("Failed to fetch migration information: %s", err, exc=True) 3033 return info

3034

3035 3036 -def AcceptInstance(instance, info, target):

3037 """Prepare the node to accept an instance. 3038 3039 @type instance: L{objects.Instance} 3040 @param instance: the instance definition 3041 @type info: string/data (opaque) 3042 @param info: migration information, from the source node 3043 @type target: string 3044 @param target: target host (usually ip), on this node 3045 3046 """ 3047 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3048 try: 3049 hyper.AcceptInstance(instance, info, target) 3050 except errors.HypervisorError, err: 3051 _Fail("Failed to accept instance: %s", err, exc=True)

3052

3053 3054 -def FinalizeMigrationDst(instance, info, success):

3055 """Finalize any preparation to accept an instance. 3056 3057 @type instance: L{objects.Instance} 3058 @param instance: the instance definition 3059 @type info: string/data (opaque) 3060 @param info: migration information, from the source node 3061 @type success: boolean 3062 @param success: whether the migration was a success or a failure 3063 3064 """ 3065 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3066 try: 3067 hyper.FinalizeMigrationDst(instance, info, success) 3068 except errors.HypervisorError, err: 3069 _Fail("Failed to finalize migration on the target node: %s", err, exc=True)

3070

3071 3072 -def MigrateInstance(cluster_name, instance, target, live):

3073 """Migrates an instance to another node. 3074 3075 @type cluster_name: string 3076 @param cluster_name: name of the cluster 3077 @type instance: L{objects.Instance} 3078 @param instance: the instance definition 3079 @type target: string 3080 @param target: the target node name 3081 @type live: boolean 3082 @param live: whether the migration should be done live or not (the 3083 interpretation of this parameter is left to the hypervisor) 3084 @raise RPCFail: if migration fails for some reason 3085 3086 """ 3087 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3088 3089 try: 3090 hyper.MigrateInstance(cluster_name, instance, target, live) 3091 except errors.HypervisorError, err: 3092 _Fail("Failed to migrate instance: %s", err, exc=True)

3093

3094 3095 -def FinalizeMigrationSource(instance, success, live):

3096 """Finalize the instance migration on the source node. 3097 3098 @type instance: L{objects.Instance} 3099 @param instance: the instance definition of the migrated instance 3100 @type success: bool 3101 @param success: whether the migration succeeded or not 3102 @type live: bool 3103 @param live: whether the user requested a live migration or not 3104 @raise RPCFail: If the execution fails for some reason 3105 3106 """ 3107 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3108 3109 try: 3110 hyper.FinalizeMigrationSource(instance, success, live) 3111 except Exception, err: # pylint: disable=W0703 3112 _Fail("Failed to finalize the migration on the source node: %s", err, 3113 exc=True)

3114

3115 3116 -def GetMigrationStatus(instance):

3117 """Get the migration status 3118 3119 @type instance: L{objects.Instance} 3120 @param instance: the instance that is being migrated 3121 @rtype: L{objects.MigrationStatus} 3122 @return: the status of the current migration (one of 3123 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional 3124 progress info that can be retrieved from the hypervisor 3125 @raise RPCFail: If the migration status cannot be retrieved 3126 3127 """ 3128 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3129 try: 3130 return hyper.GetMigrationStatus(instance) 3131 except Exception, err: # pylint: disable=W0703 3132 _Fail("Failed to get migration status: %s", err, exc=True)

3133

3134 3135 -def HotplugDevice(instance, action, dev_type, device, extra, seq):

3136 """Hotplug a device 3137 3138 Hotplug is currently supported only for KVM Hypervisor. 3139 @type instance: L{objects.Instance} 3140 @param instance: the instance to which we hotplug a device 3141 @type action: string 3142 @param action: the hotplug action to perform 3143 @type dev_type: string 3144 @param dev_type: the device type to hotplug 3145 @type device: either L{objects.NIC} or L{objects.Disk} 3146 @param device: the device object to hotplug 3147 @type extra: tuple 3148 @param extra: extra info used for disk hotplug (disk link, drive uri) 3149 @type seq: int 3150 @param seq: the index of the device from master perspective 3151 @raise RPCFail: in case instance does not have KVM hypervisor 3152 3153 """ 3154 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3155 try: 3156 hyper.VerifyHotplugSupport(instance, action, dev_type) 3157 except errors.HotplugError, err: 3158 _Fail("Hotplug is not supported: %s", err) 3159 3160 if action == constants.HOTPLUG_ACTION_ADD: 3161 fn = hyper.HotAddDevice 3162 elif action == constants.HOTPLUG_ACTION_REMOVE: 3163 fn = hyper.HotDelDevice 3164 elif action == constants.HOTPLUG_ACTION_MODIFY: 3165 fn = hyper.HotModDevice 3166 else: 3167 assert action in constants.HOTPLUG_ALL_ACTIONS 3168 3169 return fn(instance, dev_type, device, extra, seq)

3170

3171 3172 -def HotplugSupported(instance):

3173 """Checks if hotplug is generally supported. 3174 3175 """ 3176 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3177 try: 3178 hyper.HotplugSupported(instance) 3179 except errors.HotplugError, err: 3180 _Fail("Hotplug is not supported: %s", err)

3181

3182 3183 -def ModifyInstanceMetadata(metadata):

3184 """Sends instance data to the metadata daemon. 3185 3186 Uses the Luxi transport layer to communicate with the metadata 3187 daemon configuration server. It starts the metadata daemon if it is 3188 not running. 3189 The daemon must be enabled during at configuration time. 3190 3191 @type metadata: dict 3192 @param metadata: instance metadata obtained by calling 3193 L{objects.Instance.ToDict} on an instance object 3194 3195 """ 3196 if not constants.ENABLE_METAD: 3197 raise errors.ProgrammerError("The metadata deamon is disabled, yet" 3198 " ModifyInstanceMetadata has been called") 3199 3200 if not utils.IsDaemonAlive(constants.METAD): 3201 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.METAD]) 3202 if result.failed: 3203 raise errors.HypervisorError("Failed to start metadata daemon") 3204 3205 with contextlib.closing(metad.Client()) as client: 3206 client.UpdateConfig(metadata)

3207

3208 3209 -def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):

3210 """Creates a block device for an instance. 3211 3212 @type disk: L{objects.Disk} 3213 @param disk: the object describing the disk we should create 3214 @type size: int 3215 @param size: the size of the physical underlying device, in MiB 3216 @type owner: str 3217 @param owner: the name of the instance for which disk is created, 3218 used for device cache data 3219 @type on_primary: boolean 3220 @param on_primary: indicates if it is the primary node or not 3221 @type info: string 3222 @param info: string that will be sent to the physical device 3223 creation, used for example to set (LVM) tags on LVs 3224 @type excl_stor: boolean 3225 @param excl_stor: Whether exclusive_storage is active 3226 3227 @return: the new unique_id of the device (this can sometime be 3228 computed only after creation), or None. On secondary nodes, 3229 it's not required to return anything. 3230 3231 """ 3232 # TODO: remove the obsolete "size" argument 3233 # pylint: disable=W0613 3234 clist = [] 3235 if disk.children: 3236 for child in disk.children: 3237 try: 3238 crdev = _RecursiveAssembleBD(child, owner, on_primary) 3239 except errors.BlockDeviceError, err: 3240 _Fail("Can't assemble device %s: %s", child, err) 3241 if on_primary or disk.AssembleOnSecondary(): 3242 # we need the children open in case the device itself has to 3243 # be assembled 3244 try: 3245 # pylint: disable=E1103 3246 crdev.Open() 3247 except errors.BlockDeviceError, err: 3248 _Fail("Can't make child '%s' read-write: %s", child, err) 3249 clist.append(crdev) 3250 3251 try: 3252 device = bdev.Create(disk, clist, excl_stor) 3253 except errors.BlockDeviceError, err: 3254 _Fail("Can't create block device: %s", err) 3255 3256 if on_primary or disk.AssembleOnSecondary(): 3257 try: 3258 device.Assemble() 3259 except errors.BlockDeviceError, err: 3260 _Fail("Can't assemble device after creation, unusual event: %s", err) 3261 if on_primary or disk.OpenOnSecondary(): 3262 try: 3263 device.Open(force=True) 3264 except errors.BlockDeviceError, err: 3265 _Fail("Can't make device r/w after creation, unusual event: %s", err) 3266 DevCacheManager.UpdateCache(device.dev_path, owner, 3267 on_primary, disk.iv_name) 3268 3269 device.SetInfo(info) 3270 3271 return device.unique_id

3272

3273 3274 -def _DumpDevice(source_path, target_path, offset, size, truncate):

3275 """This function images/wipes the device using a local file. 3276 3277 @type source_path: string 3278 @param source_path: path of the image or data source (e.g., "/dev/zero") 3279 3280 @type target_path: string 3281 @param target_path: path of the device to image/wipe 3282 3283 @type offset: int 3284 @param offset: offset in MiB in the output file 3285 3286 @type size: int 3287 @param size: maximum size in MiB to write (data source might be smaller) 3288 3289 @type truncate: bool 3290 @param truncate: whether the file should be truncated 3291 3292 @return: None 3293 @raise RPCFail: in case of failure 3294 3295 """ 3296 # Internal sizes are always in Mebibytes; if the following "dd" command 3297 # should use a different block size the offset and size given to this 3298 # function must be adjusted accordingly before being passed to "dd". 3299 block_size = constants.DD_BLOCK_SIZE 3300 3301 cmd = [constants.DD_CMD, "if=%s" % source_path, "seek=%d" % offset, 3302 "bs=%s" % block_size, "oflag=direct", "of=%s" % target_path, 3303 "count=%d" % size] 3304 3305 if not truncate: 3306 cmd.append("conv=notrunc") 3307 3308 result = utils.RunCmd(cmd) 3309 3310 if result.failed: 3311 _Fail("Dump command '%s' exited with error: %s; output: %s", result.cmd, 3312 result.fail_reason, result.output)

3313

3314 3315 -def _DownloadAndDumpDevice(source_url, target_path, size):

3316 """This function images a device using a downloaded image file. 3317 3318 @type source_url: string 3319 @param source_url: URL of image to dump to disk 3320 3321 @type target_path: string 3322 @param target_path: path of the device to image 3323 3324 @type size: int 3325 @param size: maximum size in MiB to write (data source might be smaller) 3326 3327 @rtype: NoneType 3328 @return: None 3329 @raise RPCFail: in case of download or write failures 3330 3331 """ 3332 class DDParams(object): 3333 def __init__(self, current_size, total_size): 3334 self.current_size = current_size 3335 self.total_size = total_size 3336 self.image_size_error = False

3337 3338 def dd_write(ddparams, out): 3339 if ddparams.current_size < ddparams.total_size: 3340 ddparams.current_size += len(out) 3341 target_file.write(out) 3342 else: 3343 ddparams.image_size_error = True 3344 return -1 3345 3346 target_file = open(target_path, "r+") 3347 ddparams = DDParams(0, 1024 * 1024 * size) 3348 3349 curl = pycurl.Curl() 3350 curl.setopt(pycurl.VERBOSE, True) 3351 curl.setopt(pycurl.NOSIGNAL, True) 3352 curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION) 3353 curl.setopt(pycurl.URL, source_url) 3354 curl.setopt(pycurl.WRITEFUNCTION, lambda out: dd_write(ddparams, out)) 3355 3356 try: 3357 curl.perform() 3358 except pycurl.error: 3359 if ddparams.image_size_error: 3360 _Fail("Disk image larger than the disk") 3361 else: 3362 raise 3363 3364 target_file.close() 3365

3366 3367 -def BlockdevConvert(src_disk, target_disk):

3368 """Copies data from source block device to target. 3369 3370 This function gets the export and import commands from the source and 3371 target devices respectively, and then concatenates them to a single 3372 command using a pipe ("|"). Finally, executes the unified command that 3373 will transfer the data between the devices during the disk template 3374 conversion operation. 3375 3376 @type src_disk: L{objects.Disk} 3377 @param src_disk: the disk object we want to copy from 3378 @type target_disk: L{objects.Disk} 3379 @param target_disk: the disk object we want to copy to 3380 3381 @rtype: NoneType 3382 @return: None 3383 @raise RPCFail: in case of failure 3384 3385 """ 3386 src_dev = _RecursiveFindBD(src_disk) 3387 if src_dev is None: 3388 _Fail("Cannot copy from device '%s': device not found", src_disk.uuid) 3389 3390 dest_dev = _RecursiveFindBD(target_disk) 3391 if dest_dev is None: 3392 _Fail("Cannot copy to device '%s': device not found", target_disk.uuid) 3393 3394 src_cmd = src_dev.Export() 3395 dest_cmd = dest_dev.Import() 3396 command = "%s | %s" % (utils.ShellQuoteArgs(src_cmd), 3397 utils.ShellQuoteArgs(dest_cmd)) 3398 3399 result = utils.RunCmd(command) 3400 if result.failed: 3401 _Fail("Disk conversion command '%s' exited with error: %s; output: %s", 3402 result.cmd, result.fail_reason, result.output)

3403

3404 3405 -def BlockdevWipe(disk, offset, size):

3406 """Wipes a block device. 3407 3408 @type disk: L{objects.Disk} 3409 @param disk: the disk object we want to wipe 3410 @type offset: int 3411 @param offset: The offset in MiB in the file 3412 @type size: int 3413 @param size: The size in MiB to write 3414 3415 """ 3416 try: 3417 rdev = _RecursiveFindBD(disk) 3418 except errors.BlockDeviceError: 3419 rdev = None 3420 3421 if not rdev: 3422 _Fail("Cannot wipe device %s: device not found", disk.iv_name) 3423 if offset < 0: 3424 _Fail("Negative offset") 3425 if size < 0: 3426 _Fail("Negative size") 3427 if offset > rdev.size: 3428 _Fail("Wipe offset is bigger than device size") 3429 if (offset + size) > rdev.size: 3430 _Fail("Wipe offset and size are bigger than device size") 3431 3432 _DumpDevice("/dev/zero", rdev.dev_path, offset, size, True)

3433

3434 3435 -def BlockdevImage(disk, image, size):

3436 """Images a block device either by dumping a local file or 3437 downloading a URL. 3438 3439 @type disk: L{objects.Disk} 3440 @param disk: the disk object we want to image 3441 3442 @type image: string 3443 @param image: file path to the disk image be dumped 3444 3445 @type size: int 3446 @param size: The size in MiB to write 3447 3448 @rtype: NoneType 3449 @return: None 3450 @raise RPCFail: in case of failure 3451 3452 """ 3453 if not (utils.IsUrl(image) or os.path.exists(image)): 3454 _Fail("Image '%s' not found", image) 3455 3456 try: 3457 rdev = _RecursiveFindBD(disk) 3458 except errors.BlockDeviceError: 3459 rdev = None 3460 3461 if not rdev: 3462 _Fail("Cannot image device %s: device not found", disk.iv_name) 3463 if size < 0: 3464 _Fail("Negative size") 3465 if size > rdev.size: 3466 _Fail("Image size is bigger than device size") 3467 3468 if utils.IsUrl(image): 3469 _DownloadAndDumpDevice(image, rdev.dev_path, size) 3470 else: 3471 _DumpDevice(image, rdev.dev_path, 0, size, False)

3472

3473 3474 -def BlockdevPauseResumeSync(disks, pause):

3475 """Pause or resume the sync of the block device. 3476 3477 @type disks: list of L{objects.Disk} 3478 @param disks: the disks object we want to pause/resume 3479 @type pause: bool 3480 @param pause: Wheater to pause or resume 3481 3482 """ 3483 success = [] 3484 for disk in disks: 3485 try: 3486 rdev = _RecursiveFindBD(disk) 3487 except errors.BlockDeviceError: 3488 rdev = None 3489 3490 if not rdev: 3491 success.append((False, ("Cannot change sync for device %s:" 3492 " device not found" % disk.iv_name))) 3493 continue 3494 3495 result = rdev.PauseResumeSync(pause) 3496 3497 if result: 3498 success.append((result, None)) 3499 else: 3500 if pause: 3501 msg = "Pause" 3502 else: 3503 msg = "Resume" 3504 success.append((result, "%s for device %s failed" % (msg, disk.iv_name))) 3505 3506 return success

3507

3508 3509 -def BlockdevRemove(disk):

3510 """Remove a block device. 3511 3512 @note: This is intended to be called recursively. 3513 3514 @type disk: L{objects.Disk} 3515 @param disk: the disk object we should remove 3516 @rtype: boolean 3517 @return: the success of the operation 3518 3519 """ 3520 msgs = [] 3521 try: 3522 rdev = _RecursiveFindBD(disk) 3523 except errors.BlockDeviceError, err: 3524 # probably can't attach 3525 logging.info("Can't attach to device %s in remove", disk) 3526 rdev = None 3527 if rdev is not None: 3528 r_path = rdev.dev_path 3529 3530 def _TryRemove(): 3531 try: 3532 rdev.Remove() 3533 return [] 3534 except errors.BlockDeviceError, err: 3535 return [str(err)]

3536 3537 msgs.extend(utils.SimpleRetry([], _TryRemove, 3538 constants.DISK_REMOVE_RETRY_INTERVAL, 3539 constants.DISK_REMOVE_RETRY_TIMEOUT)) 3540 3541 if not msgs: 3542 DevCacheManager.RemoveCache(r_path) 3543 3544 if disk.children: 3545 for child in disk.children: 3546 try: 3547 BlockdevRemove(child) 3548 except RPCFail, err: 3549 msgs.append(str(err)) 3550 3551 if msgs: 3552 _Fail("; ".join(msgs)) 3553

3554 3555 -def _RecursiveAssembleBD(disk, owner, as_primary):

3556 """Activate a block device for an instance. 3557 3558 This is run on the primary and secondary nodes for an instance. 3559 3560 @note: this function is called recursively. 3561 3562 @type disk: L{objects.Disk} 3563 @param disk: the disk we try to assemble 3564 @type owner: str 3565 @param owner: the name of the instance which owns the disk 3566 @type as_primary: boolean 3567 @param as_primary: if we should make the block device 3568 read/write 3569 3570 @return: the assembled device or None (in case no device 3571 was assembled) 3572 @raise errors.BlockDeviceError: in case there is an error 3573 during the activation of the children or the device 3574 itself 3575 3576 """ 3577 children = [] 3578 if disk.children: 3579 mcn = disk.ChildrenNeeded() 3580 if mcn == -1: 3581 mcn = 0 # max number of Nones allowed 3582 else: 3583 mcn = len(disk.children) - mcn # max number of Nones 3584 for chld_disk in disk.children: 3585 try: 3586 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary) 3587 except errors.BlockDeviceError, err: 3588 if children.count(None) >= mcn: 3589 raise 3590 cdev = None 3591 logging.error("Error in child activation (but continuing): %s", 3592 str(err)) 3593 children.append(cdev) 3594 3595 if as_primary or disk.AssembleOnSecondary(): 3596 r_dev = bdev.Assemble(disk, children) 3597 result = r_dev 3598 if as_primary or disk.OpenOnSecondary(): 3599 r_dev.Open() 3600 DevCacheManager.UpdateCache(r_dev.dev_path, owner, 3601 as_primary, disk.iv_name) 3602 3603 else: 3604 result = True 3605 return result

3606

3607 3608 -def BlockdevAssemble(disk, instance, as_primary, idx):

3609 """Activate a block device for an instance. 3610 3611 This is a wrapper over _RecursiveAssembleBD. 3612 3613 @rtype: str or boolean 3614 @return: a tuple with the C{/dev/...} path and the created symlink 3615 for primary nodes, and (C{True}, C{True}) for secondary nodes 3616 3617 """ 3618 try: 3619 result = _RecursiveAssembleBD(disk, instance.name, as_primary) 3620 if isinstance(result, BlockDev): 3621 # pylint: disable=E1103 3622 dev_path = result.dev_path 3623 link_name = None 3624 uri = None 3625 if as_primary: 3626 link_name = _SymlinkBlockDev(instance.name, dev_path, idx) 3627 uri = _CalculateDeviceURI(instance, disk, result) 3628 elif result: 3629 return result, result 3630 else: 3631 _Fail("Unexpected result from _RecursiveAssembleBD") 3632 except errors.BlockDeviceError, err: 3633 _Fail("Error while assembling disk: %s", err, exc=True) 3634 except OSError, err: 3635 _Fail("Error while symlinking disk: %s", err, exc=True) 3636 3637 return dev_path, link_name, uri

3638

3639 3640 -def BlockdevShutdown(disk):

3641 """Shut down a block device. 3642 3643 First, if the device is assembled (Attach() is successful), then 3644 the device is shutdown. Then the children of the device are 3645 shutdown. 3646 3647 This function is called recursively. Note that we don't cache the 3648 children or such, as oppossed to assemble, shutdown of different 3649 devices doesn't require that the upper device was active. 3650 3651 @type disk: L{objects.Disk} 3652 @param disk: the description of the disk we should 3653 shutdown 3654 @rtype: None 3655 3656 """ 3657 msgs = [] 3658 r_dev = _RecursiveFindBD(disk) 3659 if r_dev is not None: 3660 r_path = r_dev.dev_path 3661 try: 3662 r_dev.Shutdown() 3663 DevCacheManager.RemoveCache(r_path) 3664 except errors.BlockDeviceError, err: 3665 msgs.append(str(err)) 3666 3667 if disk.children: 3668 for child in disk.children: 3669 try: 3670 BlockdevShutdown(child) 3671 except RPCFail, err: 3672 msgs.append(str(err)) 3673 3674 if msgs: 3675 _Fail("; ".join(msgs))

3676

3677 3678 -def BlockdevAddchildren(parent_cdev, new_cdevs):

3679 """Extend a mirrored block device. 3680 3681 @type parent_cdev: L{objects.Disk} 3682 @param parent_cdev: the disk to which we should add children 3683 @type new_cdevs: list of L{objects.Disk} 3684 @param new_cdevs: the list of children which we should add 3685 @rtype: None 3686 3687 """ 3688 parent_bdev = _RecursiveFindBD(parent_cdev) 3689 if parent_bdev is None: 3690 _Fail("Can't find parent device '%s' in add children", parent_cdev) 3691 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs] 3692 if new_bdevs.count(None) > 0: 3693 _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs) 3694 parent_bdev.AddChildren(new_bdevs)

3695

3696 3697 -def BlockdevRemovechildren(parent_cdev, new_cdevs):

3698 """Shrink a mirrored block device. 3699 3700 @type parent_cdev: L{objects.Disk} 3701 @param parent_cdev: the disk from which we should remove children 3702 @type new_cdevs: list of L{objects.Disk} 3703 @param new_cdevs: the list of children which we should remove 3704 @rtype: None 3705 3706 """ 3707 parent_bdev = _RecursiveFindBD(parent_cdev) 3708 if parent_bdev is None: 3709 _Fail("Can't find parent device '%s' in remove children", parent_cdev) 3710 devs = [] 3711 for disk in new_cdevs: 3712 rpath = disk.StaticDevPath() 3713 if rpath is None: 3714 bd = _RecursiveFindBD(disk) 3715 if bd is None: 3716 _Fail("Can't find device %s while removing children", disk) 3717 else: 3718 devs.append(bd.dev_path) 3719 else: 3720 if not utils.IsNormAbsPath(rpath): 3721 _Fail("Strange path returned from StaticDevPath: '%s'", rpath) 3722 devs.append(rpath) 3723 parent_bdev.RemoveChildren(devs)

3724

3725 3726 -def BlockdevGetmirrorstatus(disks):

3727 """Get the mirroring status of a list of devices. 3728 3729 @type disks: list of L{objects.Disk} 3730 @param disks: the list of disks which we should query 3731 @rtype: disk 3732 @return: List of L{objects.BlockDevStatus}, one for each disk 3733 @raise errors.BlockDeviceError: if any of the disks cannot be 3734 found 3735 3736 """ 3737 stats = [] 3738 for dsk in disks: 3739 rbd = _RecursiveFindBD(dsk) 3740 if rbd is None: 3741 _Fail("Can't find device %s", dsk) 3742 3743 stats.append(rbd.CombinedSyncStatus()) 3744 3745 return stats

3746

3747 3748 -def BlockdevGetmirrorstatusMulti(disks):

3749 """Get the mirroring status of a list of devices. 3750 3751 @type disks: list of L{objects.Disk} 3752 @param disks: the list of disks which we should query 3753 @rtype: disk 3754 @return: List of tuples, (bool, status), one for each disk; bool denotes 3755 success/failure, status is L{objects.BlockDevStatus} on success, string 3756 otherwise 3757 3758 """ 3759 result = [] 3760 for disk in disks: 3761 try: 3762 rbd = _RecursiveFindBD(disk) 3763 if rbd is None: 3764 result.append((False, "Can't find device %s" % disk)) 3765 continue 3766 3767 status = rbd.CombinedSyncStatus() 3768 except errors.BlockDeviceError, err: 3769 logging.exception("Error while getting disk status") 3770 result.append((False, str(err))) 3771 else: 3772 result.append((True, status)) 3773 3774 assert len(disks) == len(result) 3775 3776 return result

3777

3778 3779 -def _RecursiveFindBD(disk):

3780 """Check if a device is activated. 3781 3782 If so, return information about the real device. 3783 3784 @type disk: L{objects.Disk} 3785 @param disk: the disk object we need to find 3786 3787 @return: None if the device can't be found, 3788 otherwise the device instance 3789 3790 """ 3791 children = [] 3792 if disk.children: 3793 for chdisk in disk.children: 3794 children.append(_RecursiveFindBD(chdisk)) 3795 3796 return bdev.FindDevice(disk, children)

3797

3798 3799 -def _OpenRealBD(disk):

3800 """Opens the underlying block device of a disk. 3801 3802 @type disk: L{objects.Disk} 3803 @param disk: the disk object we want to open 3804 3805 """ 3806 real_disk = _RecursiveFindBD(disk) 3807 if real_disk is None: 3808 _Fail("Block device '%s' is not set up", disk) 3809 3810 real_disk.Open() 3811 3812 return real_disk

3813

3814 3815 -def BlockdevFind(disk):

3816 """Check if a device is activated. 3817 3818 If it is, return information about the real device. 3819 3820 @type disk: L{objects.Disk} 3821 @param disk: the disk to find 3822 @rtype: None or objects.BlockDevStatus 3823 @return: None if the disk cannot be found, otherwise a the current 3824 information 3825 3826 """ 3827 try: 3828 rbd = _RecursiveFindBD(disk) 3829 except errors.BlockDeviceError, err: 3830 _Fail("Failed to find device: %s", err, exc=True) 3831 3832 if rbd is None: 3833 return None 3834 3835 return rbd.GetSyncStatus()

3836

3837 3838 -def BlockdevGetdimensions(disks):

3839 """Computes the size of the given disks. 3840 3841 If a disk is not found, returns None instead. 3842 3843 @type disks: list of L{objects.Disk} 3844 @param disks: the list of disk to compute the size for 3845 @rtype: list 3846 @return: list with elements None if the disk cannot be found, 3847 otherwise the pair (size, spindles), where spindles is None if the 3848 device doesn't support that 3849 3850 """ 3851 result = [] 3852 for cf in disks: 3853 try: 3854 rbd = _RecursiveFindBD(cf) 3855 except errors.BlockDeviceError: 3856 result.append(None) 3857 continue 3858 if rbd is None: 3859 result.append(None) 3860 else: 3861 result.append(rbd.GetActualDimensions()) 3862 return result

3863

3864 3865 -def UploadFile(file_name, data, mode, uid, gid, atime, mtime):

3866 """Write a file to the filesystem. 3867 3868 This allows the master to overwrite(!) a file. It will only perform 3869 the operation if the file belongs to a list of configuration files. 3870 3871 @type file_name: str 3872 @param file_name: the target file name 3873 @type data: str 3874 @param data: the new contents of the file 3875 @type mode: int 3876 @param mode: the mode to give the file (can be None) 3877 @type uid: string 3878 @param uid: the owner of the file 3879 @type gid: string 3880 @param gid: the group of the file 3881 @type atime: float 3882 @param atime: the atime to set on the file (can be None) 3883 @type mtime: float 3884 @param mtime: the mtime to set on the file (can be None) 3885 @rtype: None 3886 3887 """ 3888 file_name = vcluster.LocalizeVirtualPath(file_name) 3889 3890 if not os.path.isabs(file_name): 3891 _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name) 3892 3893 if file_name not in _ALLOWED_UPLOAD_FILES: 3894 _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'", 3895 file_name) 3896 3897 raw_data = _Decompress(data) 3898 3899 if not (isinstance(uid, basestring) and isinstance(gid, basestring)): 3900 _Fail("Invalid username/groupname type") 3901 3902 getents = runtime.GetEnts() 3903 uid = getents.LookupUser(uid) 3904 gid = getents.LookupGroup(gid) 3905 3906 utils.SafeWriteFile(file_name, None, 3907 data=raw_data, mode=mode, uid=uid, gid=gid, 3908 atime=atime, mtime=mtime)

3909

3910 3911 -def RunOob(oob_program, command, node, timeout):

3912 """Executes oob_program with given command on given node. 3913 3914 @param oob_program: The path to the executable oob_program 3915 @param command: The command to invoke on oob_program 3916 @param node: The node given as an argument to the program 3917 @param timeout: Timeout after which we kill the oob program 3918 3919 @return: stdout 3920 @raise RPCFail: If execution fails for some reason 3921 3922 """ 3923 result = utils.RunCmd([oob_program, command, node], timeout=timeout) 3924 3925 if result.failed: 3926 _Fail("'%s' failed with reason '%s'; output: %s", result.cmd, 3927 result.fail_reason, result.output) 3928 3929 return result.stdout

3930

3931 3932 -def _OSOndiskAPIVersion(os_dir):

3933 """Compute and return the API version of a given OS. 3934 3935 This function will try to read the API version of the OS residing in 3936 the 'os_dir' directory. 3937 3938 @type os_dir: str 3939 @param os_dir: the directory in which we should look for the OS 3940 @rtype: tuple 3941 @return: tuple (status, data) with status denoting the validity and 3942 data holding either the valid versions or an error message 3943 3944 """ 3945 api_file = utils.PathJoin(os_dir, constants.OS_API_FILE) 3946 3947 try: 3948 st = os.stat(api_file) 3949 except EnvironmentError, err: 3950 return False, ("Required file '%s' not found under path %s: %s" % 3951 (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err))) 3952 3953 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 3954 return False, ("File '%s' in %s is not a regular file" % 3955 (constants.OS_API_FILE, os_dir)) 3956 3957 try: 3958 api_versions = utils.ReadFile(api_file).splitlines() 3959 except EnvironmentError, err: 3960 return False, ("Error while reading the API version file at %s: %s" % 3961 (api_file, utils.ErrnoOrStr(err))) 3962 3963 try: 3964 api_versions = [int(version.strip()) for version in api_versions] 3965 except (TypeError, ValueError), err: 3966 return False, ("API version(s) can't be converted to integer: %s" % 3967 str(err)) 3968 3969 return True, api_versions

3970

3971 3972 -def DiagnoseOS(top_dirs=None):

3973 """Compute the validity for all OSes. 3974 3975 @type top_dirs: list 3976 @param top_dirs: the list of directories in which to 3977 search (if not given defaults to 3978 L{pathutils.OS_SEARCH_PATH}) 3979 @rtype: list of L{objects.OS} 3980 @return: a list of tuples (name, path, status, diagnose, variants, 3981 parameters, api_version) for all (potential) OSes under all 3982 search paths, where: 3983 - name is the (potential) OS name 3984 - path is the full path to the OS 3985 - status True/False is the validity of the OS 3986 - diagnose is the error message for an invalid OS, otherwise empty 3987 - variants is a list of supported OS variants, if any 3988 - parameters is a list of (name, help) parameters, if any 3989 - api_version is a list of support OS API versions 3990 3991 """ 3992 if top_dirs is None: 3993 top_dirs = pathutils.OS_SEARCH_PATH 3994 3995 result = [] 3996 for dir_name in top_dirs: 3997 if os.path.isdir(dir_name): 3998 try: 3999 f_names = utils.ListVisibleFiles(dir_name) 4000 except EnvironmentError, err: 4001 logging.exception("Can't list the OS directory %s: %s", dir_name, err) 4002 break 4003 for name in f_names: 4004 os_path = utils.PathJoin(dir_name, name) 4005 status, os_inst = _TryOSFromDisk(name, base_dir=dir_name) 4006 if status: 4007 diagnose = "" 4008 variants = os_inst.supported_variants 4009 parameters = os_inst.supported_parameters 4010 api_versions = os_inst.api_versions 4011 trusted = False if os_inst.create_script_untrusted else True 4012 else: 4013 diagnose = os_inst 4014 variants = parameters = api_versions = [] 4015 trusted = True 4016 result.append((name, os_path, status, diagnose, variants, 4017 parameters, api_versions, trusted)) 4018 4019 return result

4020

4021 4022 -def _TryOSFromDisk(name, base_dir=None):

4023 """Create an OS instance from disk. 4024 4025 This function will return an OS instance if the given name is a 4026 valid OS name. 4027 4028 @type base_dir: string 4029 @keyword base_dir: Base directory containing OS installations. 4030 Defaults to a search in all the OS_SEARCH_PATH dirs. 4031 @rtype: tuple 4032 @return: success and either the OS instance if we find a valid one, 4033 or error message 4034 4035 """ 4036 if base_dir is None: 4037 os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir) 4038 else: 4039 os_dir = utils.FindFile(name, [base_dir], os.path.isdir) 4040 4041 if os_dir is None: 4042 return False, "Directory for OS %s not found in search path" % name 4043 4044 status, api_versions = _OSOndiskAPIVersion(os_dir) 4045 if not status: 4046 # push the error up 4047 return status, api_versions 4048 4049 if not constants.OS_API_VERSIONS.intersection(api_versions): 4050 return False, ("API version mismatch for path '%s': found %s, want %s." % 4051 (os_dir, api_versions, constants.OS_API_VERSIONS)) 4052 4053 # OS Files dictionary, we will populate it with the absolute path 4054 # names; if the value is True, then it is a required file, otherwise 4055 # an optional one 4056 os_files = dict.fromkeys(constants.OS_SCRIPTS, True) 4057 4058 os_files[constants.OS_SCRIPT_CREATE] = False 4059 os_files[constants.OS_SCRIPT_CREATE_UNTRUSTED] = False 4060 4061 if max(api_versions) >= constants.OS_API_V15: 4062 os_files[constants.OS_VARIANTS_FILE] = False 4063 4064 if max(api_versions) >= constants.OS_API_V20: 4065 os_files[constants.OS_PARAMETERS_FILE] = True 4066 else: 4067 del os_files[constants.OS_SCRIPT_VERIFY] 4068 4069 for (filename, required) in os_files.items(): 4070 os_files[filename] = utils.PathJoin(os_dir, filename) 4071 4072 try: 4073 st = os.stat(os_files[filename]) 4074 except EnvironmentError, err: 4075 if err.errno == errno.ENOENT and not required: 4076 del os_files[filename] 4077 continue 4078 return False, ("File '%s' under path '%s' is missing (%s)" % 4079 (filename, os_dir, utils.ErrnoOrStr(err))) 4080 4081 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 4082 return False, ("File '%s' under path '%s' is not a regular file" % 4083 (filename, os_dir)) 4084 4085 if filename in constants.OS_SCRIPTS: 4086 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR: 4087 return False, ("File '%s' under path '%s' is not executable" % 4088 (filename, os_dir)) 4089 4090 if not constants.OS_SCRIPT_CREATE in os_files and \ 4091 not constants.OS_SCRIPT_CREATE_UNTRUSTED in os_files: 4092 return False, ("A create script (trusted or untrusted) under path '%s'" 4093 " must exist" % os_dir) 4094 4095 create_script = os_files.get(constants.OS_SCRIPT_CREATE, None) 4096 create_script_untrusted = os_files.get(constants.OS_SCRIPT_CREATE_UNTRUSTED, 4097 None) 4098 4099 variants = [] 4100 if constants.OS_VARIANTS_FILE in os_files: 4101 variants_file = os_files[constants.OS_VARIANTS_FILE] 4102 try: 4103 variants = \ 4104 utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file)) 4105 except EnvironmentError, err: 4106 # we accept missing files, but not other errors 4107 if err.errno != errno.ENOENT: 4108 return False, ("Error while reading the OS variants file at %s: %s" % 4109 (variants_file, utils.ErrnoOrStr(err))) 4110 4111 parameters = [] 4112 if constants.OS_PARAMETERS_FILE in os_files: 4113 parameters_file = os_files[constants.OS_PARAMETERS_FILE] 4114 try: 4115 parameters = utils.ReadFile(parameters_file).splitlines() 4116 except EnvironmentError, err: 4117 return False, ("Error while reading the OS parameters file at %s: %s" % 4118 (parameters_file, utils.ErrnoOrStr(err))) 4119 parameters = [v.split(None, 1) for v in parameters] 4120 4121 os_obj = objects.OS(name=name, path=os_dir, 4122 create_script=create_script, 4123 create_script_untrusted=create_script_untrusted, 4124 export_script=os_files[constants.OS_SCRIPT_EXPORT], 4125 import_script=os_files[constants.OS_SCRIPT_IMPORT], 4126 rename_script=os_files[constants.OS_SCRIPT_RENAME], 4127 verify_script=os_files.get(constants.OS_SCRIPT_VERIFY, 4128 None), 4129 supported_variants=variants, 4130 supported_parameters=parameters, 4131 api_versions=api_versions) 4132 return True, os_obj

4133

4134 4135 -def OSFromDisk(name, base_dir=None):

4136 """Create an OS instance from disk. 4137 4138 This function will return an OS instance if the given name is a 4139 valid OS name. Otherwise, it will raise an appropriate 4140 L{RPCFail} exception, detailing why this is not a valid OS. 4141 4142 This is just a wrapper over L{_TryOSFromDisk}, which doesn't raise 4143 an exception but returns true/false status data. 4144 4145 @type base_dir: string 4146 @keyword base_dir: Base directory containing OS installations. 4147 Defaults to a search in all the OS_SEARCH_PATH dirs. 4148 @rtype: L{objects.OS} 4149 @return: the OS instance if we find a valid one 4150 @raise RPCFail: if we don't find a valid OS 4151 4152 """ 4153 name_only = objects.OS.GetName(name) 4154 status, payload = _TryOSFromDisk(name_only, base_dir) 4155 4156 if not status: 4157 _Fail(payload) 4158 4159 return payload

4160

4161 4162 -def OSCoreEnv(os_name, inst_os, os_params, debug=0):

4163 """Calculate the basic environment for an os script. 4164 4165 @type os_name: str 4166 @param os_name: full operating system name (including variant) 4167 @type inst_os: L{objects.OS} 4168 @param inst_os: operating system for which the environment is being built 4169 @type os_params: dict 4170 @param os_params: the OS parameters 4171 @type debug: integer 4172 @param debug: debug level (0 or 1, for OS Api 10) 4173 @rtype: dict 4174 @return: dict of environment variables 4175 @raise errors.BlockDeviceError: if the block device 4176 cannot be found 4177 4178 """ 4179 result = {} 4180 api_version = \ 4181 max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions)) 4182 result["OS_API_VERSION"] = "%d" % api_version 4183 result["OS_NAME"] = inst_os.name 4184 result["DEBUG_LEVEL"] = "%d" % debug 4185 4186 # OS variants 4187 if api_version >= constants.OS_API_V15 and inst_os.supported_variants: 4188 variant = objects.OS.GetVariant(os_name) 4189 if not variant: 4190 variant = inst_os.supported_variants[0] 4191 else: 4192 variant = "" 4193 result["OS_VARIANT"] = variant 4194 4195 # OS params 4196 for pname, pvalue in os_params.items(): 4197 result["OSP_%s" % pname.upper().replace("-", "_")] = pvalue 4198 4199 # Set a default path otherwise programs called by OS scripts (or 4200 # even hooks called from OS scripts) might break, and we don't want 4201 # to have each script require setting a PATH variable 4202 result["PATH"] = constants.HOOKS_PATH 4203 4204 return result

4205

4206 4207 -def OSEnvironment(instance, inst_os, debug=0):

4208 """Calculate the environment for an os script. 4209 4210 @type instance: L{objects.Instance} 4211 @param instance: target instance for the os script run 4212 @type inst_os: L{objects.OS} 4213 @param inst_os: operating system for which the environment is being built 4214 @type debug: integer 4215 @param debug: debug level (0 or 1, for OS Api 10) 4216 @rtype: dict 4217 @return: dict of environment variables 4218 @raise errors.BlockDeviceError: if the block device 4219 cannot be found 4220 4221 """ 4222 result = OSCoreEnv(instance.os, inst_os, objects.FillDict(instance.osparams, 4223 instance.osparams_private.Unprivate()), debug=debug) 4224 4225 for attr in ["name", "os", "uuid", "ctime", "mtime", "primary_node"]: 4226 result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr)) 4227 4228 result["HYPERVISOR"] = instance.hypervisor 4229 result["DISK_COUNT"] = "%d" % len(instance.disks_info) 4230 result["NIC_COUNT"] = "%d" % len(instance.nics) 4231 result["INSTANCE_SECONDARY_NODES"] = \ 4232 ("%s" % " ".join(instance.secondary_nodes)) 4233 4234 # Disks 4235 for idx, disk in enumerate(instance.disks_info): 4236 real_disk = _OpenRealBD(disk) 4237 uri = _CalculateDeviceURI(instance, disk, real_disk) 4238 result["DISK_%d_ACCESS" % idx] = disk.mode 4239 result["DISK_%d_UUID" % idx] = disk.uuid 4240 if real_disk.dev_path: 4241 result["DISK_%d_PATH" % idx] = real_disk.dev_path 4242 if uri: 4243 result["DISK_%d_URI" % idx] = uri 4244 if disk.name: 4245 result["DISK_%d_NAME" % idx] = disk.name 4246 if constants.HV_DISK_TYPE in instance.hvparams: 4247 result["DISK_%d_FRONTEND_TYPE" % idx] = \ 4248 instance.hvparams[constants.HV_DISK_TYPE] 4249 if disk.dev_type in constants.DTS_BLOCK: 4250 result["DISK_%d_BACKEND_TYPE" % idx] = "block" 4251 elif disk.dev_type in constants.DTS_FILEBASED: 4252 result["DISK_%d_BACKEND_TYPE" % idx] = \ 4253 "file:%s" % disk.logical_id[0] 4254 4255 # NICs 4256 for idx, nic in enumerate(instance.nics): 4257 result["NIC_%d_MAC" % idx] = nic.mac 4258 result["NIC_%d_UUID" % idx] = nic.uuid 4259 if nic.name: 4260 result["NIC_%d_NAME" % idx] = nic.name 4261 if nic.ip: 4262 result["NIC_%d_IP" % idx] = nic.ip 4263 result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE] 4264 if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 4265 result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK] 4266 if nic.nicparams[constants.NIC_LINK]: 4267 result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK] 4268 if nic.netinfo: 4269 nobj = objects.Network.FromDict(nic.netinfo) 4270 result.update(nobj.HooksDict("NIC_%d_" % idx)) 4271 if constants.HV_NIC_TYPE in instance.hvparams: 4272 result["NIC_%d_FRONTEND_TYPE" % idx] = \ 4273 instance.hvparams[constants.HV_NIC_TYPE] 4274 4275 # HV/BE params 4276 for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]: 4277 for key, value in source.items(): 4278 result["INSTANCE_%s_%s" % (kind, key)] = str(value) 4279 4280 return result

4281

4282 4283 -def DiagnoseExtStorage(top_dirs=None):

4284 """Compute the validity for all ExtStorage Providers. 4285 4286 @type top_dirs: list 4287 @param top_dirs: the list of directories in which to 4288 search (if not given defaults to 4289 L{pathutils.ES_SEARCH_PATH}) 4290 @rtype: list of L{objects.ExtStorage} 4291 @return: a list of tuples (name, path, status, diagnose, parameters) 4292 for all (potential) ExtStorage Providers under all 4293 search paths, where: 4294 - name is the (potential) ExtStorage Provider 4295 - path is the full path to the ExtStorage Provider 4296 - status True/False is the validity of the ExtStorage Provider 4297 - diagnose is the error message for an invalid ExtStorage Provider, 4298 otherwise empty 4299 - parameters is a list of (name, help) parameters, if any 4300 4301 """ 4302 if top_dirs is None: 4303 top_dirs = pathutils.ES_SEARCH_PATH 4304 4305 result = [] 4306 for dir_name in top_dirs: 4307 if os.path.isdir(dir_name): 4308 try: 4309 f_names = utils.ListVisibleFiles(dir_name) 4310 except EnvironmentError, err: 4311 logging.exception("Can't list the ExtStorage directory %s: %s", 4312 dir_name, err) 4313 break 4314 for name in f_names: 4315 es_path = utils.PathJoin(dir_name, name) 4316 status, es_inst = extstorage.ExtStorageFromDisk(name, base_dir=dir_name) 4317 if status: 4318 diagnose = "" 4319 parameters = es_inst.supported_parameters 4320 else: 4321 diagnose = es_inst 4322 parameters = [] 4323 result.append((name, es_path, status, diagnose, parameters)) 4324 4325 return result

4326

4327 4328 -def BlockdevGrow(disk, amount, dryrun, backingstore, excl_stor):

4329 """Grow a stack of block devices. 4330 4331 This function is called recursively, with the childrens being the 4332 first ones to resize. 4333 4334 @type disk: L{objects.Disk} 4335 @param disk: the disk to be grown 4336 @type amount: integer 4337 @param amount: the amount (in mebibytes) to grow with 4338 @type dryrun: boolean 4339 @param dryrun: whether to execute the operation in simulation mode 4340 only, without actually increasing the size 4341 @param backingstore: whether to execute the operation on backing storage 4342 only, or on "logical" storage only; e.g. DRBD is logical storage, 4343 whereas LVM, file, RBD are backing storage 4344 @rtype: (status, result) 4345 @type excl_stor: boolean 4346 @param excl_stor: Whether exclusive_storage is active 4347 @return: a tuple with the status of the operation (True/False), and 4348 the errors message if status is False 4349 4350 """ 4351 r_dev = _RecursiveFindBD(disk) 4352 if r_dev is None: 4353 _Fail("Cannot find block device %s", disk) 4354 4355 try: 4356 r_dev.Grow(amount, dryrun, backingstore, excl_stor) 4357 except errors.BlockDeviceError, err: 4358 _Fail("Failed to grow block device: %s", err, exc=True)

4359

4360 4361 -def BlockdevSnapshot(disk, snap_name, snap_size):

4362 """Create a snapshot copy of a block device. 4363 4364 This function is called recursively, and the snapshot is actually created 4365 just for the leaf lvm backend device. 4366 4367 @type disk: L{objects.Disk} 4368 @param disk: the disk to be snapshotted 4369 @type snap_name: string 4370 @param snap_name: the name of the snapshot 4371 @type snap_size: int 4372 @param snap_size: the size of the snapshot 4373 @rtype: string 4374 @return: snapshot disk ID as (vg, lv) 4375 4376 """ 4377 def _DiskSnapshot(disk, snap_name=None, snap_size=None): 4378 r_dev = _RecursiveFindBD(disk) 4379 if r_dev is not None: 4380 return r_dev.Snapshot(snap_name=snap_name, snap_size=snap_size) 4381 else: 4382 _Fail("Cannot find block device %s", disk)

4383 4384 if disk.SupportsSnapshots(): 4385 if disk.dev_type == constants.DT_DRBD8: 4386 if not disk.children: 4387 _Fail("DRBD device '%s' without backing storage cannot be snapshotted", 4388 disk.unique_id) 4389 return BlockdevSnapshot(disk.children[0], snap_name, snap_size) 4390 else: 4391 return _DiskSnapshot(disk, snap_name, snap_size) 4392 else: 4393 _Fail("Cannot snapshot block device '%s' of type '%s'", 4394 disk.logical_id, disk.dev_type) 4395

4396 4397 -def BlockdevSetInfo(disk, info):

4398 """Sets 'metadata' information on block devices. 4399 4400 This function sets 'info' metadata on block devices. Initial 4401 information is set at device creation; this function should be used 4402 for example after renames. 4403 4404 @type disk: L{objects.Disk} 4405 @param disk: the disk to be grown 4406 @type info: string 4407 @param info: new 'info' metadata 4408 @rtype: (status, result) 4409 @return: a tuple with the status of the operation (True/False), and 4410 the errors message if status is False 4411 4412 """ 4413 r_dev = _RecursiveFindBD(disk) 4414 if r_dev is None: 4415 _Fail("Cannot find block device %s", disk) 4416 4417 try: 4418 r_dev.SetInfo(info) 4419 except errors.BlockDeviceError, err: 4420 _Fail("Failed to set information on block device: %s", err, exc=True)

4421

4422 4423 -def FinalizeExport(instance, snap_disks):

4424 """Write out the export configuration information. 4425 4426 @type instance: L{objects.Instance} 4427 @param instance: the instance which we export, used for 4428 saving configuration 4429 @type snap_disks: list of L{objects.Disk} 4430 @param snap_disks: list of snapshot block devices, which 4431 will be used to get the actual name of the dump file 4432 4433 @rtype: None 4434 4435 """ 4436 destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new") 4437 finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name) 4438 disk_template = utils.GetDiskTemplate(snap_disks) 4439 4440 config = objects.SerializableConfigParser() 4441 4442 config.add_section(constants.INISECT_EXP) 4443 config.set(constants.INISECT_EXP, "version", str(constants.EXPORT_VERSION)) 4444 config.set(constants.INISECT_EXP, "timestamp", "%d" % int(time.time())) 4445 config.set(constants.INISECT_EXP, "source", instance.primary_node) 4446 config.set(constants.INISECT_EXP, "os", instance.os) 4447 config.set(constants.INISECT_EXP, "compression", "none") 4448 4449 config.add_section(constants.INISECT_INS) 4450 config.set(constants.INISECT_INS, "name", instance.name) 4451 config.set(constants.INISECT_INS, "maxmem", "%d" % 4452 instance.beparams[constants.BE_MAXMEM]) 4453 config.set(constants.INISECT_INS, "minmem", "%d" % 4454 instance.beparams[constants.BE_MINMEM]) 4455 # "memory" is deprecated, but useful for exporting to old ganeti versions 4456 config.set(constants.INISECT_INS, "memory", "%d" % 4457 instance.beparams[constants.BE_MAXMEM]) 4458 config.set(constants.INISECT_INS, "vcpus", "%d" % 4459 instance.beparams[constants.BE_VCPUS]) 4460 config.set(constants.INISECT_INS, "disk_template", disk_template) 4461 config.set(constants.INISECT_INS, "hypervisor", instance.hypervisor) 4462 config.set(constants.INISECT_INS, "tags", " ".join(instance.GetTags())) 4463 4464 nic_total = 0 4465 for nic_count, nic in enumerate(instance.nics): 4466 nic_total += 1 4467 config.set(constants.INISECT_INS, "nic%d_mac" % 4468 nic_count, "%s" % nic.mac) 4469 config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip) 4470 config.set(constants.INISECT_INS, "nic%d_network" % nic_count, 4471 "%s" % nic.network) 4472 config.set(constants.INISECT_INS, "nic%d_name" % nic_count, 4473 "%s" % nic.name) 4474 for param in constants.NICS_PARAMETER_TYPES: 4475 config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param), 4476 "%s" % nic.nicparams.get(param, None)) 4477 # TODO: redundant: on load can read nics until it doesn't exist 4478 config.set(constants.INISECT_INS, "nic_count", "%d" % nic_total) 4479 4480 disk_total = 0 4481 for disk_count, disk in enumerate(snap_disks): 4482 if disk: 4483 disk_total += 1 4484 config.set(constants.INISECT_INS, "disk%d_ivname" % disk_count, 4485 ("%s" % disk.iv_name)) 4486 config.set(constants.INISECT_INS, "disk%d_dump" % disk_count, 4487 ("%s" % disk.uuid)) 4488 config.set(constants.INISECT_INS, "disk%d_size" % disk_count, 4489 ("%d" % disk.size)) 4490 config.set(constants.INISECT_INS, "disk%d_name" % disk_count, 4491 "%s" % disk.name) 4492 4493 config.set(constants.INISECT_INS, "disk_count", "%d" % disk_total) 4494 4495 # New-style hypervisor/backend parameters 4496 4497 config.add_section(constants.INISECT_HYP) 4498 for name, value in instance.hvparams.items(): 4499 if name not in constants.HVC_GLOBALS: 4500 config.set(constants.INISECT_HYP, name, str(value)) 4501 4502 config.add_section(constants.INISECT_BEP) 4503 for name, value in instance.beparams.items(): 4504 config.set(constants.INISECT_BEP, name, str(value)) 4505 4506 config.add_section(constants.INISECT_OSP) 4507 for name, value in instance.osparams.items(): 4508 config.set(constants.INISECT_OSP, name, str(value)) 4509 4510 config.add_section(constants.INISECT_OSP_PRIVATE) 4511 for name, value in instance.osparams_private.items(): 4512 config.set(constants.INISECT_OSP_PRIVATE, name, str(value.Get())) 4513 4514 utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE), 4515 data=config.Dumps()) 4516 shutil.rmtree(finaldestdir, ignore_errors=True) 4517 shutil.move(destdir, finaldestdir)

4518

4519 4520 -def ExportInfo(dest):

4521 """Get export configuration information. 4522 4523 @type dest: str 4524 @param dest: directory containing the export 4525 4526 @rtype: L{objects.SerializableConfigParser} 4527 @return: a serializable config file containing the 4528 export info 4529 4530 """ 4531 cff = utils.PathJoin(dest, constants.EXPORT_CONF_FILE) 4532 4533 config = objects.SerializableConfigParser() 4534 config.read(cff) 4535 4536 if (not config.has_section(constants.INISECT_EXP) or 4537 not config.has_section(constants.INISECT_INS)): 4538 _Fail("Export info file doesn't have the required fields") 4539 4540 return config.Dumps()

4541

4542 4543 -def ListExports():

4544 """Return a list of exports currently available on this machine. 4545 4546 @rtype: list 4547 @return: list of the exports 4548 4549 """ 4550 if os.path.isdir(pathutils.EXPORT_DIR): 4551 return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR)) 4552 else: 4553 _Fail("No exports directory")

4554

4555 4556 -def RemoveExport(export):

4557 """Remove an existing export from the node. 4558 4559 @type export: str 4560 @param export: the name of the export to remove 4561 @rtype: None 4562 4563 """ 4564 target = utils.PathJoin(pathutils.EXPORT_DIR, export) 4565 4566 try: 4567 shutil.rmtree(target) 4568 except EnvironmentError, err: 4569 _Fail("Error while removing the export: %s", err, exc=True)

4570

4571 4572 -def BlockdevRename(devlist):

4573 """Rename a list of block devices. 4574 4575 @type devlist: list of tuples 4576 @param devlist: list of tuples of the form (disk, new_unique_id); disk is 4577 an L{objects.Disk} object describing the current disk, and new 4578 unique_id is the name we rename it to 4579 @rtype: boolean 4580 @return: True if all renames succeeded, False otherwise 4581 4582 """ 4583 msgs = [] 4584 result = True 4585 for disk, unique_id in devlist: 4586 dev = _RecursiveFindBD(disk) 4587 if dev is None: 4588 msgs.append("Can't find device %s in rename" % str(disk)) 4589 result = False 4590 continue 4591 try: 4592 old_rpath = dev.dev_path 4593 dev.Rename(unique_id) 4594 new_rpath = dev.dev_path 4595 if old_rpath != new_rpath: 4596 DevCacheManager.RemoveCache(old_rpath) 4597 # FIXME: we should add the new cache information here, like: 4598 # DevCacheManager.UpdateCache(new_rpath, owner, ...) 4599 # but we don't have the owner here - maybe parse from existing 4600 # cache? for now, we only lose lvm data when we rename, which 4601 # is less critical than DRBD or MD 4602 except errors.BlockDeviceError, err: 4603 msgs.append("Can't rename device '%s' to '%s': %s" % 4604 (dev, unique_id, err)) 4605 logging.exception("Can't rename device '%s' to '%s'", dev, unique_id) 4606 result = False 4607 if not result: 4608 _Fail("; ".join(msgs))

4609

4610 4611 -def _TransformFileStorageDir(fs_dir):

4612 """Checks whether given file_storage_dir is valid. 4613 4614 Checks wheter the given fs_dir is within the cluster-wide default 4615 file_storage_dir or the shared_file_storage_dir, which are stored in 4616 SimpleStore. Only paths under those directories are allowed. 4617 4618 @type fs_dir: str 4619 @param fs_dir: the path to check 4620 4621 @return: the normalized path if valid, None otherwise 4622 4623 """ 4624 filestorage.CheckFileStoragePath(fs_dir) 4625 4626 return os.path.normpath(fs_dir)

4627

4628 4629 -def CreateFileStorageDir(file_storage_dir):

4630 """Create file storage directory. 4631 4632 @type file_storage_dir: str 4633 @param file_storage_dir: directory to create 4634 4635 @rtype: tuple 4636 @return: tuple with first element a boolean indicating wheter dir 4637 creation was successful or not 4638 4639 """ 4640 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 4641 if os.path.exists(file_storage_dir): 4642 if not os.path.isdir(file_storage_dir): 4643 _Fail("Specified storage dir '%s' is not a directory", 4644 file_storage_dir) 4645 else: 4646 try: 4647 os.makedirs(file_storage_dir, 0750) 4648 except OSError, err: 4649 _Fail("Cannot create file storage directory '%s': %s", 4650 file_storage_dir, err, exc=True)

4651

4652 4653 -def RemoveFileStorageDir(file_storage_dir):

4654 """Remove file storage directory. 4655 4656 Remove it only if it's empty. If not log an error and return. 4657 4658 @type file_storage_dir: str 4659 @param file_storage_dir: the directory we should cleanup 4660 @rtype: tuple (success,) 4661 @return: tuple of one element, C{success}, denoting 4662 whether the operation was successful 4663 4664 """ 4665 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 4666 if os.path.exists(file_storage_dir): 4667 if not os.path.isdir(file_storage_dir): 4668 _Fail("Specified Storage directory '%s' is not a directory", 4669 file_storage_dir) 4670 # deletes dir only if empty, otherwise we want to fail the rpc call 4671 try: 4672 os.rmdir(file_storage_dir) 4673 except OSError, err: 4674 _Fail("Cannot remove file storage directory '%s': %s", 4675 file_storage_dir, err)

4676

4677 4678 -def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):

4679 """Rename the file storage directory. 4680 4681 @type old_file_storage_dir: str 4682 @param old_file_storage_dir: the current path 4683 @type new_file_storage_dir: str 4684 @param new_file_storage_dir: the name we should rename to 4685 @rtype: tuple (success,) 4686 @return: tuple of one element, C{success}, denoting 4687 whether the operation was successful 4688 4689 """ 4690 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir) 4691 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir) 4692 if not os.path.exists(new_file_storage_dir): 4693 if os.path.isdir(old_file_storage_dir): 4694 try: 4695 os.rename(old_file_storage_dir, new_file_storage_dir) 4696 except OSError, err: 4697 _Fail("Cannot rename '%s' to '%s': %s", 4698 old_file_storage_dir, new_file_storage_dir, err) 4699 else: 4700 _Fail("Specified storage dir '%s' is not a directory", 4701 old_file_storage_dir) 4702 else: 4703 if os.path.exists(old_file_storage_dir): 4704 _Fail("Cannot rename '%s' to '%s': both locations exist", 4705 old_file_storage_dir, new_file_storage_dir)

4706

4707 4708 -def _EnsureJobQueueFile(file_name):

4709 """Checks whether the given filename is in the queue directory. 4710 4711 @type file_name: str 4712 @param file_name: the file name we should check 4713 @rtype: None 4714 @raises RPCFail: if the file is not valid 4715 4716 """ 4717 if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name): 4718 _Fail("Passed job queue file '%s' does not belong to" 4719 " the queue directory '%s'", file_name, pathutils.QUEUE_DIR)

4720

4721 4722 -def JobQueueUpdate(file_name, content):

4723 """Updates a file in the queue directory. 4724 4725 This is just a wrapper over L{utils.io.WriteFile}, with proper 4726 checking. 4727 4728 @type file_name: str 4729 @param file_name: the job file name 4730 @type content: str 4731 @param content: the new job contents 4732 @rtype: boolean 4733 @return: the success of the operation 4734 4735 """ 4736 file_name = vcluster.LocalizeVirtualPath(file_name) 4737 4738 _EnsureJobQueueFile(file_name) 4739 getents = runtime.GetEnts() 4740 4741 # Write and replace the file atomically 4742 utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid, 4743 gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS)

4744

4745 4746 -def JobQueueRename(old, new):

4747 """Renames a job queue file. 4748 4749 This is just a wrapper over os.rename with proper checking. 4750 4751 @type old: str 4752 @param old: the old (actual) file name 4753 @type new: str 4754 @param new: the desired file name 4755 @rtype: tuple 4756 @return: the success of the operation and payload 4757 4758 """ 4759 old = vcluster.LocalizeVirtualPath(old) 4760 new = vcluster.LocalizeVirtualPath(new) 4761 4762 _EnsureJobQueueFile(old) 4763 _EnsureJobQueueFile(new) 4764 4765 getents = runtime.GetEnts() 4766 4767 utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750, 4768 dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid)

4769

4770 4771 -def BlockdevClose(instance_name, disks):

4772 """Closes the given block devices. 4773 4774 This means they will be switched to secondary mode (in case of 4775 DRBD). 4776 4777 @param instance_name: if the argument is not empty, the symlinks 4778 of this instance will be removed 4779 @type disks: list of L{objects.Disk} 4780 @param disks: the list of disks to be closed 4781 @rtype: tuple (success, message) 4782 @return: a tuple of success and message, where success 4783 indicates the succes of the operation, and message 4784 which will contain the error details in case we 4785 failed 4786 4787 """ 4788 bdevs = [] 4789 for cf in disks: 4790 rd = _RecursiveFindBD(cf) 4791 if rd is None: 4792 _Fail("Can't find device %s", cf) 4793 bdevs.append(rd) 4794 4795 msg = [] 4796 for rd in bdevs: 4797 try: 4798 rd.Close() 4799 except errors.BlockDeviceError, err: 4800 msg.append(str(err)) 4801 if msg: 4802 _Fail("Can't close devices: %s", ",".join(msg)) 4803 else: 4804 if instance_name: 4805 _RemoveBlockDevLinks(instance_name, disks)

4806

4807 4808 -def BlockdevOpen(instance_name, disks, exclusive):

4809 """Opens the given block devices. 4810 4811 """ 4812 bdevs = [] 4813 for cf in disks: 4814 rd = _RecursiveFindBD(cf) 4815 if rd is None: 4816 _Fail("Can't find device %s", cf) 4817 bdevs.append(rd) 4818 4819 msg = [] 4820 for idx, rd in enumerate(bdevs): 4821 try: 4822 rd.Open(exclusive=exclusive) 4823 _SymlinkBlockDev(instance_name, rd.dev_path, idx) 4824 except errors.BlockDeviceError, err: 4825 msg.append(str(err)) 4826 4827 if msg: 4828 _Fail("Can't open devices: %s", ",".join(msg))

4829

4830 4831 -def ValidateHVParams(hvname, hvparams):

4832 """Validates the given hypervisor parameters. 4833 4834 @type hvname: string 4835 @param hvname: the hypervisor name 4836 @type hvparams: dict 4837 @param hvparams: the hypervisor parameters to be validated 4838 @rtype: None 4839 4840 """ 4841 try: 4842 hv_type = hypervisor.GetHypervisor(hvname) 4843 hv_type.ValidateParameters(hvparams) 4844 except errors.HypervisorError, err: 4845 _Fail(str(err), log=False)

4846

4847 4848 -def _CheckOSPList(os_obj, parameters):

4849 """Check whether a list of parameters is supported by the OS. 4850 4851 @type os_obj: L{objects.OS} 4852 @param os_obj: OS object to check 4853 @type parameters: list 4854 @param parameters: the list of parameters to check 4855 4856 """ 4857 supported = [v[0] for v in os_obj.supported_parameters] 4858 delta = frozenset(parameters).difference(supported) 4859 if delta: 4860 _Fail("The following parameters are not supported" 4861 " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta)))

4862

4863 4864 -def _CheckOSVariant(os_obj, name):

4865 """Check whether an OS name conforms to the os variants specification. 4866 4867 @type os_obj: L{objects.OS} 4868 @param os_obj: OS object to check 4869 4870 @type name: string 4871 @param name: OS name passed by the user, to check for validity 4872 4873 @rtype: NoneType 4874 @return: None 4875 @raise RPCFail: if OS variant is not valid 4876 4877 """ 4878 variant = objects.OS.GetVariant(name) 4879 4880 if not os_obj.supported_variants: 4881 if variant: 4882 _Fail("OS '%s' does not support variants ('%s' passed)" % 4883 (os_obj.name, variant)) 4884 else: 4885 return 4886 4887 if not variant: 4888 _Fail("OS name '%s' must include a variant" % name) 4889 4890 if variant not in os_obj.supported_variants: 4891 _Fail("OS '%s' does not support variant '%s'" % (os_obj.name, variant))

4892

4893 4894 -def ValidateOS(required, osname, checks, osparams, force_variant):

4895 """Validate the given OS parameters. 4896 4897 @type required: boolean 4898 @param required: whether absence of the OS should translate into 4899 failure or not 4900 @type osname: string 4901 @param osname: the OS to be validated 4902 @type checks: list 4903 @param checks: list of the checks to run (currently only 'parameters') 4904 @type osparams: dict 4905 @param osparams: dictionary with OS parameters, some of which may be 4906 private. 4907 @rtype: boolean 4908 @return: True if the validation passed, or False if the OS was not 4909 found and L{required} was false 4910 4911 """ 4912 if not constants.OS_VALIDATE_CALLS.issuperset(checks): 4913 _Fail("Unknown checks required for OS %s: %s", osname, 4914 set(checks).difference(constants.OS_VALIDATE_CALLS)) 4915 4916 name_only = objects.OS.GetName(osname) 4917 status, tbv = _TryOSFromDisk(name_only, None) 4918 4919 if not status: 4920 if required: 4921 _Fail(tbv) 4922 else: 4923 return False 4924 4925 if not force_variant: 4926 _CheckOSVariant(tbv, osname) 4927 4928 if max(tbv.api_versions) < constants.OS_API_V20: 4929 return True 4930 4931 if constants.OS_VALIDATE_PARAMETERS in checks: 4932 _CheckOSPList(tbv, osparams.keys()) 4933 4934 validate_env = OSCoreEnv(osname, tbv, osparams) 4935 result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env, 4936 cwd=tbv.path, reset_env=True) 4937 if result.failed: 4938 logging.error("os validate command '%s' returned error: %s output: %s", 4939 result.cmd, result.fail_reason, result.output) 4940 _Fail("OS validation script failed (%s), output: %s", 4941 result.fail_reason, result.output, log=False) 4942 4943 return True

4944

4945 4946 -def ExportOS(instance, override_env):

4947 """Creates a GZIPed tarball with an OS definition and environment. 4948 4949 The archive contains a file with the environment variables needed by 4950 the OS scripts. 4951 4952 @type instance: L{objects.Instance} 4953 @param instance: instance for which the OS definition is exported 4954 4955 @type override_env: dict of string to string 4956 @param override_env: if supplied, it overrides the environment on a 4957 key-by-key basis that is part of the archive 4958 4959 @rtype: string 4960 @return: filepath of the archive 4961 4962 """ 4963 assert instance 4964 assert instance.os 4965 4966 temp_dir = tempfile.mkdtemp() 4967 inst_os = OSFromDisk(instance.os) 4968 4969 result = utils.RunCmd(["ln", "-s", inst_os.path, 4970 utils.PathJoin(temp_dir, "os")]) 4971 if result.failed: 4972 _Fail("Failed to copy OS package '%s' to '%s': %s, output '%s'", 4973 inst_os, temp_dir, result.fail_reason, result.output) 4974 4975 env = OSEnvironment(instance, inst_os) 4976 env.update(override_env) 4977 4978 with open(utils.PathJoin(temp_dir, "environment"), "w") as f: 4979 for var in env: 4980 f.write(var + "=" + env[var] + "\n") 4981 4982 (fd, os_package) = tempfile.mkstemp(suffix=".tgz") 4983 os.close(fd) 4984 4985 result = utils.RunCmd(["tar", "--dereference", "-czv", 4986 "-f", os_package, 4987 "-C", temp_dir, 4988 "."]) 4989 if result.failed: 4990 _Fail("Failed to create OS archive '%s': %s, output '%s'", 4991 os_package, result.fail_reason, result.output) 4992 4993 result = utils.RunCmd(["rm", "-rf", temp_dir]) 4994 if result.failed: 4995 _Fail("Failed to remove copy of OS package '%s' in '%s': %s, output '%s'", 4996 inst_os, temp_dir, result.fail_reason, result.output) 4997 4998 return os_package

4999

5000 5001 -def DemoteFromMC():

5002 """Demotes the current node from master candidate role. 5003 5004 """ 5005 # try to ensure we're not the master by mistake 5006 master, myself = ssconf.GetMasterAndMyself() 5007 if master == myself: 5008 _Fail("ssconf status shows I'm the master node, will not demote") 5009 5010 result = utils.RunCmd([pathutils.DAEMON_UTIL, "check", constants.MASTERD]) 5011 if not result.failed: 5012 _Fail("The master daemon is running, will not demote") 5013 5014 try: 5015 if os.path.isfile(pathutils.CLUSTER_CONF_FILE): 5016 utils.CreateBackup(pathutils.CLUSTER_CONF_FILE) 5017 except EnvironmentError, err: 5018 if err.errno != errno.ENOENT: 5019 _Fail("Error while backing up cluster file: %s", err, exc=True) 5020 5021 utils.RemoveFile(pathutils.CLUSTER_CONF_FILE)

5022

5023 5024 -def _GetX509Filenames(cryptodir, name):

5025 """Returns the full paths for the private key and certificate. 5026 5027 """ 5028 return (utils.PathJoin(cryptodir, name), 5029 utils.PathJoin(cryptodir, name, _X509_KEY_FILE), 5030 utils.PathJoin(cryptodir, name, _X509_CERT_FILE))

5031

5032 5033 -def CreateX509Certificate(validity, cryptodir=pathutils.CRYPTO_KEYS_DIR):

5034 """Creates a new X509 certificate for SSL/TLS. 5035 5036 @type validity: int 5037 @param validity: Validity in seconds 5038 @rtype: tuple; (string, string) 5039 @return: Certificate name and public part 5040 5041 """ 5042 serial_no = int(time.time()) 5043 (key_pem, cert_pem) = \ 5044 utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(), 5045 min(validity, _MAX_SSL_CERT_VALIDITY), 5046 serial_no) 5047 5048 cert_dir = tempfile.mkdtemp(dir=cryptodir, 5049 prefix="x509-%s-" % utils.TimestampForFilename()) 5050 try: 5051 name = os.path.basename(cert_dir) 5052 assert len(name) > 5 5053 5054 (_, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 5055 5056 utils.WriteFile(key_file, mode=0400, data=key_pem) 5057 utils.WriteFile(cert_file, mode=0400, data=cert_pem) 5058 5059 # Never return private key as it shouldn't leave the node 5060 return (name, cert_pem) 5061 except Exception: 5062 shutil.rmtree(cert_dir, ignore_errors=True) 5063 raise

5064

5065 5066 -def RemoveX509Certificate(name, cryptodir=pathutils.CRYPTO_KEYS_DIR):

5067 """Removes a X509 certificate. 5068 5069 @type name: string 5070 @param name: Certificate name 5071 5072 """ 5073 (cert_dir, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 5074 5075 utils.RemoveFile(key_file) 5076 utils.RemoveFile(cert_file) 5077 5078 try: 5079 os.rmdir(cert_dir) 5080 except EnvironmentError, err: 5081 _Fail("Cannot remove certificate directory '%s': %s", 5082 cert_dir, err)

5083

5084 5085 -def _GetImportExportIoCommand(instance, mode, ieio, ieargs):

5086 """Returns the command for the requested input/output. 5087 5088 @type instance: L{objects.Instance} 5089 @param instance: The instance object 5090 @param mode: Import/export mode 5091 @param ieio: Input/output type 5092 @param ieargs: Input/output arguments 5093 5094 """ 5095 assert mode in (constants.IEM_IMPORT, constants.IEM_EXPORT) 5096 5097 env = None 5098 prefix = None 5099 suffix = None 5100 exp_size = None 5101 5102 if ieio == constants.IEIO_FILE: 5103 (filename, ) = ieargs 5104 5105 if not utils.IsNormAbsPath(filename): 5106 _Fail("Path '%s' is not normalized or absolute", filename) 5107 5108 real_filename = os.path.realpath(filename) 5109 directory = os.path.dirname(real_filename) 5110 5111 if not utils.IsBelowDir(pathutils.EXPORT_DIR, real_filename): 5112 _Fail("File '%s' is not under exports directory '%s': %s", 5113 filename, pathutils.EXPORT_DIR, real_filename) 5114 5115 # Create directory 5116 utils.Makedirs(directory, mode=0750) 5117 5118 quoted_filename = utils.ShellQuote(filename) 5119 5120 if mode == constants.IEM_IMPORT: 5121 suffix = "> %s" % quoted_filename 5122 elif mode == constants.IEM_EXPORT: 5123 suffix = "< %s" % quoted_filename 5124 5125 # Retrieve file size 5126 try: 5127 st = os.stat(filename) 5128 except EnvironmentError, err: 5129 logging.error("Can't stat(2) %s: %s", filename, err) 5130 else: 5131 exp_size = utils.BytesToMebibyte(st.st_size) 5132 5133 elif ieio == constants.IEIO_RAW_DISK: 5134 (disk, ) = ieargs 5135 real_disk = _OpenRealBD(disk) 5136 5137 if mode == constants.IEM_IMPORT: 5138 suffix = "| %s" % utils.ShellQuoteArgs(real_disk.Import()) 5139 5140 elif mode == constants.IEM_EXPORT: 5141 prefix = "%s |" % utils.ShellQuoteArgs(real_disk.Export()) 5142 exp_size = disk.size 5143 5144 elif ieio == constants.IEIO_SCRIPT: 5145 (disk, disk_index, ) = ieargs 5146 5147 assert isinstance(disk_index, (int, long)) 5148 5149 inst_os = OSFromDisk(instance.os) 5150 env = OSEnvironment(instance, inst_os) 5151 5152 if mode == constants.IEM_IMPORT: 5153 disk_path_var = "DISK_%d_PATH" % disk_index 5154 if disk_path_var in env: 5155 env["IMPORT_DEVICE"] = env[disk_path_var] 5156 env["IMPORT_DISK_PATH"] = env[disk_path_var] 5157 5158 disk_uri_var = "DISK_%d_URI" % disk_index 5159 if disk_uri_var in env: 5160 env["IMPORT_DISK_URI"] = env[disk_uri_var] 5161 5162 env["IMPORT_INDEX"] = str(disk_index) 5163 script = inst_os.import_script 5164 5165 elif mode == constants.IEM_EXPORT: 5166 disk_path_var = "DISK_%d_PATH" % disk_index 5167 if disk_path_var in env: 5168 env["EXPORT_DEVICE"] = env[disk_path_var] 5169 env["EXPORT_DISK_PATH"] = env[disk_path_var] 5170 5171 disk_uri_var = "DISK_%d_URI" % disk_index 5172 if disk_uri_var in env: 5173 env["EXPORT_DISK_URI"] = env[disk_uri_var] 5174 5175 env["EXPORT_INDEX"] = str(disk_index) 5176 script = inst_os.export_script 5177 5178 # TODO: Pass special environment only to script 5179 script_cmd = utils.BuildShellCmd("( cd %s && %s; )", inst_os.path, script) 5180 5181 if mode == constants.IEM_IMPORT: 5182 suffix = "| %s" % script_cmd 5183 5184 elif mode == constants.IEM_EXPORT: 5185 prefix = "%s |" % script_cmd 5186 5187 # Let script predict size 5188 exp_size = constants.IE_CUSTOM_SIZE 5189 5190 else: 5191 _Fail("Invalid %s I/O mode %r", mode, ieio) 5192 5193 return (env, prefix, suffix, exp_size)

5194

5195 5196 -def _CreateImportExportStatusDir(prefix):

5197 """Creates status directory for import/export. 5198 5199 """ 5200 return tempfile.mkdtemp(dir=pathutils.IMPORT_EXPORT_DIR, 5201 prefix=("%s-%s-" % 5202 (prefix, utils.TimestampForFilename())))

5203

5204 5205 -def StartImportExportDaemon(mode, opts, host, port, instance, component, 5206 ieio, ieioargs):

5207 """Starts an import or export daemon. 5208 5209 @param mode: Import/output mode 5210 @type opts: L{objects.ImportExportOptions} 5211 @param opts: Daemon options 5212 @type host: string 5213 @param host: Remote host for export (None for import) 5214 @type port: int 5215 @param port: Remote port for export (None for import) 5216 @type instance: L{objects.Instance} 5217 @param instance: Instance object 5218 @type component: string 5219 @param component: which part of the instance is transferred now, 5220 e.g. 'disk/0' 5221 @param ieio: Input/output type 5222 @param ieioargs: Input/output arguments 5223 5224 """ 5225 5226 # Use Import/Export over socat. 5227 # 5228 # Export() gives a command that produces a flat stream. 5229 # Import() gives a command that reads a flat stream to a disk template. 5230 if mode == constants.IEM_IMPORT: 5231 prefix = "import" 5232 5233 if not (host is None and port is None): 5234 _Fail("Can not specify host or port on import") 5235 5236 elif mode == constants.IEM_EXPORT: 5237 prefix = "export" 5238 5239 if host is None or port is None: 5240 _Fail("Host and port must be specified for an export") 5241 5242 else: 5243 _Fail("Invalid mode %r", mode) 5244 5245 if (opts.key_name is None) ^ (opts.ca_pem is None): 5246 _Fail("Cluster certificate can only be used for both key and CA") 5247 5248 (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \ 5249 _GetImportExportIoCommand(instance, mode, ieio, ieioargs) 5250 5251 if opts.key_name is None: 5252 # Use server.pem 5253 key_path = pathutils.NODED_CERT_FILE 5254 cert_path = pathutils.NODED_CERT_FILE 5255 assert opts.ca_pem is None 5256 else: 5257 (_, key_path, cert_path) = _GetX509Filenames(pathutils.CRYPTO_KEYS_DIR, 5258 opts.key_name) 5259 assert opts.ca_pem is not None 5260 5261 for i in [key_path, cert_path]: 5262 if not os.path.exists(i): 5263 _Fail("File '%s' does not exist" % i) 5264 5265 status_dir = _CreateImportExportStatusDir("%s-%s" % (prefix, component)) 5266 try: 5267 status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE) 5268 pid_file = utils.PathJoin(status_dir, _IES_PID_FILE) 5269 ca_file = utils.PathJoin(status_dir, _IES_CA_FILE) 5270 5271 if opts.ca_pem is None: 5272 # Use server.pem 5273 ca = utils.ReadFile(pathutils.NODED_CERT_FILE) 5274 else: 5275 ca = opts.ca_pem 5276 5277 # Write CA file 5278 utils.WriteFile(ca_file, data=ca, mode=0400) 5279 5280 cmd = [ 5281 pathutils.IMPORT_EXPORT_DAEMON, 5282 status_file, mode, 5283 "--key=%s" % key_path, 5284 "--cert=%s" % cert_path, 5285 "--ca=%s" % ca_file, 5286 ] 5287 5288 if host: 5289 cmd.append("--host=%s" % host) 5290 5291 if port: 5292 cmd.append("--port=%s" % port) 5293 5294 if opts.ipv6: 5295 cmd.append("--ipv6") 5296 else: 5297 cmd.append("--ipv4") 5298 5299 if opts.compress: 5300 cmd.append("--compress=%s" % opts.compress) 5301 5302 if opts.magic: 5303 cmd.append("--magic=%s" % opts.magic) 5304 5305 if exp_size is not None: 5306 cmd.append("--expected-size=%s" % exp_size) 5307 5308 if cmd_prefix: 5309 cmd.append("--cmd-prefix=%s" % cmd_prefix) 5310 5311 if cmd_suffix: 5312 cmd.append("--cmd-suffix=%s" % cmd_suffix) 5313 5314 if mode == constants.IEM_EXPORT: 5315 # Retry connection a few times when connecting to remote peer 5316 cmd.append("--connect-retries=%s" % constants.RIE_CONNECT_RETRIES) 5317 cmd.append("--connect-timeout=%s" % constants.RIE_CONNECT_ATTEMPT_TIMEOUT) 5318 elif opts.connect_timeout is not None: 5319 assert mode == constants.IEM_IMPORT 5320 # Overall timeout for establishing connection while listening 5321 cmd.append("--connect-timeout=%s" % opts.connect_timeout) 5322 5323 logfile = _InstanceLogName(prefix, instance.os, instance.name, component) 5324 5325 # TODO: Once _InstanceLogName uses tempfile.mkstemp, StartDaemon has 5326 # support for receiving a file descriptor for output 5327 utils.StartDaemon(cmd, env=cmd_env, pidfile=pid_file, 5328 output=logfile) 5329 5330 # The import/export name is simply the status directory name 5331 return os.path.basename(status_dir) 5332 5333 except Exception: 5334 shutil.rmtree(status_dir, ignore_errors=True) 5335 raise

5336

5337 5338 -def GetImportExportStatus(names):

5339 """Returns import/export daemon status. 5340 5341 @type names: sequence 5342 @param names: List of names 5343 @rtype: List of dicts 5344 @return: Returns a list of the state of each named import/export or None if a 5345 status couldn't be read 5346 5347 """ 5348 result = [] 5349 5350 for name in names: 5351 status_file = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name, 5352 _IES_STATUS_FILE) 5353 5354 try: 5355 data = utils.ReadFile(status_file) 5356 except EnvironmentError, err: 5357 if err.errno != errno.ENOENT: 5358 raise 5359 data = None 5360 5361 if not data: 5362 result.append(None) 5363 continue 5364 5365 result.append(serializer.LoadJson(data)) 5366 5367 return result

5368

5369 5370 -def AbortImportExport(name):

5371 """Sends SIGTERM to a running import/export daemon. 5372 5373 """ 5374 logging.info("Abort import/export %s", name) 5375 5376 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 5377 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 5378 5379 if pid: 5380 logging.info("Import/export %s is running with PID %s, sending SIGTERM", 5381 name, pid) 5382 utils.IgnoreProcessNotFound(os.kill, pid, signal.SIGTERM)

5383

5384 5385 -def CleanupImportExport(name):

5386 """Cleanup after an import or export. 5387 5388 If the import/export daemon is still running it's killed. Afterwards the 5389 whole status directory is removed. 5390 5391 """ 5392 logging.info("Finalizing import/export %s", name) 5393 5394 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 5395 5396 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 5397 5398 if pid: 5399 logging.info("Import/export %s is still running with PID %s", 5400 name, pid) 5401 utils.KillProcess(pid, waitpid=False) 5402 5403 shutil.rmtree(status_dir, ignore_errors=True)

5404

5405 5406 -def _FindDisks(disks):

5407 """Finds attached L{BlockDev}s for the given disks. 5408 5409 @type disks: list of L{objects.Disk} 5410 @param disks: the disk objects we need to find 5411 5412 @return: list of L{BlockDev} objects or C{None} if a given disk 5413 was not found or was no attached. 5414 5415 """ 5416 bdevs = [] 5417 5418 for disk in disks: 5419 rd = _RecursiveFindBD(disk) 5420 if rd is None: 5421 _Fail("Can't find device %s", disk) 5422 bdevs.append(rd) 5423 return bdevs

5424

5425 5426 -def DrbdDisconnectNet(disks):

5427 """Disconnects the network on a list of drbd devices. 5428 5429 """ 5430 bdevs = _FindDisks(disks) 5431 5432 # disconnect disks 5433 for rd in bdevs: 5434 try: 5435 rd.DisconnectNet() 5436 except errors.BlockDeviceError, err: 5437 _Fail("Can't change network configuration to standalone mode: %s", 5438 err, exc=True)

5439

5440 5441 -def DrbdAttachNet(disks, multimaster):

5442 """Attaches the network on a list of drbd devices. 5443 5444 """ 5445 bdevs = _FindDisks(disks) 5446 5447 # reconnect disks, switch to new master configuration and if 5448 # needed primary mode 5449 for rd in bdevs: 5450 try: 5451 rd.AttachNet(multimaster) 5452 except errors.BlockDeviceError, err: 5453 _Fail("Can't change network configuration: %s", err) 5454 5455 # wait until the disks are connected; we need to retry the re-attach 5456 # if the device becomes standalone, as this might happen if the one 5457 # node disconnects and reconnects in a different mode before the 5458 # other node reconnects; in this case, one or both of the nodes will 5459 # decide it has wrong configuration and switch to standalone 5460 5461 def _Attach(): 5462 all_connected = True 5463 5464 for rd in bdevs: 5465 stats = rd.GetProcStatus() 5466 5467 if multimaster: 5468 # In the multimaster case we have to wait explicitly until 5469 # the resource is Connected and UpToDate/UpToDate, because 5470 # we promote *both nodes* to primary directly afterwards. 5471 # Being in resync is not enough, since there is a race during which we 5472 # may promote a node with an Outdated disk to primary, effectively 5473 # tearing down the connection. 5474 all_connected = (all_connected and 5475 stats.is_connected and 5476 stats.is_disk_uptodate and 5477 stats.peer_disk_uptodate) 5478 else: 5479 all_connected = (all_connected and 5480 (stats.is_connected or stats.is_in_resync)) 5481 5482 if stats.is_standalone: 5483 # peer had different config info and this node became 5484 # standalone, even though this should not happen with the 5485 # new staged way of changing disk configs 5486 try: 5487 rd.AttachNet(multimaster) 5488 except errors.BlockDeviceError, err: 5489 _Fail("Can't change network configuration: %s", err) 5490 5491 if not all_connected: 5492 raise utils.RetryAgain()

5493 5494 try: 5495 # Start with a delay of 100 miliseconds and go up to 5 seconds 5496 utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60) 5497 except utils.RetryTimeout: 5498 _Fail("Timeout in disk reconnecting") 5499

5500 5501 -def DrbdWaitSync(disks):

5502 """Wait until DRBDs have synchronized. 5503 5504 """ 5505 def _helper(rd): 5506 stats = rd.GetProcStatus() 5507 if not (stats.is_connected or stats.is_in_resync): 5508 raise utils.RetryAgain() 5509 return stats

5510 5511 bdevs = _FindDisks(disks) 5512 5513 min_resync = 100 5514 alldone = True 5515 for rd in bdevs: 5516 try: 5517 # poll each second for 15 seconds 5518 stats = utils.Retry(_helper, 1, 15, args=[rd]) 5519 except utils.RetryTimeout: 5520 stats = rd.GetProcStatus() 5521 # last check 5522 if not (stats.is_connected or stats.is_in_resync): 5523 _Fail("DRBD device %s is not in sync: stats=%s", rd, stats) 5524 alldone = alldone and (not stats.is_in_resync) 5525 if stats.sync_percent is not None: 5526 min_resync = min(min_resync, stats.sync_percent) 5527 5528 return (alldone, min_resync) 5529

5530 5531 -def DrbdNeedsActivation(disks):

5532 """Checks which of the passed disks needs activation and returns their UUIDs. 5533 5534 """ 5535 faulty_disks = [] 5536 5537 for disk in disks: 5538 rd = _RecursiveFindBD(disk) 5539 if rd is None: 5540 faulty_disks.append(disk) 5541 continue 5542 5543 stats = rd.GetProcStatus() 5544 if stats.is_standalone or stats.is_diskless: 5545 faulty_disks.append(disk) 5546 5547 return [disk.uuid for disk in faulty_disks]

5548

5549 5550 -def GetDrbdUsermodeHelper():

5551 """Returns DRBD usermode helper currently configured. 5552 5553 """ 5554 try: 5555 return drbd.DRBD8.GetUsermodeHelper() 5556 except errors.BlockDeviceError, err: 5557 _Fail(str(err))

5558

5559 5560 -def PowercycleNode(hypervisor_type, hvparams=None):

5561 """Hard-powercycle the node. 5562 5563 Because we need to return first, and schedule the powercycle in the 5564 background, we won't be able to report failures nicely. 5565 5566 """ 5567 hyper = hypervisor.GetHypervisor(hypervisor_type) 5568 try: 5569 pid = os.fork() 5570 except OSError: 5571 # if we can't fork, we'll pretend that we're in the child process 5572 pid = 0 5573 if pid > 0: 5574 return "Reboot scheduled in 5 seconds" 5575 # ensure the child is running on ram 5576 try: 5577 utils.Mlockall() 5578 except Exception: # pylint: disable=W0703 5579 pass 5580 time.sleep(5) 5581 hyper.PowercycleNode(hvparams=hvparams)

5582

5583 5584 -def _VerifyRestrictedCmdName(cmd):

5585 """Verifies a restricted command name. 5586 5587 @type cmd: string 5588 @param cmd: Command name 5589 @rtype: tuple; (boolean, string or None) 5590 @return: The tuple's first element is the status; if C{False}, the second 5591 element is an error message string, otherwise it's C{None} 5592 5593 """ 5594 if not cmd.strip(): 5595 return (False, "Missing command name") 5596 5597 if os.path.basename(cmd) != cmd: 5598 return (False, "Invalid command name") 5599 5600 if not constants.EXT_PLUGIN_MASK.match(cmd): 5601 return (False, "Command name contains forbidden characters") 5602 5603 return (True, None)

5604

5605 5606 -def _CommonRestrictedCmdCheck(path, owner):

5607 """Common checks for restricted command file system directories and files. 5608 5609 @type path: string 5610 @param path: Path to check 5611 @param owner: C{None} or tuple containing UID and GID 5612 @rtype: tuple; (boolean, string or C{os.stat} result) 5613 @return: The tuple's first element is the status; if C{False}, the second 5614 element is an error message string, otherwise it's the result of C{os.stat} 5615 5616 """ 5617 if owner is None: 5618 # Default to root as owner 5619 owner = (0, 0) 5620 5621 try: 5622 st = os.stat(path) 5623 except EnvironmentError, err: 5624 return (False, "Can't stat(2) '%s': %s" % (path, err)) 5625 5626 if stat.S_IMODE(st.st_mode) & (~_RCMD_MAX_MODE): 5627 return (False, "Permissions on '%s' are too permissive" % path) 5628 5629 if (st.st_uid, st.st_gid) != owner: 5630 (owner_uid, owner_gid) = owner 5631 return (False, "'%s' is not owned by %s:%s" % (path, owner_uid, owner_gid)) 5632 5633 return (True, st)

5634

5635 5636 -def _VerifyRestrictedCmdDirectory(path, _owner=None):

5637 """Verifies restricted command directory. 5638 5639 @type path: string 5640 @param path: Path to check 5641 @rtype: tuple; (boolean, string or None) 5642 @return: The tuple's first element is the status; if C{False}, the second 5643 element is an error message string, otherwise it's C{None} 5644 5645 """ 5646 (status, value) = _CommonRestrictedCmdCheck(path, _owner) 5647 5648 if not status: 5649 return (False, value) 5650 5651 if not stat.S_ISDIR(value.st_mode): 5652 return (False, "Path '%s' is not a directory" % path) 5653 5654 return (True, None)

5655

5656 5657 -def _VerifyRestrictedCmd(path, cmd, _owner=None):

5658 """Verifies a whole restricted command and returns its executable filename. 5659 5660 @type path: string 5661 @param path: Directory containing restricted commands 5662 @type cmd: string 5663 @param cmd: Command name 5664 @rtype: tuple; (boolean, string) 5665 @return: The tuple's first element is the status; if C{False}, the second 5666 element is an error message string, otherwise the second element is the 5667 absolute path to the executable 5668 5669 """ 5670 executable = utils.PathJoin(path, cmd) 5671 5672 (status, msg) = _CommonRestrictedCmdCheck(executable, _owner) 5673 5674 if not status: 5675 return (False, msg) 5676 5677 if not utils.IsExecutable(executable): 5678 return (False, "access(2) thinks '%s' can't be executed" % executable) 5679 5680 return (True, executable)

5681

5682 5683 -def _PrepareRestrictedCmd(path, cmd, 5684 _verify_dir=_VerifyRestrictedCmdDirectory, 5685 _verify_name=_VerifyRestrictedCmdName, 5686 _verify_cmd=_VerifyRestrictedCmd):

5687 """Performs a number of tests on a restricted command. 5688 5689 @type path: string 5690 @param path: Directory containing restricted commands 5691 @type cmd: string 5692 @param cmd: Command name 5693 @return: Same as L{_VerifyRestrictedCmd} 5694 5695 """ 5696 # Verify the directory first 5697 (status, msg) = _verify_dir(path) 5698 if status: 5699 # Check command if everything was alright 5700 (status, msg) = _verify_name(cmd) 5701 5702 if not status: 5703 return (False, msg) 5704 5705 # Check actual executable 5706 return _verify_cmd(path, cmd)

5707

5708 5709 -def RunRestrictedCmd(cmd, 5710 _lock_timeout=_RCMD_LOCK_TIMEOUT, 5711 _lock_file=pathutils.RESTRICTED_COMMANDS_LOCK_FILE, 5712 _path=pathutils.RESTRICTED_COMMANDS_DIR, 5713 _sleep_fn=time.sleep, 5714 _prepare_fn=_PrepareRestrictedCmd, 5715 _runcmd_fn=utils.RunCmd, 5716 _enabled=constants.ENABLE_RESTRICTED_COMMANDS):

5717 """Executes a restricted command after performing strict tests. 5718 5719 @type cmd: string 5720 @param cmd: Command name 5721 @rtype: string 5722 @return: Command output 5723 @raise RPCFail: In case of an error 5724 5725 """ 5726 logging.info("Preparing to run restricted command '%s'", cmd) 5727 5728 if not _enabled: 5729 _Fail("Restricted commands disabled at configure time") 5730 5731 lock = None 5732 try: 5733 cmdresult = None 5734 try: 5735 lock = utils.FileLock.Open(_lock_file) 5736 lock.Exclusive(blocking=True, timeout=_lock_timeout) 5737 5738 (status, value) = _prepare_fn(_path, cmd) 5739 5740 if status: 5741 cmdresult = _runcmd_fn([value], env={}, reset_env=True, 5742 postfork_fn=lambda _: lock.Unlock()) 5743 else: 5744 logging.error(value) 5745 except Exception: # pylint: disable=W0703 5746 # Keep original error in log 5747 logging.exception("Caught exception") 5748 5749 if cmdresult is None: 5750 logging.info("Sleeping for %0.1f seconds before returning", 5751 _RCMD_INVALID_DELAY) 5752 _sleep_fn(_RCMD_INVALID_DELAY) 5753 5754 # Do not include original error message in returned error 5755 _Fail("Executing command '%s' failed" % cmd) 5756 elif cmdresult.failed or cmdresult.fail_reason: 5757 _Fail("Restricted command '%s' failed: %s; output: %s", 5758 cmd, cmdresult.fail_reason, cmdresult.output) 5759 else: 5760 return cmdresult.output 5761 finally: 5762 if lock is not None: 5763 # Release lock at last 5764 lock.Close() 5765 lock = None

5766

5767 5768 -def SetWatcherPause(until, _filename=pathutils.WATCHER_PAUSEFILE):

5769 """Creates or removes the watcher pause file. 5770 5771 @type until: None or number 5772 @param until: Unix timestamp saying until when the watcher shouldn't run 5773 5774 """ 5775 if until is None: 5776 logging.info("Received request to no longer pause watcher") 5777 utils.RemoveFile(_filename) 5778 else: 5779 logging.info("Received request to pause watcher until %s", until) 5780 5781 if not ht.TNumber(until): 5782 _Fail("Duration must be numeric") 5783 5784 utils.WriteFile(_filename, data="%d\n" % (until, ), mode=0644)

5785

5786 5787 -def ConfigureOVS(ovs_name, ovs_link):

5788 """Creates a OpenvSwitch on the node. 5789 5790 This function sets up a OpenvSwitch on the node with given name nad 5791 connects it via a given eth device. 5792 5793 @type ovs_name: string 5794 @param ovs_name: Name of the OpenvSwitch to create. 5795 @type ovs_link: None or string 5796 @param ovs_link: Ethernet device for outside connection (can be missing) 5797 5798 """ 5799 # Initialize the OpenvSwitch 5800 result = utils.RunCmd(["ovs-vsctl", "add-br", ovs_name]) 5801 if result.failed: 5802 _Fail("Failed to create openvswitch. Script return value: %s, output: '%s'" 5803 % (result.exit_code, result.output), log=True) 5804 5805 # And connect it to a physical interface, if given 5806 if ovs_link: 5807 result = utils.RunCmd(["ovs-vsctl", "add-port", ovs_name, ovs_link]) 5808 if result.failed: 5809 _Fail("Failed to connect openvswitch to interface %s. Script return" 5810 " value: %s, output: '%s'" % (ovs_link, result.exit_code, 5811 result.output), log=True)

5812

5813 5814 -def GetFileInfo(file_path):

5815 """ Checks if a file exists and returns information related to it. 5816 5817 Currently returned information: 5818 - file size: int, size in bytes 5819 5820 @type file_path: string 5821 @param file_path: Name of file to examine. 5822 5823 @rtype: tuple of bool, dict 5824 @return: Whether the file exists, and a dictionary of information about the 5825 file gathered by os.stat. 5826 5827 """ 5828 try: 5829 stat_info = os.stat(file_path) 5830 values_dict = { 5831 constants.STAT_SIZE: stat_info.st_size, 5832 } 5833 return True, values_dict 5834 except IOError: 5835 return False, {}

5836

5837 5838 -class HooksRunner(object):

5839 """Hook runner. 5840 5841 This class is instantiated on the node side (ganeti-noded) and not 5842 on the master side. 5843 5844 """

5845 - def __init__(self, hooks_base_dir=None):

5846 """Constructor for hooks runner. 5847 5848 @type hooks_base_dir: str or None 5849 @param hooks_base_dir: if not None, this overrides the 5850 L{pathutils.HOOKS_BASE_DIR} (useful for unittests) 5851 5852 """ 5853 if hooks_base_dir is None: 5854 hooks_base_dir = pathutils.HOOKS_BASE_DIR 5855 # yeah, _BASE_DIR is not valid for attributes, we use it like a 5856 # constant 5857 self._BASE_DIR = hooks_base_dir # pylint: disable=C0103

5858

5859 - def RunLocalHooks(self, node_list, hpath, phase, env):

5860 """Check that the hooks will be run only locally and then run them. 5861 5862 """ 5863 assert len(node_list) == 1 5864 node = node_list[0] 5865 _, myself = ssconf.GetMasterAndMyself() 5866 assert node == myself 5867 5868 results = self.RunHooks(hpath, phase, env) 5869 5870 # Return values in the form expected by HooksMaster 5871 return {node: (None, False, results)}

5872

5873 - def RunHooks(self, hpath, phase, env):

5874 """Run the scripts in the hooks directory. 5875 5876 @type hpath: str 5877 @param hpath: the path to the hooks directory which 5878 holds the scripts 5879 @type phase: str 5880 @param phase: either L{constants.HOOKS_PHASE_PRE} or 5881 L{constants.HOOKS_PHASE_POST} 5882 @type env: dict 5883 @param env: dictionary with the environment for the hook 5884 @rtype: list 5885 @return: list of 3-element tuples: 5886 - script path 5887 - script result, either L{constants.HKR_SUCCESS} or 5888 L{constants.HKR_FAIL} 5889 - output of the script 5890 5891 @raise errors.ProgrammerError: for invalid input 5892 parameters 5893 5894 """ 5895 if phase == constants.HOOKS_PHASE_PRE: 5896 suffix = "pre" 5897 elif phase == constants.HOOKS_PHASE_POST: 5898 suffix = "post" 5899 else: 5900 _Fail("Unknown hooks phase '%s'", phase) 5901 5902 subdir = "%s-%s.d" % (hpath, suffix) 5903 dir_name = utils.PathJoin(self._BASE_DIR, subdir) 5904 5905 results = [] 5906 5907 if not os.path.isdir(dir_name): 5908 # for non-existing/non-dirs, we simply exit instead of logging a 5909 # warning at every operation 5910 return results 5911 5912 runparts_results = utils.RunParts(dir_name, env=env, reset_env=True) 5913 5914 for (relname, relstatus, runresult) in runparts_results: 5915 if relstatus == constants.RUNPARTS_SKIP: 5916 rrval = constants.HKR_SKIP 5917 output = "" 5918 elif relstatus == constants.RUNPARTS_ERR: 5919 rrval = constants.HKR_FAIL 5920 output = "Hook script execution error: %s" % runresult 5921 elif relstatus == constants.RUNPARTS_RUN: 5922 if runresult.failed: 5923 rrval = constants.HKR_FAIL 5924 else: 5925 rrval = constants.HKR_SUCCESS 5926 output = utils.SafeEncode(runresult.output.strip()) 5927 results.append(("%s/%s" % (subdir, relname), rrval, output)) 5928 5929 return results

5930

5931 5932 -class IAllocatorRunner(object):

5933 """IAllocator runner. 5934 5935 This class is instantiated on the node side (ganeti-noded) and not on 5936 the master side. 5937 5938 """ 5939 @staticmethod

5940 - def Run(name, idata, ial_params):

5941 """Run an iallocator script. 5942 5943 @type name: str 5944 @param name: the iallocator script name 5945 @type idata: str 5946 @param idata: the allocator input data 5947 @type ial_params: list 5948 @param ial_params: the iallocator parameters 5949 5950 @rtype: tuple 5951 @return: two element tuple of: 5952 - status 5953 - either error message or stdout of allocator (for success) 5954 5955 """ 5956 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH, 5957 os.path.isfile) 5958 if alloc_script is None: 5959 _Fail("iallocator module '%s' not found in the search path", name) 5960 5961 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.") 5962 try: 5963 os.write(fd, idata) 5964 os.close(fd) 5965 result = utils.RunCmd([alloc_script, fin_name] + ial_params) 5966 if result.failed: 5967 _Fail("iallocator module '%s' failed: %s, output '%s'", 5968 name, result.fail_reason, result.output) 5969 finally: 5970 os.unlink(fin_name) 5971 5972 return result.stdout

5973

5974 5975 -class DevCacheManager(object):

5976 """Simple class for managing a cache of block device information. 5977 5978 """ 5979 _DEV_PREFIX = "/dev/" 5980 _ROOT_DIR = pathutils.BDEV_CACHE_DIR 5981 5982 @classmethod

5983 - def _ConvertPath(cls, dev_path):

5984 """Converts a /dev/name path to the cache file name. 5985 5986 This replaces slashes with underscores and strips the /dev 5987 prefix. It then returns the full path to the cache file. 5988 5989 @type dev_path: str 5990 @param dev_path: the C{/dev/} path name 5991 @rtype: str 5992 @return: the converted path name 5993 5994 """ 5995 if dev_path.startswith(cls._DEV_PREFIX): 5996 dev_path = dev_path[len(cls._DEV_PREFIX):] 5997 dev_path = dev_path.replace("/", "_") 5998 fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path) 5999 return fpath

6000 6001 @classmethod

6002 - def UpdateCache(cls, dev_path, owner, on_primary, iv_name):

6003 """Updates the cache information for a given device. 6004 6005 @type dev_path: str 6006 @param dev_path: the pathname of the device 6007 @type owner: str 6008 @param owner: the owner (instance name) of the device 6009 @type on_primary: bool 6010 @param on_primary: whether this is the primary 6011 node nor not 6012 @type iv_name: str 6013 @param iv_name: the instance-visible name of the 6014 device, as in objects.Disk.iv_name 6015 6016 @rtype: None 6017 6018 """ 6019 if dev_path is None: 6020 logging.error("DevCacheManager.UpdateCache got a None dev_path") 6021 return 6022 fpath = cls._ConvertPath(dev_path) 6023 if on_primary: 6024 state = "primary" 6025 else: 6026 state = "secondary" 6027 if iv_name is None: 6028 iv_name = "not_visible" 6029 fdata = "%s %s %s\n" % (str(owner), state, iv_name) 6030 try: 6031 utils.WriteFile(fpath, data=fdata) 6032 except EnvironmentError, err: 6033 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)

6034 6035 @classmethod

6036 - def RemoveCache(cls, dev_path):

6037 """Remove data for a dev_path. 6038 6039 This is just a wrapper over L{utils.io.RemoveFile} with a converted 6040 path name and logging. 6041 6042 @type dev_path: str 6043 @param dev_path: the pathname of the device 6044 6045 @rtype: None 6046 6047 """ 6048 if dev_path is None: 6049 logging.error("DevCacheManager.RemoveCache got a None dev_path") 6050 return 6051 fpath = cls._ConvertPath(dev_path) 6052 try: 6053 utils.RemoveFile(fpath) 6054 except EnvironmentError, err: 6055 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)

6056