ganeti.backend

Source Code for Module ganeti.backend

1 # 2 # 3 4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc. 5 # All rights reserved. 6 # 7 # Redistribution and use in source and binary forms, with or without 8 # modification, are permitted provided that the following conditions are 9 # met: 10 # 11 # 1. Redistributions of source code must retain the above copyright notice, 12 # this list of conditions and the following disclaimer. 13 # 14 # 2. Redistributions in binary form must reproduce the above copyright 15 # notice, this list of conditions and the following disclaimer in the 16 # documentation and/or other materials provided with the distribution. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 31 """Functions used by the node daemon 32 33 @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in 34 the L{UploadFile} function 35 @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted 36 in the L{_CleanDirectory} function 37 38 """ 39 40 # pylint: disable=E1103,C0302 41 42 # E1103: %s %r has no %r member (but some types could not be 43 # inferred), because the _TryOSFromDisk returns either (True, os_obj) 44 # or (False, "string") which confuses pylint 45 46 # C0302: This module has become too big and should be split up 47 48 49 import base64 50 import errno 51 import logging 52 import os 53 import os.path 54 import pycurl 55 import random 56 import re 57 import shutil 58 import signal 59 import stat 60 import tempfile 61 import time 62 import zlib 63 import contextlib 64 import collections 65 66 from ganeti import errors 67 from ganeti import http 68 from ganeti import utils 69 from ganeti import ssh 70 from ganeti import hypervisor 71 from ganeti.hypervisor import hv_base 72 from ganeti import constants 73 from ganeti.storage import bdev 74 from ganeti.storage import drbd 75 from ganeti.storage import extstorage 76 from ganeti.storage import filestorage 77 from ganeti import objects 78 from ganeti import ssconf 79 from ganeti import serializer 80 from ganeti import netutils 81 from ganeti import runtime 82 from ganeti import compat 83 from ganeti import pathutils 84 from ganeti import vcluster 85 from ganeti import ht 86 from ganeti.storage.base import BlockDev 87 from ganeti.storage.drbd import DRBD8 88 from ganeti import hooksmaster 89 import ganeti.metad as metad 90 91 92 _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" 93 _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([ 94 pathutils.DATA_DIR, 95 pathutils.JOB_QUEUE_ARCHIVE_DIR, 96 pathutils.QUEUE_DIR, 97 pathutils.CRYPTO_KEYS_DIR, 98 ]) 99 _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60 100 _X509_KEY_FILE = "key" 101 _X509_CERT_FILE = "cert" 102 _IES_STATUS_FILE = "status" 103 _IES_PID_FILE = "pid" 104 _IES_CA_FILE = "ca" 105 106 #: Valid LVS output line regex 107 _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$") 108 109 # Actions for the master setup script 110 _MASTER_START = "start" 111 _MASTER_STOP = "stop" 112 113 #: Maximum file permissions for restricted command directory and executables 114 _RCMD_MAX_MODE = (stat.S_IRWXU | 115 stat.S_IRGRP | stat.S_IXGRP | 116 stat.S_IROTH | stat.S_IXOTH) 117 118 #: Delay before returning an error for restricted commands 119 _RCMD_INVALID_DELAY = 10 120 121 #: How long to wait to acquire lock for restricted commands (shorter than 122 #: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many 123 #: command requests arrive 124 _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8

125 126 127 -class RPCFail(Exception):

128 """Class denoting RPC failure. 129 130 Its argument is the error message. 131 132 """

133

134 135 -def _GetInstReasonFilename(instance_name):

136 """Path of the file containing the reason of the instance status change. 137 138 @type instance_name: string 139 @param instance_name: The name of the instance 140 @rtype: string 141 @return: The path of the file 142 143 """ 144 return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)

145

146 147 -def _StoreInstReasonTrail(instance_name, trail):

148 """Serialize a reason trail related to an instance change of state to file. 149 150 The exact location of the file depends on the name of the instance and on 151 the configuration of the Ganeti cluster defined at deploy time. 152 153 @type instance_name: string 154 @param instance_name: The name of the instance 155 156 @type trail: list of reasons 157 @param trail: reason trail 158 159 @rtype: None 160 161 """ 162 json = serializer.DumpJson(trail) 163 filename = _GetInstReasonFilename(instance_name) 164 utils.WriteFile(filename, data=json)

165

166 167 -def _Fail(msg, *args, **kwargs):

168 """Log an error and the raise an RPCFail exception. 169 170 This exception is then handled specially in the ganeti daemon and 171 turned into a 'failed' return type. As such, this function is a 172 useful shortcut for logging the error and returning it to the master 173 daemon. 174 175 @type msg: string 176 @param msg: the text of the exception 177 @raise RPCFail 178 179 """ 180 if args: 181 msg = msg % args 182 if "log" not in kwargs or kwargs["log"]: # if we should log this error 183 if "exc" in kwargs and kwargs["exc"]: 184 logging.exception(msg) 185 else: 186 logging.error(msg) 187 raise RPCFail(msg)

188

189 190 -def _GetConfig():

191 """Simple wrapper to return a SimpleStore. 192 193 @rtype: L{ssconf.SimpleStore} 194 @return: a SimpleStore instance 195 196 """ 197 return ssconf.SimpleStore()

198

199 200 -def _GetSshRunner(cluster_name):

201 """Simple wrapper to return an SshRunner. 202 203 @type cluster_name: str 204 @param cluster_name: the cluster name, which is needed 205 by the SshRunner constructor 206 @rtype: L{ssh.SshRunner} 207 @return: an SshRunner instance 208 209 """ 210 return ssh.SshRunner(cluster_name)

211

212 213 -def _Decompress(data):

214 """Unpacks data compressed by the RPC client. 215 216 @type data: list or tuple 217 @param data: Data sent by RPC client 218 @rtype: str 219 @return: Decompressed data 220 221 """ 222 assert isinstance(data, (list, tuple)) 223 assert len(data) == 2 224 (encoding, content) = data 225 if encoding == constants.RPC_ENCODING_NONE: 226 return content 227 elif encoding == constants.RPC_ENCODING_ZLIB_BASE64: 228 return zlib.decompress(base64.b64decode(content)) 229 else: 230 raise AssertionError("Unknown data encoding")

231

232 233 -def _CleanDirectory(path, exclude=None):

234 """Removes all regular files in a directory. 235 236 @type path: str 237 @param path: the directory to clean 238 @type exclude: list 239 @param exclude: list of files to be excluded, defaults 240 to the empty list 241 242 """ 243 if path not in _ALLOWED_CLEAN_DIRS: 244 _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'", 245 path) 246 247 if not os.path.isdir(path): 248 return 249 if exclude is None: 250 exclude = [] 251 else: 252 # Normalize excluded paths 253 exclude = [os.path.normpath(i) for i in exclude] 254 255 for rel_name in utils.ListVisibleFiles(path): 256 full_name = utils.PathJoin(path, rel_name) 257 if full_name in exclude: 258 continue 259 if os.path.isfile(full_name) and not os.path.islink(full_name): 260 utils.RemoveFile(full_name)

261

262 263 -def _BuildUploadFileList():

264 """Build the list of allowed upload files. 265 266 This is abstracted so that it's built only once at module import time. 267 268 """ 269 allowed_files = set([ 270 pathutils.CLUSTER_CONF_FILE, 271 pathutils.ETC_HOSTS, 272 pathutils.SSH_KNOWN_HOSTS_FILE, 273 pathutils.VNC_PASSWORD_FILE, 274 pathutils.RAPI_CERT_FILE, 275 pathutils.SPICE_CERT_FILE, 276 pathutils.SPICE_CACERT_FILE, 277 pathutils.RAPI_USERS_FILE, 278 pathutils.CONFD_HMAC_KEY, 279 pathutils.CLUSTER_DOMAIN_SECRET_FILE, 280 ]) 281 282 for hv_name in constants.HYPER_TYPES: 283 hv_class = hypervisor.GetHypervisorClass(hv_name) 284 allowed_files.update(hv_class.GetAncillaryFiles()[0]) 285 286 assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \ 287 "Allowed file storage paths should never be uploaded via RPC" 288 289 return frozenset(allowed_files)

290 291 292 _ALLOWED_UPLOAD_FILES = _BuildUploadFileList()

293 294 295 -def JobQueuePurge():

296 """Removes job queue files and archived jobs. 297 298 @rtype: tuple 299 @return: True, None 300 301 """ 302 _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE]) 303 _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR)

304

305 306 -def GetMasterNodeName():

307 """Returns the master node name. 308 309 @rtype: string 310 @return: name of the master node 311 @raise RPCFail: in case of errors 312 313 """ 314 try: 315 return _GetConfig().GetMasterNode() 316 except errors.ConfigurationError, err: 317 _Fail("Cluster configuration incomplete: %s", err, exc=True)

318

319 320 -def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn):

321 """Decorator that runs hooks before and after the decorated function. 322 323 @type hook_opcode: string 324 @param hook_opcode: opcode of the hook 325 @type hooks_path: string 326 @param hooks_path: path of the hooks 327 @type env_builder_fn: function 328 @param env_builder_fn: function that returns a dictionary containing the 329 environment variables for the hooks. Will get all the parameters of the 330 decorated function. 331 @raise RPCFail: in case of pre-hook failure 332 333 """ 334 def decorator(fn): 335 def wrapper(*args, **kwargs): 336 _, myself = ssconf.GetMasterAndMyself() 337 nodes = ([myself], [myself]) # these hooks run locally 338 339 env_fn = compat.partial(env_builder_fn, *args, **kwargs) 340 341 cfg = _GetConfig() 342 hr = HooksRunner() 343 hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes, 344 hr.RunLocalHooks, None, env_fn, None, 345 logging.warning, cfg.GetClusterName(), 346 cfg.GetMasterNode()) 347 hm.RunPhase(constants.HOOKS_PHASE_PRE) 348 result = fn(*args, **kwargs) 349 hm.RunPhase(constants.HOOKS_PHASE_POST) 350 351 return result

352 return wrapper 353 return decorator 354

355 356 -def _BuildMasterIpEnv(master_params, use_external_mip_script=None):

357 """Builds environment variables for master IP hooks. 358 359 @type master_params: L{objects.MasterNetworkParameters} 360 @param master_params: network parameters of the master 361 @type use_external_mip_script: boolean 362 @param use_external_mip_script: whether to use an external master IP 363 address setup script (unused, but necessary per the implementation of the 364 _RunLocalHooks decorator) 365 366 """ 367 # pylint: disable=W0613 368 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family) 369 env = { 370 "MASTER_NETDEV": master_params.netdev, 371 "MASTER_IP": master_params.ip, 372 "MASTER_NETMASK": str(master_params.netmask), 373 "CLUSTER_IP_VERSION": str(ver), 374 } 375 376 return env

377

378 379 -def _RunMasterSetupScript(master_params, action, use_external_mip_script):

380 """Execute the master IP address setup script. 381 382 @type master_params: L{objects.MasterNetworkParameters} 383 @param master_params: network parameters of the master 384 @type action: string 385 @param action: action to pass to the script. Must be one of 386 L{backend._MASTER_START} or L{backend._MASTER_STOP} 387 @type use_external_mip_script: boolean 388 @param use_external_mip_script: whether to use an external master IP 389 address setup script 390 @raise backend.RPCFail: if there are errors during the execution of the 391 script 392 393 """ 394 env = _BuildMasterIpEnv(master_params) 395 396 if use_external_mip_script: 397 setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT 398 else: 399 setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT 400 401 result = utils.RunCmd([setup_script, action], env=env, reset_env=True) 402 403 if result.failed: 404 _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" % 405 (action, result.exit_code, result.output), log=True)

406 407 408 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup", 409 _BuildMasterIpEnv)

410 -def ActivateMasterIp(master_params, use_external_mip_script):

411 """Activate the IP address of the master daemon. 412 413 @type master_params: L{objects.MasterNetworkParameters} 414 @param master_params: network parameters of the master 415 @type use_external_mip_script: boolean 416 @param use_external_mip_script: whether to use an external master IP 417 address setup script 418 @raise RPCFail: in case of errors during the IP startup 419 420 """ 421 _RunMasterSetupScript(master_params, _MASTER_START, 422 use_external_mip_script)

423

424 425 -def StartMasterDaemons(no_voting):

426 """Activate local node as master node. 427 428 The function will start the master daemons (ganeti-masterd and ganeti-rapi). 429 430 @type no_voting: boolean 431 @param no_voting: whether to start ganeti-masterd without a node vote 432 but still non-interactively 433 @rtype: None 434 435 """ 436 437 if no_voting: 438 daemon_args = "--no-voting --yes-do-it" 439 else: 440 daemon_args = "" 441 442 env = { 443 "EXTRA_LUXID_ARGS": daemon_args, 444 "EXTRA_WCONFD_ARGS": daemon_args, 445 } 446 447 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env) 448 if result.failed: 449 msg = "Can't start Ganeti master: %s" % result.output 450 logging.error(msg) 451 _Fail(msg)

452 453 454 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown", 455 _BuildMasterIpEnv)

456 -def DeactivateMasterIp(master_params, use_external_mip_script):

457 """Deactivate the master IP on this node. 458 459 @type master_params: L{objects.MasterNetworkParameters} 460 @param master_params: network parameters of the master 461 @type use_external_mip_script: boolean 462 @param use_external_mip_script: whether to use an external master IP 463 address setup script 464 @raise RPCFail: in case of errors during the IP turndown 465 466 """ 467 _RunMasterSetupScript(master_params, _MASTER_STOP, 468 use_external_mip_script)

469

470 471 -def StopMasterDaemons():

472 """Stop the master daemons on this node. 473 474 Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node. 475 476 @rtype: None 477 478 """ 479 # TODO: log and report back to the caller the error failures; we 480 # need to decide in which case we fail the RPC for this 481 482 result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"]) 483 if result.failed: 484 logging.error("Could not stop Ganeti master, command %s had exitcode %s" 485 " and error %s", 486 result.cmd, result.exit_code, result.output)

487

488 489 -def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev):

490 """Change the netmask of the master IP. 491 492 @param old_netmask: the old value of the netmask 493 @param netmask: the new value of the netmask 494 @param master_ip: the master IP 495 @param master_netdev: the master network device 496 497 """ 498 if old_netmask == netmask: 499 return 500 501 if not netutils.IPAddress.Own(master_ip): 502 _Fail("The master IP address is not up, not attempting to change its" 503 " netmask") 504 505 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add", 506 "%s/%s" % (master_ip, netmask), 507 "dev", master_netdev, "label", 508 "%s:0" % master_netdev]) 509 if result.failed: 510 _Fail("Could not set the new netmask on the master IP address") 511 512 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del", 513 "%s/%s" % (master_ip, old_netmask), 514 "dev", master_netdev, "label", 515 "%s:0" % master_netdev]) 516 if result.failed: 517 _Fail("Could not bring down the master IP address with the old netmask")

518

519 520 -def EtcHostsModify(mode, host, ip):

521 """Modify a host entry in /etc/hosts. 522 523 @param mode: The mode to operate. Either add or remove entry 524 @param host: The host to operate on 525 @param ip: The ip associated with the entry 526 527 """ 528 if mode == constants.ETC_HOSTS_ADD: 529 if not ip: 530 RPCFail("Mode 'add' needs 'ip' parameter, but parameter not" 531 " present") 532 utils.AddHostToEtcHosts(host, ip) 533 elif mode == constants.ETC_HOSTS_REMOVE: 534 if ip: 535 RPCFail("Mode 'remove' does not allow 'ip' parameter, but" 536 " parameter is present") 537 utils.RemoveHostFromEtcHosts(host) 538 else: 539 RPCFail("Mode not supported")

540

541 542 -def LeaveCluster(modify_ssh_setup):

543 """Cleans up and remove the current node. 544 545 This function cleans up and prepares the current node to be removed 546 from the cluster. 547 548 If processing is successful, then it raises an 549 L{errors.QuitGanetiException} which is used as a special case to 550 shutdown the node daemon. 551 552 @param modify_ssh_setup: boolean 553 554 """ 555 _CleanDirectory(pathutils.DATA_DIR) 556 _CleanDirectory(pathutils.CRYPTO_KEYS_DIR) 557 JobQueuePurge() 558 559 if modify_ssh_setup: 560 try: 561 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER) 562 563 ssh.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) 564 565 utils.RemoveFile(priv_key) 566 utils.RemoveFile(pub_key) 567 except errors.OpExecError: 568 logging.exception("Error while processing ssh files") 569 except IOError: 570 logging.exception("At least one SSH file was not accessible.") 571 572 try: 573 utils.RemoveFile(pathutils.CONFD_HMAC_KEY) 574 utils.RemoveFile(pathutils.RAPI_CERT_FILE) 575 utils.RemoveFile(pathutils.SPICE_CERT_FILE) 576 utils.RemoveFile(pathutils.SPICE_CACERT_FILE) 577 utils.RemoveFile(pathutils.NODED_CERT_FILE) 578 except: # pylint: disable=W0702 579 logging.exception("Error while removing cluster secrets") 580 581 utils.StopDaemon(constants.CONFD) 582 utils.StopDaemon(constants.MOND) 583 utils.StopDaemon(constants.KVMD) 584 585 # Raise a custom exception (handled in ganeti-noded) 586 raise errors.QuitGanetiException(True, "Shutdown scheduled")

587

588 589 -def _CheckStorageParams(params, num_params):

590 """Performs sanity checks for storage parameters. 591 592 @type params: list 593 @param params: list of storage parameters 594 @type num_params: int 595 @param num_params: expected number of parameters 596 597 """ 598 if params is None: 599 raise errors.ProgrammerError("No storage parameters for storage" 600 " reporting is provided.") 601 if not isinstance(params, list): 602 raise errors.ProgrammerError("The storage parameters are not of type" 603 " list: '%s'" % params) 604 if not len(params) == num_params: 605 raise errors.ProgrammerError("Did not receive the expected number of" 606 "storage parameters: expected %s," 607 " received '%s'" % (num_params, len(params)))

608

609 610 -def _CheckLvmStorageParams(params):

611 """Performs sanity check for the 'exclusive storage' flag. 612 613 @see: C{_CheckStorageParams} 614 615 """ 616 _CheckStorageParams(params, 1) 617 excl_stor = params[0] 618 if not isinstance(params[0], bool): 619 raise errors.ProgrammerError("Exclusive storage parameter is not" 620 " boolean: '%s'." % excl_stor) 621 return excl_stor

622

623 624 -def _GetLvmVgSpaceInfo(name, params):

625 """Wrapper around C{_GetVgInfo} which checks the storage parameters. 626 627 @type name: string 628 @param name: name of the volume group 629 @type params: list 630 @param params: list of storage parameters, which in this case should be 631 containing only one for exclusive storage 632 633 """ 634 excl_stor = _CheckLvmStorageParams(params) 635 return _GetVgInfo(name, excl_stor)

636

637 638 -def _GetVgInfo( 639 name, excl_stor, info_fn=bdev.LogicalVolume.GetVGInfo):

640 """Retrieves information about a LVM volume group. 641 642 """ 643 # TODO: GetVGInfo supports returning information for multiple VGs at once 644 vginfo = info_fn([name], excl_stor) 645 if vginfo: 646 vg_free = int(round(vginfo[0][0], 0)) 647 vg_size = int(round(vginfo[0][1], 0)) 648 else: 649 vg_free = None 650 vg_size = None 651 652 return { 653 "type": constants.ST_LVM_VG, 654 "name": name, 655 "storage_free": vg_free, 656 "storage_size": vg_size, 657 }

658

659 660 -def _GetLvmPvSpaceInfo(name, params):

661 """Wrapper around C{_GetVgSpindlesInfo} with sanity checks. 662 663 @see: C{_GetLvmVgSpaceInfo} 664 665 """ 666 excl_stor = _CheckLvmStorageParams(params) 667 return _GetVgSpindlesInfo(name, excl_stor)

668

669 670 -def _GetVgSpindlesInfo( 671 name, excl_stor, info_fn=bdev.LogicalVolume.GetVgSpindlesInfo):

672 """Retrieves information about spindles in an LVM volume group. 673 674 @type name: string 675 @param name: VG name 676 @type excl_stor: bool 677 @param excl_stor: exclusive storage 678 @rtype: dict 679 @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name, 680 free spindles, total spindles respectively 681 682 """ 683 if excl_stor: 684 (vg_free, vg_size) = info_fn(name) 685 else: 686 vg_free = 0 687 vg_size = 0 688 return { 689 "type": constants.ST_LVM_PV, 690 "name": name, 691 "storage_free": vg_free, 692 "storage_size": vg_size, 693 }

694

695 696 -def _GetHvInfo(name, hvparams, get_hv_fn=hypervisor.GetHypervisor):

697 """Retrieves node information from a hypervisor. 698 699 The information returned depends on the hypervisor. Common items: 700 701 - vg_size is the size of the configured volume group in MiB 702 - vg_free is the free size of the volume group in MiB 703 - memory_dom0 is the memory allocated for domain0 in MiB 704 - memory_free is the currently available (free) ram in MiB 705 - memory_total is the total number of ram in MiB 706 - hv_version: the hypervisor version, if available 707 708 @type hvparams: dict of string 709 @param hvparams: the hypervisor's hvparams 710 711 """ 712 return get_hv_fn(name).GetNodeInfo(hvparams=hvparams)

713

714 715 -def _GetHvInfoAll(hv_specs, get_hv_fn=hypervisor.GetHypervisor):

716 """Retrieves node information for all hypervisors. 717 718 See C{_GetHvInfo} for information on the output. 719 720 @type hv_specs: list of pairs (string, dict of strings) 721 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 722 723 """ 724 if hv_specs is None: 725 return None 726 727 result = [] 728 for hvname, hvparams in hv_specs: 729 result.append(_GetHvInfo(hvname, hvparams, get_hv_fn)) 730 return result

731

732 733 -def _GetNamedNodeInfo(names, fn):

734 """Calls C{fn} for all names in C{names} and returns a dictionary. 735 736 @rtype: None or dict 737 738 """ 739 if names is None: 740 return None 741 else: 742 return map(fn, names)

743

744 745 -def GetNodeInfo(storage_units, hv_specs):

746 """Gives back a hash with different information about the node. 747 748 @type storage_units: list of tuples (string, string, list) 749 @param storage_units: List of tuples (storage unit, identifier, parameters) to 750 ask for disk space information. In case of lvm-vg, the identifier is 751 the VG name. The parameters can contain additional, storage-type-specific 752 parameters, for example exclusive storage for lvm storage. 753 @type hv_specs: list of pairs (string, dict of strings) 754 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 755 @rtype: tuple; (string, None/dict, None/dict) 756 @return: Tuple containing boot ID, volume group information and hypervisor 757 information 758 759 """ 760 bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") 761 storage_info = _GetNamedNodeInfo( 762 storage_units, 763 (lambda (storage_type, storage_key, storage_params): 764 _ApplyStorageInfoFunction(storage_type, storage_key, storage_params))) 765 hv_info = _GetHvInfoAll(hv_specs) 766 return (bootid, storage_info, hv_info)

767

768 769 -def _GetFileStorageSpaceInfo(path, params):

770 """Wrapper around filestorage.GetSpaceInfo. 771 772 The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo 773 and ignore the *args parameter to not leak it into the filestorage 774 module's code. 775 776 @see: C{filestorage.GetFileStorageSpaceInfo} for description of the 777 parameters. 778 779 """ 780 _CheckStorageParams(params, 0) 781 return filestorage.GetFileStorageSpaceInfo(path)

782 783 784 # FIXME: implement storage reporting for all missing storage types. 785 _STORAGE_TYPE_INFO_FN = { 786 constants.ST_BLOCK: None, 787 constants.ST_DISKLESS: None, 788 constants.ST_EXT: None, 789 constants.ST_FILE: _GetFileStorageSpaceInfo, 790 constants.ST_LVM_PV: _GetLvmPvSpaceInfo, 791 constants.ST_LVM_VG: _GetLvmVgSpaceInfo, 792 constants.ST_SHARED_FILE: None, 793 constants.ST_GLUSTER: None, 794 constants.ST_RADOS: None, 795 }

796 797 798 -def _ApplyStorageInfoFunction(storage_type, storage_key, *args):

799 """Looks up and applies the correct function to calculate free and total 800 storage for the given storage type. 801 802 @type storage_type: string 803 @param storage_type: the storage type for which the storage shall be reported. 804 @type storage_key: string 805 @param storage_key: identifier of a storage unit, e.g. the volume group name 806 of an LVM storage unit 807 @type args: any 808 @param args: various parameters that can be used for storage reporting. These 809 parameters and their semantics vary from storage type to storage type and 810 are just propagated in this function. 811 @return: the results of the application of the storage space function (see 812 _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that 813 storage type 814 @raises NotImplementedError: for storage types who don't support space 815 reporting yet 816 """ 817 fn = _STORAGE_TYPE_INFO_FN[storage_type] 818 if fn is not None: 819 return fn(storage_key, *args) 820 else: 821 raise NotImplementedError

822

823 824 -def _CheckExclusivePvs(pvi_list):

825 """Check that PVs are not shared among LVs 826 827 @type pvi_list: list of L{objects.LvmPvInfo} objects 828 @param pvi_list: information about the PVs 829 830 @rtype: list of tuples (string, list of strings) 831 @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...]) 832 833 """ 834 res = [] 835 for pvi in pvi_list: 836 if len(pvi.lv_list) > 1: 837 res.append((pvi.name, pvi.lv_list)) 838 return res

839

840 841 -def _VerifyHypervisors(what, vm_capable, result, all_hvparams, 842 get_hv_fn=hypervisor.GetHypervisor):

843 """Verifies the hypervisor. Appends the results to the 'results' list. 844 845 @type what: C{dict} 846 @param what: a dictionary of things to check 847 @type vm_capable: boolean 848 @param vm_capable: whether or not this node is vm capable 849 @type result: dict 850 @param result: dictionary of verification results; results of the 851 verifications in this function will be added here 852 @type all_hvparams: dict of dict of string 853 @param all_hvparams: dictionary mapping hypervisor names to hvparams 854 @type get_hv_fn: function 855 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 856 857 """ 858 if not vm_capable: 859 return 860 861 if constants.NV_HYPERVISOR in what: 862 result[constants.NV_HYPERVISOR] = {} 863 for hv_name in what[constants.NV_HYPERVISOR]: 864 hvparams = all_hvparams[hv_name] 865 try: 866 val = get_hv_fn(hv_name).Verify(hvparams=hvparams) 867 except errors.HypervisorError, err: 868 val = "Error while checking hypervisor: %s" % str(err) 869 result[constants.NV_HYPERVISOR][hv_name] = val

870

871 872 -def _VerifyHvparams(what, vm_capable, result, 873 get_hv_fn=hypervisor.GetHypervisor):

874 """Verifies the hvparams. Appends the results to the 'results' list. 875 876 @type what: C{dict} 877 @param what: a dictionary of things to check 878 @type vm_capable: boolean 879 @param vm_capable: whether or not this node is vm capable 880 @type result: dict 881 @param result: dictionary of verification results; results of the 882 verifications in this function will be added here 883 @type get_hv_fn: function 884 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 885 886 """ 887 if not vm_capable: 888 return 889 890 if constants.NV_HVPARAMS in what: 891 result[constants.NV_HVPARAMS] = [] 892 for source, hv_name, hvparms in what[constants.NV_HVPARAMS]: 893 try: 894 logging.info("Validating hv %s, %s", hv_name, hvparms) 895 get_hv_fn(hv_name).ValidateParameters(hvparms) 896 except errors.HypervisorError, err: 897 result[constants.NV_HVPARAMS].append((source, hv_name, str(err)))

898

899 900 -def _VerifyInstanceList(what, vm_capable, result, all_hvparams):

901 """Verifies the instance list. 902 903 @type what: C{dict} 904 @param what: a dictionary of things to check 905 @type vm_capable: boolean 906 @param vm_capable: whether or not this node is vm capable 907 @type result: dict 908 @param result: dictionary of verification results; results of the 909 verifications in this function will be added here 910 @type all_hvparams: dict of dict of string 911 @param all_hvparams: dictionary mapping hypervisor names to hvparams 912 913 """ 914 if constants.NV_INSTANCELIST in what and vm_capable: 915 # GetInstanceList can fail 916 try: 917 val = GetInstanceList(what[constants.NV_INSTANCELIST], 918 all_hvparams=all_hvparams) 919 except RPCFail, err: 920 val = str(err) 921 result[constants.NV_INSTANCELIST] = val

922

923 924 -def _VerifyNodeInfo(what, vm_capable, result, all_hvparams):

925 """Verifies the node info. 926 927 @type what: C{dict} 928 @param what: a dictionary of things to check 929 @type vm_capable: boolean 930 @param vm_capable: whether or not this node is vm capable 931 @type result: dict 932 @param result: dictionary of verification results; results of the 933 verifications in this function will be added here 934 @type all_hvparams: dict of dict of string 935 @param all_hvparams: dictionary mapping hypervisor names to hvparams 936 937 """ 938 if constants.NV_HVINFO in what and vm_capable: 939 hvname = what[constants.NV_HVINFO] 940 hyper = hypervisor.GetHypervisor(hvname) 941 hvparams = all_hvparams[hvname] 942 result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams)

943

944 945 -def _VerifyClientCertificate(cert_file=pathutils.NODED_CLIENT_CERT_FILE):

946 """Verify the existance and validity of the client SSL certificate. 947 948 Also, verify that the client certificate is not self-signed. Self- 949 signed client certificates stem from Ganeti versions 2.12.0 - 2.12.4 950 and should be replaced by client certificates signed by the server 951 certificate. Hence we output a warning when we encounter a self-signed 952 one. 953 954 """ 955 create_cert_cmd = "gnt-cluster renew-crypto --new-node-certificates" 956 if not os.path.exists(cert_file): 957 return (constants.CV_ERROR, 958 "The client certificate does not exist. Run '%s' to create" 959 " client certificates for all nodes." % create_cert_cmd) 960 961 (errcode, msg) = utils.VerifyCertificate(cert_file) 962 if errcode is not None: 963 return (errcode, msg) 964 965 (errcode, msg) = utils.IsCertificateSelfSigned(cert_file) 966 if errcode is not None: 967 return (errcode, msg) 968 969 # if everything is fine, we return the digest to be compared to the config 970 return (None, utils.GetCertificateDigest(cert_filename=cert_file))

971

972 973 -def _VerifySshSetup(node_status_list, my_name, ssh_key_type, 974 ganeti_pub_keys_file=pathutils.SSH_PUB_KEYS):

975 """Verifies the state of the SSH key files. 976 977 @type node_status_list: list of tuples 978 @param node_status_list: list of nodes of the cluster associated with a 979 couple of flags: (uuid, name, is_master_candidate, 980 is_potential_master_candidate, online) 981 @type my_name: str 982 @param my_name: name of this node 983 @type ssh_key_type: one of L{constants.SSHK_ALL} 984 @param ssh_key_type: type of key used on nodes 985 @type ganeti_pub_keys_file: str 986 @param ganeti_pub_keys_file: filename of the public keys file 987 988 """ 989 if node_status_list is None: 990 return ["No node list to check against the pub_key_file received."] 991 992 my_status_list = [(my_uuid, name, mc, pot_mc, online) for 993 (my_uuid, name, mc, pot_mc, online) 994 in node_status_list if name == my_name] 995 if len(my_status_list) == 0: 996 return ["Cannot find node information for node '%s'." % my_name] 997 (my_uuid, _, _, potential_master_candidate, online) = \ 998 my_status_list[0] 999 1000 result = [] 1001 1002 if not os.path.exists(ganeti_pub_keys_file): 1003 result.append("The public key file '%s' does not exist. Consider running" 1004 " 'gnt-cluster renew-crypto --new-ssh-keys" 1005 " [--no-ssh-key-check]' to fix this." % ganeti_pub_keys_file) 1006 return result 1007 1008 pot_mc_uuids = [uuid for (uuid, _, _, _, _) in node_status_list] 1009 offline_nodes = [uuid for (uuid, _, _, _, online) in node_status_list 1010 if not online] 1011 pub_keys = ssh.QueryPubKeyFile(None, key_file=ganeti_pub_keys_file) 1012 1013 if potential_master_candidate: 1014 # Check that the set of potential master candidates matches the 1015 # public key file 1016 pub_uuids_set = set(pub_keys.keys()) - set(offline_nodes) 1017 pot_mc_uuids_set = set(pot_mc_uuids) - set(offline_nodes) 1018 missing_uuids = set([]) 1019 if pub_uuids_set != pot_mc_uuids_set: 1020 unknown_uuids = pub_uuids_set - pot_mc_uuids_set 1021 if unknown_uuids: 1022 result.append("The following node UUIDs are listed in the public key" 1023 " file on node '%s', but are not potential master" 1024 " candidates: %s." 1025 % (my_name, ", ".join(list(unknown_uuids)))) 1026 missing_uuids = pot_mc_uuids_set - pub_uuids_set 1027 if missing_uuids: 1028 result.append("The following node UUIDs of potential master candidates" 1029 " are missing in the public key file on node %s: %s." 1030 % (my_name, ", ".join(list(missing_uuids)))) 1031 1032 (_, key_files) = \ 1033 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1034 (_, node_pub_key_file) = key_files[ssh_key_type] 1035 1036 my_keys = pub_keys[my_uuid] 1037 1038 node_pub_key = utils.ReadFile(node_pub_key_file) 1039 if node_pub_key.strip() not in my_keys: 1040 result.append("The dsa key of node %s does not match this node's key" 1041 " in the pub key file." % my_name) 1042 if len(my_keys) != 1: 1043 result.append("There is more than one key for node %s in the public key" 1044 " file." % my_name) 1045 else: 1046 if len(pub_keys.keys()) > 0: 1047 result.append("The public key file of node '%s' is not empty, although" 1048 " the node is not a potential master candidate." 1049 % my_name) 1050 1051 # Check that all master candidate keys are in the authorized_keys file 1052 (auth_key_file, _) = \ 1053 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1054 for (uuid, name, mc, _, online) in node_status_list: 1055 if not online: 1056 continue 1057 if uuid in missing_uuids: 1058 continue 1059 if mc: 1060 for key in pub_keys[uuid]: 1061 if not ssh.HasAuthorizedKey(auth_key_file, key): 1062 result.append("A SSH key of master candidate '%s' (UUID: '%s') is" 1063 " not in the 'authorized_keys' file of node '%s'." 1064 % (name, uuid, my_name)) 1065 else: 1066 for key in pub_keys[uuid]: 1067 if name != my_name and ssh.HasAuthorizedKey(auth_key_file, key): 1068 result.append("A SSH key of normal node '%s' (UUID: '%s') is in the" 1069 " 'authorized_keys' file of node '%s'." 1070 % (name, uuid, my_name)) 1071 if name == my_name and not ssh.HasAuthorizedKey(auth_key_file, key): 1072 result.append("A SSH key of normal node '%s' (UUID: '%s') is not" 1073 " in the 'authorized_keys' file of itself." 1074 % (my_name, uuid)) 1075 1076 return result

1077

1078 1079 -def _VerifySshClutter(node_status_list, my_name):

1080 """Verifies that the 'authorized_keys' files are not cluttered up. 1081 1082 @type node_status_list: list of tuples 1083 @param node_status_list: list of nodes of the cluster associated with a 1084 couple of flags: (uuid, name, is_master_candidate, 1085 is_potential_master_candidate, online) 1086 @type my_name: str 1087 @param my_name: name of this node 1088 1089 """ 1090 result = [] 1091 (auth_key_file, _) = \ 1092 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1093 node_names = [name for (_, name, _, _) in node_status_list] 1094 multiple_occurrences = ssh.CheckForMultipleKeys(auth_key_file, node_names) 1095 if multiple_occurrences: 1096 msg = "There are hosts which have more than one SSH key stored for the" \ 1097 " same user in the 'authorized_keys' file of node %s. This can be" \ 1098 " due to an unsuccessful operation which cluttered up the" \ 1099 " 'authorized_keys' file. We recommend to clean this up manually. " \ 1100 % my_name 1101 for host, occ in multiple_occurrences.items(): 1102 msg += "Entry for '%s' in lines %s. " % (host, utils.CommaJoin(occ)) 1103 result.append(msg) 1104 1105 return result

1106

1107 1108 -def VerifyNode(what, cluster_name, all_hvparams):

1109 """Verify the status of the local node. 1110 1111 Based on the input L{what} parameter, various checks are done on the 1112 local node. 1113 1114 If the I{filelist} key is present, this list of 1115 files is checksummed and the file/checksum pairs are returned. 1116 1117 If the I{nodelist} key is present, we check that we have 1118 connectivity via ssh with the target nodes (and check the hostname 1119 report). 1120 1121 If the I{node-net-test} key is present, we check that we have 1122 connectivity to the given nodes via both primary IP and, if 1123 applicable, secondary IPs. 1124 1125 @type what: C{dict} 1126 @param what: a dictionary of things to check: 1127 - filelist: list of files for which to compute checksums 1128 - nodelist: list of nodes we should check ssh communication with 1129 - node-net-test: list of nodes we should check node daemon port 1130 connectivity with 1131 - hypervisor: list with hypervisors to run the verify for 1132 @type cluster_name: string 1133 @param cluster_name: the cluster's name 1134 @type all_hvparams: dict of dict of strings 1135 @param all_hvparams: a dictionary mapping hypervisor names to hvparams 1136 @rtype: dict 1137 @return: a dictionary with the same keys as the input dict, and 1138 values representing the result of the checks 1139 1140 """ 1141 result = {} 1142 my_name = netutils.Hostname.GetSysName() 1143 port = netutils.GetDaemonPort(constants.NODED) 1144 vm_capable = my_name not in what.get(constants.NV_NONVMNODES, []) 1145 1146 _VerifyHypervisors(what, vm_capable, result, all_hvparams) 1147 _VerifyHvparams(what, vm_capable, result) 1148 1149 if constants.NV_FILELIST in what: 1150 fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath, 1151 what[constants.NV_FILELIST])) 1152 result[constants.NV_FILELIST] = \ 1153 dict((vcluster.MakeVirtualPath(key), value) 1154 for (key, value) in fingerprints.items()) 1155 1156 if constants.NV_CLIENT_CERT in what: 1157 result[constants.NV_CLIENT_CERT] = _VerifyClientCertificate() 1158 1159 if constants.NV_SSH_SETUP in what: 1160 node_status_list, key_type = what[constants.NV_SSH_SETUP] 1161 result[constants.NV_SSH_SETUP] = \ 1162 _VerifySshSetup(node_status_list, my_name, key_type) 1163 if constants.NV_SSH_CLUTTER in what: 1164 result[constants.NV_SSH_CLUTTER] = \ 1165 _VerifySshClutter(what[constants.NV_SSH_SETUP], my_name) 1166 1167 if constants.NV_NODELIST in what: 1168 (nodes, bynode, mcs) = what[constants.NV_NODELIST] 1169 1170 # Add nodes from other groups (different for each node) 1171 try: 1172 nodes.extend(bynode[my_name]) 1173 except KeyError: 1174 pass 1175 1176 # Use a random order 1177 random.shuffle(nodes) 1178 1179 # Try to contact all nodes 1180 val = {} 1181 ssh_port_map = ssconf.SimpleStore().GetSshPortMap() 1182 for node in nodes: 1183 # We only test if master candidates can communicate to other nodes. 1184 # We cannot test if normal nodes cannot communicate with other nodes, 1185 # because the administrator might have installed additional SSH keys, 1186 # over which Ganeti has no power. 1187 if my_name in mcs: 1188 success, message = _GetSshRunner(cluster_name). \ 1189 VerifyNodeHostname(node, ssh_port_map[node]) 1190 if not success: 1191 val[node] = message 1192 1193 result[constants.NV_NODELIST] = val 1194 1195 if constants.NV_NODENETTEST in what: 1196 result[constants.NV_NODENETTEST] = tmp = {} 1197 my_pip = my_sip = None 1198 for name, pip, sip in what[constants.NV_NODENETTEST]: 1199 if name == my_name: 1200 my_pip = pip 1201 my_sip = sip 1202 break 1203 if not my_pip: 1204 tmp[my_name] = ("Can't find my own primary/secondary IP" 1205 " in the node list") 1206 else: 1207 for name, pip, sip in what[constants.NV_NODENETTEST]: 1208 fail = [] 1209 if not netutils.TcpPing(pip, port, source=my_pip): 1210 fail.append("primary") 1211 if sip != pip: 1212 if not netutils.TcpPing(sip, port, source=my_sip): 1213 fail.append("secondary") 1214 if fail: 1215 tmp[name] = ("failure using the %s interface(s)" % 1216 " and ".join(fail)) 1217 1218 if constants.NV_MASTERIP in what: 1219 # FIXME: add checks on incoming data structures (here and in the 1220 # rest of the function) 1221 master_name, master_ip = what[constants.NV_MASTERIP] 1222 if master_name == my_name: 1223 source = constants.IP4_ADDRESS_LOCALHOST 1224 else: 1225 source = None 1226 result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port, 1227 source=source) 1228 1229 if constants.NV_USERSCRIPTS in what: 1230 result[constants.NV_USERSCRIPTS] = \ 1231 [script for script in what[constants.NV_USERSCRIPTS] 1232 if not utils.IsExecutable(script)] 1233 1234 if constants.NV_OOB_PATHS in what: 1235 result[constants.NV_OOB_PATHS] = tmp = [] 1236 for path in what[constants.NV_OOB_PATHS]: 1237 try: 1238 st = os.stat(path) 1239 except OSError, err: 1240 tmp.append("error stating out of band helper: %s" % err) 1241 else: 1242 if stat.S_ISREG(st.st_mode): 1243 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR: 1244 tmp.append(None) 1245 else: 1246 tmp.append("out of band helper %s is not executable" % path) 1247 else: 1248 tmp.append("out of band helper %s is not a file" % path) 1249 1250 if constants.NV_LVLIST in what and vm_capable: 1251 try: 1252 val = GetVolumeList(utils.ListVolumeGroups().keys()) 1253 except RPCFail, err: 1254 val = str(err) 1255 result[constants.NV_LVLIST] = val 1256 1257 _VerifyInstanceList(what, vm_capable, result, all_hvparams) 1258 1259 if constants.NV_VGLIST in what and vm_capable: 1260 result[constants.NV_VGLIST] = utils.ListVolumeGroups() 1261 1262 if constants.NV_PVLIST in what and vm_capable: 1263 check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what 1264 val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], 1265 filter_allocatable=False, 1266 include_lvs=check_exclusive_pvs) 1267 if check_exclusive_pvs: 1268 result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val) 1269 for pvi in val: 1270 # Avoid sending useless data on the wire 1271 pvi.lv_list = [] 1272 result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val) 1273 1274 if constants.NV_VERSION in what: 1275 result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION, 1276 constants.RELEASE_VERSION) 1277 1278 _VerifyNodeInfo(what, vm_capable, result, all_hvparams) 1279 1280 if constants.NV_DRBDVERSION in what and vm_capable: 1281 try: 1282 drbd_version = DRBD8.GetProcInfo().GetVersionString() 1283 except errors.BlockDeviceError, err: 1284 logging.warning("Can't get DRBD version", exc_info=True) 1285 drbd_version = str(err) 1286 result[constants.NV_DRBDVERSION] = drbd_version 1287 1288 if constants.NV_DRBDLIST in what and vm_capable: 1289 try: 1290 used_minors = drbd.DRBD8.GetUsedDevs() 1291 except errors.BlockDeviceError, err: 1292 logging.warning("Can't get used minors list", exc_info=True) 1293 used_minors = str(err) 1294 result[constants.NV_DRBDLIST] = used_minors 1295 1296 if constants.NV_DRBDHELPER in what and vm_capable: 1297 status = True 1298 try: 1299 payload = drbd.DRBD8.GetUsermodeHelper() 1300 except errors.BlockDeviceError, err: 1301 logging.error("Can't get DRBD usermode helper: %s", str(err)) 1302 status = False 1303 payload = str(err) 1304 result[constants.NV_DRBDHELPER] = (status, payload) 1305 1306 if constants.NV_NODESETUP in what: 1307 result[constants.NV_NODESETUP] = tmpr = [] 1308 if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"): 1309 tmpr.append("The sysfs filesytem doesn't seem to be mounted" 1310 " under /sys, missing required directories /sys/block" 1311 " and /sys/class/net") 1312 if (not os.path.isdir("/proc/sys") or 1313 not os.path.isfile("/proc/sysrq-trigger")): 1314 tmpr.append("The procfs filesystem doesn't seem to be mounted" 1315 " under /proc, missing required directory /proc/sys and" 1316 " the file /proc/sysrq-trigger") 1317 1318 if constants.NV_TIME in what: 1319 result[constants.NV_TIME] = utils.SplitTime(time.time()) 1320 1321 if constants.NV_OSLIST in what and vm_capable: 1322 result[constants.NV_OSLIST] = DiagnoseOS() 1323 1324 if constants.NV_BRIDGES in what and vm_capable: 1325 result[constants.NV_BRIDGES] = [bridge 1326 for bridge in what[constants.NV_BRIDGES] 1327 if not utils.BridgeExists(bridge)] 1328 1329 if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name: 1330 result[constants.NV_ACCEPTED_STORAGE_PATHS] = \ 1331 filestorage.ComputeWrongFileStoragePaths() 1332 1333 if what.get(constants.NV_FILE_STORAGE_PATH): 1334 pathresult = filestorage.CheckFileStoragePath( 1335 what[constants.NV_FILE_STORAGE_PATH]) 1336 if pathresult: 1337 result[constants.NV_FILE_STORAGE_PATH] = pathresult 1338 1339 if what.get(constants.NV_SHARED_FILE_STORAGE_PATH): 1340 pathresult = filestorage.CheckFileStoragePath( 1341 what[constants.NV_SHARED_FILE_STORAGE_PATH]) 1342 if pathresult: 1343 result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult 1344 1345 return result

1346

1347 1348 -def GetCryptoTokens(token_requests):

1349 """Perform actions on the node's cryptographic tokens. 1350 1351 Token types can be 'ssl' or 'ssh'. So far only some actions are implemented 1352 for 'ssl'. Action 'get' returns the digest of the public client ssl 1353 certificate. Action 'create' creates a new client certificate and private key 1354 and also returns the digest of the certificate. The third parameter of a 1355 token request are optional parameters for the actions, so far only the 1356 filename is supported. 1357 1358 @type token_requests: list of tuples of (string, string, dict), where the 1359 first string is in constants.CRYPTO_TYPES, the second in 1360 constants.CRYPTO_ACTIONS. The third parameter is a dictionary of string 1361 to string. 1362 @param token_requests: list of requests of cryptographic tokens and actions 1363 to perform on them. The actions come with a dictionary of options. 1364 @rtype: list of tuples (string, string) 1365 @return: list of tuples of the token type and the public crypto token 1366 1367 """ 1368 tokens = [] 1369 for (token_type, action, _) in token_requests: 1370 if token_type not in constants.CRYPTO_TYPES: 1371 raise errors.ProgrammerError("Token type '%s' not supported." % 1372 token_type) 1373 if action not in constants.CRYPTO_ACTIONS: 1374 raise errors.ProgrammerError("Action '%s' is not supported." % 1375 action) 1376 if token_type == constants.CRYPTO_TYPE_SSL_DIGEST: 1377 tokens.append((token_type, 1378 utils.GetCertificateDigest())) 1379 return tokens

1380

1381 1382 -def EnsureDaemon(daemon_name, run):

1383 """Ensures the given daemon is running or stopped. 1384 1385 @type daemon_name: string 1386 @param daemon_name: name of the daemon (e.g., constants.KVMD) 1387 1388 @type run: bool 1389 @param run: whether to start or stop the daemon 1390 1391 @rtype: bool 1392 @return: 'True' if daemon successfully started/stopped, 1393 'False' otherwise 1394 1395 """ 1396 allowed_daemons = [constants.KVMD] 1397 1398 if daemon_name not in allowed_daemons: 1399 fn = lambda _: False 1400 elif run: 1401 fn = utils.EnsureDaemon 1402 else: 1403 fn = utils.StopDaemon 1404 1405 return fn(daemon_name)

1406

1407 1408 -def _InitSshUpdateData(data, noded_cert_file, ssconf_store):

1409 (_, noded_cert) = \ 1410 utils.ExtractX509Certificate(utils.ReadFile(noded_cert_file)) 1411 data[constants.SSHS_NODE_DAEMON_CERTIFICATE] = noded_cert 1412 1413 cluster_name = ssconf_store.GetClusterName() 1414 data[constants.SSHS_CLUSTER_NAME] = cluster_name

1415

1416 1417 -def AddNodeSshKey(node_uuid, node_name, 1418 potential_master_candidates, 1419 to_authorized_keys=False, 1420 to_public_keys=False, 1421 get_public_keys=False, 1422 pub_key_file=pathutils.SSH_PUB_KEYS, 1423 ssconf_store=None, 1424 noded_cert_file=pathutils.NODED_CERT_FILE, 1425 run_cmd_fn=ssh.RunSshCmdWithStdin):

1426 """Distributes a node's public SSH key across the cluster. 1427 1428 Note that this function should only be executed on the master node, which 1429 then will copy the new node's key to all nodes in the cluster via SSH. 1430 1431 Also note: at least one of the flags C{to_authorized_keys}, 1432 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for 1433 the function to actually perform any actions. 1434 1435 @type node_uuid: str 1436 @param node_uuid: the UUID of the node whose key is added 1437 @type node_name: str 1438 @param node_name: the name of the node whose key is added 1439 @type potential_master_candidates: list of str 1440 @param potential_master_candidates: list of node names of potential master 1441 candidates; this should match the list of uuids in the public key file 1442 @type to_authorized_keys: boolean 1443 @param to_authorized_keys: whether the key should be added to the 1444 C{authorized_keys} file of all nodes 1445 @type to_public_keys: boolean 1446 @param to_public_keys: whether the keys should be added to the public key file 1447 @type get_public_keys: boolean 1448 @param get_public_keys: whether the node should add the clusters' public keys 1449 to its {ganeti_pub_keys} file 1450 1451 """ 1452 node_list = [SshAddNodeInfo(name=node_name, uuid=node_uuid, 1453 to_authorized_keys=to_authorized_keys, 1454 to_public_keys=to_public_keys, 1455 get_public_keys=get_public_keys)] 1456 return AddNodeSshKeyBulk(node_list, 1457 potential_master_candidates, 1458 pub_key_file=pub_key_file, 1459 ssconf_store=ssconf_store, 1460 noded_cert_file=noded_cert_file, 1461 run_cmd_fn=run_cmd_fn)

1462 1463 1464 # Node info named tuple specifically for the use with AddNodeSshKeyBulk 1465 SshAddNodeInfo = collections.namedtuple( 1466 "SshAddNodeInfo", 1467 ["uuid", 1468 "name", 1469 "to_authorized_keys", 1470 "to_public_keys", 1471 "get_public_keys"])

1472 1473 1474 -def AddNodeSshKeyBulk(node_list, 1475 potential_master_candidates, 1476 pub_key_file=pathutils.SSH_PUB_KEYS, 1477 ssconf_store=None, 1478 noded_cert_file=pathutils.NODED_CERT_FILE, 1479 run_cmd_fn=ssh.RunSshCmdWithStdin):

1480 """Distributes a node's public SSH key across the cluster. 1481 1482 Note that this function should only be executed on the master node, which 1483 then will copy the new node's key to all nodes in the cluster via SSH. 1484 1485 Also note: at least one of the flags C{to_authorized_keys}, 1486 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for 1487 the function to actually perform any actions. 1488 1489 @type node_list: list of SshAddNodeInfo tuples 1490 @param node_list: list of tuples containing the necessary node information for 1491 adding their keys 1492 @type potential_master_candidates: list of str 1493 @param potential_master_candidates: list of node names of potential master 1494 candidates; this should match the list of uuids in the public key file 1495 1496 """ 1497 # whether there are any keys to be added or retrieved at all 1498 to_authorized_keys = any([node_info.to_authorized_keys for node_info in 1499 node_list]) 1500 to_public_keys = any([node_info.to_public_keys for node_info in 1501 node_list]) 1502 1503 if not ssconf_store: 1504 ssconf_store = ssconf.SimpleStore() 1505 1506 for node_info in node_list: 1507 # replacement not necessary for keys that are not supposed to be in the 1508 # list of public keys 1509 if not node_info.to_public_keys: 1510 continue 1511 # Check and fix sanity of key file 1512 keys_by_name = ssh.QueryPubKeyFile([node_info.name], key_file=pub_key_file) 1513 keys_by_uuid = ssh.QueryPubKeyFile([node_info.uuid], key_file=pub_key_file) 1514 1515 if (not keys_by_name or node_info.name not in keys_by_name) \ 1516 and (not keys_by_uuid or node_info.uuid not in keys_by_uuid): 1517 raise errors.SshUpdateError( 1518 "No keys found for the new node '%s' (UUID %s) in the list of public" 1519 " SSH keys, neither for the name or the UUID" % 1520 (node_info.name, node_info.uuid)) 1521 else: 1522 if node_info.name in keys_by_name: 1523 # Replace the name by UUID in the file as the name should only be used 1524 # temporarily 1525 ssh.ReplaceNameByUuid(node_info.uuid, node_info.name, 1526 error_fn=errors.SshUpdateError, 1527 key_file=pub_key_file) 1528 1529 # Retrieve updated map of UUIDs to keys 1530 keys_by_uuid = ssh.QueryPubKeyFile( 1531 [node_info.uuid for node_info in node_list], key_file=pub_key_file) 1532 1533 # Update the master node's key files 1534 (auth_key_file, _) = \ 1535 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1536 for node_info in node_list: 1537 if node_info.to_authorized_keys: 1538 ssh.AddAuthorizedKeys(auth_key_file, keys_by_uuid[node_info.uuid]) 1539 1540 base_data = {} 1541 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) 1542 cluster_name = base_data[constants.SSHS_CLUSTER_NAME] 1543 1544 ssh_port_map = ssconf_store.GetSshPortMap() 1545 1546 # Update the target nodes themselves 1547 for node_info in node_list: 1548 logging.debug("Updating SSH key files of target node '%s'.", node_info.name) 1549 if node_info.get_public_keys: 1550 node_data = {} 1551 _InitSshUpdateData(node_data, noded_cert_file, ssconf_store) 1552 all_keys = ssh.QueryPubKeyFile(None, key_file=pub_key_file) 1553 node_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1554 (constants.SSHS_OVERRIDE, all_keys) 1555 1556 try: 1557 utils.RetryByNumberOfTimes( 1558 constants.SSHS_MAX_RETRIES, 1559 errors.SshUpdateError, 1560 run_cmd_fn, cluster_name, node_info.name, pathutils.SSH_UPDATE, 1561 ssh_port_map.get(node_info.name), node_data, 1562 debug=False, verbose=False, use_cluster_key=False, 1563 ask_key=False, strict_host_check=False) 1564 except errors.SshUpdateError as e: 1565 # Clean up the master's public key file if adding key fails 1566 if node_info.to_public_keys: 1567 ssh.RemovePublicKey(node_info.uuid) 1568 raise e 1569 1570 # Update all nodes except master and the target nodes 1571 keys_by_uuid_auth = ssh.QueryPubKeyFile( 1572 [node_info.uuid for node_info in node_list 1573 if node_info.to_authorized_keys], 1574 key_file=pub_key_file) 1575 if to_authorized_keys: 1576 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1577 (constants.SSHS_ADD, keys_by_uuid_auth) 1578 1579 pot_mc_data = base_data.copy() 1580 keys_by_uuid_pub = ssh.QueryPubKeyFile( 1581 [node_info.uuid for node_info in node_list 1582 if node_info.to_public_keys], 1583 key_file=pub_key_file) 1584 if to_public_keys: 1585 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1586 (constants.SSHS_REPLACE_OR_ADD, keys_by_uuid_pub) 1587 1588 all_nodes = ssconf_store.GetNodeList() 1589 master_node = ssconf_store.GetMasterNode() 1590 online_nodes = ssconf_store.GetOnlineNodeList() 1591 1592 node_errors = [] 1593 for node in all_nodes: 1594 if node == master_node: 1595 logging.debug("Skipping master node '%s'.", master_node) 1596 continue 1597 if node not in online_nodes: 1598 logging.debug("Skipping offline node '%s'.", node) 1599 continue 1600 if node in potential_master_candidates: 1601 logging.debug("Updating SSH key files of node '%s'.", node) 1602 try: 1603 utils.RetryByNumberOfTimes( 1604 constants.SSHS_MAX_RETRIES, 1605 errors.SshUpdateError, 1606 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1607 ssh_port_map.get(node), pot_mc_data, 1608 debug=False, verbose=False, use_cluster_key=False, 1609 ask_key=False, strict_host_check=False) 1610 except errors.SshUpdateError as last_exception: 1611 error_msg = ("When adding the key of node '%s', updating SSH key" 1612 " files of node '%s' failed after %s retries." 1613 " Not trying again. Last error was: %s." % 1614 (node, node_info.name, constants.SSHS_MAX_RETRIES, 1615 last_exception)) 1616 node_errors.append((node, error_msg)) 1617 # We only log the error and don't throw an exception, because 1618 # one unreachable node shall not abort the entire procedure. 1619 logging.error(error_msg) 1620 1621 else: 1622 if to_authorized_keys: 1623 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE, 1624 ssh_port_map.get(node), base_data, 1625 debug=False, verbose=False, use_cluster_key=False, 1626 ask_key=False, strict_host_check=False) 1627 1628 return node_errors

1629

1630 1631 -def RemoveNodeSshKey(node_uuid, node_name, 1632 master_candidate_uuids, 1633 potential_master_candidates, 1634 master_uuid=None, 1635 keys_to_remove=None, 1636 from_authorized_keys=False, 1637 from_public_keys=False, 1638 clear_authorized_keys=False, 1639 clear_public_keys=False, 1640 pub_key_file=pathutils.SSH_PUB_KEYS, 1641 ssconf_store=None, 1642 noded_cert_file=pathutils.NODED_CERT_FILE, 1643 readd=False, 1644 run_cmd_fn=ssh.RunSshCmdWithStdin):

1645 """Removes the node's SSH keys from the key files and distributes those. 1646 1647 Note that at least one of the flags C{from_authorized_keys}, 1648 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys} 1649 has to be set to C{True} for the function to perform any action at all. 1650 Not doing so will trigger an assertion in the function. 1651 1652 @type node_uuid: str 1653 @param node_uuid: UUID of the node whose key is removed 1654 @type node_name: str 1655 @param node_name: name of the node whose key is remove 1656 @type master_candidate_uuids: list of str 1657 @param master_candidate_uuids: list of UUIDs of the current master candidates 1658 @type potential_master_candidates: list of str 1659 @param potential_master_candidates: list of names of potential master 1660 candidates 1661 @type keys_to_remove: dict of str to list of str 1662 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys 1663 to be removed. This list is supposed to be used only if the keys are not 1664 in the public keys file. This is for example the case when removing a 1665 master node's key. 1666 @type from_authorized_keys: boolean 1667 @param from_authorized_keys: whether or not the key should be removed 1668 from the C{authorized_keys} file 1669 @type from_public_keys: boolean 1670 @param from_public_keys: whether or not the key should be remove from 1671 the C{ganeti_pub_keys} file 1672 @type clear_authorized_keys: boolean 1673 @param clear_authorized_keys: whether or not the C{authorized_keys} file 1674 should be cleared on the node whose keys are removed 1675 @type clear_public_keys: boolean 1676 @param clear_public_keys: whether to clear the node's C{ganeti_pub_key} file 1677 @type readd: boolean 1678 @param readd: whether this is called during a readd operation. 1679 @rtype: list of string 1680 @returns: list of feedback messages 1681 1682 """ 1683 node_list = [SshRemoveNodeInfo(uuid=node_uuid, 1684 name=node_name, 1685 from_authorized_keys=from_authorized_keys, 1686 from_public_keys=from_public_keys, 1687 clear_authorized_keys=clear_authorized_keys, 1688 clear_public_keys=clear_public_keys)] 1689 return RemoveNodeSshKeyBulk(node_list, 1690 master_candidate_uuids, 1691 potential_master_candidates, 1692 master_uuid=master_uuid, 1693 keys_to_remove=keys_to_remove, 1694 pub_key_file=pub_key_file, 1695 ssconf_store=ssconf_store, 1696 noded_cert_file=noded_cert_file, 1697 readd=readd, 1698 run_cmd_fn=run_cmd_fn)

1699 1700 1701 # Node info named tuple specifically for the use with RemoveNodeSshKeyBulk 1702 SshRemoveNodeInfo = collections.namedtuple( 1703 "SshRemoveNodeInfo", 1704 ["uuid", 1705 "name", 1706 "from_authorized_keys", 1707 "from_public_keys", 1708 "clear_authorized_keys", 1709 "clear_public_keys"])

1710 1711 1712 -def RemoveNodeSshKeyBulk(node_list, 1713 master_candidate_uuids, 1714 potential_master_candidates, 1715 master_uuid=None, 1716 keys_to_remove=None, 1717 pub_key_file=pathutils.SSH_PUB_KEYS, 1718 ssconf_store=None, 1719 noded_cert_file=pathutils.NODED_CERT_FILE, 1720 readd=False, 1721 run_cmd_fn=ssh.RunSshCmdWithStdin):

1722 """Removes the node's SSH keys from the key files and distributes those. 1723 1724 Note that at least one of the flags C{from_authorized_keys}, 1725 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys} 1726 of at least one node has to be set to C{True} for the function to perform any 1727 action at all. Not doing so will trigger an assertion in the function. 1728 1729 @type node_list: list of C{SshRemoveNodeInfo}. 1730 @param node_list: list of information about nodes whose keys are being removed 1731 @type master_candidate_uuids: list of str 1732 @param master_candidate_uuids: list of UUIDs of the current master candidates 1733 @type potential_master_candidates: list of str 1734 @param potential_master_candidates: list of names of potential master 1735 candidates 1736 @type keys_to_remove: dict of str to list of str 1737 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys 1738 to be removed. This list is supposed to be used only if the keys are not 1739 in the public keys file. This is for example the case when removing a 1740 master node's key. 1741 @type readd: boolean 1742 @param readd: whether this is called during a readd operation. 1743 @rtype: list of string 1744 @returns: list of feedback messages 1745 1746 """ 1747 # Non-disruptive error messages, list of (node, msg) pairs 1748 result_msgs = [] 1749 1750 # whether there are any keys to be added or retrieved at all 1751 from_authorized_keys = any([node_info.from_authorized_keys for node_info in 1752 node_list]) 1753 from_public_keys = any([node_info.from_public_keys for node_info in 1754 node_list]) 1755 clear_authorized_keys = any([node_info.clear_authorized_keys for node_info in 1756 node_list]) 1757 clear_public_keys = any([node_info.clear_public_keys for node_info in 1758 node_list]) 1759 1760 # Make sure at least one of these flags is true. 1761 if not (from_authorized_keys or from_public_keys or clear_authorized_keys 1762 or clear_public_keys): 1763 raise errors.SshUpdateError("No removal from any key file was requested.") 1764 1765 if not ssconf_store: 1766 ssconf_store = ssconf.SimpleStore() 1767 1768 master_node = ssconf_store.GetMasterNode() 1769 ssh_port_map = ssconf_store.GetSshPortMap() 1770 1771 all_keys_to_remove = {} 1772 if from_authorized_keys or from_public_keys: 1773 for node_info in node_list: 1774 # Skip nodes that don't actually need any keys to be removed. 1775 if not (node_info.from_authorized_keys or node_info.from_public_keys): 1776 continue 1777 if node_info.name == master_node and not keys_to_remove: 1778 raise errors.SshUpdateError("Cannot remove the master node's keys.") 1779 if keys_to_remove: 1780 keys = keys_to_remove 1781 else: 1782 keys = ssh.QueryPubKeyFile([node_info.uuid], key_file=pub_key_file) 1783 if (not keys or node_info.uuid not in keys) and not readd: 1784 raise errors.SshUpdateError("Node '%s' not found in the list of" 1785 " public SSH keys. It seems someone" 1786 " tries to remove a key from outside" 1787 " the cluster!" % node_info.uuid) 1788 # During an upgrade all nodes have the master key. In this case we 1789 # should not remove it to avoid accidentally shutting down cluster 1790 # SSH communication 1791 master_keys = None 1792 if master_uuid: 1793 master_keys = ssh.QueryPubKeyFile([master_uuid], 1794 key_file=pub_key_file) 1795 for master_key in master_keys: 1796 if master_key in keys[node_info.uuid]: 1797 keys[node_info.uuid].remove(master_key) 1798 1799 all_keys_to_remove.update(keys) 1800 1801 if all_keys_to_remove: 1802 base_data = {} 1803 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) 1804 cluster_name = base_data[constants.SSHS_CLUSTER_NAME] 1805 1806 if from_authorized_keys: 1807 # UUIDs of nodes that are supposed to be removed from the 1808 # authorized_keys files. 1809 nodes_remove_from_authorized_keys = [ 1810 node_info.uuid for node_info in node_list 1811 if node_info.from_authorized_keys] 1812 keys_to_remove_from_authorized_keys = dict([ 1813 (uuid, keys) for (uuid, keys) in all_keys_to_remove.items() 1814 if uuid in nodes_remove_from_authorized_keys]) 1815 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1816 (constants.SSHS_REMOVE, keys_to_remove_from_authorized_keys) 1817 (auth_key_file, _) = \ 1818 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, 1819 dircheck=False) 1820 1821 for uuid in nodes_remove_from_authorized_keys: 1822 ssh.RemoveAuthorizedKeys(auth_key_file, 1823 keys_to_remove_from_authorized_keys[uuid]) 1824 1825 pot_mc_data = base_data.copy() 1826 1827 if from_public_keys: 1828 nodes_remove_from_public_keys = [ 1829 node_info.uuid for node_info in node_list 1830 if node_info.from_public_keys] 1831 keys_to_remove_from_public_keys = dict([ 1832 (uuid, keys) for (uuid, keys) in all_keys_to_remove.items() 1833 if uuid in nodes_remove_from_public_keys]) 1834 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1835 (constants.SSHS_REMOVE, keys_to_remove_from_public_keys) 1836 1837 all_nodes = ssconf_store.GetNodeList() 1838 online_nodes = ssconf_store.GetOnlineNodeList() 1839 all_nodes_to_remove = [node_info.name for node_info in node_list] 1840 logging.debug("Removing keys of nodes '%s' from all nodes but itself and" 1841 " master.", ", ".join(all_nodes_to_remove)) 1842 for node in all_nodes: 1843 if node == master_node: 1844 logging.debug("Skipping master node '%s'.", master_node) 1845 continue 1846 if node not in online_nodes: 1847 logging.debug("Skipping offline node '%s'.", node) 1848 continue 1849 if node in all_nodes_to_remove: 1850 logging.debug("Skipping node whose key is removed itself '%s'.", node) 1851 continue 1852 ssh_port = ssh_port_map.get(node) 1853 if not ssh_port: 1854 raise errors.OpExecError("No SSH port information available for" 1855 " node '%s', map: %s." % 1856 (node, ssh_port_map)) 1857 error_msg_final = ("When removing the key of node '%s', updating the" 1858 " SSH key files of node '%s' failed. Last error" 1859 " was: %s.") 1860 if node in potential_master_candidates: 1861 logging.debug("Updating key setup of potential master candidate node" 1862 " %s.", node) 1863 try: 1864 utils.RetryByNumberOfTimes( 1865 constants.SSHS_MAX_RETRIES, 1866 errors.SshUpdateError, 1867 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1868 ssh_port, pot_mc_data, 1869 debug=False, verbose=False, use_cluster_key=False, 1870 ask_key=False, strict_host_check=False) 1871 except errors.SshUpdateError as last_exception: 1872 error_msg = error_msg_final % ( 1873 node_info.name, node, last_exception) 1874 result_msgs.append((node, error_msg)) 1875 logging.error(error_msg) 1876 1877 else: 1878 if from_authorized_keys: 1879 logging.debug("Updating key setup of normal node %s.", node) 1880 try: 1881 utils.RetryByNumberOfTimes( 1882 constants.SSHS_MAX_RETRIES, 1883 errors.SshUpdateError, 1884 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1885 ssh_port, base_data, 1886 debug=False, verbose=False, use_cluster_key=False, 1887 ask_key=False, strict_host_check=False) 1888 except errors.SshUpdateError as last_exception: 1889 error_msg = error_msg_final % ( 1890 node_info.name, node, last_exception) 1891 result_msgs.append((node, error_msg)) 1892 logging.error(error_msg) 1893 1894 for node_info in node_list: 1895 if node_info.clear_authorized_keys or node_info.from_public_keys or \ 1896 node_info.clear_public_keys: 1897 data = {} 1898 _InitSshUpdateData(data, noded_cert_file, ssconf_store) 1899 cluster_name = data[constants.SSHS_CLUSTER_NAME] 1900 ssh_port = ssh_port_map.get(node_info.name) 1901 if not ssh_port: 1902 raise errors.OpExecError("No SSH port information available for" 1903 " node '%s', which is leaving the cluster.") 1904 1905 if node_info.clear_authorized_keys: 1906 # The 'authorized_keys' file is not solely managed by Ganeti. Therefore, 1907 # we have to specify exactly which keys to clear to leave keys untouched 1908 # that were not added by Ganeti. 1909 other_master_candidate_uuids = [uuid for uuid in master_candidate_uuids 1910 if uuid != node_info.uuid] 1911 candidate_keys = ssh.QueryPubKeyFile(other_master_candidate_uuids, 1912 key_file=pub_key_file) 1913 data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1914 (constants.SSHS_REMOVE, candidate_keys) 1915 1916 if node_info.clear_public_keys: 1917 data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1918 (constants.SSHS_CLEAR, {}) 1919 elif node_info.from_public_keys: 1920 # Since clearing the public keys subsumes removing just a single key, 1921 # we only do it if clear_public_keys is 'False'. 1922 1923 if all_keys_to_remove: 1924 data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1925 (constants.SSHS_REMOVE, all_keys_to_remove) 1926 1927 # If we have no changes to any keyfile, just return 1928 if not (constants.SSHS_SSH_PUBLIC_KEYS in data or 1929 constants.SSHS_SSH_AUTHORIZED_KEYS in data): 1930 return 1931 1932 logging.debug("Updating SSH key setup of target node '%s'.", 1933 node_info.name) 1934 try: 1935 utils.RetryByNumberOfTimes( 1936 constants.SSHS_MAX_RETRIES, 1937 errors.SshUpdateError, 1938 run_cmd_fn, cluster_name, node_info.name, pathutils.SSH_UPDATE, 1939 ssh_port, data, 1940 debug=False, verbose=False, use_cluster_key=False, 1941 ask_key=False, strict_host_check=False) 1942 except errors.SshUpdateError as last_exception: 1943 result_msgs.append( 1944 (node_info.name, 1945 ("Removing SSH keys from node '%s' failed." 1946 " This can happen when the node is already unreachable." 1947 " Error: %s" % (node_info.name, last_exception)))) 1948 1949 if all_keys_to_remove and from_public_keys: 1950 for node_uuid in nodes_remove_from_public_keys: 1951 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file) 1952 1953 return result_msgs

1954

1955 1956 -def _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, ssh_key_type, 1957 ssh_key_bits, pub_key_file=pathutils.SSH_PUB_KEYS, 1958 ssconf_store=None, 1959 noded_cert_file=pathutils.NODED_CERT_FILE, 1960 run_cmd_fn=ssh.RunSshCmdWithStdin, 1961 suffix=""):

1962 """Generates the root SSH key pair on the node. 1963 1964 @type node_uuid: str 1965 @param node_uuid: UUID of the node whose key is removed 1966 @type node_name: str 1967 @param node_name: name of the node whose key is remove 1968 @type ssh_port_map: dict of str to int 1969 @param ssh_port_map: mapping of node names to their SSH port 1970 @type ssh_key_type: One of L{constants.SSHK_ALL} 1971 @param ssh_key_type: the type of SSH key to be generated 1972 @type ssh_key_bits: int 1973 @param ssh_key_bits: the length of the key to be generated 1974 1975 """ 1976 if not ssconf_store: 1977 ssconf_store = ssconf.SimpleStore() 1978 1979 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file) 1980 if not keys_by_uuid or node_uuid not in keys_by_uuid: 1981 raise errors.SshUpdateError("Node %s (UUID: %s) whose key is requested to" 1982 " be regenerated is not registered in the" 1983 " public keys file." % (node_name, node_uuid)) 1984 1985 data = {} 1986 _InitSshUpdateData(data, noded_cert_file, ssconf_store) 1987 cluster_name = data[constants.SSHS_CLUSTER_NAME] 1988 data[constants.SSHS_GENERATE] = (ssh_key_type, ssh_key_bits, suffix) 1989 1990 run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE, 1991 ssh_port_map.get(node_name), data, 1992 debug=False, verbose=False, use_cluster_key=False, 1993 ask_key=False, strict_host_check=False)

1994

1995 1996 -def _GetMasterNodeUUID(node_uuid_name_map, master_node_name):

1997 master_node_uuids = [node_uuid for (node_uuid, node_name) 1998 in node_uuid_name_map 1999 if node_name == master_node_name] 2000 if len(master_node_uuids) != 1: 2001 raise errors.SshUpdateError("No (unique) master UUID found. Master node" 2002 " name: '%s', Master UUID: '%s'" % 2003 (master_node_name, master_node_uuids)) 2004 return master_node_uuids[0]

2005

2006 2007 -def _GetOldMasterKeys(master_node_uuid, pub_key_file):

2008 old_master_keys_by_uuid = ssh.QueryPubKeyFile([master_node_uuid], 2009 key_file=pub_key_file) 2010 if not old_master_keys_by_uuid: 2011 raise errors.SshUpdateError("No public key of the master node (UUID '%s')" 2012 " found, not generating a new key." 2013 % master_node_uuid) 2014 return old_master_keys_by_uuid

2015

2016 2017 -def _GetNewMasterKey(root_keyfiles, master_node_uuid):

2018 new_master_keys = [] 2019 for (_, (_, public_key_file)) in root_keyfiles.items(): 2020 public_key_dir = os.path.dirname(public_key_file) 2021 public_key_file_tmp_filename = \ 2022 os.path.splitext(os.path.basename(public_key_file))[0] \ 2023 + constants.SSHS_MASTER_SUFFIX + ".pub" 2024 public_key_path_tmp = os.path.join(public_key_dir, 2025 public_key_file_tmp_filename) 2026 if os.path.exists(public_key_path_tmp): 2027 # for some key types, there might not be any keys 2028 key = utils.ReadFile(public_key_path_tmp) 2029 new_master_keys.append(key) 2030 if not new_master_keys: 2031 raise errors.SshUpdateError("Cannot find any type of temporary SSH key.") 2032 return {master_node_uuid: new_master_keys}

2033

2034 2035 -def _ReplaceMasterKeyOnMaster(root_keyfiles):

2036 number_of_moves = 0 2037 for (_, (private_key_file, public_key_file)) in root_keyfiles.items(): 2038 key_dir = os.path.dirname(public_key_file) 2039 private_key_file_tmp = \ 2040 os.path.basename(private_key_file) + constants.SSHS_MASTER_SUFFIX 2041 public_key_file_tmp = private_key_file_tmp + ".pub" 2042 private_key_path_tmp = os.path.join(key_dir, 2043 private_key_file_tmp) 2044 public_key_path_tmp = os.path.join(key_dir, 2045 public_key_file_tmp) 2046 if os.path.exists(public_key_file): 2047 utils.CreateBackup(public_key_file) 2048 utils.RemoveFile(public_key_file) 2049 if os.path.exists(private_key_file): 2050 utils.CreateBackup(private_key_file) 2051 utils.RemoveFile(private_key_file) 2052 if os.path.exists(public_key_path_tmp) and \ 2053 os.path.exists(private_key_path_tmp): 2054 # for some key types, there might not be any keys 2055 shutil.move(public_key_path_tmp, public_key_file) 2056 shutil.move(private_key_path_tmp, private_key_file) 2057 number_of_moves += 1 2058 if not number_of_moves: 2059 raise errors.SshUpdateError("Could not move at least one master SSH key.")

2060

2061 2062 -def RenewSshKeys(node_uuids, node_names, master_candidate_uuids, 2063 potential_master_candidates, old_key_type, new_key_type, 2064 new_key_bits, 2065 ganeti_pub_keys_file=pathutils.SSH_PUB_KEYS, 2066 ssconf_store=None, 2067 noded_cert_file=pathutils.NODED_CERT_FILE, 2068 run_cmd_fn=ssh.RunSshCmdWithStdin):

2069 """Renews all SSH keys and updates authorized_keys and ganeti_pub_keys. 2070 2071 @type node_uuids: list of str 2072 @param node_uuids: list of node UUIDs whose keys should be renewed 2073 @type node_names: list of str 2074 @param node_names: list of node names whose keys should be removed. This list 2075 should match the C{node_uuids} parameter 2076 @type master_candidate_uuids: list of str 2077 @param master_candidate_uuids: list of UUIDs of master candidates or 2078 master node 2079 @type old_key_type: One of L{constants.SSHK_ALL} 2080 @param old_key_type: the type of SSH key already present on nodes 2081 @type new_key_type: One of L{constants.SSHK_ALL} 2082 @param new_key_type: the type of SSH key to be generated 2083 @type new_key_bits: int 2084 @param new_key_bits: the length of the key to be generated 2085 @type ganeti_pub_keys_file: str 2086 @param ganeti_pub_keys_file: file path of the the public key file 2087 @type noded_cert_file: str 2088 @param noded_cert_file: path of the noded SSL certificate file 2089 @type run_cmd_fn: function 2090 @param run_cmd_fn: function to run commands on remote nodes via SSH 2091 @raises ProgrammerError: if node_uuids and node_names don't match; 2092 SshUpdateError if a node's key is missing from the public key file, 2093 if a node's new SSH key could not be fetched from it, if there is 2094 none or more than one entry in the public key list for the master 2095 node. 2096 2097 """ 2098 if not ssconf_store: 2099 ssconf_store = ssconf.SimpleStore() 2100 cluster_name = ssconf_store.GetClusterName() 2101 2102 if not len(node_uuids) == len(node_names): 2103 raise errors.ProgrammerError("List of nodes UUIDs and node names" 2104 " does not match in length.") 2105 2106 (_, root_keyfiles) = \ 2107 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 2108 (_, old_pub_keyfile) = root_keyfiles[old_key_type] 2109 (_, new_pub_keyfile) = root_keyfiles[new_key_type] 2110 old_master_key = utils.ReadFile(old_pub_keyfile) 2111 2112 node_uuid_name_map = zip(node_uuids, node_names) 2113 2114 master_node_name = ssconf_store.GetMasterNode() 2115 master_node_uuid = _GetMasterNodeUUID(node_uuid_name_map, master_node_name) 2116 ssh_port_map = ssconf_store.GetSshPortMap() 2117 # List of all node errors that happened, but which did not abort the 2118 # procedure as a whole. It is important that this is a list to have a 2119 # somewhat chronological history of events. 2120 all_node_errors = [] 2121 2122 # process non-master nodes 2123 2124 # keys to add in bulk at the end 2125 node_keys_to_add = [] 2126 2127 # list of all nodes 2128 node_list = [] 2129 2130 # list of keys to be removed before generating new keys 2131 node_info_to_remove = [] 2132 2133 for node_uuid, node_name in node_uuid_name_map: 2134 if node_name == master_node_name: 2135 continue 2136 master_candidate = node_uuid in master_candidate_uuids 2137 potential_master_candidate = node_name in potential_master_candidates 2138 node_list.append((node_uuid, node_name, master_candidate, 2139 potential_master_candidate)) 2140 2141 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], 2142 key_file=ganeti_pub_keys_file) 2143 if not keys_by_uuid: 2144 raise errors.SshUpdateError("No public key of node %s (UUID %s) found," 2145 " not generating a new key." 2146 % (node_name, node_uuid)) 2147 2148 if master_candidate: 2149 logging.debug("Fetching old SSH key from node '%s'.", node_name) 2150 old_pub_key = ssh.ReadRemoteSshPubKeys(old_pub_keyfile, 2151 node_name, cluster_name, 2152 ssh_port_map[node_name], 2153 False, # ask_key 2154 False) # key_check 2155 if old_pub_key != old_master_key: 2156 # If we are already in a multi-key setup (that is past Ganeti 2.12), 2157 # we can safely remove the old key of the node. Otherwise, we cannot 2158 # remove that node's key, because it is also the master node's key 2159 # and that would terminate all communication from the master to the 2160 # node. 2161 node_info_to_remove.append(SshRemoveNodeInfo( 2162 uuid=node_uuid, 2163 name=node_name, 2164 from_authorized_keys=master_candidate, 2165 from_public_keys=False, 2166 clear_authorized_keys=False, 2167 clear_public_keys=False)) 2168 else: 2169 logging.debug("Old key of node '%s' is the same as the current master" 2170 " key. Not deleting that key on the node.", node_name) 2171 2172 logging.debug("Removing old SSH keys of all master candidates.") 2173 if node_info_to_remove: 2174 node_errors = RemoveNodeSshKeyBulk( 2175 node_info_to_remove, 2176 master_candidate_uuids, 2177 potential_master_candidates, 2178 master_uuid=master_node_uuid) 2179 if node_errors: 2180 all_node_errors = all_node_errors + node_errors 2181 2182 for (node_uuid, node_name, master_candidate, potential_master_candidate) \ 2183 in node_list: 2184 2185 logging.debug("Generating new SSH key for node '%s'.", node_name) 2186 _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, new_key_type, 2187 new_key_bits, pub_key_file=ganeti_pub_keys_file, 2188 ssconf_store=ssconf_store, 2189 noded_cert_file=noded_cert_file, 2190 run_cmd_fn=run_cmd_fn) 2191 2192 try: 2193 logging.debug("Fetching newly created SSH key from node '%s'.", node_name) 2194 pub_key = ssh.ReadRemoteSshPubKeys(new_pub_keyfile, 2195 node_name, cluster_name, 2196 ssh_port_map[node_name], 2197 False, # ask_key 2198 False) # key_check 2199 except: 2200 raise errors.SshUpdateError("Could not fetch key of node %s" 2201 " (UUID %s)" % (node_name, node_uuid)) 2202 2203 if potential_master_candidate: 2204 ssh.RemovePublicKey(node_uuid, key_file=ganeti_pub_keys_file) 2205 ssh.AddPublicKey(node_uuid, pub_key, key_file=ganeti_pub_keys_file) 2206 2207 node_info = SshAddNodeInfo(name=node_name, 2208 uuid=node_uuid, 2209 to_authorized_keys=master_candidate, 2210 to_public_keys=potential_master_candidate, 2211 get_public_keys=True) 2212 node_keys_to_add.append(node_info) 2213 2214 node_errors = AddNodeSshKeyBulk( 2215 node_keys_to_add, potential_master_candidates, 2216 pub_key_file=ganeti_pub_keys_file, ssconf_store=ssconf_store, 2217 noded_cert_file=noded_cert_file, 2218 run_cmd_fn=run_cmd_fn) 2219 if node_errors: 2220 all_node_errors = all_node_errors + node_errors 2221 2222 # Renewing the master node's key 2223 2224 # Preserve the old keys for now 2225 old_master_keys_by_uuid = _GetOldMasterKeys(master_node_uuid, 2226 ganeti_pub_keys_file) 2227 2228 # Generate a new master key with a suffix, don't touch the old one for now 2229 logging.debug("Generate new ssh key of master.") 2230 _GenerateNodeSshKey(master_node_uuid, master_node_name, ssh_port_map, 2231 new_key_type, new_key_bits, 2232 pub_key_file=ganeti_pub_keys_file, 2233 ssconf_store=ssconf_store, 2234 noded_cert_file=noded_cert_file, 2235 run_cmd_fn=run_cmd_fn, 2236 suffix=constants.SSHS_MASTER_SUFFIX) 2237 # Read newly created master key 2238 new_master_key_dict = _GetNewMasterKey(root_keyfiles, master_node_uuid) 2239 2240 # Replace master key in the master nodes' public key file 2241 ssh.RemovePublicKey(master_node_uuid, key_file=ganeti_pub_keys_file) 2242 for pub_key in new_master_key_dict[master_node_uuid]: 2243 ssh.AddPublicKey(master_node_uuid, pub_key, key_file=ganeti_pub_keys_file) 2244 2245 # Add new master key to all node's public and authorized keys 2246 logging.debug("Add new master key to all nodes.") 2247 node_errors = AddNodeSshKey( 2248 master_node_uuid, master_node_name, potential_master_candidates, 2249 to_authorized_keys=True, to_public_keys=True, 2250 get_public_keys=False, pub_key_file=ganeti_pub_keys_file, 2251 ssconf_store=ssconf_store, noded_cert_file=noded_cert_file, 2252 run_cmd_fn=run_cmd_fn) 2253 if node_errors: 2254 all_node_errors = all_node_errors + node_errors 2255 2256 # Remove the old key file and rename the new key to the non-temporary filename 2257 _ReplaceMasterKeyOnMaster(root_keyfiles) 2258 2259 # Remove old key from authorized keys 2260 (auth_key_file, _) = \ 2261 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 2262 ssh.RemoveAuthorizedKeys(auth_key_file, 2263 old_master_keys_by_uuid[master_node_uuid]) 2264 2265 # Remove the old key from all node's authorized keys file 2266 logging.debug("Remove the old master key from all nodes.") 2267 node_errors = RemoveNodeSshKey( 2268 master_node_uuid, master_node_name, master_candidate_uuids, 2269 potential_master_candidates, 2270 keys_to_remove=old_master_keys_by_uuid, from_authorized_keys=True, 2271 from_public_keys=False, clear_authorized_keys=False, 2272 clear_public_keys=False) 2273 if node_errors: 2274 all_node_errors = all_node_errors + node_errors 2275 2276 return all_node_errors

2277

2278 2279 -def GetBlockDevSizes(devices):

2280 """Return the size of the given block devices 2281 2282 @type devices: list 2283 @param devices: list of block device nodes to query 2284 @rtype: dict 2285 @return: 2286 dictionary of all block devices under /dev (key). The value is their 2287 size in MiB. 2288 2289 {'/dev/disk/by-uuid/123456-12321231-312312-312': 124} 2290 2291 """ 2292 DEV_PREFIX = "/dev/" 2293 blockdevs = {} 2294 2295 for devpath in devices: 2296 if not utils.IsBelowDir(DEV_PREFIX, devpath): 2297 continue 2298 2299 try: 2300 st = os.stat(devpath) 2301 except EnvironmentError, err: 2302 logging.warning("Error stat()'ing device %s: %s", devpath, str(err)) 2303 continue 2304 2305 if stat.S_ISBLK(st.st_mode): 2306 result = utils.RunCmd(["blockdev", "--getsize64", devpath]) 2307 if result.failed: 2308 # We don't want to fail, just do not list this device as available 2309 logging.warning("Cannot get size for block device %s", devpath) 2310 continue 2311 2312 size = int(result.stdout) / (1024 * 1024) 2313 blockdevs[devpath] = size 2314 return blockdevs

2315

2316 2317 -def GetVolumeList(vg_names):

2318 """Compute list of logical volumes and their size. 2319 2320 @type vg_names: list 2321 @param vg_names: the volume groups whose LVs we should list, or 2322 empty for all volume groups 2323 @rtype: dict 2324 @return: 2325 dictionary of all partions (key) with value being a tuple of 2326 their size (in MiB), inactive and online status:: 2327 2328 {'xenvg/test1': ('20.06', True, True)} 2329 2330 in case of errors, a string is returned with the error 2331 details. 2332 2333 """ 2334 lvs = {} 2335 sep = "|" 2336 if not vg_names: 2337 vg_names = [] 2338 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 2339 "--separator=%s" % sep, 2340 "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names) 2341 if result.failed: 2342 _Fail("Failed to list logical volumes, lvs output: %s", result.output) 2343 2344 for line in result.stdout.splitlines(): 2345 line = line.strip() 2346 match = _LVSLINE_REGEX.match(line) 2347 if not match: 2348 logging.error("Invalid line returned from lvs output: '%s'", line) 2349 continue 2350 vg_name, name, size, attr = match.groups() 2351 inactive = attr[4] == "-" 2352 online = attr[5] == "o" 2353 virtual = attr[0] == "v" 2354 if virtual: 2355 # we don't want to report such volumes as existing, since they 2356 # don't really hold data 2357 continue 2358 lvs[vg_name + "/" + name] = (size, inactive, online) 2359 2360 return lvs

2361

2362 2363 -def ListVolumeGroups():

2364 """List the volume groups and their size. 2365 2366 @rtype: dict 2367 @return: dictionary with keys volume name and values the 2368 size of the volume 2369 2370 """ 2371 return utils.ListVolumeGroups()

2372

2373 2374 -def NodeVolumes():

2375 """List all volumes on this node. 2376 2377 @rtype: list 2378 @return: 2379 A list of dictionaries, each having four keys: 2380 - name: the logical volume name, 2381 - size: the size of the logical volume 2382 - dev: the physical device on which the LV lives 2383 - vg: the volume group to which it belongs 2384 2385 In case of errors, we return an empty list and log the 2386 error. 2387 2388 Note that since a logical volume can live on multiple physical 2389 volumes, the resulting list might include a logical volume 2390 multiple times. 2391 2392 """ 2393 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 2394 "--separator=|", 2395 "--options=lv_name,lv_size,devices,vg_name"]) 2396 if result.failed: 2397 _Fail("Failed to list logical volumes, lvs output: %s", 2398 result.output) 2399 2400 def parse_dev(dev): 2401 return dev.split("(")[0]

2402 2403 def handle_dev(dev): 2404 return [parse_dev(x) for x in dev.split(",")] 2405 2406 def map_line(line): 2407 line = [v.strip() for v in line] 2408 return [{"name": line[0], "size": line[1], 2409 "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])] 2410 2411 all_devs = [] 2412 for line in result.stdout.splitlines(): 2413 if line.count("|") >= 3: 2414 all_devs.extend(map_line(line.split("|"))) 2415 else: 2416 logging.warning("Strange line in the output from lvs: '%s'", line) 2417 return all_devs 2418

2419 2420 -def BridgesExist(bridges_list):

2421 """Check if a list of bridges exist on the current node. 2422 2423 @rtype: boolean 2424 @return: C{True} if all of them exist, C{False} otherwise 2425 2426 """ 2427 missing = [] 2428 for bridge in bridges_list: 2429 if not utils.BridgeExists(bridge): 2430 missing.append(bridge) 2431 2432 if missing: 2433 _Fail("Missing bridges %s", utils.CommaJoin(missing))

2434

2435 2436 -def GetInstanceListForHypervisor(hname, hvparams=None, 2437 get_hv_fn=hypervisor.GetHypervisor):

2438 """Provides a list of instances of the given hypervisor. 2439 2440 @type hname: string 2441 @param hname: name of the hypervisor 2442 @type hvparams: dict of strings 2443 @param hvparams: hypervisor parameters for the given hypervisor 2444 @type get_hv_fn: function 2445 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 2446 name; optional parameter to increase testability 2447 2448 @rtype: list 2449 @return: a list of all running instances on the current node 2450 - instance1.example.com 2451 - instance2.example.com 2452 2453 """ 2454 try: 2455 return get_hv_fn(hname).ListInstances(hvparams=hvparams) 2456 except errors.HypervisorError, err: 2457 _Fail("Error enumerating instances (hypervisor %s): %s", 2458 hname, err, exc=True)

2459

2460 2461 -def GetInstanceList(hypervisor_list, all_hvparams=None, 2462 get_hv_fn=hypervisor.GetHypervisor):

2463 """Provides a list of instances. 2464 2465 @type hypervisor_list: list 2466 @param hypervisor_list: the list of hypervisors to query information 2467 @type all_hvparams: dict of dict of strings 2468 @param all_hvparams: a dictionary mapping hypervisor types to respective 2469 cluster-wide hypervisor parameters 2470 @type get_hv_fn: function 2471 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 2472 name; optional parameter to increase testability 2473 2474 @rtype: list 2475 @return: a list of all running instances on the current node 2476 - instance1.example.com 2477 - instance2.example.com 2478 2479 """ 2480 results = [] 2481 for hname in hypervisor_list: 2482 hvparams = all_hvparams[hname] 2483 results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams, 2484 get_hv_fn=get_hv_fn)) 2485 return results

2486

2487 2488 -def GetInstanceInfo(instance, hname, hvparams=None):

2489 """Gives back the information about an instance as a dictionary. 2490 2491 @type instance: string 2492 @param instance: the instance name 2493 @type hname: string 2494 @param hname: the hypervisor type of the instance 2495 @type hvparams: dict of strings 2496 @param hvparams: the instance's hvparams 2497 2498 @rtype: dict 2499 @return: dictionary with the following keys: 2500 - memory: memory size of instance (int) 2501 - state: state of instance (HvInstanceState) 2502 - time: cpu time of instance (float) 2503 - vcpus: the number of vcpus (int) 2504 2505 """ 2506 output = {} 2507 2508 iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance, 2509 hvparams=hvparams) 2510 if iinfo is not None: 2511 output["memory"] = iinfo[2] 2512 output["vcpus"] = iinfo[3] 2513 output["state"] = iinfo[4] 2514 output["time"] = iinfo[5] 2515 2516 return output

2517

2518 2519 -def GetInstanceMigratable(instance):

2520 """Computes whether an instance can be migrated. 2521 2522 @type instance: L{objects.Instance} 2523 @param instance: object representing the instance to be checked. 2524 2525 @rtype: tuple 2526 @return: tuple of (result, description) where: 2527 - result: whether the instance can be migrated or not 2528 - description: a description of the issue, if relevant 2529 2530 """ 2531 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2532 iname = instance.name 2533 if iname not in hyper.ListInstances(hvparams=instance.hvparams): 2534 _Fail("Instance %s is not running", iname) 2535 2536 for idx in range(len(instance.disks_info)): 2537 link_name = _GetBlockDevSymlinkPath(iname, idx) 2538 if not os.path.islink(link_name): 2539 logging.warning("Instance %s is missing symlink %s for disk %d", 2540 iname, link_name, idx)

2541

2542 2543 -def GetAllInstancesInfo(hypervisor_list, all_hvparams):

2544 """Gather data about all instances. 2545 2546 This is the equivalent of L{GetInstanceInfo}, except that it 2547 computes data for all instances at once, thus being faster if one 2548 needs data about more than one instance. 2549 2550 @type hypervisor_list: list 2551 @param hypervisor_list: list of hypervisors to query for instance data 2552 @type all_hvparams: dict of dict of strings 2553 @param all_hvparams: mapping of hypervisor names to hvparams 2554 2555 @rtype: dict 2556 @return: dictionary of instance: data, with data having the following keys: 2557 - memory: memory size of instance (int) 2558 - state: xen state of instance (string) 2559 - time: cpu time of instance (float) 2560 - vcpus: the number of vcpus 2561 2562 """ 2563 output = {} 2564 for hname in hypervisor_list: 2565 hvparams = all_hvparams[hname] 2566 iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams) 2567 if iinfo: 2568 for name, _, memory, vcpus, state, times in iinfo: 2569 value = { 2570 "memory": memory, 2571 "vcpus": vcpus, 2572 "state": state, 2573 "time": times, 2574 } 2575 if name in output: 2576 # we only check static parameters, like memory and vcpus, 2577 # and not state and time which can change between the 2578 # invocations of the different hypervisors 2579 for key in "memory", "vcpus": 2580 if value[key] != output[name][key]: 2581 _Fail("Instance %s is running twice" 2582 " with different parameters", name) 2583 output[name] = value 2584 2585 return output

2586

2587 2588 -def GetInstanceConsoleInfo(instance_param_dict, 2589 get_hv_fn=hypervisor.GetHypervisor):

2590 """Gather data about the console access of a set of instances of this node. 2591 2592 This function assumes that the caller already knows which instances are on 2593 this node, by calling a function such as L{GetAllInstancesInfo} or 2594 L{GetInstanceList}. 2595 2596 For every instance, a large amount of configuration data needs to be 2597 provided to the hypervisor interface in order to receive the console 2598 information. Whether this could or should be cut down can be discussed. 2599 The information is provided in a dictionary indexed by instance name, 2600 allowing any number of instance queries to be done. 2601 2602 @type instance_param_dict: dict of string to tuple of dictionaries, where the 2603 dictionaries represent: L{objects.Instance}, L{objects.Node}, 2604 L{objects.NodeGroup}, HvParams, BeParams 2605 @param instance_param_dict: mapping of instance name to parameters necessary 2606 for console information retrieval 2607 2608 @rtype: dict 2609 @return: dictionary of instance: data, with data having the following keys: 2610 - instance: instance name 2611 - kind: console kind 2612 - message: used with kind == CONS_MESSAGE, indicates console to be 2613 unavailable, supplies error message 2614 - host: host to connect to 2615 - port: port to use 2616 - user: user for login 2617 - command: the command, broken into parts as an array 2618 - display: unknown, potentially unused? 2619 2620 """ 2621 2622 output = {} 2623 for inst_name in instance_param_dict: 2624 instance = instance_param_dict[inst_name]["instance"] 2625 pnode = instance_param_dict[inst_name]["node"] 2626 group = instance_param_dict[inst_name]["group"] 2627 hvparams = instance_param_dict[inst_name]["hvParams"] 2628 beparams = instance_param_dict[inst_name]["beParams"] 2629 2630 instance = objects.Instance.FromDict(instance) 2631 pnode = objects.Node.FromDict(pnode) 2632 group = objects.NodeGroup.FromDict(group) 2633 2634 h = get_hv_fn(instance.hypervisor) 2635 output[inst_name] = h.GetInstanceConsole(instance, pnode, group, 2636 hvparams, beparams).ToDict() 2637 2638 return output

2639

2640 2641 -def _InstanceLogName(kind, os_name, instance, component):

2642 """Compute the OS log filename for a given instance and operation. 2643 2644 The instance name and os name are passed in as strings since not all 2645 operations have these as part of an instance object. 2646 2647 @type kind: string 2648 @param kind: the operation type (e.g. add, import, etc.) 2649 @type os_name: string 2650 @param os_name: the os name 2651 @type instance: string 2652 @param instance: the name of the instance being imported/added/etc. 2653 @type component: string or None 2654 @param component: the name of the component of the instance being 2655 transferred 2656 2657 """ 2658 # TODO: Use tempfile.mkstemp to create unique filename 2659 if component: 2660 assert "/" not in component 2661 c_msg = "-%s" % component 2662 else: 2663 c_msg = "" 2664 base = ("%s-%s-%s%s-%s.log" % 2665 (kind, os_name, instance, c_msg, utils.TimestampForFilename())) 2666 return utils.PathJoin(pathutils.LOG_OS_DIR, base)

2667

2668 2669 -def InstanceOsAdd(instance, reinstall, debug):

2670 """Add an OS to an instance. 2671 2672 @type instance: L{objects.Instance} 2673 @param instance: Instance whose OS is to be installed 2674 @type reinstall: boolean 2675 @param reinstall: whether this is an instance reinstall 2676 @type debug: integer 2677 @param debug: debug level, passed to the OS scripts 2678 @rtype: None 2679 2680 """ 2681 inst_os = OSFromDisk(instance.os) 2682 2683 create_env = OSEnvironment(instance, inst_os, debug) 2684 if reinstall: 2685 create_env["INSTANCE_REINSTALL"] = "1" 2686 2687 logfile = _InstanceLogName("add", instance.os, instance.name, None) 2688 2689 result = utils.RunCmd([inst_os.create_script], env=create_env, 2690 cwd=inst_os.path, output=logfile, reset_env=True) 2691 if result.failed: 2692 logging.error("os create command '%s' returned error: %s, logfile: %s," 2693 " output: %s", result.cmd, result.fail_reason, logfile, 2694 result.output) 2695 lines = [utils.SafeEncode(val) 2696 for val in utils.TailFile(logfile, lines=20)] 2697 _Fail("OS create script failed (%s), last lines in the" 2698 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)

2699

2700 2701 -def RunRenameInstance(instance, old_name, debug):

2702 """Run the OS rename script for an instance. 2703 2704 @type instance: L{objects.Instance} 2705 @param instance: Instance whose OS is to be installed 2706 @type old_name: string 2707 @param old_name: previous instance name 2708 @type debug: integer 2709 @param debug: debug level, passed to the OS scripts 2710 @rtype: boolean 2711 @return: the success of the operation 2712 2713 """ 2714 inst_os = OSFromDisk(instance.os) 2715 2716 rename_env = OSEnvironment(instance, inst_os, debug) 2717 rename_env["OLD_INSTANCE_NAME"] = old_name 2718 2719 logfile = _InstanceLogName("rename", instance.os, 2720 "%s-%s" % (old_name, instance.name), None) 2721 2722 result = utils.RunCmd([inst_os.rename_script], env=rename_env, 2723 cwd=inst_os.path, output=logfile, reset_env=True) 2724 2725 if result.failed: 2726 logging.error("os create command '%s' returned error: %s output: %s", 2727 result.cmd, result.fail_reason, result.output) 2728 lines = [utils.SafeEncode(val) 2729 for val in utils.TailFile(logfile, lines=20)] 2730 _Fail("OS rename script failed (%s), last lines in the" 2731 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)

2732

2733 2734 -def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None):

2735 """Returns symlink path for block device. 2736 2737 """ 2738 if _dir is None: 2739 _dir = pathutils.DISK_LINKS_DIR 2740 2741 return utils.PathJoin(_dir, 2742 ("%s%s%s" % 2743 (instance_name, constants.DISK_SEPARATOR, idx)))

2744

2745 2746 -def _SymlinkBlockDev(instance_name, device_path, idx):

2747 """Set up symlinks to a instance's block device. 2748 2749 This is an auxiliary function run when an instance is start (on the primary 2750 node) or when an instance is migrated (on the target node). 2751 2752 2753 @param instance_name: the name of the target instance 2754 @param device_path: path of the physical block device, on the node 2755 @param idx: the disk index 2756 @return: absolute path to the disk's symlink 2757 2758 """ 2759 # In case we have only a userspace access URI, device_path is None 2760 if not device_path: 2761 return None 2762 2763 link_name = _GetBlockDevSymlinkPath(instance_name, idx) 2764 try: 2765 os.symlink(device_path, link_name) 2766 except OSError, err: 2767 if err.errno == errno.EEXIST: 2768 if (not os.path.islink(link_name) or 2769 os.readlink(link_name) != device_path): 2770 os.remove(link_name) 2771 os.symlink(device_path, link_name) 2772 else: 2773 raise 2774 2775 return link_name

2776

2777 2778 -def _RemoveBlockDevLinks(instance_name, disks):

2779 """Remove the block device symlinks belonging to the given instance. 2780 2781 """ 2782 for idx, _ in enumerate(disks): 2783 link_name = _GetBlockDevSymlinkPath(instance_name, idx) 2784 if os.path.islink(link_name): 2785 try: 2786 os.remove(link_name) 2787 except OSError: 2788 logging.exception("Can't remove symlink '%s'", link_name)

2789

2790 2791 -def _CalculateDeviceURI(instance, disk, device):

2792 """Get the URI for the device. 2793 2794 @type instance: L{objects.Instance} 2795 @param instance: the instance which disk belongs to 2796 @type disk: L{objects.Disk} 2797 @param disk: the target disk object 2798 @type device: L{bdev.BlockDev} 2799 @param device: the corresponding BlockDevice 2800 @rtype: string 2801 @return: the device uri if any else None 2802 2803 """ 2804 access_mode = disk.params.get(constants.LDP_ACCESS, 2805 constants.DISK_KERNELSPACE) 2806 if access_mode == constants.DISK_USERSPACE: 2807 # This can raise errors.BlockDeviceError 2808 return device.GetUserspaceAccessUri(instance.hypervisor) 2809 else: 2810 return None

2811

2812 2813 -def _GatherAndLinkBlockDevs(instance):

2814 """Set up an instance's block device(s). 2815 2816 This is run on the primary node at instance startup. The block 2817 devices must be already assembled. 2818 2819 @type instance: L{objects.Instance} 2820 @param instance: the instance whose disks we should assemble 2821 @rtype: list 2822 @return: list of (disk_object, link_name, drive_uri) 2823 2824 """ 2825 block_devices = [] 2826 for idx, disk in enumerate(instance.disks_info): 2827 device = _RecursiveFindBD(disk) 2828 if device is None: 2829 raise errors.BlockDeviceError("Block device '%s' is not set up." % 2830 str(disk)) 2831 device.Open() 2832 try: 2833 link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx) 2834 except OSError, e: 2835 raise errors.BlockDeviceError("Cannot create block device symlink: %s" % 2836 e.strerror) 2837 uri = _CalculateDeviceURI(instance, disk, device) 2838 2839 block_devices.append((disk, link_name, uri)) 2840 2841 return block_devices

2842

2843 2844 -def _IsInstanceUserDown(instance_info):

2845 return instance_info and \ 2846 "state" in instance_info and \ 2847 hv_base.HvInstanceState.IsShutdown(instance_info["state"])

2848

2849 2850 -def _GetInstanceInfo(instance):

2851 """Helper function L{GetInstanceInfo}""" 2852 return GetInstanceInfo(instance.name, instance.hypervisor, 2853 hvparams=instance.hvparams)

2854

2855 2856 -def StartInstance(instance, startup_paused, reason, store_reason=True):

2857 """Start an instance. 2858 2859 @type instance: L{objects.Instance} 2860 @param instance: the instance object 2861 @type startup_paused: bool 2862 @param instance: pause instance at startup? 2863 @type reason: list of reasons 2864 @param reason: the reason trail for this startup 2865 @type store_reason: boolean 2866 @param store_reason: whether to store the shutdown reason trail on file 2867 @rtype: None 2868 2869 """ 2870 instance_info = _GetInstanceInfo(instance) 2871 2872 if instance_info and not _IsInstanceUserDown(instance_info): 2873 logging.info("Instance '%s' already running, not starting", instance.name) 2874 return 2875 2876 try: 2877 block_devices = _GatherAndLinkBlockDevs(instance) 2878 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2879 hyper.StartInstance(instance, block_devices, startup_paused) 2880 if store_reason: 2881 _StoreInstReasonTrail(instance.name, reason) 2882 except errors.BlockDeviceError, err: 2883 _Fail("Block device error: %s", err, exc=True) 2884 except errors.HypervisorError, err: 2885 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2886 _Fail("Hypervisor error: %s", err, exc=True)

2887

2888 2889 -def InstanceShutdown(instance, timeout, reason, store_reason=True):

2890 """Shut an instance down. 2891 2892 @note: this functions uses polling with a hardcoded timeout. 2893 2894 @type instance: L{objects.Instance} 2895 @param instance: the instance object 2896 @type timeout: integer 2897 @param timeout: maximum timeout for soft shutdown 2898 @type reason: list of reasons 2899 @param reason: the reason trail for this shutdown 2900 @type store_reason: boolean 2901 @param store_reason: whether to store the shutdown reason trail on file 2902 @rtype: None 2903 2904 """ 2905 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2906 2907 if not _GetInstanceInfo(instance): 2908 logging.info("Instance '%s' not running, doing nothing", instance.name) 2909 return 2910 2911 class _TryShutdown(object): 2912 def __init__(self): 2913 self.tried_once = False

2914 2915 def __call__(self): 2916 try: 2917 hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout) 2918 if store_reason: 2919 _StoreInstReasonTrail(instance.name, reason) 2920 except errors.HypervisorError, err: 2921 # if the instance does no longer exist, consider this success and go to 2922 # cleanup, otherwise fail without retrying 2923 if _GetInstanceInfo(instance): 2924 _Fail("Failed to stop instance '%s': %s", instance.name, err) 2925 return 2926 2927 # TODO: Cleanup hypervisor implementations to prevent them from failing 2928 # silently. We could easily decide if we want to retry or not by using 2929 # HypervisorSoftError()/HypervisorHardError() 2930 self.tried_once = True 2931 if _GetInstanceInfo(instance): 2932 raise utils.RetryAgain() 2933 2934 try: 2935 utils.Retry(_TryShutdown(), 5, timeout) 2936 except utils.RetryTimeout: 2937 # the shutdown did not succeed 2938 logging.error("Shutdown of '%s' unsuccessful, forcing", instance.name) 2939 2940 try: 2941 hyper.StopInstance(instance, force=True) 2942 except errors.HypervisorError, err: 2943 # only raise an error if the instance still exists, otherwise 2944 # the error could simply be "instance ... unknown"! 2945 if _GetInstanceInfo(instance): 2946 _Fail("Failed to force stop instance '%s': %s", instance.name, err) 2947 2948 time.sleep(1) 2949 2950 if _GetInstanceInfo(instance): 2951 _Fail("Could not shutdown instance '%s' even by destroy", instance.name) 2952 2953 try: 2954 hyper.CleanupInstance(instance.name) 2955 except errors.HypervisorError, err: 2956 logging.warning("Failed to execute post-shutdown cleanup step: %s", err) 2957 2958 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2959

2960 2961 -def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):

2962 """Reboot an instance. 2963 2964 @type instance: L{objects.Instance} 2965 @param instance: the instance object to reboot 2966 @type reboot_type: str 2967 @param reboot_type: the type of reboot, one the following 2968 constants: 2969 - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the 2970 instance OS, do not recreate the VM 2971 - L{constants.INSTANCE_REBOOT_HARD}: tear down and 2972 restart the VM (at the hypervisor level) 2973 - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is 2974 not accepted here, since that mode is handled differently, in 2975 cmdlib, and translates into full stop and start of the 2976 instance (instead of a call_instance_reboot RPC) 2977 @type shutdown_timeout: integer 2978 @param shutdown_timeout: maximum timeout for soft shutdown 2979 @type reason: list of reasons 2980 @param reason: the reason trail for this reboot 2981 @rtype: None 2982 2983 """ 2984 # TODO: this is inconsistent with 'StartInstance' and 'InstanceShutdown' 2985 # because those functions simply 'return' on error whereas this one 2986 # raises an exception with '_Fail' 2987 if not _GetInstanceInfo(instance): 2988 _Fail("Cannot reboot instance '%s' that is not running", instance.name) 2989 2990 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2991 if reboot_type == constants.INSTANCE_REBOOT_SOFT: 2992 try: 2993 hyper.RebootInstance(instance) 2994 except errors.HypervisorError, err: 2995 _Fail("Failed to soft reboot instance '%s': %s", instance.name, err) 2996 elif reboot_type == constants.INSTANCE_REBOOT_HARD: 2997 try: 2998 InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False) 2999 result = StartInstance(instance, False, reason, store_reason=False) 3000 _StoreInstReasonTrail(instance.name, reason) 3001 return result 3002 except errors.HypervisorError, err: 3003 _Fail("Failed to hard reboot instance '%s': %s", instance.name, err) 3004 else: 3005 _Fail("Invalid reboot_type received: '%s'", reboot_type)

3006

3007 3008 -def InstanceBalloonMemory(instance, memory):

3009 """Resize an instance's memory. 3010 3011 @type instance: L{objects.Instance} 3012 @param instance: the instance object 3013 @type memory: int 3014 @param memory: new memory amount in MB 3015 @rtype: None 3016 3017 """ 3018 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3019 running = hyper.ListInstances(hvparams=instance.hvparams) 3020 if instance.name not in running: 3021 logging.info("Instance %s is not running, cannot balloon", instance.name) 3022 return 3023 try: 3024 hyper.BalloonInstanceMemory(instance, memory) 3025 except errors.HypervisorError, err: 3026 _Fail("Failed to balloon instance memory: %s", err, exc=True)

3027

3028 3029 -def MigrationInfo(instance):

3030 """Gather information about an instance to be migrated. 3031 3032 @type instance: L{objects.Instance} 3033 @param instance: the instance definition 3034 3035 """ 3036 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3037 try: 3038 info = hyper.MigrationInfo(instance) 3039 except errors.HypervisorError, err: 3040 _Fail("Failed to fetch migration information: %s", err, exc=True) 3041 return info

3042

3043 3044 -def AcceptInstance(instance, info, target):

3045 """Prepare the node to accept an instance. 3046 3047 @type instance: L{objects.Instance} 3048 @param instance: the instance definition 3049 @type info: string/data (opaque) 3050 @param info: migration information, from the source node 3051 @type target: string 3052 @param target: target host (usually ip), on this node 3053 3054 """ 3055 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3056 try: 3057 hyper.AcceptInstance(instance, info, target) 3058 except errors.HypervisorError, err: 3059 _Fail("Failed to accept instance: %s", err, exc=True)

3060

3061 3062 -def FinalizeMigrationDst(instance, info, success):

3063 """Finalize any preparation to accept an instance. 3064 3065 @type instance: L{objects.Instance} 3066 @param instance: the instance definition 3067 @type info: string/data (opaque) 3068 @param info: migration information, from the source node 3069 @type success: boolean 3070 @param success: whether the migration was a success or a failure 3071 3072 """ 3073 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3074 try: 3075 hyper.FinalizeMigrationDst(instance, info, success) 3076 except errors.HypervisorError, err: 3077 _Fail("Failed to finalize migration on the target node: %s", err, exc=True)

3078

3079 3080 -def MigrateInstance(cluster_name, instance, target, live):

3081 """Migrates an instance to another node. 3082 3083 @type cluster_name: string 3084 @param cluster_name: name of the cluster 3085 @type instance: L{objects.Instance} 3086 @param instance: the instance definition 3087 @type target: string 3088 @param target: the target node name 3089 @type live: boolean 3090 @param live: whether the migration should be done live or not (the 3091 interpretation of this parameter is left to the hypervisor) 3092 @raise RPCFail: if migration fails for some reason 3093 3094 """ 3095 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3096 3097 try: 3098 hyper.MigrateInstance(cluster_name, instance, target, live) 3099 except errors.HypervisorError, err: 3100 _Fail("Failed to migrate instance: %s", err, exc=True)

3101

3102 3103 -def FinalizeMigrationSource(instance, success, live):

3104 """Finalize the instance migration on the source node. 3105 3106 @type instance: L{objects.Instance} 3107 @param instance: the instance definition of the migrated instance 3108 @type success: bool 3109 @param success: whether the migration succeeded or not 3110 @type live: bool 3111 @param live: whether the user requested a live migration or not 3112 @raise RPCFail: If the execution fails for some reason 3113 3114 """ 3115 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3116 3117 try: 3118 hyper.FinalizeMigrationSource(instance, success, live) 3119 except Exception, err: # pylint: disable=W0703 3120 _Fail("Failed to finalize the migration on the source node: %s", err, 3121 exc=True)

3122

3123 3124 -def GetMigrationStatus(instance):

3125 """Get the migration status 3126 3127 @type instance: L{objects.Instance} 3128 @param instance: the instance that is being migrated 3129 @rtype: L{objects.MigrationStatus} 3130 @return: the status of the current migration (one of 3131 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional 3132 progress info that can be retrieved from the hypervisor 3133 @raise RPCFail: If the migration status cannot be retrieved 3134 3135 """ 3136 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3137 try: 3138 return hyper.GetMigrationStatus(instance) 3139 except Exception, err: # pylint: disable=W0703 3140 _Fail("Failed to get migration status: %s", err, exc=True)

3141

3142 3143 -def HotplugDevice(instance, action, dev_type, device, extra, seq):

3144 """Hotplug a device 3145 3146 Hotplug is currently supported only for KVM Hypervisor. 3147 @type instance: L{objects.Instance} 3148 @param instance: the instance to which we hotplug a device 3149 @type action: string 3150 @param action: the hotplug action to perform 3151 @type dev_type: string 3152 @param dev_type: the device type to hotplug 3153 @type device: either L{objects.NIC} or L{objects.Disk} 3154 @param device: the device object to hotplug 3155 @type extra: tuple 3156 @param extra: extra info used for disk hotplug (disk link, drive uri) 3157 @type seq: int 3158 @param seq: the index of the device from master perspective 3159 @raise RPCFail: in case instance does not have KVM hypervisor 3160 3161 """ 3162 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3163 try: 3164 hyper.VerifyHotplugSupport(instance, action, dev_type) 3165 except errors.HotplugError, err: 3166 _Fail("Hotplug is not supported: %s", err) 3167 3168 if action == constants.HOTPLUG_ACTION_ADD: 3169 fn = hyper.HotAddDevice 3170 elif action == constants.HOTPLUG_ACTION_REMOVE: 3171 fn = hyper.HotDelDevice 3172 elif action == constants.HOTPLUG_ACTION_MODIFY: 3173 fn = hyper.HotModDevice 3174 else: 3175 assert action in constants.HOTPLUG_ALL_ACTIONS 3176 3177 return fn(instance, dev_type, device, extra, seq)

3178

3179 3180 -def HotplugSupported(instance):

3181 """Checks if hotplug is generally supported. 3182 3183 """ 3184 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3185 try: 3186 hyper.HotplugSupported(instance) 3187 except errors.HotplugError, err: 3188 _Fail("Hotplug is not supported: %s", err)

3189

3190 3191 -def ModifyInstanceMetadata(metadata):

3192 """Sends instance data to the metadata daemon. 3193 3194 Uses the Luxi transport layer to communicate with the metadata 3195 daemon configuration server. It starts the metadata daemon if it is 3196 not running. 3197 The daemon must be enabled during at configuration time. 3198 3199 @type metadata: dict 3200 @param metadata: instance metadata obtained by calling 3201 L{objects.Instance.ToDict} on an instance object 3202 3203 """ 3204 if not constants.ENABLE_METAD: 3205 raise errors.ProgrammerError("The metadata deamon is disabled, yet" 3206 " ModifyInstanceMetadata has been called") 3207 3208 if not utils.IsDaemonAlive(constants.METAD): 3209 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.METAD]) 3210 if result.failed: 3211 raise errors.HypervisorError("Failed to start metadata daemon") 3212 3213 with contextlib.closing(metad.Client()) as client: 3214 client.UpdateConfig(metadata)

3215

3216 3217 -def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):

3218 """Creates a block device for an instance. 3219 3220 @type disk: L{objects.Disk} 3221 @param disk: the object describing the disk we should create 3222 @type size: int 3223 @param size: the size of the physical underlying device, in MiB 3224 @type owner: str 3225 @param owner: the name of the instance for which disk is created, 3226 used for device cache data 3227 @type on_primary: boolean 3228 @param on_primary: indicates if it is the primary node or not 3229 @type info: string 3230 @param info: string that will be sent to the physical device 3231 creation, used for example to set (LVM) tags on LVs 3232 @type excl_stor: boolean 3233 @param excl_stor: Whether exclusive_storage is active 3234 3235 @return: the new unique_id of the device (this can sometime be 3236 computed only after creation), or None. On secondary nodes, 3237 it's not required to return anything. 3238 3239 """ 3240 # TODO: remove the obsolete "size" argument 3241 # pylint: disable=W0613 3242 clist = [] 3243 if disk.children: 3244 for child in disk.children: 3245 try: 3246 crdev = _RecursiveAssembleBD(child, owner, on_primary) 3247 except errors.BlockDeviceError, err: 3248 _Fail("Can't assemble device %s: %s", child, err) 3249 if on_primary or disk.AssembleOnSecondary(): 3250 # we need the children open in case the device itself has to 3251 # be assembled 3252 try: 3253 # pylint: disable=E1103 3254 crdev.Open() 3255 except errors.BlockDeviceError, err: 3256 _Fail("Can't make child '%s' read-write: %s", child, err) 3257 clist.append(crdev) 3258 3259 try: 3260 device = bdev.Create(disk, clist, excl_stor) 3261 except errors.BlockDeviceError, err: 3262 _Fail("Can't create block device: %s", err) 3263 3264 if on_primary or disk.AssembleOnSecondary(): 3265 try: 3266 device.Assemble() 3267 except errors.BlockDeviceError, err: 3268 _Fail("Can't assemble device after creation, unusual event: %s", err) 3269 if on_primary or disk.OpenOnSecondary(): 3270 try: 3271 device.Open(force=True) 3272 except errors.BlockDeviceError, err: 3273 _Fail("Can't make device r/w after creation, unusual event: %s", err) 3274 DevCacheManager.UpdateCache(device.dev_path, owner, 3275 on_primary, disk.iv_name) 3276 3277 device.SetInfo(info) 3278 3279 return device.unique_id

3280

3281 3282 -def _DumpDevice(source_path, target_path, offset, size, truncate):

3283 """This function images/wipes the device using a local file. 3284 3285 @type source_path: string 3286 @param source_path: path of the image or data source (e.g., "/dev/zero") 3287 3288 @type target_path: string 3289 @param target_path: path of the device to image/wipe 3290 3291 @type offset: int 3292 @param offset: offset in MiB in the output file 3293 3294 @type size: int 3295 @param size: maximum size in MiB to write (data source might be smaller) 3296 3297 @type truncate: bool 3298 @param truncate: whether the file should be truncated 3299 3300 @return: None 3301 @raise RPCFail: in case of failure 3302 3303 """ 3304 # Internal sizes are always in Mebibytes; if the following "dd" command 3305 # should use a different block size the offset and size given to this 3306 # function must be adjusted accordingly before being passed to "dd". 3307 block_size = constants.DD_BLOCK_SIZE 3308 3309 cmd = [constants.DD_CMD, "if=%s" % source_path, "seek=%d" % offset, 3310 "bs=%s" % block_size, "oflag=direct", "of=%s" % target_path, 3311 "count=%d" % size] 3312 3313 if not truncate: 3314 cmd.append("conv=notrunc") 3315 3316 result = utils.RunCmd(cmd) 3317 3318 if result.failed: 3319 _Fail("Dump command '%s' exited with error: %s; output: %s", result.cmd, 3320 result.fail_reason, result.output)

3321

3322 3323 -def _DownloadAndDumpDevice(source_url, target_path, size):

3324 """This function images a device using a downloaded image file. 3325 3326 @type source_url: string 3327 @param source_url: URL of image to dump to disk 3328 3329 @type target_path: string 3330 @param target_path: path of the device to image 3331 3332 @type size: int 3333 @param size: maximum size in MiB to write (data source might be smaller) 3334 3335 @rtype: NoneType 3336 @return: None 3337 @raise RPCFail: in case of download or write failures 3338 3339 """ 3340 class DDParams(object): 3341 def __init__(self, current_size, total_size): 3342 self.current_size = current_size 3343 self.total_size = total_size 3344 self.image_size_error = False

3345 3346 def dd_write(ddparams, out): 3347 if ddparams.current_size < ddparams.total_size: 3348 ddparams.current_size += len(out) 3349 target_file.write(out) 3350 else: 3351 ddparams.image_size_error = True 3352 return -1 3353 3354 target_file = open(target_path, "r+") 3355 ddparams = DDParams(0, 1024 * 1024 * size) 3356 3357 curl = pycurl.Curl() 3358 curl.setopt(pycurl.VERBOSE, True) 3359 curl.setopt(pycurl.NOSIGNAL, True) 3360 curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION) 3361 curl.setopt(pycurl.URL, source_url) 3362 curl.setopt(pycurl.WRITEFUNCTION, lambda out: dd_write(ddparams, out)) 3363 3364 try: 3365 curl.perform() 3366 except pycurl.error: 3367 if ddparams.image_size_error: 3368 _Fail("Disk image larger than the disk") 3369 else: 3370 raise 3371 3372 target_file.close() 3373

3374 3375 -def BlockdevConvert(src_disk, target_disk):

3376 """Copies data from source block device to target. 3377 3378 This function gets the export and import commands from the source and 3379 target devices respectively, and then concatenates them to a single 3380 command using a pipe ("|"). Finally, executes the unified command that 3381 will transfer the data between the devices during the disk template 3382 conversion operation. 3383 3384 @type src_disk: L{objects.Disk} 3385 @param src_disk: the disk object we want to copy from 3386 @type target_disk: L{objects.Disk} 3387 @param target_disk: the disk object we want to copy to 3388 3389 @rtype: NoneType 3390 @return: None 3391 @raise RPCFail: in case of failure 3392 3393 """ 3394 src_dev = _RecursiveFindBD(src_disk) 3395 if src_dev is None: 3396 _Fail("Cannot copy from device '%s': device not found", src_disk.uuid) 3397 3398 dest_dev = _RecursiveFindBD(target_disk) 3399 if dest_dev is None: 3400 _Fail("Cannot copy to device '%s': device not found", target_disk.uuid) 3401 3402 src_cmd = src_dev.Export() 3403 dest_cmd = dest_dev.Import() 3404 command = "%s | %s" % (utils.ShellQuoteArgs(src_cmd), 3405 utils.ShellQuoteArgs(dest_cmd)) 3406 3407 result = utils.RunCmd(command) 3408 if result.failed: 3409 _Fail("Disk conversion command '%s' exited with error: %s; output: %s", 3410 result.cmd, result.fail_reason, result.output)

3411

3412 3413 -def BlockdevWipe(disk, offset, size):

3414 """Wipes a block device. 3415 3416 @type disk: L{objects.Disk} 3417 @param disk: the disk object we want to wipe 3418 @type offset: int 3419 @param offset: The offset in MiB in the file 3420 @type size: int 3421 @param size: The size in MiB to write 3422 3423 """ 3424 try: 3425 rdev = _RecursiveFindBD(disk) 3426 except errors.BlockDeviceError: 3427 rdev = None 3428 3429 if not rdev: 3430 _Fail("Cannot wipe device %s: device not found", disk.iv_name) 3431 if offset < 0: 3432 _Fail("Negative offset") 3433 if size < 0: 3434 _Fail("Negative size") 3435 if offset > rdev.size: 3436 _Fail("Wipe offset is bigger than device size") 3437 if (offset + size) > rdev.size: 3438 _Fail("Wipe offset and size are bigger than device size") 3439 3440 _DumpDevice("/dev/zero", rdev.dev_path, offset, size, True)

3441

3442 3443 -def BlockdevImage(disk, image, size):

3444 """Images a block device either by dumping a local file or 3445 downloading a URL. 3446 3447 @type disk: L{objects.Disk} 3448 @param disk: the disk object we want to image 3449 3450 @type image: string 3451 @param image: file path to the disk image be dumped 3452 3453 @type size: int 3454 @param size: The size in MiB to write 3455 3456 @rtype: NoneType 3457 @return: None 3458 @raise RPCFail: in case of failure 3459 3460 """ 3461 if not (utils.IsUrl(image) or os.path.exists(image)): 3462 _Fail("Image '%s' not found", image) 3463 3464 try: 3465 rdev = _RecursiveFindBD(disk) 3466 except errors.BlockDeviceError: 3467 rdev = None 3468 3469 if not rdev: 3470 _Fail("Cannot image device %s: device not found", disk.iv_name) 3471 if size < 0: 3472 _Fail("Negative size") 3473 if size > rdev.size: 3474 _Fail("Image size is bigger than device size") 3475 3476 if utils.IsUrl(image): 3477 _DownloadAndDumpDevice(image, rdev.dev_path, size) 3478 else: 3479 _DumpDevice(image, rdev.dev_path, 0, size, False)

3480

3481 3482 -def BlockdevPauseResumeSync(disks, pause):

3483 """Pause or resume the sync of the block device. 3484 3485 @type disks: list of L{objects.Disk} 3486 @param disks: the disks object we want to pause/resume 3487 @type pause: bool 3488 @param pause: Wheater to pause or resume 3489 3490 """ 3491 success = [] 3492 for disk in disks: 3493 try: 3494 rdev = _RecursiveFindBD(disk) 3495 except errors.BlockDeviceError: 3496 rdev = None 3497 3498 if not rdev: 3499 success.append((False, ("Cannot change sync for device %s:" 3500 " device not found" % disk.iv_name))) 3501 continue 3502 3503 result = rdev.PauseResumeSync(pause) 3504 3505 if result: 3506 success.append((result, None)) 3507 else: 3508 if pause: 3509 msg = "Pause" 3510 else: 3511 msg = "Resume" 3512 success.append((result, "%s for device %s failed" % (msg, disk.iv_name))) 3513 3514 return success

3515

3516 3517 -def BlockdevRemove(disk):

3518 """Remove a block device. 3519 3520 @note: This is intended to be called recursively. 3521 3522 @type disk: L{objects.Disk} 3523 @param disk: the disk object we should remove 3524 @rtype: boolean 3525 @return: the success of the operation 3526 3527 """ 3528 msgs = [] 3529 try: 3530 rdev = _RecursiveFindBD(disk) 3531 except errors.BlockDeviceError, err: 3532 # probably can't attach 3533 logging.info("Can't attach to device %s in remove", disk) 3534 rdev = None 3535 if rdev is not None: 3536 r_path = rdev.dev_path 3537 3538 def _TryRemove(): 3539 try: 3540 rdev.Remove() 3541 return [] 3542 except errors.BlockDeviceError, err: 3543 return [str(err)]

3544 3545 msgs.extend(utils.SimpleRetry([], _TryRemove, 3546 constants.DISK_REMOVE_RETRY_INTERVAL, 3547 constants.DISK_REMOVE_RETRY_TIMEOUT)) 3548 3549 if not msgs: 3550 DevCacheManager.RemoveCache(r_path) 3551 3552 if disk.children: 3553 for child in disk.children: 3554 try: 3555 BlockdevRemove(child) 3556 except RPCFail, err: 3557 msgs.append(str(err)) 3558 3559 if msgs: 3560 _Fail("; ".join(msgs)) 3561

3562 3563 -def _RecursiveAssembleBD(disk, owner, as_primary):

3564 """Activate a block device for an instance. 3565 3566 This is run on the primary and secondary nodes for an instance. 3567 3568 @note: this function is called recursively. 3569 3570 @type disk: L{objects.Disk} 3571 @param disk: the disk we try to assemble 3572 @type owner: str 3573 @param owner: the name of the instance which owns the disk 3574 @type as_primary: boolean 3575 @param as_primary: if we should make the block device 3576 read/write 3577 3578 @return: the assembled device or None (in case no device 3579 was assembled) 3580 @raise errors.BlockDeviceError: in case there is an error 3581 during the activation of the children or the device 3582 itself 3583 3584 """ 3585 children = [] 3586 if disk.children: 3587 mcn = disk.ChildrenNeeded() 3588 if mcn == -1: 3589 mcn = 0 # max number of Nones allowed 3590 else: 3591 mcn = len(disk.children) - mcn # max number of Nones 3592 for chld_disk in disk.children: 3593 try: 3594 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary) 3595 except errors.BlockDeviceError, err: 3596 if children.count(None) >= mcn: 3597 raise 3598 cdev = None 3599 logging.error("Error in child activation (but continuing): %s", 3600 str(err)) 3601 children.append(cdev) 3602 3603 if as_primary or disk.AssembleOnSecondary(): 3604 r_dev = bdev.Assemble(disk, children) 3605 result = r_dev 3606 if as_primary or disk.OpenOnSecondary(): 3607 r_dev.Open() 3608 DevCacheManager.UpdateCache(r_dev.dev_path, owner, 3609 as_primary, disk.iv_name) 3610 3611 else: 3612 result = True 3613 return result

3614

3615 3616 -def BlockdevAssemble(disk, instance, as_primary, idx):

3617 """Activate a block device for an instance. 3618 3619 This is a wrapper over _RecursiveAssembleBD. 3620 3621 @rtype: str or boolean 3622 @return: a tuple with the C{/dev/...} path and the created symlink 3623 for primary nodes, and (C{True}, C{True}) for secondary nodes 3624 3625 """ 3626 try: 3627 result = _RecursiveAssembleBD(disk, instance.name, as_primary) 3628 if isinstance(result, BlockDev): 3629 # pylint: disable=E1103 3630 dev_path = result.dev_path 3631 link_name = None 3632 uri = None 3633 if as_primary: 3634 link_name = _SymlinkBlockDev(instance.name, dev_path, idx) 3635 uri = _CalculateDeviceURI(instance, disk, result) 3636 elif result: 3637 return result, result 3638 else: 3639 _Fail("Unexpected result from _RecursiveAssembleBD") 3640 except errors.BlockDeviceError, err: 3641 _Fail("Error while assembling disk: %s", err, exc=True) 3642 except OSError, err: 3643 _Fail("Error while symlinking disk: %s", err, exc=True) 3644 3645 return dev_path, link_name, uri

3646

3647 3648 -def BlockdevShutdown(disk):

3649 """Shut down a block device. 3650 3651 First, if the device is assembled (Attach() is successful), then 3652 the device is shutdown. Then the children of the device are 3653 shutdown. 3654 3655 This function is called recursively. Note that we don't cache the 3656 children or such, as oppossed to assemble, shutdown of different 3657 devices doesn't require that the upper device was active. 3658 3659 @type disk: L{objects.Disk} 3660 @param disk: the description of the disk we should 3661 shutdown 3662 @rtype: None 3663 3664 """ 3665 msgs = [] 3666 r_dev = _RecursiveFindBD(disk) 3667 if r_dev is not None: 3668 r_path = r_dev.dev_path 3669 try: 3670 r_dev.Shutdown() 3671 DevCacheManager.RemoveCache(r_path) 3672 except errors.BlockDeviceError, err: 3673 msgs.append(str(err)) 3674 3675 if disk.children: 3676 for child in disk.children: 3677 try: 3678 BlockdevShutdown(child) 3679 except RPCFail, err: 3680 msgs.append(str(err)) 3681 3682 if msgs: 3683 _Fail("; ".join(msgs))

3684

3685 3686 -def BlockdevAddchildren(parent_cdev, new_cdevs):

3687 """Extend a mirrored block device. 3688 3689 @type parent_cdev: L{objects.Disk} 3690 @param parent_cdev: the disk to which we should add children 3691 @type new_cdevs: list of L{objects.Disk} 3692 @param new_cdevs: the list of children which we should add 3693 @rtype: None 3694 3695 """ 3696 parent_bdev = _RecursiveFindBD(parent_cdev) 3697 if parent_bdev is None: 3698 _Fail("Can't find parent device '%s' in add children", parent_cdev) 3699 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs] 3700 if new_bdevs.count(None) > 0: 3701 _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs) 3702 parent_bdev.AddChildren(new_bdevs)

3703

3704 3705 -def BlockdevRemovechildren(parent_cdev, new_cdevs):

3706 """Shrink a mirrored block device. 3707 3708 @type parent_cdev: L{objects.Disk} 3709 @param parent_cdev: the disk from which we should remove children 3710 @type new_cdevs: list of L{objects.Disk} 3711 @param new_cdevs: the list of children which we should remove 3712 @rtype: None 3713 3714 """ 3715 parent_bdev = _RecursiveFindBD(parent_cdev) 3716 if parent_bdev is None: 3717 _Fail("Can't find parent device '%s' in remove children", parent_cdev) 3718 devs = [] 3719 for disk in new_cdevs: 3720 rpath = disk.StaticDevPath() 3721 if rpath is None: 3722 bd = _RecursiveFindBD(disk) 3723 if bd is None: 3724 _Fail("Can't find device %s while removing children", disk) 3725 else: 3726 devs.append(bd.dev_path) 3727 else: 3728 if not utils.IsNormAbsPath(rpath): 3729 _Fail("Strange path returned from StaticDevPath: '%s'", rpath) 3730 devs.append(rpath) 3731 parent_bdev.RemoveChildren(devs)

3732

3733 3734 -def BlockdevGetmirrorstatus(disks):

3735 """Get the mirroring status of a list of devices. 3736 3737 @type disks: list of L{objects.Disk} 3738 @param disks: the list of disks which we should query 3739 @rtype: disk 3740 @return: List of L{objects.BlockDevStatus}, one for each disk 3741 @raise errors.BlockDeviceError: if any of the disks cannot be 3742 found 3743 3744 """ 3745 stats = [] 3746 for dsk in disks: 3747 rbd = _RecursiveFindBD(dsk) 3748 if rbd is None: 3749 _Fail("Can't find device %s", dsk) 3750 3751 stats.append(rbd.CombinedSyncStatus()) 3752 3753 return stats

3754

3755 3756 -def BlockdevGetmirrorstatusMulti(disks):

3757 """Get the mirroring status of a list of devices. 3758 3759 @type disks: list of L{objects.Disk} 3760 @param disks: the list of disks which we should query 3761 @rtype: disk 3762 @return: List of tuples, (bool, status), one for each disk; bool denotes 3763 success/failure, status is L{objects.BlockDevStatus} on success, string 3764 otherwise 3765 3766 """ 3767 result = [] 3768 for disk in disks: 3769 try: 3770 rbd = _RecursiveFindBD(disk) 3771 if rbd is None: 3772 result.append((False, "Can't find device %s" % disk)) 3773 continue 3774 3775 status = rbd.CombinedSyncStatus() 3776 except errors.BlockDeviceError, err: 3777 logging.exception("Error while getting disk status") 3778 result.append((False, str(err))) 3779 else: 3780 result.append((True, status)) 3781 3782 assert len(disks) == len(result) 3783 3784 return result

3785

3786 3787 -def _RecursiveFindBD(disk):

3788 """Check if a device is activated. 3789 3790 If so, return information about the real device. 3791 3792 @type disk: L{objects.Disk} 3793 @param disk: the disk object we need to find 3794 3795 @return: None if the device can't be found, 3796 otherwise the device instance 3797 3798 """ 3799 children = [] 3800 if disk.children: 3801 for chdisk in disk.children: 3802 children.append(_RecursiveFindBD(chdisk)) 3803 3804 return bdev.FindDevice(disk, children)

3805

3806 3807 -def _OpenRealBD(disk):

3808 """Opens the underlying block device of a disk. 3809 3810 @type disk: L{objects.Disk} 3811 @param disk: the disk object we want to open 3812 3813 """ 3814 real_disk = _RecursiveFindBD(disk) 3815 if real_disk is None: 3816 _Fail("Block device '%s' is not set up", disk) 3817 3818 real_disk.Open() 3819 3820 return real_disk

3821

3822 3823 -def BlockdevFind(disk):

3824 """Check if a device is activated. 3825 3826 If it is, return information about the real device. 3827 3828 @type disk: L{objects.Disk} 3829 @param disk: the disk to find 3830 @rtype: None or objects.BlockDevStatus 3831 @return: None if the disk cannot be found, otherwise a the current 3832 information 3833 3834 """ 3835 try: 3836 rbd = _RecursiveFindBD(disk) 3837 except errors.BlockDeviceError, err: 3838 _Fail("Failed to find device: %s", err, exc=True) 3839 3840 if rbd is None: 3841 return None 3842 3843 return rbd.GetSyncStatus()

3844

3845 3846 -def BlockdevGetdimensions(disks):

3847 """Computes the size of the given disks. 3848 3849 If a disk is not found, returns None instead. 3850 3851 @type disks: list of L{objects.Disk} 3852 @param disks: the list of disk to compute the size for 3853 @rtype: list 3854 @return: list with elements None if the disk cannot be found, 3855 otherwise the pair (size, spindles), where spindles is None if the 3856 device doesn't support that 3857 3858 """ 3859 result = [] 3860 for cf in disks: 3861 try: 3862 rbd = _RecursiveFindBD(cf) 3863 except errors.BlockDeviceError: 3864 result.append(None) 3865 continue 3866 if rbd is None: 3867 result.append(None) 3868 else: 3869 result.append(rbd.GetActualDimensions()) 3870 return result

3871

3872 3873 -def UploadFile(file_name, data, mode, uid, gid, atime, mtime):

3874 """Write a file to the filesystem. 3875 3876 This allows the master to overwrite(!) a file. It will only perform 3877 the operation if the file belongs to a list of configuration files. 3878 3879 @type file_name: str 3880 @param file_name: the target file name 3881 @type data: str 3882 @param data: the new contents of the file 3883 @type mode: int 3884 @param mode: the mode to give the file (can be None) 3885 @type uid: string 3886 @param uid: the owner of the file 3887 @type gid: string 3888 @param gid: the group of the file 3889 @type atime: float 3890 @param atime: the atime to set on the file (can be None) 3891 @type mtime: float 3892 @param mtime: the mtime to set on the file (can be None) 3893 @rtype: None 3894 3895 """ 3896 file_name = vcluster.LocalizeVirtualPath(file_name) 3897 3898 if not os.path.isabs(file_name): 3899 _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name) 3900 3901 if file_name not in _ALLOWED_UPLOAD_FILES: 3902 _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'", 3903 file_name) 3904 3905 raw_data = _Decompress(data) 3906 3907 if not (isinstance(uid, basestring) and isinstance(gid, basestring)): 3908 _Fail("Invalid username/groupname type") 3909 3910 getents = runtime.GetEnts() 3911 uid = getents.LookupUser(uid) 3912 gid = getents.LookupGroup(gid) 3913 3914 utils.SafeWriteFile(file_name, None, 3915 data=raw_data, mode=mode, uid=uid, gid=gid, 3916 atime=atime, mtime=mtime)

3917

3918 3919 -def RunOob(oob_program, command, node, timeout):

3920 """Executes oob_program with given command on given node. 3921 3922 @param oob_program: The path to the executable oob_program 3923 @param command: The command to invoke on oob_program 3924 @param node: The node given as an argument to the program 3925 @param timeout: Timeout after which we kill the oob program 3926 3927 @return: stdout 3928 @raise RPCFail: If execution fails for some reason 3929 3930 """ 3931 result = utils.RunCmd([oob_program, command, node], timeout=timeout) 3932 3933 if result.failed: 3934 _Fail("'%s' failed with reason '%s'; output: %s", result.cmd, 3935 result.fail_reason, result.output) 3936 3937 return result.stdout

3938

3939 3940 -def _OSOndiskAPIVersion(os_dir):

3941 """Compute and return the API version of a given OS. 3942 3943 This function will try to read the API version of the OS residing in 3944 the 'os_dir' directory. 3945 3946 @type os_dir: str 3947 @param os_dir: the directory in which we should look for the OS 3948 @rtype: tuple 3949 @return: tuple (status, data) with status denoting the validity and 3950 data holding either the valid versions or an error message 3951 3952 """ 3953 api_file = utils.PathJoin(os_dir, constants.OS_API_FILE) 3954 3955 try: 3956 st = os.stat(api_file) 3957 except EnvironmentError, err: 3958 return False, ("Required file '%s' not found under path %s: %s" % 3959 (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err))) 3960 3961 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 3962 return False, ("File '%s' in %s is not a regular file" % 3963 (constants.OS_API_FILE, os_dir)) 3964 3965 try: 3966 api_versions = utils.ReadFile(api_file).splitlines() 3967 except EnvironmentError, err: 3968 return False, ("Error while reading the API version file at %s: %s" % 3969 (api_file, utils.ErrnoOrStr(err))) 3970 3971 try: 3972 api_versions = [int(version.strip()) for version in api_versions] 3973 except (TypeError, ValueError), err: 3974 return False, ("API version(s) can't be converted to integer: %s" % 3975 str(err)) 3976 3977 return True, api_versions

3978

3979 3980 -def DiagnoseOS(top_dirs=None):

3981 """Compute the validity for all OSes. 3982 3983 @type top_dirs: list 3984 @param top_dirs: the list of directories in which to 3985 search (if not given defaults to 3986 L{pathutils.OS_SEARCH_PATH}) 3987 @rtype: list of L{objects.OS} 3988 @return: a list of tuples (name, path, status, diagnose, variants, 3989 parameters, api_version) for all (potential) OSes under all 3990 search paths, where: 3991 - name is the (potential) OS name 3992 - path is the full path to the OS 3993 - status True/False is the validity of the OS 3994 - diagnose is the error message for an invalid OS, otherwise empty 3995 - variants is a list of supported OS variants, if any 3996 - parameters is a list of (name, help) parameters, if any 3997 - api_version is a list of support OS API versions 3998 3999 """ 4000 if top_dirs is None: 4001 top_dirs = pathutils.OS_SEARCH_PATH 4002 4003 result = [] 4004 for dir_name in top_dirs: 4005 if os.path.isdir(dir_name): 4006 try: 4007 f_names = utils.ListVisibleFiles(dir_name) 4008 except EnvironmentError, err: 4009 logging.exception("Can't list the OS directory %s: %s", dir_name, err) 4010 break 4011 for name in f_names: 4012 os_path = utils.PathJoin(dir_name, name) 4013 status, os_inst = _TryOSFromDisk(name, base_dir=dir_name) 4014 if status: 4015 diagnose = "" 4016 variants = os_inst.supported_variants 4017 parameters = os_inst.supported_parameters 4018 api_versions = os_inst.api_versions 4019 trusted = False if os_inst.create_script_untrusted else True 4020 else: 4021 diagnose = os_inst 4022 variants = parameters = api_versions = [] 4023 trusted = True 4024 result.append((name, os_path, status, diagnose, variants, 4025 parameters, api_versions, trusted)) 4026 4027 return result

4028

4029 4030 -def _TryOSFromDisk(name, base_dir=None):

4031 """Create an OS instance from disk. 4032 4033 This function will return an OS instance if the given name is a 4034 valid OS name. 4035 4036 @type base_dir: string 4037 @keyword base_dir: Base directory containing OS installations. 4038 Defaults to a search in all the OS_SEARCH_PATH dirs. 4039 @rtype: tuple 4040 @return: success and either the OS instance if we find a valid one, 4041 or error message 4042 4043 """ 4044 if base_dir is None: 4045 os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir) 4046 else: 4047 os_dir = utils.FindFile(name, [base_dir], os.path.isdir) 4048 4049 if os_dir is None: 4050 return False, "Directory for OS %s not found in search path" % name 4051 4052 status, api_versions = _OSOndiskAPIVersion(os_dir) 4053 if not status: 4054 # push the error up 4055 return status, api_versions 4056 4057 if not constants.OS_API_VERSIONS.intersection(api_versions): 4058 return False, ("API version mismatch for path '%s': found %s, want %s." % 4059 (os_dir, api_versions, constants.OS_API_VERSIONS)) 4060 4061 # OS Files dictionary, we will populate it with the absolute path 4062 # names; if the value is True, then it is a required file, otherwise 4063 # an optional one 4064 os_files = dict.fromkeys(constants.OS_SCRIPTS, True) 4065 4066 os_files[constants.OS_SCRIPT_CREATE] = False 4067 os_files[constants.OS_SCRIPT_CREATE_UNTRUSTED] = False 4068 4069 if max(api_versions) >= constants.OS_API_V15: 4070 os_files[constants.OS_VARIANTS_FILE] = False 4071 4072 if max(api_versions) >= constants.OS_API_V20: 4073 os_files[constants.OS_PARAMETERS_FILE] = True 4074 else: 4075 del os_files[constants.OS_SCRIPT_VERIFY] 4076 4077 for (filename, required) in os_files.items(): 4078 os_files[filename] = utils.PathJoin(os_dir, filename) 4079 4080 try: 4081 st = os.stat(os_files[filename]) 4082 except EnvironmentError, err: 4083 if err.errno == errno.ENOENT and not required: 4084 del os_files[filename] 4085 continue 4086 return False, ("File '%s' under path '%s' is missing (%s)" % 4087 (filename, os_dir, utils.ErrnoOrStr(err))) 4088 4089 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 4090 return False, ("File '%s' under path '%s' is not a regular file" % 4091 (filename, os_dir)) 4092 4093 if filename in constants.OS_SCRIPTS: 4094 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR: 4095 return False, ("File '%s' under path '%s' is not executable" % 4096 (filename, os_dir)) 4097 4098 if not constants.OS_SCRIPT_CREATE in os_files and \ 4099 not constants.OS_SCRIPT_CREATE_UNTRUSTED in os_files: 4100 return False, ("A create script (trusted or untrusted) under path '%s'" 4101 " must exist" % os_dir) 4102 4103 create_script = os_files.get(constants.OS_SCRIPT_CREATE, None) 4104 create_script_untrusted = os_files.get(constants.OS_SCRIPT_CREATE_UNTRUSTED, 4105 None) 4106 4107 variants = [] 4108 if constants.OS_VARIANTS_FILE in os_files: 4109 variants_file = os_files[constants.OS_VARIANTS_FILE] 4110 try: 4111 variants = \ 4112 utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file)) 4113 except EnvironmentError, err: 4114 # we accept missing files, but not other errors 4115 if err.errno != errno.ENOENT: 4116 return False, ("Error while reading the OS variants file at %s: %s" % 4117 (variants_file, utils.ErrnoOrStr(err))) 4118 4119 parameters = [] 4120 if constants.OS_PARAMETERS_FILE in os_files: 4121 parameters_file = os_files[constants.OS_PARAMETERS_FILE] 4122 try: 4123 parameters = utils.ReadFile(parameters_file).splitlines() 4124 except EnvironmentError, err: 4125 return False, ("Error while reading the OS parameters file at %s: %s" % 4126 (parameters_file, utils.ErrnoOrStr(err))) 4127 parameters = [v.split(None, 1) for v in parameters] 4128 4129 os_obj = objects.OS(name=name, path=os_dir, 4130 create_script=create_script, 4131 create_script_untrusted=create_script_untrusted, 4132 export_script=os_files[constants.OS_SCRIPT_EXPORT], 4133 import_script=os_files[constants.OS_SCRIPT_IMPORT], 4134 rename_script=os_files[constants.OS_SCRIPT_RENAME], 4135 verify_script=os_files.get(constants.OS_SCRIPT_VERIFY, 4136 None), 4137 supported_variants=variants, 4138 supported_parameters=parameters, 4139 api_versions=api_versions) 4140 return True, os_obj

4141

4142 4143 -def OSFromDisk(name, base_dir=None):

4144 """Create an OS instance from disk. 4145 4146 This function will return an OS instance if the given name is a 4147 valid OS name. Otherwise, it will raise an appropriate 4148 L{RPCFail} exception, detailing why this is not a valid OS. 4149 4150 This is just a wrapper over L{_TryOSFromDisk}, which doesn't raise 4151 an exception but returns true/false status data. 4152 4153 @type base_dir: string 4154 @keyword base_dir: Base directory containing OS installations. 4155 Defaults to a search in all the OS_SEARCH_PATH dirs. 4156 @rtype: L{objects.OS} 4157 @return: the OS instance if we find a valid one 4158 @raise RPCFail: if we don't find a valid OS 4159 4160 """ 4161 name_only = objects.OS.GetName(name) 4162 status, payload = _TryOSFromDisk(name_only, base_dir) 4163 4164 if not status: 4165 _Fail(payload) 4166 4167 return payload

4168

4169 4170 -def OSCoreEnv(os_name, inst_os, os_params, debug=0):

4171 """Calculate the basic environment for an os script. 4172 4173 @type os_name: str 4174 @param os_name: full operating system name (including variant) 4175 @type inst_os: L{objects.OS} 4176 @param inst_os: operating system for which the environment is being built 4177 @type os_params: dict 4178 @param os_params: the OS parameters 4179 @type debug: integer 4180 @param debug: debug level (0 or 1, for OS Api 10) 4181 @rtype: dict 4182 @return: dict of environment variables 4183 @raise errors.BlockDeviceError: if the block device 4184 cannot be found 4185 4186 """ 4187 result = {} 4188 api_version = \ 4189 max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions)) 4190 result["OS_API_VERSION"] = "%d" % api_version 4191 result["OS_NAME"] = inst_os.name 4192 result["DEBUG_LEVEL"] = "%d" % debug 4193 4194 # OS variants 4195 if api_version >= constants.OS_API_V15 and inst_os.supported_variants: 4196 variant = objects.OS.GetVariant(os_name) 4197 if not variant: 4198 variant = inst_os.supported_variants[0] 4199 else: 4200 variant = "" 4201 result["OS_VARIANT"] = variant 4202 4203 # OS params 4204 for pname, pvalue in os_params.items(): 4205 result["OSP_%s" % pname.upper().replace("-", "_")] = pvalue 4206 4207 # Set a default path otherwise programs called by OS scripts (or 4208 # even hooks called from OS scripts) might break, and we don't want 4209 # to have each script require setting a PATH variable 4210 result["PATH"] = constants.HOOKS_PATH 4211 4212 return result

4213

4214 4215 -def OSEnvironment(instance, inst_os, debug=0):

4216 """Calculate the environment for an os script. 4217 4218 @type instance: L{objects.Instance} 4219 @param instance: target instance for the os script run 4220 @type inst_os: L{objects.OS} 4221 @param inst_os: operating system for which the environment is being built 4222 @type debug: integer 4223 @param debug: debug level (0 or 1, for OS Api 10) 4224 @rtype: dict 4225 @return: dict of environment variables 4226 @raise errors.BlockDeviceError: if the block device 4227 cannot be found 4228 4229 """ 4230 result = OSCoreEnv(instance.os, inst_os, objects.FillDict(instance.osparams, 4231 instance.osparams_private.Unprivate()), debug=debug) 4232 4233 for attr in ["name", "os", "uuid", "ctime", "mtime", "primary_node"]: 4234 result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr)) 4235 4236 result["HYPERVISOR"] = instance.hypervisor 4237 result["DISK_COUNT"] = "%d" % len(instance.disks_info) 4238 result["NIC_COUNT"] = "%d" % len(instance.nics) 4239 result["INSTANCE_SECONDARY_NODES"] = \ 4240 ("%s" % " ".join(instance.secondary_nodes)) 4241 4242 # Disks 4243 for idx, disk in enumerate(instance.disks_info): 4244 real_disk = _OpenRealBD(disk) 4245 uri = _CalculateDeviceURI(instance, disk, real_disk) 4246 result["DISK_%d_ACCESS" % idx] = disk.mode 4247 result["DISK_%d_UUID" % idx] = disk.uuid 4248 if real_disk.dev_path: 4249 result["DISK_%d_PATH" % idx] = real_disk.dev_path 4250 if uri: 4251 result["DISK_%d_URI" % idx] = uri 4252 if disk.name: 4253 result["DISK_%d_NAME" % idx] = disk.name 4254 if constants.HV_DISK_TYPE in instance.hvparams: 4255 result["DISK_%d_FRONTEND_TYPE" % idx] = \ 4256 instance.hvparams[constants.HV_DISK_TYPE] 4257 if disk.dev_type in constants.DTS_BLOCK: 4258 result["DISK_%d_BACKEND_TYPE" % idx] = "block" 4259 elif disk.dev_type in constants.DTS_FILEBASED: 4260 result["DISK_%d_BACKEND_TYPE" % idx] = \ 4261 "file:%s" % disk.logical_id[0] 4262 4263 # NICs 4264 for idx, nic in enumerate(instance.nics): 4265 result["NIC_%d_MAC" % idx] = nic.mac 4266 result["NIC_%d_UUID" % idx] = nic.uuid 4267 if nic.name: 4268 result["NIC_%d_NAME" % idx] = nic.name 4269 if nic.ip: 4270 result["NIC_%d_IP" % idx] = nic.ip 4271 result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE] 4272 if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 4273 result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK] 4274 if nic.nicparams[constants.NIC_LINK]: 4275 result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK] 4276 if nic.netinfo: 4277 nobj = objects.Network.FromDict(nic.netinfo) 4278 result.update(nobj.HooksDict("NIC_%d_" % idx)) 4279 if constants.HV_NIC_TYPE in instance.hvparams: 4280 result["NIC_%d_FRONTEND_TYPE" % idx] = \ 4281 instance.hvparams[constants.HV_NIC_TYPE] 4282 4283 # HV/BE params 4284 for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]: 4285 for key, value in source.items(): 4286 result["INSTANCE_%s_%s" % (kind, key)] = str(value) 4287 4288 return result

4289

4290 4291 -def DiagnoseExtStorage(top_dirs=None):

4292 """Compute the validity for all ExtStorage Providers. 4293 4294 @type top_dirs: list 4295 @param top_dirs: the list of directories in which to 4296 search (if not given defaults to 4297 L{pathutils.ES_SEARCH_PATH}) 4298 @rtype: list of L{objects.ExtStorage} 4299 @return: a list of tuples (name, path, status, diagnose, parameters) 4300 for all (potential) ExtStorage Providers under all 4301 search paths, where: 4302 - name is the (potential) ExtStorage Provider 4303 - path is the full path to the ExtStorage Provider 4304 - status True/False is the validity of the ExtStorage Provider 4305 - diagnose is the error message for an invalid ExtStorage Provider, 4306 otherwise empty 4307 - parameters is a list of (name, help) parameters, if any 4308 4309 """ 4310 if top_dirs is None: 4311 top_dirs = pathutils.ES_SEARCH_PATH 4312 4313 result = [] 4314 for dir_name in top_dirs: 4315 if os.path.isdir(dir_name): 4316 try: 4317 f_names = utils.ListVisibleFiles(dir_name) 4318 except EnvironmentError, err: 4319 logging.exception("Can't list the ExtStorage directory %s: %s", 4320 dir_name, err) 4321 break 4322 for name in f_names: 4323 es_path = utils.PathJoin(dir_name, name) 4324 status, es_inst = extstorage.ExtStorageFromDisk(name, base_dir=dir_name) 4325 if status: 4326 diagnose = "" 4327 parameters = es_inst.supported_parameters 4328 else: 4329 diagnose = es_inst 4330 parameters = [] 4331 result.append((name, es_path, status, diagnose, parameters)) 4332 4333 return result

4334

4335 4336 -def BlockdevGrow(disk, amount, dryrun, backingstore, excl_stor):

4337 """Grow a stack of block devices. 4338 4339 This function is called recursively, with the childrens being the 4340 first ones to resize. 4341 4342 @type disk: L{objects.Disk} 4343 @param disk: the disk to be grown 4344 @type amount: integer 4345 @param amount: the amount (in mebibytes) to grow with 4346 @type dryrun: boolean 4347 @param dryrun: whether to execute the operation in simulation mode 4348 only, without actually increasing the size 4349 @param backingstore: whether to execute the operation on backing storage 4350 only, or on "logical" storage only; e.g. DRBD is logical storage, 4351 whereas LVM, file, RBD are backing storage 4352 @rtype: (status, result) 4353 @type excl_stor: boolean 4354 @param excl_stor: Whether exclusive_storage is active 4355 @return: a tuple with the status of the operation (True/False), and 4356 the errors message if status is False 4357 4358 """ 4359 r_dev = _RecursiveFindBD(disk) 4360 if r_dev is None: 4361 _Fail("Cannot find block device %s", disk) 4362 4363 try: 4364 r_dev.Grow(amount, dryrun, backingstore, excl_stor) 4365 except errors.BlockDeviceError, err: 4366 _Fail("Failed to grow block device: %s", err, exc=True)

4367

4368 4369 -def BlockdevSnapshot(disk, snap_name, snap_size):

4370 """Create a snapshot copy of a block device. 4371 4372 This function is called recursively, and the snapshot is actually created 4373 just for the leaf lvm backend device. 4374 4375 @type disk: L{objects.Disk} 4376 @param disk: the disk to be snapshotted 4377 @type snap_name: string 4378 @param snap_name: the name of the snapshot 4379 @type snap_size: int 4380 @param snap_size: the size of the snapshot 4381 @rtype: string 4382 @return: snapshot disk ID as (vg, lv) 4383 4384 """ 4385 def _DiskSnapshot(disk, snap_name=None, snap_size=None): 4386 r_dev = _RecursiveFindBD(disk) 4387 if r_dev is not None: 4388 return r_dev.Snapshot(snap_name=snap_name, snap_size=snap_size) 4389 else: 4390 _Fail("Cannot find block device %s", disk)

4391 4392 if disk.SupportsSnapshots(): 4393 if disk.dev_type == constants.DT_DRBD8: 4394 if not disk.children: 4395 _Fail("DRBD device '%s' without backing storage cannot be snapshotted", 4396 disk.unique_id) 4397 return BlockdevSnapshot(disk.children[0], snap_name, snap_size) 4398 else: 4399 return _DiskSnapshot(disk, snap_name, snap_size) 4400 else: 4401 _Fail("Cannot snapshot block device '%s' of type '%s'", 4402 disk.logical_id, disk.dev_type) 4403

4404 4405 -def BlockdevSetInfo(disk, info):

4406 """Sets 'metadata' information on block devices. 4407 4408 This function sets 'info' metadata on block devices. Initial 4409 information is set at device creation; this function should be used 4410 for example after renames. 4411 4412 @type disk: L{objects.Disk} 4413 @param disk: the disk to be grown 4414 @type info: string 4415 @param info: new 'info' metadata 4416 @rtype: (status, result) 4417 @return: a tuple with the status of the operation (True/False), and 4418 the errors message if status is False 4419 4420 """ 4421 r_dev = _RecursiveFindBD(disk) 4422 if r_dev is None: 4423 _Fail("Cannot find block device %s", disk) 4424 4425 try: 4426 r_dev.SetInfo(info) 4427 except errors.BlockDeviceError, err: 4428 _Fail("Failed to set information on block device: %s", err, exc=True)

4429

4430 4431 -def FinalizeExport(instance, snap_disks):

4432 """Write out the export configuration information. 4433 4434 @type instance: L{objects.Instance} 4435 @param instance: the instance which we export, used for 4436 saving configuration 4437 @type snap_disks: list of L{objects.Disk} 4438 @param snap_disks: list of snapshot block devices, which 4439 will be used to get the actual name of the dump file 4440 4441 @rtype: None 4442 4443 """ 4444 destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new") 4445 finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name) 4446 disk_template = utils.GetDiskTemplate(snap_disks) 4447 4448 config = objects.SerializableConfigParser() 4449 4450 config.add_section(constants.INISECT_EXP) 4451 config.set(constants.INISECT_EXP, "version", str(constants.EXPORT_VERSION)) 4452 config.set(constants.INISECT_EXP, "timestamp", "%d" % int(time.time())) 4453 config.set(constants.INISECT_EXP, "source", instance.primary_node) 4454 config.set(constants.INISECT_EXP, "os", instance.os) 4455 config.set(constants.INISECT_EXP, "compression", "none") 4456 4457 config.add_section(constants.INISECT_INS) 4458 config.set(constants.INISECT_INS, "name", instance.name) 4459 config.set(constants.INISECT_INS, "maxmem", "%d" % 4460 instance.beparams[constants.BE_MAXMEM]) 4461 config.set(constants.INISECT_INS, "minmem", "%d" % 4462 instance.beparams[constants.BE_MINMEM]) 4463 # "memory" is deprecated, but useful for exporting to old ganeti versions 4464 config.set(constants.INISECT_INS, "memory", "%d" % 4465 instance.beparams[constants.BE_MAXMEM]) 4466 config.set(constants.INISECT_INS, "vcpus", "%d" % 4467 instance.beparams[constants.BE_VCPUS]) 4468 config.set(constants.INISECT_INS, "disk_template", disk_template) 4469 config.set(constants.INISECT_INS, "hypervisor", instance.hypervisor) 4470 config.set(constants.INISECT_INS, "tags", " ".join(instance.GetTags())) 4471 4472 nic_total = 0 4473 for nic_count, nic in enumerate(instance.nics): 4474 nic_total += 1 4475 config.set(constants.INISECT_INS, "nic%d_mac" % 4476 nic_count, "%s" % nic.mac) 4477 config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip) 4478 config.set(constants.INISECT_INS, "nic%d_network" % nic_count, 4479 "%s" % nic.network) 4480 config.set(constants.INISECT_INS, "nic%d_name" % nic_count, 4481 "%s" % nic.name) 4482 for param in constants.NICS_PARAMETER_TYPES: 4483 config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param), 4484 "%s" % nic.nicparams.get(param, None)) 4485 # TODO: redundant: on load can read nics until it doesn't exist 4486 config.set(constants.INISECT_INS, "nic_count", "%d" % nic_total) 4487 4488 disk_total = 0 4489 for disk_count, disk in enumerate(snap_disks): 4490 if disk: 4491 disk_total += 1 4492 config.set(constants.INISECT_INS, "disk%d_ivname" % disk_count, 4493 ("%s" % disk.iv_name)) 4494 config.set(constants.INISECT_INS, "disk%d_dump" % disk_count, 4495 ("%s" % disk.uuid)) 4496 config.set(constants.INISECT_INS, "disk%d_size" % disk_count, 4497 ("%d" % disk.size)) 4498 config.set(constants.INISECT_INS, "disk%d_name" % disk_count, 4499 "%s" % disk.name) 4500 4501 config.set(constants.INISECT_INS, "disk_count", "%d" % disk_total) 4502 4503 # New-style hypervisor/backend parameters 4504 4505 config.add_section(constants.INISECT_HYP) 4506 for name, value in instance.hvparams.items(): 4507 if name not in constants.HVC_GLOBALS: 4508 config.set(constants.INISECT_HYP, name, str(value)) 4509 4510 config.add_section(constants.INISECT_BEP) 4511 for name, value in instance.beparams.items(): 4512 config.set(constants.INISECT_BEP, name, str(value)) 4513 4514 config.add_section(constants.INISECT_OSP) 4515 for name, value in instance.osparams.items(): 4516 config.set(constants.INISECT_OSP, name, str(value)) 4517 4518 config.add_section(constants.INISECT_OSP_PRIVATE) 4519 for name, value in instance.osparams_private.items(): 4520 config.set(constants.INISECT_OSP_PRIVATE, name, str(value.Get())) 4521 4522 utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE), 4523 data=config.Dumps()) 4524 shutil.rmtree(finaldestdir, ignore_errors=True) 4525 shutil.move(destdir, finaldestdir)

4526

4527 4528 -def ExportInfo(dest):

4529 """Get export configuration information. 4530 4531 @type dest: str 4532 @param dest: directory containing the export 4533 4534 @rtype: L{objects.SerializableConfigParser} 4535 @return: a serializable config file containing the 4536 export info 4537 4538 """ 4539 cff = utils.PathJoin(dest, constants.EXPORT_CONF_FILE) 4540 4541 config = objects.SerializableConfigParser() 4542 config.read(cff) 4543 4544 if (not config.has_section(constants.INISECT_EXP) or 4545 not config.has_section(constants.INISECT_INS)): 4546 _Fail("Export info file doesn't have the required fields") 4547 4548 return config.Dumps()

4549

4550 4551 -def ListExports():

4552 """Return a list of exports currently available on this machine. 4553 4554 @rtype: list 4555 @return: list of the exports 4556 4557 """ 4558 if os.path.isdir(pathutils.EXPORT_DIR): 4559 return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR)) 4560 else: 4561 _Fail("No exports directory")

4562

4563 4564 -def RemoveExport(export):

4565 """Remove an existing export from the node. 4566 4567 @type export: str 4568 @param export: the name of the export to remove 4569 @rtype: None 4570 4571 """ 4572 target = utils.PathJoin(pathutils.EXPORT_DIR, export) 4573 4574 try: 4575 shutil.rmtree(target) 4576 except EnvironmentError, err: 4577 _Fail("Error while removing the export: %s", err, exc=True)

4578

4579 4580 -def BlockdevRename(devlist):

4581 """Rename a list of block devices. 4582 4583 @type devlist: list of tuples 4584 @param devlist: list of tuples of the form (disk, new_unique_id); disk is 4585 an L{objects.Disk} object describing the current disk, and new 4586 unique_id is the name we rename it to 4587 @rtype: boolean 4588 @return: True if all renames succeeded, False otherwise 4589 4590 """ 4591 msgs = [] 4592 result = True 4593 for disk, unique_id in devlist: 4594 dev = _RecursiveFindBD(disk) 4595 if dev is None: 4596 msgs.append("Can't find device %s in rename" % str(disk)) 4597 result = False 4598 continue 4599 try: 4600 old_rpath = dev.dev_path 4601 dev.Rename(unique_id) 4602 new_rpath = dev.dev_path 4603 if old_rpath != new_rpath: 4604 DevCacheManager.RemoveCache(old_rpath) 4605 # FIXME: we should add the new cache information here, like: 4606 # DevCacheManager.UpdateCache(new_rpath, owner, ...) 4607 # but we don't have the owner here - maybe parse from existing 4608 # cache? for now, we only lose lvm data when we rename, which 4609 # is less critical than DRBD or MD 4610 except errors.BlockDeviceError, err: 4611 msgs.append("Can't rename device '%s' to '%s': %s" % 4612 (dev, unique_id, err)) 4613 logging.exception("Can't rename device '%s' to '%s'", dev, unique_id) 4614 result = False 4615 if not result: 4616 _Fail("; ".join(msgs))

4617

4618 4619 -def _TransformFileStorageDir(fs_dir):

4620 """Checks whether given file_storage_dir is valid. 4621 4622 Checks wheter the given fs_dir is within the cluster-wide default 4623 file_storage_dir or the shared_file_storage_dir, which are stored in 4624 SimpleStore. Only paths under those directories are allowed. 4625 4626 @type fs_dir: str 4627 @param fs_dir: the path to check 4628 4629 @return: the normalized path if valid, None otherwise 4630 4631 """ 4632 filestorage.CheckFileStoragePath(fs_dir) 4633 4634 return os.path.normpath(fs_dir)

4635

4636 4637 -def CreateFileStorageDir(file_storage_dir):

4638 """Create file storage directory. 4639 4640 @type file_storage_dir: str 4641 @param file_storage_dir: directory to create 4642 4643 @rtype: tuple 4644 @return: tuple with first element a boolean indicating wheter dir 4645 creation was successful or not 4646 4647 """ 4648 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 4649 if os.path.exists(file_storage_dir): 4650 if not os.path.isdir(file_storage_dir): 4651 _Fail("Specified storage dir '%s' is not a directory", 4652 file_storage_dir) 4653 else: 4654 try: 4655 os.makedirs(file_storage_dir, 0750) 4656 except OSError, err: 4657 _Fail("Cannot create file storage directory '%s': %s", 4658 file_storage_dir, err, exc=True)

4659

4660 4661 -def RemoveFileStorageDir(file_storage_dir):

4662 """Remove file storage directory. 4663 4664 Remove it only if it's empty. If not log an error and return. 4665 4666 @type file_storage_dir: str 4667 @param file_storage_dir: the directory we should cleanup 4668 @rtype: tuple (success,) 4669 @return: tuple of one element, C{success}, denoting 4670 whether the operation was successful 4671 4672 """ 4673 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 4674 if os.path.exists(file_storage_dir): 4675 if not os.path.isdir(file_storage_dir): 4676 _Fail("Specified Storage directory '%s' is not a directory", 4677 file_storage_dir) 4678 # deletes dir only if empty, otherwise we want to fail the rpc call 4679 try: 4680 os.rmdir(file_storage_dir) 4681 except OSError, err: 4682 _Fail("Cannot remove file storage directory '%s': %s", 4683 file_storage_dir, err)

4684

4685 4686 -def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):

4687 """Rename the file storage directory. 4688 4689 @type old_file_storage_dir: str 4690 @param old_file_storage_dir: the current path 4691 @type new_file_storage_dir: str 4692 @param new_file_storage_dir: the name we should rename to 4693 @rtype: tuple (success,) 4694 @return: tuple of one element, C{success}, denoting 4695 whether the operation was successful 4696 4697 """ 4698 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir) 4699 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir) 4700 if not os.path.exists(new_file_storage_dir): 4701 if os.path.isdir(old_file_storage_dir): 4702 try: 4703 os.rename(old_file_storage_dir, new_file_storage_dir) 4704 except OSError, err: 4705 _Fail("Cannot rename '%s' to '%s': %s", 4706 old_file_storage_dir, new_file_storage_dir, err) 4707 else: 4708 _Fail("Specified storage dir '%s' is not a directory", 4709 old_file_storage_dir) 4710 else: 4711 if os.path.exists(old_file_storage_dir): 4712 _Fail("Cannot rename '%s' to '%s': both locations exist", 4713 old_file_storage_dir, new_file_storage_dir)

4714

4715 4716 -def _EnsureJobQueueFile(file_name):

4717 """Checks whether the given filename is in the queue directory. 4718 4719 @type file_name: str 4720 @param file_name: the file name we should check 4721 @rtype: None 4722 @raises RPCFail: if the file is not valid 4723 4724 """ 4725 if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name): 4726 _Fail("Passed job queue file '%s' does not belong to" 4727 " the queue directory '%s'", file_name, pathutils.QUEUE_DIR)

4728

4729 4730 -def JobQueueUpdate(file_name, content):

4731 """Updates a file in the queue directory. 4732 4733 This is just a wrapper over L{utils.io.WriteFile}, with proper 4734 checking. 4735 4736 @type file_name: str 4737 @param file_name: the job file name 4738 @type content: str 4739 @param content: the new job contents 4740 @rtype: boolean 4741 @return: the success of the operation 4742 4743 """ 4744 file_name = vcluster.LocalizeVirtualPath(file_name) 4745 4746 _EnsureJobQueueFile(file_name) 4747 getents = runtime.GetEnts() 4748 4749 # Write and replace the file atomically 4750 utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid, 4751 gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS)

4752

4753 4754 -def JobQueueRename(old, new):

4755 """Renames a job queue file. 4756 4757 This is just a wrapper over os.rename with proper checking. 4758 4759 @type old: str 4760 @param old: the old (actual) file name 4761 @type new: str 4762 @param new: the desired file name 4763 @rtype: tuple 4764 @return: the success of the operation and payload 4765 4766 """ 4767 old = vcluster.LocalizeVirtualPath(old) 4768 new = vcluster.LocalizeVirtualPath(new) 4769 4770 _EnsureJobQueueFile(old) 4771 _EnsureJobQueueFile(new) 4772 4773 getents = runtime.GetEnts() 4774 4775 utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750, 4776 dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid)

4777

4778 4779 -def BlockdevClose(instance_name, disks):

4780 """Closes the given block devices. 4781 4782 This means they will be switched to secondary mode (in case of 4783 DRBD). 4784 4785 @param instance_name: if the argument is not empty, the symlinks 4786 of this instance will be removed 4787 @type disks: list of L{objects.Disk} 4788 @param disks: the list of disks to be closed 4789 @rtype: tuple (success, message) 4790 @return: a tuple of success and message, where success 4791 indicates the succes of the operation, and message 4792 which will contain the error details in case we 4793 failed 4794 4795 """ 4796 bdevs = [] 4797 for cf in disks: 4798 rd = _RecursiveFindBD(cf) 4799 if rd is None: 4800 _Fail("Can't find device %s", cf) 4801 bdevs.append(rd) 4802 4803 msg = [] 4804 for rd in bdevs: 4805 try: 4806 rd.Close() 4807 except errors.BlockDeviceError, err: 4808 msg.append(str(err)) 4809 if msg: 4810 _Fail("Can't close devices: %s", ",".join(msg)) 4811 else: 4812 if instance_name: 4813 _RemoveBlockDevLinks(instance_name, disks)

4814

4815 4816 -def BlockdevOpen(instance_name, disks, exclusive):

4817 """Opens the given block devices. 4818 4819 """ 4820 bdevs = [] 4821 for cf in disks: 4822 rd = _RecursiveFindBD(cf) 4823 if rd is None: 4824 _Fail("Can't find device %s", cf) 4825 bdevs.append(rd) 4826 4827 msg = [] 4828 for idx, rd in enumerate(bdevs): 4829 try: 4830 rd.Open(exclusive=exclusive) 4831 _SymlinkBlockDev(instance_name, rd.dev_path, idx) 4832 except errors.BlockDeviceError, err: 4833 msg.append(str(err)) 4834 4835 if msg: 4836 _Fail("Can't open devices: %s", ",".join(msg))

4837

4838 4839 -def ValidateHVParams(hvname, hvparams):

4840 """Validates the given hypervisor parameters. 4841 4842 @type hvname: string 4843 @param hvname: the hypervisor name 4844 @type hvparams: dict 4845 @param hvparams: the hypervisor parameters to be validated 4846 @rtype: None 4847 4848 """ 4849 try: 4850 hv_type = hypervisor.GetHypervisor(hvname) 4851 hv_type.ValidateParameters(hvparams) 4852 except errors.HypervisorError, err: 4853 _Fail(str(err), log=False)

4854

4855 4856 -def _CheckOSPList(os_obj, parameters):

4857 """Check whether a list of parameters is supported by the OS. 4858 4859 @type os_obj: L{objects.OS} 4860 @param os_obj: OS object to check 4861 @type parameters: list 4862 @param parameters: the list of parameters to check 4863 4864 """ 4865 supported = [v[0] for v in os_obj.supported_parameters] 4866 delta = frozenset(parameters).difference(supported) 4867 if delta: 4868 _Fail("The following parameters are not supported" 4869 " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta)))

4870

4871 4872 -def _CheckOSVariant(os_obj, name):

4873 """Check whether an OS name conforms to the os variants specification. 4874 4875 @type os_obj: L{objects.OS} 4876 @param os_obj: OS object to check 4877 4878 @type name: string 4879 @param name: OS name passed by the user, to check for validity 4880 4881 @rtype: NoneType 4882 @return: None 4883 @raise RPCFail: if OS variant is not valid 4884 4885 """ 4886 variant = objects.OS.GetVariant(name) 4887 4888 if not os_obj.supported_variants: 4889 if variant: 4890 _Fail("OS '%s' does not support variants ('%s' passed)" % 4891 (os_obj.name, variant)) 4892 else: 4893 return 4894 4895 if not variant: 4896 _Fail("OS name '%s' must include a variant" % name) 4897 4898 if variant not in os_obj.supported_variants: 4899 _Fail("OS '%s' does not support variant '%s'" % (os_obj.name, variant))

4900

4901 4902 -def ValidateOS(required, osname, checks, osparams, force_variant):

4903 """Validate the given OS parameters. 4904 4905 @type required: boolean 4906 @param required: whether absence of the OS should translate into 4907 failure or not 4908 @type osname: string 4909 @param osname: the OS to be validated 4910 @type checks: list 4911 @param checks: list of the checks to run (currently only 'parameters') 4912 @type osparams: dict 4913 @param osparams: dictionary with OS parameters, some of which may be 4914 private. 4915 @rtype: boolean 4916 @return: True if the validation passed, or False if the OS was not 4917 found and L{required} was false 4918 4919 """ 4920 if not constants.OS_VALIDATE_CALLS.issuperset(checks): 4921 _Fail("Unknown checks required for OS %s: %s", osname, 4922 set(checks).difference(constants.OS_VALIDATE_CALLS)) 4923 4924 name_only = objects.OS.GetName(osname) 4925 status, tbv = _TryOSFromDisk(name_only, None) 4926 4927 if not status: 4928 if required: 4929 _Fail(tbv) 4930 else: 4931 return False 4932 4933 if not force_variant: 4934 _CheckOSVariant(tbv, osname) 4935 4936 if max(tbv.api_versions) < constants.OS_API_V20: 4937 return True 4938 4939 if constants.OS_VALIDATE_PARAMETERS in checks: 4940 _CheckOSPList(tbv, osparams.keys()) 4941 4942 validate_env = OSCoreEnv(osname, tbv, osparams) 4943 result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env, 4944 cwd=tbv.path, reset_env=True) 4945 if result.failed: 4946 logging.error("os validate command '%s' returned error: %s output: %s", 4947 result.cmd, result.fail_reason, result.output) 4948 _Fail("OS validation script failed (%s), output: %s", 4949 result.fail_reason, result.output, log=False) 4950 4951 return True

4952

4953 4954 -def ExportOS(instance, override_env):

4955 """Creates a GZIPed tarball with an OS definition and environment. 4956 4957 The archive contains a file with the environment variables needed by 4958 the OS scripts. 4959 4960 @type instance: L{objects.Instance} 4961 @param instance: instance for which the OS definition is exported 4962 4963 @type override_env: dict of string to string 4964 @param override_env: if supplied, it overrides the environment on a 4965 key-by-key basis that is part of the archive 4966 4967 @rtype: string 4968 @return: filepath of the archive 4969 4970 """ 4971 assert instance 4972 assert instance.os 4973 4974 temp_dir = tempfile.mkdtemp() 4975 inst_os = OSFromDisk(instance.os) 4976 4977 result = utils.RunCmd(["ln", "-s", inst_os.path, 4978 utils.PathJoin(temp_dir, "os")]) 4979 if result.failed: 4980 _Fail("Failed to copy OS package '%s' to '%s': %s, output '%s'", 4981 inst_os, temp_dir, result.fail_reason, result.output) 4982 4983 env = OSEnvironment(instance, inst_os) 4984 env.update(override_env) 4985 4986 with open(utils.PathJoin(temp_dir, "environment"), "w") as f: 4987 for var in env: 4988 f.write(var + "=" + env[var] + "\n") 4989 4990 (fd, os_package) = tempfile.mkstemp(suffix=".tgz") 4991 os.close(fd) 4992 4993 result = utils.RunCmd(["tar", "--dereference", "-czv", 4994 "-f", os_package, 4995 "-C", temp_dir, 4996 "."]) 4997 if result.failed: 4998 _Fail("Failed to create OS archive '%s': %s, output '%s'", 4999 os_package, result.fail_reason, result.output) 5000 5001 result = utils.RunCmd(["rm", "-rf", temp_dir]) 5002 if result.failed: 5003 _Fail("Failed to remove copy of OS package '%s' in '%s': %s, output '%s'", 5004 inst_os, temp_dir, result.fail_reason, result.output) 5005 5006 return os_package

5007

5008 5009 -def DemoteFromMC():

5010 """Demotes the current node from master candidate role. 5011 5012 """ 5013 # try to ensure we're not the master by mistake 5014 master, myself = ssconf.GetMasterAndMyself() 5015 if master == myself: 5016 _Fail("ssconf status shows I'm the master node, will not demote") 5017 5018 result = utils.RunCmd([pathutils.DAEMON_UTIL, "check", constants.MASTERD]) 5019 if not result.failed: 5020 _Fail("The master daemon is running, will not demote") 5021 5022 try: 5023 if os.path.isfile(pathutils.CLUSTER_CONF_FILE): 5024 utils.CreateBackup(pathutils.CLUSTER_CONF_FILE) 5025 except EnvironmentError, err: 5026 if err.errno != errno.ENOENT: 5027 _Fail("Error while backing up cluster file: %s", err, exc=True) 5028 5029 utils.RemoveFile(pathutils.CLUSTER_CONF_FILE)

5030

5031 5032 -def _GetX509Filenames(cryptodir, name):

5033 """Returns the full paths for the private key and certificate. 5034 5035 """ 5036 return (utils.PathJoin(cryptodir, name), 5037 utils.PathJoin(cryptodir, name, _X509_KEY_FILE), 5038 utils.PathJoin(cryptodir, name, _X509_CERT_FILE))

5039

5040 5041 -def CreateX509Certificate(validity, cryptodir=pathutils.CRYPTO_KEYS_DIR):

5042 """Creates a new X509 certificate for SSL/TLS. 5043 5044 @type validity: int 5045 @param validity: Validity in seconds 5046 @rtype: tuple; (string, string) 5047 @return: Certificate name and public part 5048 5049 """ 5050 serial_no = int(time.time()) 5051 (key_pem, cert_pem) = \ 5052 utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(), 5053 min(validity, _MAX_SSL_CERT_VALIDITY), 5054 serial_no) 5055 5056 cert_dir = tempfile.mkdtemp(dir=cryptodir, 5057 prefix="x509-%s-" % utils.TimestampForFilename()) 5058 try: 5059 name = os.path.basename(cert_dir) 5060 assert len(name) > 5 5061 5062 (_, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 5063 5064 utils.WriteFile(key_file, mode=0400, data=key_pem) 5065 utils.WriteFile(cert_file, mode=0400, data=cert_pem) 5066 5067 # Never return private key as it shouldn't leave the node 5068 return (name, cert_pem) 5069 except Exception: 5070 shutil.rmtree(cert_dir, ignore_errors=True) 5071 raise

5072

5073 5074 -def RemoveX509Certificate(name, cryptodir=pathutils.CRYPTO_KEYS_DIR):

5075 """Removes a X509 certificate. 5076 5077 @type name: string 5078 @param name: Certificate name 5079 5080 """ 5081 (cert_dir, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 5082 5083 utils.RemoveFile(key_file) 5084 utils.RemoveFile(cert_file) 5085 5086 try: 5087 os.rmdir(cert_dir) 5088 except EnvironmentError, err: 5089 _Fail("Cannot remove certificate directory '%s': %s", 5090 cert_dir, err)

5091

5092 5093 -def _GetImportExportIoCommand(instance, mode, ieio, ieargs):

5094 """Returns the command for the requested input/output. 5095 5096 @type instance: L{objects.Instance} 5097 @param instance: The instance object 5098 @param mode: Import/export mode 5099 @param ieio: Input/output type 5100 @param ieargs: Input/output arguments 5101 5102 """ 5103 assert mode in (constants.IEM_IMPORT, constants.IEM_EXPORT) 5104 5105 env = None 5106 prefix = None 5107 suffix = None 5108 exp_size = None 5109 5110 if ieio == constants.IEIO_FILE: 5111 (filename, ) = ieargs 5112 5113 if not utils.IsNormAbsPath(filename): 5114 _Fail("Path '%s' is not normalized or absolute", filename) 5115 5116 real_filename = os.path.realpath(filename) 5117 directory = os.path.dirname(real_filename) 5118 5119 if not utils.IsBelowDir(pathutils.EXPORT_DIR, real_filename): 5120 _Fail("File '%s' is not under exports directory '%s': %s", 5121 filename, pathutils.EXPORT_DIR, real_filename) 5122 5123 # Create directory 5124 utils.Makedirs(directory, mode=0750) 5125 5126 quoted_filename = utils.ShellQuote(filename) 5127 5128 if mode == constants.IEM_IMPORT: 5129 suffix = "> %s" % quoted_filename 5130 elif mode == constants.IEM_EXPORT: 5131 suffix = "< %s" % quoted_filename 5132 5133 # Retrieve file size 5134 try: 5135 st = os.stat(filename) 5136 except EnvironmentError, err: 5137 logging.error("Can't stat(2) %s: %s", filename, err) 5138 else: 5139 exp_size = utils.BytesToMebibyte(st.st_size) 5140 5141 elif ieio == constants.IEIO_RAW_DISK: 5142 (disk, ) = ieargs 5143 real_disk = _OpenRealBD(disk) 5144 5145 if mode == constants.IEM_IMPORT: 5146 suffix = "| %s" % utils.ShellQuoteArgs(real_disk.Import()) 5147 5148 elif mode == constants.IEM_EXPORT: 5149 prefix = "%s |" % utils.ShellQuoteArgs(real_disk.Export()) 5150 exp_size = disk.size 5151 5152 elif ieio == constants.IEIO_SCRIPT: 5153 (disk, disk_index, ) = ieargs 5154 5155 assert isinstance(disk_index, (int, long)) 5156 5157 inst_os = OSFromDisk(instance.os) 5158 env = OSEnvironment(instance, inst_os) 5159 5160 if mode == constants.IEM_IMPORT: 5161 disk_path_var = "DISK_%d_PATH" % disk_index 5162 if disk_path_var in env: 5163 env["IMPORT_DEVICE"] = env[disk_path_var] 5164 env["IMPORT_DISK_PATH"] = env[disk_path_var] 5165 5166 disk_uri_var = "DISK_%d_URI" % disk_index 5167 if disk_uri_var in env: 5168 env["IMPORT_DISK_URI"] = env[disk_uri_var] 5169 5170 env["IMPORT_INDEX"] = str(disk_index) 5171 script = inst_os.import_script 5172 5173 elif mode == constants.IEM_EXPORT: 5174 disk_path_var = "DISK_%d_PATH" % disk_index 5175 if disk_path_var in env: 5176 env["EXPORT_DEVICE"] = env[disk_path_var] 5177 env["EXPORT_DISK_PATH"] = env[disk_path_var] 5178 5179 disk_uri_var = "DISK_%d_URI" % disk_index 5180 if disk_uri_var in env: 5181 env["EXPORT_DISK_URI"] = env[disk_uri_var] 5182 5183 env["EXPORT_INDEX"] = str(disk_index) 5184 script = inst_os.export_script 5185 5186 # TODO: Pass special environment only to script 5187 script_cmd = utils.BuildShellCmd("( cd %s && %s; )", inst_os.path, script) 5188 5189 if mode == constants.IEM_IMPORT: 5190 suffix = "| %s" % script_cmd 5191 5192 elif mode == constants.IEM_EXPORT: 5193 prefix = "%s |" % script_cmd 5194 5195 # Let script predict size 5196 exp_size = constants.IE_CUSTOM_SIZE 5197 5198 else: 5199 _Fail("Invalid %s I/O mode %r", mode, ieio) 5200 5201 return (env, prefix, suffix, exp_size)

5202

5203 5204 -def _CreateImportExportStatusDir(prefix):

5205 """Creates status directory for import/export. 5206 5207 """ 5208 return tempfile.mkdtemp(dir=pathutils.IMPORT_EXPORT_DIR, 5209 prefix=("%s-%s-" % 5210 (prefix, utils.TimestampForFilename())))

5211

5212 5213 -def StartImportExportDaemon(mode, opts, host, port, instance, component, 5214 ieio, ieioargs):

5215 """Starts an import or export daemon. 5216 5217 @param mode: Import/output mode 5218 @type opts: L{objects.ImportExportOptions} 5219 @param opts: Daemon options 5220 @type host: string 5221 @param host: Remote host for export (None for import) 5222 @type port: int 5223 @param port: Remote port for export (None for import) 5224 @type instance: L{objects.Instance} 5225 @param instance: Instance object 5226 @type component: string 5227 @param component: which part of the instance is transferred now, 5228 e.g. 'disk/0' 5229 @param ieio: Input/output type 5230 @param ieioargs: Input/output arguments 5231 5232 """ 5233 5234 # Use Import/Export over socat. 5235 # 5236 # Export() gives a command that produces a flat stream. 5237 # Import() gives a command that reads a flat stream to a disk template. 5238 if mode == constants.IEM_IMPORT: 5239 prefix = "import" 5240 5241 if not (host is None and port is None): 5242 _Fail("Can not specify host or port on import") 5243 5244 elif mode == constants.IEM_EXPORT: 5245 prefix = "export" 5246 5247 if host is None or port is None: 5248 _Fail("Host and port must be specified for an export") 5249 5250 else: 5251 _Fail("Invalid mode %r", mode) 5252 5253 if (opts.key_name is None) ^ (opts.ca_pem is None): 5254 _Fail("Cluster certificate can only be used for both key and CA") 5255 5256 (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \ 5257 _GetImportExportIoCommand(instance, mode, ieio, ieioargs) 5258 5259 if opts.key_name is None: 5260 # Use server.pem 5261 key_path = pathutils.NODED_CERT_FILE 5262 cert_path = pathutils.NODED_CERT_FILE 5263 assert opts.ca_pem is None 5264 else: 5265 (_, key_path, cert_path) = _GetX509Filenames(pathutils.CRYPTO_KEYS_DIR, 5266 opts.key_name) 5267 assert opts.ca_pem is not None 5268 5269 for i in [key_path, cert_path]: 5270 if not os.path.exists(i): 5271 _Fail("File '%s' does not exist" % i) 5272 5273 status_dir = _CreateImportExportStatusDir("%s-%s" % (prefix, component)) 5274 try: 5275 status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE) 5276 pid_file = utils.PathJoin(status_dir, _IES_PID_FILE) 5277 ca_file = utils.PathJoin(status_dir, _IES_CA_FILE) 5278 5279 if opts.ca_pem is None: 5280 # Use server.pem 5281 ca = utils.ReadFile(pathutils.NODED_CERT_FILE) 5282 else: 5283 ca = opts.ca_pem 5284 5285 # Write CA file 5286 utils.WriteFile(ca_file, data=ca, mode=0400) 5287 5288 cmd = [ 5289 pathutils.IMPORT_EXPORT_DAEMON, 5290 status_file, mode, 5291 "--key=%s" % key_path, 5292 "--cert=%s" % cert_path, 5293 "--ca=%s" % ca_file, 5294 ] 5295 5296 if host: 5297 cmd.append("--host=%s" % host) 5298 5299 if port: 5300 cmd.append("--port=%s" % port) 5301 5302 if opts.ipv6: 5303 cmd.append("--ipv6") 5304 else: 5305 cmd.append("--ipv4") 5306 5307 if opts.compress: 5308 cmd.append("--compress=%s" % opts.compress) 5309 5310 if opts.magic: 5311 cmd.append("--magic=%s" % opts.magic) 5312 5313 if exp_size is not None: 5314 cmd.append("--expected-size=%s" % exp_size) 5315 5316 if cmd_prefix: 5317 cmd.append("--cmd-prefix=%s" % cmd_prefix) 5318 5319 if cmd_suffix: 5320 cmd.append("--cmd-suffix=%s" % cmd_suffix) 5321 5322 if mode == constants.IEM_EXPORT: 5323 # Retry connection a few times when connecting to remote peer 5324 cmd.append("--connect-retries=%s" % constants.RIE_CONNECT_RETRIES) 5325 cmd.append("--connect-timeout=%s" % constants.RIE_CONNECT_ATTEMPT_TIMEOUT) 5326 elif opts.connect_timeout is not None: 5327 assert mode == constants.IEM_IMPORT 5328 # Overall timeout for establishing connection while listening 5329 cmd.append("--connect-timeout=%s" % opts.connect_timeout) 5330 5331 logfile = _InstanceLogName(prefix, instance.os, instance.name, component) 5332 5333 # TODO: Once _InstanceLogName uses tempfile.mkstemp, StartDaemon has 5334 # support for receiving a file descriptor for output 5335 utils.StartDaemon(cmd, env=cmd_env, pidfile=pid_file, 5336 output=logfile) 5337 5338 # The import/export name is simply the status directory name 5339 return os.path.basename(status_dir) 5340 5341 except Exception: 5342 shutil.rmtree(status_dir, ignore_errors=True) 5343 raise

5344

5345 5346 -def GetImportExportStatus(names):

5347 """Returns import/export daemon status. 5348 5349 @type names: sequence 5350 @param names: List of names 5351 @rtype: List of dicts 5352 @return: Returns a list of the state of each named import/export or None if a 5353 status couldn't be read 5354 5355 """ 5356 result = [] 5357 5358 for name in names: 5359 status_file = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name, 5360 _IES_STATUS_FILE) 5361 5362 try: 5363 data = utils.ReadFile(status_file) 5364 except EnvironmentError, err: 5365 if err.errno != errno.ENOENT: 5366 raise 5367 data = None 5368 5369 if not data: 5370 result.append(None) 5371 continue 5372 5373 result.append(serializer.LoadJson(data)) 5374 5375 return result

5376

5377 5378 -def AbortImportExport(name):

5379 """Sends SIGTERM to a running import/export daemon. 5380 5381 """ 5382 logging.info("Abort import/export %s", name) 5383 5384 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 5385 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 5386 5387 if pid: 5388 logging.info("Import/export %s is running with PID %s, sending SIGTERM", 5389 name, pid) 5390 utils.IgnoreProcessNotFound(os.kill, pid, signal.SIGTERM)

5391

5392 5393 -def CleanupImportExport(name):

5394 """Cleanup after an import or export. 5395 5396 If the import/export daemon is still running it's killed. Afterwards the 5397 whole status directory is removed. 5398 5399 """ 5400 logging.info("Finalizing import/export %s", name) 5401 5402 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 5403 5404 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 5405 5406 if pid: 5407 logging.info("Import/export %s is still running with PID %s", 5408 name, pid) 5409 utils.KillProcess(pid, waitpid=False) 5410 5411 shutil.rmtree(status_dir, ignore_errors=True)

5412

5413 5414 -def _FindDisks(disks):

5415 """Finds attached L{BlockDev}s for the given disks. 5416 5417 @type disks: list of L{objects.Disk} 5418 @param disks: the disk objects we need to find 5419 5420 @return: list of L{BlockDev} objects or C{None} if a given disk 5421 was not found or was no attached. 5422 5423 """ 5424 bdevs = [] 5425 5426 for disk in disks: 5427 rd = _RecursiveFindBD(disk) 5428 if rd is None: 5429 _Fail("Can't find device %s", disk) 5430 bdevs.append(rd) 5431 return bdevs

5432

5433 5434 -def DrbdDisconnectNet(disks):

5435 """Disconnects the network on a list of drbd devices. 5436 5437 """ 5438 bdevs = _FindDisks(disks) 5439 5440 # disconnect disks 5441 for rd in bdevs: 5442 try: 5443 rd.DisconnectNet() 5444 except errors.BlockDeviceError, err: 5445 _Fail("Can't change network configuration to standalone mode: %s", 5446 err, exc=True)

5447

5448 5449 -def DrbdAttachNet(disks, multimaster):

5450 """Attaches the network on a list of drbd devices. 5451 5452 """ 5453 bdevs = _FindDisks(disks) 5454 5455 # reconnect disks, switch to new master configuration and if 5456 # needed primary mode 5457 for rd in bdevs: 5458 try: 5459 rd.AttachNet(multimaster) 5460 except errors.BlockDeviceError, err: 5461 _Fail("Can't change network configuration: %s", err) 5462 5463 # wait until the disks are connected; we need to retry the re-attach 5464 # if the device becomes standalone, as this might happen if the one 5465 # node disconnects and reconnects in a different mode before the 5466 # other node reconnects; in this case, one or both of the nodes will 5467 # decide it has wrong configuration and switch to standalone 5468 5469 def _Attach(): 5470 all_connected = True 5471 5472 for rd in bdevs: 5473 stats = rd.GetProcStatus() 5474 5475 if multimaster: 5476 # In the multimaster case we have to wait explicitly until 5477 # the resource is Connected and UpToDate/UpToDate, because 5478 # we promote *both nodes* to primary directly afterwards. 5479 # Being in resync is not enough, since there is a race during which we 5480 # may promote a node with an Outdated disk to primary, effectively 5481 # tearing down the connection. 5482 all_connected = (all_connected and 5483 stats.is_connected and 5484 stats.is_disk_uptodate and 5485 stats.peer_disk_uptodate) 5486 else: 5487 all_connected = (all_connected and 5488 (stats.is_connected or stats.is_in_resync)) 5489 5490 if stats.is_standalone: 5491 # peer had different config info and this node became 5492 # standalone, even though this should not happen with the 5493 # new staged way of changing disk configs 5494 try: 5495 rd.AttachNet(multimaster) 5496 except errors.BlockDeviceError, err: 5497 _Fail("Can't change network configuration: %s", err) 5498 5499 if not all_connected: 5500 raise utils.RetryAgain()

5501 5502 try: 5503 # Start with a delay of 100 miliseconds and go up to 5 seconds 5504 utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60) 5505 except utils.RetryTimeout: 5506 _Fail("Timeout in disk reconnecting") 5507

5508 5509 -def DrbdWaitSync(disks):

5510 """Wait until DRBDs have synchronized. 5511 5512 """ 5513 def _helper(rd): 5514 stats = rd.GetProcStatus() 5515 if not (stats.is_connected or stats.is_in_resync): 5516 raise utils.RetryAgain() 5517 return stats

5518 5519 bdevs = _FindDisks(disks) 5520 5521 min_resync = 100 5522 alldone = True 5523 for rd in bdevs: 5524 try: 5525 # poll each second for 15 seconds 5526 stats = utils.Retry(_helper, 1, 15, args=[rd]) 5527 except utils.RetryTimeout: 5528 stats = rd.GetProcStatus() 5529 # last check 5530 if not (stats.is_connected or stats.is_in_resync): 5531 _Fail("DRBD device %s is not in sync: stats=%s", rd, stats) 5532 alldone = alldone and (not stats.is_in_resync) 5533 if stats.sync_percent is not None: 5534 min_resync = min(min_resync, stats.sync_percent) 5535 5536 return (alldone, min_resync) 5537

5538 5539 -def DrbdNeedsActivation(disks):

5540 """Checks which of the passed disks needs activation and returns their UUIDs. 5541 5542 """ 5543 faulty_disks = [] 5544 5545 for disk in disks: 5546 rd = _RecursiveFindBD(disk) 5547 if rd is None: 5548 faulty_disks.append(disk) 5549 continue 5550 5551 stats = rd.GetProcStatus() 5552 if stats.is_standalone or stats.is_diskless: 5553 faulty_disks.append(disk) 5554 5555 return [disk.uuid for disk in faulty_disks]

5556

5557 5558 -def GetDrbdUsermodeHelper():

5559 """Returns DRBD usermode helper currently configured. 5560 5561 """ 5562 try: 5563 return drbd.DRBD8.GetUsermodeHelper() 5564 except errors.BlockDeviceError, err: 5565 _Fail(str(err))

5566

5567 5568 -def PowercycleNode(hypervisor_type, hvparams=None):

5569 """Hard-powercycle the node. 5570 5571 Because we need to return first, and schedule the powercycle in the 5572 background, we won't be able to report failures nicely. 5573 5574 """ 5575 hyper = hypervisor.GetHypervisor(hypervisor_type) 5576 try: 5577 pid = os.fork() 5578 except OSError: 5579 # if we can't fork, we'll pretend that we're in the child process 5580 pid = 0 5581 if pid > 0: 5582 return "Reboot scheduled in 5 seconds" 5583 # ensure the child is running on ram 5584 try: 5585 utils.Mlockall() 5586 except Exception: # pylint: disable=W0703 5587 pass 5588 time.sleep(5) 5589 hyper.PowercycleNode(hvparams=hvparams)

5590

5591 5592 -def _VerifyRestrictedCmdName(cmd):

5593 """Verifies a restricted command name. 5594 5595 @type cmd: string 5596 @param cmd: Command name 5597 @rtype: tuple; (boolean, string or None) 5598 @return: The tuple's first element is the status; if C{False}, the second 5599 element is an error message string, otherwise it's C{None} 5600 5601 """ 5602 if not cmd.strip(): 5603 return (False, "Missing command name") 5604 5605 if os.path.basename(cmd) != cmd: 5606 return (False, "Invalid command name") 5607 5608 if not constants.EXT_PLUGIN_MASK.match(cmd): 5609 return (False, "Command name contains forbidden characters") 5610 5611 return (True, None)

5612

5613 5614 -def _CommonRestrictedCmdCheck(path, owner):

5615 """Common checks for restricted command file system directories and files. 5616 5617 @type path: string 5618 @param path: Path to check 5619 @param owner: C{None} or tuple containing UID and GID 5620 @rtype: tuple; (boolean, string or C{os.stat} result) 5621 @return: The tuple's first element is the status; if C{False}, the second 5622 element is an error message string, otherwise it's the result of C{os.stat} 5623 5624 """ 5625 if owner is None: 5626 # Default to root as owner 5627 owner = (0, 0) 5628 5629 try: 5630 st = os.stat(path) 5631 except EnvironmentError, err: 5632 return (False, "Can't stat(2) '%s': %s" % (path, err)) 5633 5634 if stat.S_IMODE(st.st_mode) & (~_RCMD_MAX_MODE): 5635 return (False, "Permissions on '%s' are too permissive" % path) 5636 5637 if (st.st_uid, st.st_gid) != owner: 5638 (owner_uid, owner_gid) = owner 5639 return (False, "'%s' is not owned by %s:%s" % (path, owner_uid, owner_gid)) 5640 5641 return (True, st)

5642

5643 5644 -def _VerifyRestrictedCmdDirectory(path, _owner=None):

5645 """Verifies restricted command directory. 5646 5647 @type path: string 5648 @param path: Path to check 5649 @rtype: tuple; (boolean, string or None) 5650 @return: The tuple's first element is the status; if C{False}, the second 5651 element is an error message string, otherwise it's C{None} 5652 5653 """ 5654 (status, value) = _CommonRestrictedCmdCheck(path, _owner) 5655 5656 if not status: 5657 return (False, value) 5658 5659 if not stat.S_ISDIR(value.st_mode): 5660 return (False, "Path '%s' is not a directory" % path) 5661 5662 return (True, None)

5663

5664 5665 -def _VerifyRestrictedCmd(path, cmd, _owner=None):

5666 """Verifies a whole restricted command and returns its executable filename. 5667 5668 @type path: string 5669 @param path: Directory containing restricted commands 5670 @type cmd: string 5671 @param cmd: Command name 5672 @rtype: tuple; (boolean, string) 5673 @return: The tuple's first element is the status; if C{False}, the second 5674 element is an error message string, otherwise the second element is the 5675 absolute path to the executable 5676 5677 """ 5678 executable = utils.PathJoin(path, cmd) 5679 5680 (status, msg) = _CommonRestrictedCmdCheck(executable, _owner) 5681 5682 if not status: 5683 return (False, msg) 5684 5685 if not utils.IsExecutable(executable): 5686 return (False, "access(2) thinks '%s' can't be executed" % executable) 5687 5688 return (True, executable)

5689

5690 5691 -def _PrepareRestrictedCmd(path, cmd, 5692 _verify_dir=_VerifyRestrictedCmdDirectory, 5693 _verify_name=_VerifyRestrictedCmdName, 5694 _verify_cmd=_VerifyRestrictedCmd):

5695 """Performs a number of tests on a restricted command. 5696 5697 @type path: string 5698 @param path: Directory containing restricted commands 5699 @type cmd: string 5700 @param cmd: Command name 5701 @return: Same as L{_VerifyRestrictedCmd} 5702 5703 """ 5704 # Verify the directory first 5705 (status, msg) = _verify_dir(path) 5706 if status: 5707 # Check command if everything was alright 5708 (status, msg) = _verify_name(cmd) 5709 5710 if not status: 5711 return (False, msg) 5712 5713 # Check actual executable 5714 return _verify_cmd(path, cmd)

5715

5716 5717 -def RunRestrictedCmd(cmd, 5718 _lock_timeout=_RCMD_LOCK_TIMEOUT, 5719 _lock_file=pathutils.RESTRICTED_COMMANDS_LOCK_FILE, 5720 _path=pathutils.RESTRICTED_COMMANDS_DIR, 5721 _sleep_fn=time.sleep, 5722 _prepare_fn=_PrepareRestrictedCmd, 5723 _runcmd_fn=utils.RunCmd, 5724 _enabled=constants.ENABLE_RESTRICTED_COMMANDS):

5725 """Executes a restricted command after performing strict tests. 5726 5727 @type cmd: string 5728 @param cmd: Command name 5729 @rtype: string 5730 @return: Command output 5731 @raise RPCFail: In case of an error 5732 5733 """ 5734 logging.info("Preparing to run restricted command '%s'", cmd) 5735 5736 if not _enabled: 5737 _Fail("Restricted commands disabled at configure time") 5738 5739 lock = None 5740 try: 5741 cmdresult = None 5742 try: 5743 lock = utils.FileLock.Open(_lock_file) 5744 lock.Exclusive(blocking=True, timeout=_lock_timeout) 5745 5746 (status, value) = _prepare_fn(_path, cmd) 5747 5748 if status: 5749 cmdresult = _runcmd_fn([value], env={}, reset_env=True, 5750 postfork_fn=lambda _: lock.Unlock()) 5751 else: 5752 logging.error(value) 5753 except Exception: # pylint: disable=W0703 5754 # Keep original error in log 5755 logging.exception("Caught exception") 5756 5757 if cmdresult is None: 5758 logging.info("Sleeping for %0.1f seconds before returning", 5759 _RCMD_INVALID_DELAY) 5760 _sleep_fn(_RCMD_INVALID_DELAY) 5761 5762 # Do not include original error message in returned error 5763 _Fail("Executing command '%s' failed" % cmd) 5764 elif cmdresult.failed or cmdresult.fail_reason: 5765 _Fail("Restricted command '%s' failed: %s; output: %s", 5766 cmd, cmdresult.fail_reason, cmdresult.output) 5767 else: 5768 return cmdresult.output 5769 finally: 5770 if lock is not None: 5771 # Release lock at last 5772 lock.Close() 5773 lock = None

5774

5775 5776 -def SetWatcherPause(until, _filename=pathutils.WATCHER_PAUSEFILE):

5777 """Creates or removes the watcher pause file. 5778 5779 @type until: None or number 5780 @param until: Unix timestamp saying until when the watcher shouldn't run 5781 5782 """ 5783 if until is None: 5784 logging.info("Received request to no longer pause watcher") 5785 utils.RemoveFile(_filename) 5786 else: 5787 logging.info("Received request to pause watcher until %s", until) 5788 5789 if not ht.TNumber(until): 5790 _Fail("Duration must be numeric") 5791 5792 utils.WriteFile(_filename, data="%d\n" % (until, ), mode=0644)

5793

5794 5795 -def ConfigureOVS(ovs_name, ovs_link):

5796 """Creates a OpenvSwitch on the node. 5797 5798 This function sets up a OpenvSwitch on the node with given name nad 5799 connects it via a given eth device. 5800 5801 @type ovs_name: string 5802 @param ovs_name: Name of the OpenvSwitch to create. 5803 @type ovs_link: None or string 5804 @param ovs_link: Ethernet device for outside connection (can be missing) 5805 5806 """ 5807 # Initialize the OpenvSwitch 5808 result = utils.RunCmd(["ovs-vsctl", "add-br", ovs_name]) 5809 if result.failed: 5810 _Fail("Failed to create openvswitch. Script return value: %s, output: '%s'" 5811 % (result.exit_code, result.output), log=True) 5812 5813 # And connect it to a physical interface, if given 5814 if ovs_link: 5815 result = utils.RunCmd(["ovs-vsctl", "add-port", ovs_name, ovs_link]) 5816 if result.failed: 5817 _Fail("Failed to connect openvswitch to interface %s. Script return" 5818 " value: %s, output: '%s'" % (ovs_link, result.exit_code, 5819 result.output), log=True)

5820

5821 5822 -def GetFileInfo(file_path):

5823 """ Checks if a file exists and returns information related to it. 5824 5825 Currently returned information: 5826 - file size: int, size in bytes 5827 5828 @type file_path: string 5829 @param file_path: Name of file to examine. 5830 5831 @rtype: tuple of bool, dict 5832 @return: Whether the file exists, and a dictionary of information about the 5833 file gathered by os.stat. 5834 5835 """ 5836 try: 5837 stat_info = os.stat(file_path) 5838 values_dict = { 5839 constants.STAT_SIZE: stat_info.st_size, 5840 } 5841 return True, values_dict 5842 except IOError: 5843 return False, {}

5844

5845 5846 -class HooksRunner(object):

5847 """Hook runner. 5848 5849 This class is instantiated on the node side (ganeti-noded) and not 5850 on the master side. 5851 5852 """

5853 - def __init__(self, hooks_base_dir=None):

5854 """Constructor for hooks runner. 5855 5856 @type hooks_base_dir: str or None 5857 @param hooks_base_dir: if not None, this overrides the 5858 L{pathutils.HOOKS_BASE_DIR} (useful for unittests) 5859 5860 """ 5861 if hooks_base_dir is None: 5862 hooks_base_dir = pathutils.HOOKS_BASE_DIR 5863 # yeah, _BASE_DIR is not valid for attributes, we use it like a 5864 # constant 5865 self._BASE_DIR = hooks_base_dir # pylint: disable=C0103

5866

5867 - def RunLocalHooks(self, node_list, hpath, phase, env):

5868 """Check that the hooks will be run only locally and then run them. 5869 5870 """ 5871 assert len(node_list) == 1 5872 node = node_list[0] 5873 _, myself = ssconf.GetMasterAndMyself() 5874 assert node == myself 5875 5876 results = self.RunHooks(hpath, phase, env) 5877 5878 # Return values in the form expected by HooksMaster 5879 return {node: (None, False, results)}

5880

5881 - def RunHooks(self, hpath, phase, env):

5882 """Run the scripts in the hooks directory. 5883 5884 @type hpath: str 5885 @param hpath: the path to the hooks directory which 5886 holds the scripts 5887 @type phase: str 5888 @param phase: either L{constants.HOOKS_PHASE_PRE} or 5889 L{constants.HOOKS_PHASE_POST} 5890 @type env: dict 5891 @param env: dictionary with the environment for the hook 5892 @rtype: list 5893 @return: list of 3-element tuples: 5894 - script path 5895 - script result, either L{constants.HKR_SUCCESS} or 5896 L{constants.HKR_FAIL} 5897 - output of the script 5898 5899 @raise errors.ProgrammerError: for invalid input 5900 parameters 5901 5902 """ 5903 if phase == constants.HOOKS_PHASE_PRE: 5904 suffix = "pre" 5905 elif phase == constants.HOOKS_PHASE_POST: 5906 suffix = "post" 5907 else: 5908 _Fail("Unknown hooks phase '%s'", phase) 5909 5910 subdir = "%s-%s.d" % (hpath, suffix) 5911 dir_name = utils.PathJoin(self._BASE_DIR, subdir) 5912 5913 results = [] 5914 5915 if not os.path.isdir(dir_name): 5916 # for non-existing/non-dirs, we simply exit instead of logging a 5917 # warning at every operation 5918 return results 5919 5920 runparts_results = utils.RunParts(dir_name, env=env, reset_env=True) 5921 5922 for (relname, relstatus, runresult) in runparts_results: 5923 if relstatus == constants.RUNPARTS_SKIP: 5924 rrval = constants.HKR_SKIP 5925 output = "" 5926 elif relstatus == constants.RUNPARTS_ERR: 5927 rrval = constants.HKR_FAIL 5928 output = "Hook script execution error: %s" % runresult 5929 elif relstatus == constants.RUNPARTS_RUN: 5930 if runresult.failed: 5931 rrval = constants.HKR_FAIL 5932 else: 5933 rrval = constants.HKR_SUCCESS 5934 output = utils.SafeEncode(runresult.output.strip()) 5935 results.append(("%s/%s" % (subdir, relname), rrval, output)) 5936 5937 return results

5938

5939 5940 -class IAllocatorRunner(object):

5941 """IAllocator runner. 5942 5943 This class is instantiated on the node side (ganeti-noded) and not on 5944 the master side. 5945 5946 """ 5947 @staticmethod

5948 - def Run(name, idata, ial_params):

5949 """Run an iallocator script. 5950 5951 @type name: str 5952 @param name: the iallocator script name 5953 @type idata: str 5954 @param idata: the allocator input data 5955 @type ial_params: list 5956 @param ial_params: the iallocator parameters 5957 5958 @rtype: tuple 5959 @return: two element tuple of: 5960 - status 5961 - either error message or stdout of allocator (for success) 5962 5963 """ 5964 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH, 5965 os.path.isfile) 5966 if alloc_script is None: 5967 _Fail("iallocator module '%s' not found in the search path", name) 5968 5969 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.") 5970 try: 5971 os.write(fd, idata) 5972 os.close(fd) 5973 result = utils.RunCmd([alloc_script, fin_name] + ial_params) 5974 if result.failed: 5975 _Fail("iallocator module '%s' failed: %s, output '%s'", 5976 name, result.fail_reason, result.output) 5977 finally: 5978 os.unlink(fin_name) 5979 5980 return result.stdout

5981

5982 5983 -class DevCacheManager(object):

5984 """Simple class for managing a cache of block device information. 5985 5986 """ 5987 _DEV_PREFIX = "/dev/" 5988 _ROOT_DIR = pathutils.BDEV_CACHE_DIR 5989 5990 @classmethod

5991 - def _ConvertPath(cls, dev_path):

5992 """Converts a /dev/name path to the cache file name. 5993 5994 This replaces slashes with underscores and strips the /dev 5995 prefix. It then returns the full path to the cache file. 5996 5997 @type dev_path: str 5998 @param dev_path: the C{/dev/} path name 5999 @rtype: str 6000 @return: the converted path name 6001 6002 """ 6003 if dev_path.startswith(cls._DEV_PREFIX): 6004 dev_path = dev_path[len(cls._DEV_PREFIX):] 6005 dev_path = dev_path.replace("/", "_") 6006 fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path) 6007 return fpath

6008 6009 @classmethod

6010 - def UpdateCache(cls, dev_path, owner, on_primary, iv_name):

6011 """Updates the cache information for a given device. 6012 6013 @type dev_path: str 6014 @param dev_path: the pathname of the device 6015 @type owner: str 6016 @param owner: the owner (instance name) of the device 6017 @type on_primary: bool 6018 @param on_primary: whether this is the primary 6019 node nor not 6020 @type iv_name: str 6021 @param iv_name: the instance-visible name of the 6022 device, as in objects.Disk.iv_name 6023 6024 @rtype: None 6025 6026 """ 6027 if dev_path is None: 6028 logging.error("DevCacheManager.UpdateCache got a None dev_path") 6029 return 6030 fpath = cls._ConvertPath(dev_path) 6031 if on_primary: 6032 state = "primary" 6033 else: 6034 state = "secondary" 6035 if iv_name is None: 6036 iv_name = "not_visible" 6037 fdata = "%s %s %s\n" % (str(owner), state, iv_name) 6038 try: 6039 utils.WriteFile(fpath, data=fdata) 6040 except EnvironmentError, err: 6041 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)

6042 6043 @classmethod

6044 - def RemoveCache(cls, dev_path):

6045 """Remove data for a dev_path. 6046 6047 This is just a wrapper over L{utils.io.RemoveFile} with a converted 6048 path name and logging. 6049 6050 @type dev_path: str 6051 @param dev_path: the pathname of the device 6052 6053 @rtype: None 6054 6055 """ 6056 if dev_path is None: 6057 logging.error("DevCacheManager.RemoveCache got a None dev_path") 6058 return 6059 fpath = cls._ConvertPath(dev_path) 6060 try: 6061 utils.RemoveFile(fpath) 6062 except EnvironmentError, err: 6063 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)

6064