ganeti.backend

Source Code for Module ganeti.backend

1 # 2 # 3 4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc. 5 # All rights reserved. 6 # 7 # Redistribution and use in source and binary forms, with or without 8 # modification, are permitted provided that the following conditions are 9 # met: 10 # 11 # 1. Redistributions of source code must retain the above copyright notice, 12 # this list of conditions and the following disclaimer. 13 # 14 # 2. Redistributions in binary form must reproduce the above copyright 15 # notice, this list of conditions and the following disclaimer in the 16 # documentation and/or other materials provided with the distribution. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 31 """Functions used by the node daemon 32 33 @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in 34 the L{UploadFile} function 35 @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted 36 in the L{_CleanDirectory} function 37 38 """ 39 40 # pylint: disable=E1103,C0302 41 42 # E1103: %s %r has no %r member (but some types could not be 43 # inferred), because the _TryOSFromDisk returns either (True, os_obj) 44 # or (False, "string") which confuses pylint 45 46 # C0302: This module has become too big and should be split up 47 48 49 import base64 50 import errno 51 import logging 52 import os 53 import os.path 54 import pycurl 55 import random 56 import re 57 import shutil 58 import signal 59 import stat 60 import tempfile 61 import time 62 import zlib 63 import contextlib 64 import collections 65 66 from ganeti import errors 67 from ganeti import http 68 from ganeti import utils 69 from ganeti import ssh 70 from ganeti import hypervisor 71 from ganeti.hypervisor import hv_base 72 from ganeti import constants 73 from ganeti.storage import bdev 74 from ganeti.storage import drbd 75 from ganeti.storage import extstorage 76 from ganeti.storage import filestorage 77 from ganeti import objects 78 from ganeti import ssconf 79 from ganeti import serializer 80 from ganeti import netutils 81 from ganeti import runtime 82 from ganeti import compat 83 from ganeti import pathutils 84 from ganeti import vcluster 85 from ganeti import ht 86 from ganeti.storage.base import BlockDev 87 from ganeti.storage.drbd import DRBD8 88 from ganeti import hooksmaster 89 import ganeti.metad as metad 90 91 92 _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" 93 _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([ 94 pathutils.DATA_DIR, 95 pathutils.JOB_QUEUE_ARCHIVE_DIR, 96 pathutils.QUEUE_DIR, 97 pathutils.CRYPTO_KEYS_DIR, 98 ]) 99 _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60 100 _X509_KEY_FILE = "key" 101 _X509_CERT_FILE = "cert" 102 _IES_STATUS_FILE = "status" 103 _IES_PID_FILE = "pid" 104 _IES_CA_FILE = "ca" 105 106 #: Valid LVS output line regex 107 _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$") 108 109 # Actions for the master setup script 110 _MASTER_START = "start" 111 _MASTER_STOP = "stop" 112 113 #: Maximum file permissions for restricted command directory and executables 114 _RCMD_MAX_MODE = (stat.S_IRWXU | 115 stat.S_IRGRP | stat.S_IXGRP | 116 stat.S_IROTH | stat.S_IXOTH) 117 118 #: Delay before returning an error for restricted commands 119 _RCMD_INVALID_DELAY = 10 120 121 #: How long to wait to acquire lock for restricted commands (shorter than 122 #: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many 123 #: command requests arrive 124 _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8

125 126 127 -class RPCFail(Exception):

128 """Class denoting RPC failure. 129 130 Its argument is the error message. 131 132 """

133

134 135 -def _GetInstReasonFilename(instance_name):

136 """Path of the file containing the reason of the instance status change. 137 138 @type instance_name: string 139 @param instance_name: The name of the instance 140 @rtype: string 141 @return: The path of the file 142 143 """ 144 return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)

145

146 147 -def _StoreInstReasonTrail(instance_name, trail):

148 """Serialize a reason trail related to an instance change of state to file. 149 150 The exact location of the file depends on the name of the instance and on 151 the configuration of the Ganeti cluster defined at deploy time. 152 153 @type instance_name: string 154 @param instance_name: The name of the instance 155 156 @type trail: list of reasons 157 @param trail: reason trail 158 159 @rtype: None 160 161 """ 162 json = serializer.DumpJson(trail) 163 filename = _GetInstReasonFilename(instance_name) 164 utils.WriteFile(filename, data=json)

165

166 167 -def _Fail(msg, *args, **kwargs):

168 """Log an error and the raise an RPCFail exception. 169 170 This exception is then handled specially in the ganeti daemon and 171 turned into a 'failed' return type. As such, this function is a 172 useful shortcut for logging the error and returning it to the master 173 daemon. 174 175 @type msg: string 176 @param msg: the text of the exception 177 @raise RPCFail 178 179 """ 180 if args: 181 msg = msg % args 182 if "log" not in kwargs or kwargs["log"]: # if we should log this error 183 if "exc" in kwargs and kwargs["exc"]: 184 logging.exception(msg) 185 else: 186 logging.error(msg) 187 raise RPCFail(msg)

188

189 190 -def _GetConfig():

191 """Simple wrapper to return a SimpleStore. 192 193 @rtype: L{ssconf.SimpleStore} 194 @return: a SimpleStore instance 195 196 """ 197 return ssconf.SimpleStore()

198

199 200 -def _GetSshRunner(cluster_name):

201 """Simple wrapper to return an SshRunner. 202 203 @type cluster_name: str 204 @param cluster_name: the cluster name, which is needed 205 by the SshRunner constructor 206 @rtype: L{ssh.SshRunner} 207 @return: an SshRunner instance 208 209 """ 210 return ssh.SshRunner(cluster_name)

211

212 213 -def _Decompress(data):

214 """Unpacks data compressed by the RPC client. 215 216 @type data: list or tuple 217 @param data: Data sent by RPC client 218 @rtype: str 219 @return: Decompressed data 220 221 """ 222 assert isinstance(data, (list, tuple)) 223 assert len(data) == 2 224 (encoding, content) = data 225 if encoding == constants.RPC_ENCODING_NONE: 226 return content 227 elif encoding == constants.RPC_ENCODING_ZLIB_BASE64: 228 return zlib.decompress(base64.b64decode(content)) 229 else: 230 raise AssertionError("Unknown data encoding")

231

232 233 -def _CleanDirectory(path, exclude=None):

234 """Removes all regular files in a directory. 235 236 @type path: str 237 @param path: the directory to clean 238 @type exclude: list 239 @param exclude: list of files to be excluded, defaults 240 to the empty list 241 242 """ 243 if path not in _ALLOWED_CLEAN_DIRS: 244 _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'", 245 path) 246 247 if not os.path.isdir(path): 248 return 249 if exclude is None: 250 exclude = [] 251 else: 252 # Normalize excluded paths 253 exclude = [os.path.normpath(i) for i in exclude] 254 255 for rel_name in utils.ListVisibleFiles(path): 256 full_name = utils.PathJoin(path, rel_name) 257 if full_name in exclude: 258 continue 259 if os.path.isfile(full_name) and not os.path.islink(full_name): 260 utils.RemoveFile(full_name)

261

262 263 -def _BuildUploadFileList():

264 """Build the list of allowed upload files. 265 266 This is abstracted so that it's built only once at module import time. 267 268 """ 269 allowed_files = set([ 270 pathutils.CLUSTER_CONF_FILE, 271 pathutils.ETC_HOSTS, 272 pathutils.SSH_KNOWN_HOSTS_FILE, 273 pathutils.VNC_PASSWORD_FILE, 274 pathutils.RAPI_CERT_FILE, 275 pathutils.SPICE_CERT_FILE, 276 pathutils.SPICE_CACERT_FILE, 277 pathutils.RAPI_USERS_FILE, 278 pathutils.CONFD_HMAC_KEY, 279 pathutils.CLUSTER_DOMAIN_SECRET_FILE, 280 ]) 281 282 for hv_name in constants.HYPER_TYPES: 283 hv_class = hypervisor.GetHypervisorClass(hv_name) 284 allowed_files.update(hv_class.GetAncillaryFiles()[0]) 285 286 assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \ 287 "Allowed file storage paths should never be uploaded via RPC" 288 289 return frozenset(allowed_files)

290 291 292 _ALLOWED_UPLOAD_FILES = _BuildUploadFileList()

293 294 295 -def JobQueuePurge():

296 """Removes job queue files and archived jobs. 297 298 @rtype: tuple 299 @return: True, None 300 301 """ 302 _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE]) 303 _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR)

304

305 306 -def GetMasterNodeName():

307 """Returns the master node name. 308 309 @rtype: string 310 @return: name of the master node 311 @raise RPCFail: in case of errors 312 313 """ 314 try: 315 return _GetConfig().GetMasterNode() 316 except errors.ConfigurationError, err: 317 _Fail("Cluster configuration incomplete: %s", err, exc=True)

318

319 320 -def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn):

321 """Decorator that runs hooks before and after the decorated function. 322 323 @type hook_opcode: string 324 @param hook_opcode: opcode of the hook 325 @type hooks_path: string 326 @param hooks_path: path of the hooks 327 @type env_builder_fn: function 328 @param env_builder_fn: function that returns a dictionary containing the 329 environment variables for the hooks. Will get all the parameters of the 330 decorated function. 331 @raise RPCFail: in case of pre-hook failure 332 333 """ 334 def decorator(fn): 335 def wrapper(*args, **kwargs): 336 _, myself = ssconf.GetMasterAndMyself() 337 nodes = ([myself], [myself]) # these hooks run locally 338 339 env_fn = compat.partial(env_builder_fn, *args, **kwargs) 340 341 cfg = _GetConfig() 342 hr = HooksRunner() 343 hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes, 344 hr.RunLocalHooks, None, env_fn, None, 345 logging.warning, cfg.GetClusterName(), 346 cfg.GetMasterNode()) 347 hm.RunPhase(constants.HOOKS_PHASE_PRE) 348 result = fn(*args, **kwargs) 349 hm.RunPhase(constants.HOOKS_PHASE_POST) 350 351 return result

352 return wrapper 353 return decorator 354

355 356 -def _BuildMasterIpEnv(master_params, use_external_mip_script=None):

357 """Builds environment variables for master IP hooks. 358 359 @type master_params: L{objects.MasterNetworkParameters} 360 @param master_params: network parameters of the master 361 @type use_external_mip_script: boolean 362 @param use_external_mip_script: whether to use an external master IP 363 address setup script (unused, but necessary per the implementation of the 364 _RunLocalHooks decorator) 365 366 """ 367 # pylint: disable=W0613 368 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family) 369 env = { 370 "MASTER_NETDEV": master_params.netdev, 371 "MASTER_IP": master_params.ip, 372 "MASTER_NETMASK": str(master_params.netmask), 373 "CLUSTER_IP_VERSION": str(ver), 374 } 375 376 return env

377

378 379 -def _RunMasterSetupScript(master_params, action, use_external_mip_script):

380 """Execute the master IP address setup script. 381 382 @type master_params: L{objects.MasterNetworkParameters} 383 @param master_params: network parameters of the master 384 @type action: string 385 @param action: action to pass to the script. Must be one of 386 L{backend._MASTER_START} or L{backend._MASTER_STOP} 387 @type use_external_mip_script: boolean 388 @param use_external_mip_script: whether to use an external master IP 389 address setup script 390 @raise backend.RPCFail: if there are errors during the execution of the 391 script 392 393 """ 394 env = _BuildMasterIpEnv(master_params) 395 396 if use_external_mip_script: 397 setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT 398 else: 399 setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT 400 401 result = utils.RunCmd([setup_script, action], env=env, reset_env=True) 402 403 if result.failed: 404 _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" % 405 (action, result.exit_code, result.output), log=True)

406 407 408 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup", 409 _BuildMasterIpEnv)

410 -def ActivateMasterIp(master_params, use_external_mip_script):

411 """Activate the IP address of the master daemon. 412 413 @type master_params: L{objects.MasterNetworkParameters} 414 @param master_params: network parameters of the master 415 @type use_external_mip_script: boolean 416 @param use_external_mip_script: whether to use an external master IP 417 address setup script 418 @raise RPCFail: in case of errors during the IP startup 419 420 """ 421 _RunMasterSetupScript(master_params, _MASTER_START, 422 use_external_mip_script)

423

424 425 -def StartMasterDaemons(no_voting):

426 """Activate local node as master node. 427 428 The function will start the master daemons (ganeti-masterd and ganeti-rapi). 429 430 @type no_voting: boolean 431 @param no_voting: whether to start ganeti-masterd without a node vote 432 but still non-interactively 433 @rtype: None 434 435 """ 436 437 if no_voting: 438 daemon_args = "--no-voting --yes-do-it" 439 else: 440 daemon_args = "" 441 442 env = { 443 "EXTRA_LUXID_ARGS": daemon_args, 444 "EXTRA_WCONFD_ARGS": daemon_args, 445 } 446 447 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env) 448 if result.failed: 449 msg = "Can't start Ganeti master: %s" % result.output 450 logging.error(msg) 451 _Fail(msg)

452 453 454 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown", 455 _BuildMasterIpEnv)

456 -def DeactivateMasterIp(master_params, use_external_mip_script):

457 """Deactivate the master IP on this node. 458 459 @type master_params: L{objects.MasterNetworkParameters} 460 @param master_params: network parameters of the master 461 @type use_external_mip_script: boolean 462 @param use_external_mip_script: whether to use an external master IP 463 address setup script 464 @raise RPCFail: in case of errors during the IP turndown 465 466 """ 467 _RunMasterSetupScript(master_params, _MASTER_STOP, 468 use_external_mip_script)

469

470 471 -def StopMasterDaemons():

472 """Stop the master daemons on this node. 473 474 Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node. 475 476 @rtype: None 477 478 """ 479 # TODO: log and report back to the caller the error failures; we 480 # need to decide in which case we fail the RPC for this 481 482 result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"]) 483 if result.failed: 484 logging.error("Could not stop Ganeti master, command %s had exitcode %s" 485 " and error %s", 486 result.cmd, result.exit_code, result.output)

487

488 489 -def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev):

490 """Change the netmask of the master IP. 491 492 @param old_netmask: the old value of the netmask 493 @param netmask: the new value of the netmask 494 @param master_ip: the master IP 495 @param master_netdev: the master network device 496 497 """ 498 if old_netmask == netmask: 499 return 500 501 if not netutils.IPAddress.Own(master_ip): 502 _Fail("The master IP address is not up, not attempting to change its" 503 " netmask") 504 505 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add", 506 "%s/%s" % (master_ip, netmask), 507 "dev", master_netdev, "label", 508 "%s:0" % master_netdev]) 509 if result.failed: 510 _Fail("Could not set the new netmask on the master IP address") 511 512 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del", 513 "%s/%s" % (master_ip, old_netmask), 514 "dev", master_netdev, "label", 515 "%s:0" % master_netdev]) 516 if result.failed: 517 _Fail("Could not bring down the master IP address with the old netmask")

518

519 520 -def EtcHostsModify(mode, host, ip):

521 """Modify a host entry in /etc/hosts. 522 523 @param mode: The mode to operate. Either add or remove entry 524 @param host: The host to operate on 525 @param ip: The ip associated with the entry 526 527 """ 528 if mode == constants.ETC_HOSTS_ADD: 529 if not ip: 530 RPCFail("Mode 'add' needs 'ip' parameter, but parameter not" 531 " present") 532 utils.AddHostToEtcHosts(host, ip) 533 elif mode == constants.ETC_HOSTS_REMOVE: 534 if ip: 535 RPCFail("Mode 'remove' does not allow 'ip' parameter, but" 536 " parameter is present") 537 utils.RemoveHostFromEtcHosts(host) 538 else: 539 RPCFail("Mode not supported")

540

541 542 -def LeaveCluster(modify_ssh_setup):

543 """Cleans up and remove the current node. 544 545 This function cleans up and prepares the current node to be removed 546 from the cluster. 547 548 If processing is successful, then it raises an 549 L{errors.QuitGanetiException} which is used as a special case to 550 shutdown the node daemon. 551 552 @param modify_ssh_setup: boolean 553 554 """ 555 _CleanDirectory(pathutils.DATA_DIR) 556 _CleanDirectory(pathutils.CRYPTO_KEYS_DIR) 557 JobQueuePurge() 558 559 if modify_ssh_setup: 560 try: 561 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER) 562 563 ssh.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) 564 565 utils.RemoveFile(priv_key) 566 utils.RemoveFile(pub_key) 567 except errors.OpExecError: 568 logging.exception("Error while processing ssh files") 569 except IOError: 570 logging.exception("At least one SSH file was not accessible.") 571 572 try: 573 utils.RemoveFile(pathutils.CONFD_HMAC_KEY) 574 utils.RemoveFile(pathutils.RAPI_CERT_FILE) 575 utils.RemoveFile(pathutils.SPICE_CERT_FILE) 576 utils.RemoveFile(pathutils.SPICE_CACERT_FILE) 577 utils.RemoveFile(pathutils.NODED_CERT_FILE) 578 except: # pylint: disable=W0702 579 logging.exception("Error while removing cluster secrets") 580 581 utils.StopDaemon(constants.CONFD) 582 utils.StopDaemon(constants.MOND) 583 utils.StopDaemon(constants.KVMD) 584 585 # Raise a custom exception (handled in ganeti-noded) 586 raise errors.QuitGanetiException(True, "Shutdown scheduled")

587

588 589 -def _CheckStorageParams(params, num_params):

590 """Performs sanity checks for storage parameters. 591 592 @type params: list 593 @param params: list of storage parameters 594 @type num_params: int 595 @param num_params: expected number of parameters 596 597 """ 598 if params is None: 599 raise errors.ProgrammerError("No storage parameters for storage" 600 " reporting is provided.") 601 if not isinstance(params, list): 602 raise errors.ProgrammerError("The storage parameters are not of type" 603 " list: '%s'" % params) 604 if not len(params) == num_params: 605 raise errors.ProgrammerError("Did not receive the expected number of" 606 "storage parameters: expected %s," 607 " received '%s'" % (num_params, len(params)))

608

609 610 -def _CheckLvmStorageParams(params):

611 """Performs sanity check for the 'exclusive storage' flag. 612 613 @see: C{_CheckStorageParams} 614 615 """ 616 _CheckStorageParams(params, 1) 617 excl_stor = params[0] 618 if not isinstance(params[0], bool): 619 raise errors.ProgrammerError("Exclusive storage parameter is not" 620 " boolean: '%s'." % excl_stor) 621 return excl_stor

622

623 624 -def _GetLvmVgSpaceInfo(name, params):

625 """Wrapper around C{_GetVgInfo} which checks the storage parameters. 626 627 @type name: string 628 @param name: name of the volume group 629 @type params: list 630 @param params: list of storage parameters, which in this case should be 631 containing only one for exclusive storage 632 633 """ 634 excl_stor = _CheckLvmStorageParams(params) 635 return _GetVgInfo(name, excl_stor)

636

637 638 -def _GetVgInfo( 639 name, excl_stor, info_fn=bdev.LogicalVolume.GetVGInfo):

640 """Retrieves information about a LVM volume group. 641 642 """ 643 # TODO: GetVGInfo supports returning information for multiple VGs at once 644 vginfo = info_fn([name], excl_stor) 645 if vginfo: 646 vg_free = int(round(vginfo[0][0], 0)) 647 vg_size = int(round(vginfo[0][1], 0)) 648 else: 649 vg_free = None 650 vg_size = None 651 652 return { 653 "type": constants.ST_LVM_VG, 654 "name": name, 655 "storage_free": vg_free, 656 "storage_size": vg_size, 657 }

658

659 660 -def _GetLvmPvSpaceInfo(name, params):

661 """Wrapper around C{_GetVgSpindlesInfo} with sanity checks. 662 663 @see: C{_GetLvmVgSpaceInfo} 664 665 """ 666 excl_stor = _CheckLvmStorageParams(params) 667 return _GetVgSpindlesInfo(name, excl_stor)

668

669 670 -def _GetVgSpindlesInfo( 671 name, excl_stor, info_fn=bdev.LogicalVolume.GetVgSpindlesInfo):

672 """Retrieves information about spindles in an LVM volume group. 673 674 @type name: string 675 @param name: VG name 676 @type excl_stor: bool 677 @param excl_stor: exclusive storage 678 @rtype: dict 679 @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name, 680 free spindles, total spindles respectively 681 682 """ 683 if excl_stor: 684 (vg_free, vg_size) = info_fn(name) 685 else: 686 vg_free = 0 687 vg_size = 0 688 return { 689 "type": constants.ST_LVM_PV, 690 "name": name, 691 "storage_free": vg_free, 692 "storage_size": vg_size, 693 }

694

695 696 -def _GetHvInfo(name, hvparams, get_hv_fn=hypervisor.GetHypervisor):

697 """Retrieves node information from a hypervisor. 698 699 The information returned depends on the hypervisor. Common items: 700 701 - vg_size is the size of the configured volume group in MiB 702 - vg_free is the free size of the volume group in MiB 703 - memory_dom0 is the memory allocated for domain0 in MiB 704 - memory_free is the currently available (free) ram in MiB 705 - memory_total is the total number of ram in MiB 706 - hv_version: the hypervisor version, if available 707 708 @type hvparams: dict of string 709 @param hvparams: the hypervisor's hvparams 710 711 """ 712 return get_hv_fn(name).GetNodeInfo(hvparams=hvparams)

713

714 715 -def _GetHvInfoAll(hv_specs, get_hv_fn=hypervisor.GetHypervisor):

716 """Retrieves node information for all hypervisors. 717 718 See C{_GetHvInfo} for information on the output. 719 720 @type hv_specs: list of pairs (string, dict of strings) 721 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 722 723 """ 724 if hv_specs is None: 725 return None 726 727 result = [] 728 for hvname, hvparams in hv_specs: 729 result.append(_GetHvInfo(hvname, hvparams, get_hv_fn)) 730 return result

731

732 733 -def _GetNamedNodeInfo(names, fn):

734 """Calls C{fn} for all names in C{names} and returns a dictionary. 735 736 @rtype: None or dict 737 738 """ 739 if names is None: 740 return None 741 else: 742 return map(fn, names)

743

744 745 -def GetNodeInfo(storage_units, hv_specs):

746 """Gives back a hash with different information about the node. 747 748 @type storage_units: list of tuples (string, string, list) 749 @param storage_units: List of tuples (storage unit, identifier, parameters) to 750 ask for disk space information. In case of lvm-vg, the identifier is 751 the VG name. The parameters can contain additional, storage-type-specific 752 parameters, for example exclusive storage for lvm storage. 753 @type hv_specs: list of pairs (string, dict of strings) 754 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 755 @rtype: tuple; (string, None/dict, None/dict) 756 @return: Tuple containing boot ID, volume group information and hypervisor 757 information 758 759 """ 760 bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") 761 storage_info = _GetNamedNodeInfo( 762 storage_units, 763 (lambda (storage_type, storage_key, storage_params): 764 _ApplyStorageInfoFunction(storage_type, storage_key, storage_params))) 765 hv_info = _GetHvInfoAll(hv_specs) 766 return (bootid, storage_info, hv_info)

767

768 769 -def _GetFileStorageSpaceInfo(path, params):

770 """Wrapper around filestorage.GetSpaceInfo. 771 772 The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo 773 and ignore the *args parameter to not leak it into the filestorage 774 module's code. 775 776 @see: C{filestorage.GetFileStorageSpaceInfo} for description of the 777 parameters. 778 779 """ 780 _CheckStorageParams(params, 0) 781 return filestorage.GetFileStorageSpaceInfo(path)

782 783 784 # FIXME: implement storage reporting for all missing storage types. 785 _STORAGE_TYPE_INFO_FN = { 786 constants.ST_BLOCK: None, 787 constants.ST_DISKLESS: None, 788 constants.ST_EXT: None, 789 constants.ST_FILE: _GetFileStorageSpaceInfo, 790 constants.ST_LVM_PV: _GetLvmPvSpaceInfo, 791 constants.ST_LVM_VG: _GetLvmVgSpaceInfo, 792 constants.ST_SHARED_FILE: None, 793 constants.ST_GLUSTER: None, 794 constants.ST_RADOS: None, 795 }

796 797 798 -def _ApplyStorageInfoFunction(storage_type, storage_key, *args):

799 """Looks up and applies the correct function to calculate free and total 800 storage for the given storage type. 801 802 @type storage_type: string 803 @param storage_type: the storage type for which the storage shall be reported. 804 @type storage_key: string 805 @param storage_key: identifier of a storage unit, e.g. the volume group name 806 of an LVM storage unit 807 @type args: any 808 @param args: various parameters that can be used for storage reporting. These 809 parameters and their semantics vary from storage type to storage type and 810 are just propagated in this function. 811 @return: the results of the application of the storage space function (see 812 _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that 813 storage type 814 @raises NotImplementedError: for storage types who don't support space 815 reporting yet 816 """ 817 fn = _STORAGE_TYPE_INFO_FN[storage_type] 818 if fn is not None: 819 return fn(storage_key, *args) 820 else: 821 raise NotImplementedError

822

823 824 -def _CheckExclusivePvs(pvi_list):

825 """Check that PVs are not shared among LVs 826 827 @type pvi_list: list of L{objects.LvmPvInfo} objects 828 @param pvi_list: information about the PVs 829 830 @rtype: list of tuples (string, list of strings) 831 @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...]) 832 833 """ 834 res = [] 835 for pvi in pvi_list: 836 if len(pvi.lv_list) > 1: 837 res.append((pvi.name, pvi.lv_list)) 838 return res

839

840 841 -def _VerifyHypervisors(what, vm_capable, result, all_hvparams, 842 get_hv_fn=hypervisor.GetHypervisor):

843 """Verifies the hypervisor. Appends the results to the 'results' list. 844 845 @type what: C{dict} 846 @param what: a dictionary of things to check 847 @type vm_capable: boolean 848 @param vm_capable: whether or not this node is vm capable 849 @type result: dict 850 @param result: dictionary of verification results; results of the 851 verifications in this function will be added here 852 @type all_hvparams: dict of dict of string 853 @param all_hvparams: dictionary mapping hypervisor names to hvparams 854 @type get_hv_fn: function 855 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 856 857 """ 858 if not vm_capable: 859 return 860 861 if constants.NV_HYPERVISOR in what: 862 result[constants.NV_HYPERVISOR] = {} 863 for hv_name in what[constants.NV_HYPERVISOR]: 864 hvparams = all_hvparams[hv_name] 865 try: 866 val = get_hv_fn(hv_name).Verify(hvparams=hvparams) 867 except errors.HypervisorError, err: 868 val = "Error while checking hypervisor: %s" % str(err) 869 result[constants.NV_HYPERVISOR][hv_name] = val

870

871 872 -def _VerifyHvparams(what, vm_capable, result, 873 get_hv_fn=hypervisor.GetHypervisor):

874 """Verifies the hvparams. Appends the results to the 'results' list. 875 876 @type what: C{dict} 877 @param what: a dictionary of things to check 878 @type vm_capable: boolean 879 @param vm_capable: whether or not this node is vm capable 880 @type result: dict 881 @param result: dictionary of verification results; results of the 882 verifications in this function will be added here 883 @type get_hv_fn: function 884 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 885 886 """ 887 if not vm_capable: 888 return 889 890 if constants.NV_HVPARAMS in what: 891 result[constants.NV_HVPARAMS] = [] 892 for source, hv_name, hvparms in what[constants.NV_HVPARAMS]: 893 try: 894 logging.info("Validating hv %s, %s", hv_name, hvparms) 895 get_hv_fn(hv_name).ValidateParameters(hvparms) 896 except errors.HypervisorError, err: 897 result[constants.NV_HVPARAMS].append((source, hv_name, str(err)))

898

899 900 -def _VerifyInstanceList(what, vm_capable, result, all_hvparams):

901 """Verifies the instance list. 902 903 @type what: C{dict} 904 @param what: a dictionary of things to check 905 @type vm_capable: boolean 906 @param vm_capable: whether or not this node is vm capable 907 @type result: dict 908 @param result: dictionary of verification results; results of the 909 verifications in this function will be added here 910 @type all_hvparams: dict of dict of string 911 @param all_hvparams: dictionary mapping hypervisor names to hvparams 912 913 """ 914 if constants.NV_INSTANCELIST in what and vm_capable: 915 # GetInstanceList can fail 916 try: 917 val = GetInstanceList(what[constants.NV_INSTANCELIST], 918 all_hvparams=all_hvparams) 919 except RPCFail, err: 920 val = str(err) 921 result[constants.NV_INSTANCELIST] = val

922

923 924 -def _VerifyNodeInfo(what, vm_capable, result, all_hvparams):

925 """Verifies the node info. 926 927 @type what: C{dict} 928 @param what: a dictionary of things to check 929 @type vm_capable: boolean 930 @param vm_capable: whether or not this node is vm capable 931 @type result: dict 932 @param result: dictionary of verification results; results of the 933 verifications in this function will be added here 934 @type all_hvparams: dict of dict of string 935 @param all_hvparams: dictionary mapping hypervisor names to hvparams 936 937 """ 938 if constants.NV_HVINFO in what and vm_capable: 939 hvname = what[constants.NV_HVINFO] 940 hyper = hypervisor.GetHypervisor(hvname) 941 hvparams = all_hvparams[hvname] 942 result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams)

943

944 945 -def _VerifyClientCertificate(cert_file=pathutils.NODED_CLIENT_CERT_FILE):

946 """Verify the existance and validity of the client SSL certificate. 947 948 Also, verify that the client certificate is not self-signed. Self- 949 signed client certificates stem from Ganeti versions 2.12.0 - 2.12.4 950 and should be replaced by client certificates signed by the server 951 certificate. Hence we output a warning when we encounter a self-signed 952 one. 953 954 """ 955 create_cert_cmd = "gnt-cluster renew-crypto --new-node-certificates" 956 if not os.path.exists(cert_file): 957 return (constants.CV_ERROR, 958 "The client certificate does not exist. Run '%s' to create" 959 " client certificates for all nodes." % create_cert_cmd) 960 961 (errcode, msg) = utils.VerifyCertificate(cert_file) 962 if errcode is not None: 963 return (errcode, msg) 964 965 (errcode, msg) = utils.IsCertificateSelfSigned(cert_file) 966 if errcode is not None: 967 return (errcode, msg) 968 969 # if everything is fine, we return the digest to be compared to the config 970 return (None, utils.GetCertificateDigest(cert_filename=cert_file))

971

972 973 -def _VerifySshSetup(node_status_list, my_name, ssh_key_type, 974 ganeti_pub_keys_file=pathutils.SSH_PUB_KEYS):

975 """Verifies the state of the SSH key files. 976 977 @type node_status_list: list of tuples 978 @param node_status_list: list of nodes of the cluster associated with a 979 couple of flags: (uuid, name, is_master_candidate, 980 is_potential_master_candidate, online) 981 @type my_name: str 982 @param my_name: name of this node 983 @type ssh_key_type: one of L{constants.SSHK_ALL} 984 @param ssh_key_type: type of key used on nodes 985 @type ganeti_pub_keys_file: str 986 @param ganeti_pub_keys_file: filename of the public keys file 987 988 """ 989 if node_status_list is None: 990 return ["No node list to check against the pub_key_file received."] 991 992 my_status_list = [(my_uuid, name, mc, pot_mc, online) for 993 (my_uuid, name, mc, pot_mc, online) 994 in node_status_list if name == my_name] 995 if len(my_status_list) == 0: 996 return ["Cannot find node information for node '%s'." % my_name] 997 (my_uuid, _, _, potential_master_candidate, online) = \ 998 my_status_list[0] 999 1000 result = [] 1001 1002 if not os.path.exists(ganeti_pub_keys_file): 1003 result.append("The public key file '%s' does not exist. Consider running" 1004 " 'gnt-cluster renew-crypto --new-ssh-keys" 1005 " [--no-ssh-key-check]' to fix this." % ganeti_pub_keys_file) 1006 return result 1007 1008 pot_mc_uuids = [uuid for (uuid, _, _, _, _) in node_status_list] 1009 offline_nodes = [uuid for (uuid, _, _, _, online) in node_status_list 1010 if not online] 1011 pub_keys = ssh.QueryPubKeyFile(None, key_file=ganeti_pub_keys_file) 1012 1013 if potential_master_candidate: 1014 # Check that the set of potential master candidates matches the 1015 # public key file 1016 pub_uuids_set = set(pub_keys.keys()) - set(offline_nodes) 1017 pot_mc_uuids_set = set(pot_mc_uuids) - set(offline_nodes) 1018 missing_uuids = set([]) 1019 if pub_uuids_set != pot_mc_uuids_set: 1020 unknown_uuids = pub_uuids_set - pot_mc_uuids_set 1021 if unknown_uuids: 1022 result.append("The following node UUIDs are listed in the public key" 1023 " file on node '%s', but are not potential master" 1024 " candidates: %s." 1025 % (my_name, ", ".join(list(unknown_uuids)))) 1026 missing_uuids = pot_mc_uuids_set - pub_uuids_set 1027 if missing_uuids: 1028 result.append("The following node UUIDs of potential master candidates" 1029 " are missing in the public key file on node %s: %s." 1030 % (my_name, ", ".join(list(missing_uuids)))) 1031 1032 (_, key_files) = \ 1033 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1034 (_, node_pub_key_file) = key_files[ssh_key_type] 1035 1036 my_keys = pub_keys[my_uuid] 1037 1038 node_pub_key = utils.ReadFile(node_pub_key_file) 1039 if node_pub_key.strip() not in my_keys: 1040 result.append("The dsa key of node %s does not match this node's key" 1041 " in the pub key file." % my_name) 1042 if len(my_keys) != 1: 1043 result.append("There is more than one key for node %s in the public key" 1044 " file." % my_name) 1045 else: 1046 if len(pub_keys.keys()) > 0: 1047 result.append("The public key file of node '%s' is not empty, although" 1048 " the node is not a potential master candidate." 1049 % my_name) 1050 1051 # Check that all master candidate keys are in the authorized_keys file 1052 (auth_key_file, _) = \ 1053 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1054 for (uuid, name, mc, _, online) in node_status_list: 1055 if not online: 1056 continue 1057 if uuid in missing_uuids: 1058 continue 1059 if mc: 1060 for key in pub_keys[uuid]: 1061 if not ssh.HasAuthorizedKey(auth_key_file, key): 1062 result.append("A SSH key of master candidate '%s' (UUID: '%s') is" 1063 " not in the 'authorized_keys' file of node '%s'." 1064 % (name, uuid, my_name)) 1065 else: 1066 for key in pub_keys[uuid]: 1067 if name != my_name and ssh.HasAuthorizedKey(auth_key_file, key): 1068 result.append("A SSH key of normal node '%s' (UUID: '%s') is in the" 1069 " 'authorized_keys' file of node '%s'." 1070 % (name, uuid, my_name)) 1071 if name == my_name and not ssh.HasAuthorizedKey(auth_key_file, key): 1072 result.append("A SSH key of normal node '%s' (UUID: '%s') is not" 1073 " in the 'authorized_keys' file of itself." 1074 % (my_name, uuid)) 1075 1076 return result

1077

1078 1079 -def _VerifySshClutter(node_status_list, my_name):

1080 """Verifies that the 'authorized_keys' files are not cluttered up. 1081 1082 @type node_status_list: list of tuples 1083 @param node_status_list: list of nodes of the cluster associated with a 1084 couple of flags: (uuid, name, is_master_candidate, 1085 is_potential_master_candidate, online) 1086 @type my_name: str 1087 @param my_name: name of this node 1088 1089 """ 1090 result = [] 1091 (auth_key_file, _) = \ 1092 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1093 node_names = [name for (_, name, _, _) in node_status_list] 1094 multiple_occurrences = ssh.CheckForMultipleKeys(auth_key_file, node_names) 1095 if multiple_occurrences: 1096 msg = "There are hosts which have more than one SSH key stored for the" \ 1097 " same user in the 'authorized_keys' file of node %s. This can be" \ 1098 " due to an unsuccessful operation which cluttered up the" \ 1099 " 'authorized_keys' file. We recommend to clean this up manually. " \ 1100 % my_name 1101 for host, occ in multiple_occurrences.items(): 1102 msg += "Entry for '%s' in lines %s. " % (host, utils.CommaJoin(occ)) 1103 result.append(msg) 1104 1105 return result

1106

1107 1108 -def VerifyNode(what, cluster_name, all_hvparams):

1109 """Verify the status of the local node. 1110 1111 Based on the input L{what} parameter, various checks are done on the 1112 local node. 1113 1114 If the I{filelist} key is present, this list of 1115 files is checksummed and the file/checksum pairs are returned. 1116 1117 If the I{nodelist} key is present, we check that we have 1118 connectivity via ssh with the target nodes (and check the hostname 1119 report). 1120 1121 If the I{node-net-test} key is present, we check that we have 1122 connectivity to the given nodes via both primary IP and, if 1123 applicable, secondary IPs. 1124 1125 @type what: C{dict} 1126 @param what: a dictionary of things to check: 1127 - filelist: list of files for which to compute checksums 1128 - nodelist: list of nodes we should check ssh communication with 1129 - node-net-test: list of nodes we should check node daemon port 1130 connectivity with 1131 - hypervisor: list with hypervisors to run the verify for 1132 @type cluster_name: string 1133 @param cluster_name: the cluster's name 1134 @type all_hvparams: dict of dict of strings 1135 @param all_hvparams: a dictionary mapping hypervisor names to hvparams 1136 @rtype: dict 1137 @return: a dictionary with the same keys as the input dict, and 1138 values representing the result of the checks 1139 1140 """ 1141 result = {} 1142 my_name = netutils.Hostname.GetSysName() 1143 port = netutils.GetDaemonPort(constants.NODED) 1144 vm_capable = my_name not in what.get(constants.NV_NONVMNODES, []) 1145 1146 _VerifyHypervisors(what, vm_capable, result, all_hvparams) 1147 _VerifyHvparams(what, vm_capable, result) 1148 1149 if constants.NV_FILELIST in what: 1150 fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath, 1151 what[constants.NV_FILELIST])) 1152 result[constants.NV_FILELIST] = \ 1153 dict((vcluster.MakeVirtualPath(key), value) 1154 for (key, value) in fingerprints.items()) 1155 1156 if constants.NV_CLIENT_CERT in what: 1157 result[constants.NV_CLIENT_CERT] = _VerifyClientCertificate() 1158 1159 if constants.NV_SSH_SETUP in what: 1160 node_status_list, key_type = what[constants.NV_SSH_SETUP] 1161 result[constants.NV_SSH_SETUP] = \ 1162 _VerifySshSetup(node_status_list, my_name, key_type) 1163 if constants.NV_SSH_CLUTTER in what: 1164 result[constants.NV_SSH_CLUTTER] = \ 1165 _VerifySshClutter(what[constants.NV_SSH_SETUP], my_name) 1166 1167 if constants.NV_NODELIST in what: 1168 (nodes, bynode, mcs) = what[constants.NV_NODELIST] 1169 1170 # Add nodes from other groups (different for each node) 1171 try: 1172 nodes.extend(bynode[my_name]) 1173 except KeyError: 1174 pass 1175 1176 # Use a random order 1177 random.shuffle(nodes) 1178 1179 # Try to contact all nodes 1180 val = {} 1181 ssh_port_map = ssconf.SimpleStore().GetSshPortMap() 1182 for node in nodes: 1183 # We only test if master candidates can communicate to other nodes. 1184 # We cannot test if normal nodes cannot communicate with other nodes, 1185 # because the administrator might have installed additional SSH keys, 1186 # over which Ganeti has no power. 1187 if my_name in mcs: 1188 success, message = _GetSshRunner(cluster_name). \ 1189 VerifyNodeHostname(node, ssh_port_map[node]) 1190 if not success: 1191 val[node] = message 1192 1193 result[constants.NV_NODELIST] = val 1194 1195 if constants.NV_NODENETTEST in what: 1196 result[constants.NV_NODENETTEST] = tmp = {} 1197 my_pip = my_sip = None 1198 for name, pip, sip in what[constants.NV_NODENETTEST]: 1199 if name == my_name: 1200 my_pip = pip 1201 my_sip = sip 1202 break 1203 if not my_pip: 1204 tmp[my_name] = ("Can't find my own primary/secondary IP" 1205 " in the node list") 1206 else: 1207 for name, pip, sip in what[constants.NV_NODENETTEST]: 1208 fail = [] 1209 if not netutils.TcpPing(pip, port, source=my_pip): 1210 fail.append("primary") 1211 if sip != pip: 1212 if not netutils.TcpPing(sip, port, source=my_sip): 1213 fail.append("secondary") 1214 if fail: 1215 tmp[name] = ("failure using the %s interface(s)" % 1216 " and ".join(fail)) 1217 1218 if constants.NV_MASTERIP in what: 1219 # FIXME: add checks on incoming data structures (here and in the 1220 # rest of the function) 1221 master_name, master_ip = what[constants.NV_MASTERIP] 1222 if master_name == my_name: 1223 source = constants.IP4_ADDRESS_LOCALHOST 1224 else: 1225 source = None 1226 result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port, 1227 source=source) 1228 1229 if constants.NV_USERSCRIPTS in what: 1230 result[constants.NV_USERSCRIPTS] = \ 1231 [script for script in what[constants.NV_USERSCRIPTS] 1232 if not utils.IsExecutable(script)] 1233 1234 if constants.NV_OOB_PATHS in what: 1235 result[constants.NV_OOB_PATHS] = tmp = [] 1236 for path in what[constants.NV_OOB_PATHS]: 1237 try: 1238 st = os.stat(path) 1239 except OSError, err: 1240 tmp.append("error stating out of band helper: %s" % err) 1241 else: 1242 if stat.S_ISREG(st.st_mode): 1243 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR: 1244 tmp.append(None) 1245 else: 1246 tmp.append("out of band helper %s is not executable" % path) 1247 else: 1248 tmp.append("out of band helper %s is not a file" % path) 1249 1250 if constants.NV_LVLIST in what and vm_capable: 1251 try: 1252 val = GetVolumeList(utils.ListVolumeGroups().keys()) 1253 except RPCFail, err: 1254 val = str(err) 1255 result[constants.NV_LVLIST] = val 1256 1257 _VerifyInstanceList(what, vm_capable, result, all_hvparams) 1258 1259 if constants.NV_VGLIST in what and vm_capable: 1260 result[constants.NV_VGLIST] = utils.ListVolumeGroups() 1261 1262 if constants.NV_PVLIST in what and vm_capable: 1263 check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what 1264 val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], 1265 filter_allocatable=False, 1266 include_lvs=check_exclusive_pvs) 1267 if check_exclusive_pvs: 1268 result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val) 1269 for pvi in val: 1270 # Avoid sending useless data on the wire 1271 pvi.lv_list = [] 1272 result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val) 1273 1274 if constants.NV_VERSION in what: 1275 result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION, 1276 constants.RELEASE_VERSION) 1277 1278 _VerifyNodeInfo(what, vm_capable, result, all_hvparams) 1279 1280 if constants.NV_DRBDVERSION in what and vm_capable: 1281 try: 1282 drbd_version = DRBD8.GetProcInfo().GetVersionString() 1283 except errors.BlockDeviceError, err: 1284 logging.warning("Can't get DRBD version", exc_info=True) 1285 drbd_version = str(err) 1286 result[constants.NV_DRBDVERSION] = drbd_version 1287 1288 if constants.NV_DRBDLIST in what and vm_capable: 1289 try: 1290 used_minors = drbd.DRBD8.GetUsedDevs() 1291 except errors.BlockDeviceError, err: 1292 logging.warning("Can't get used minors list", exc_info=True) 1293 used_minors = str(err) 1294 result[constants.NV_DRBDLIST] = used_minors 1295 1296 if constants.NV_DRBDHELPER in what and vm_capable: 1297 status = True 1298 try: 1299 payload = drbd.DRBD8.GetUsermodeHelper() 1300 except errors.BlockDeviceError, err: 1301 logging.error("Can't get DRBD usermode helper: %s", str(err)) 1302 status = False 1303 payload = str(err) 1304 result[constants.NV_DRBDHELPER] = (status, payload) 1305 1306 if constants.NV_NODESETUP in what: 1307 result[constants.NV_NODESETUP] = tmpr = [] 1308 if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"): 1309 tmpr.append("The sysfs filesytem doesn't seem to be mounted" 1310 " under /sys, missing required directories /sys/block" 1311 " and /sys/class/net") 1312 if (not os.path.isdir("/proc/sys") or 1313 not os.path.isfile("/proc/sysrq-trigger")): 1314 tmpr.append("The procfs filesystem doesn't seem to be mounted" 1315 " under /proc, missing required directory /proc/sys and" 1316 " the file /proc/sysrq-trigger") 1317 1318 if constants.NV_TIME in what: 1319 result[constants.NV_TIME] = utils.SplitTime(time.time()) 1320 1321 if constants.NV_OSLIST in what and vm_capable: 1322 result[constants.NV_OSLIST] = DiagnoseOS() 1323 1324 if constants.NV_BRIDGES in what and vm_capable: 1325 result[constants.NV_BRIDGES] = [bridge 1326 for bridge in what[constants.NV_BRIDGES] 1327 if not utils.BridgeExists(bridge)] 1328 1329 if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name: 1330 result[constants.NV_ACCEPTED_STORAGE_PATHS] = \ 1331 filestorage.ComputeWrongFileStoragePaths() 1332 1333 if what.get(constants.NV_FILE_STORAGE_PATH): 1334 pathresult = filestorage.CheckFileStoragePath( 1335 what[constants.NV_FILE_STORAGE_PATH]) 1336 if pathresult: 1337 result[constants.NV_FILE_STORAGE_PATH] = pathresult 1338 1339 if what.get(constants.NV_SHARED_FILE_STORAGE_PATH): 1340 pathresult = filestorage.CheckFileStoragePath( 1341 what[constants.NV_SHARED_FILE_STORAGE_PATH]) 1342 if pathresult: 1343 result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult 1344 1345 return result

1346

1347 1348 -def GetCryptoTokens(token_requests):

1349 """Perform actions on the node's cryptographic tokens. 1350 1351 Token types can be 'ssl' or 'ssh'. So far only some actions are implemented 1352 for 'ssl'. Action 'get' returns the digest of the public client ssl 1353 certificate. Action 'create' creates a new client certificate and private key 1354 and also returns the digest of the certificate. The third parameter of a 1355 token request are optional parameters for the actions, so far only the 1356 filename is supported. 1357 1358 @type token_requests: list of tuples of (string, string, dict), where the 1359 first string is in constants.CRYPTO_TYPES, the second in 1360 constants.CRYPTO_ACTIONS. The third parameter is a dictionary of string 1361 to string. 1362 @param token_requests: list of requests of cryptographic tokens and actions 1363 to perform on them. The actions come with a dictionary of options. 1364 @rtype: list of tuples (string, string) 1365 @return: list of tuples of the token type and the public crypto token 1366 1367 """ 1368 tokens = [] 1369 for (token_type, action, _) in token_requests: 1370 if token_type not in constants.CRYPTO_TYPES: 1371 raise errors.ProgrammerError("Token type '%s' not supported." % 1372 token_type) 1373 if action not in constants.CRYPTO_ACTIONS: 1374 raise errors.ProgrammerError("Action '%s' is not supported." % 1375 action) 1376 if token_type == constants.CRYPTO_TYPE_SSL_DIGEST: 1377 tokens.append((token_type, 1378 utils.GetCertificateDigest())) 1379 return tokens

1380

1381 1382 -def EnsureDaemon(daemon_name, run):

1383 """Ensures the given daemon is running or stopped. 1384 1385 @type daemon_name: string 1386 @param daemon_name: name of the daemon (e.g., constants.KVMD) 1387 1388 @type run: bool 1389 @param run: whether to start or stop the daemon 1390 1391 @rtype: bool 1392 @return: 'True' if daemon successfully started/stopped, 1393 'False' otherwise 1394 1395 """ 1396 allowed_daemons = [constants.KVMD] 1397 1398 if daemon_name not in allowed_daemons: 1399 fn = lambda _: False 1400 elif run: 1401 fn = utils.EnsureDaemon 1402 else: 1403 fn = utils.StopDaemon 1404 1405 return fn(daemon_name)

1406

1407 1408 -def _InitSshUpdateData(data, noded_cert_file, ssconf_store):

1409 (_, noded_cert) = \ 1410 utils.ExtractX509Certificate(utils.ReadFile(noded_cert_file)) 1411 data[constants.SSHS_NODE_DAEMON_CERTIFICATE] = noded_cert 1412 1413 cluster_name = ssconf_store.GetClusterName() 1414 data[constants.SSHS_CLUSTER_NAME] = cluster_name

1415

1416 1417 -def AddNodeSshKey(node_uuid, node_name, 1418 potential_master_candidates, 1419 to_authorized_keys=False, 1420 to_public_keys=False, 1421 get_public_keys=False, 1422 pub_key_file=pathutils.SSH_PUB_KEYS, 1423 ssconf_store=None, 1424 noded_cert_file=pathutils.NODED_CERT_FILE, 1425 run_cmd_fn=ssh.RunSshCmdWithStdin, 1426 ssh_update_debug=False, 1427 ssh_update_verbose=False):

1428 """Distributes a node's public SSH key across the cluster. 1429 1430 Note that this function should only be executed on the master node, which 1431 then will copy the new node's key to all nodes in the cluster via SSH. 1432 1433 Also note: at least one of the flags C{to_authorized_keys}, 1434 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for 1435 the function to actually perform any actions. 1436 1437 @type node_uuid: str 1438 @param node_uuid: the UUID of the node whose key is added 1439 @type node_name: str 1440 @param node_name: the name of the node whose key is added 1441 @type potential_master_candidates: list of str 1442 @param potential_master_candidates: list of node names of potential master 1443 candidates; this should match the list of uuids in the public key file 1444 @type to_authorized_keys: boolean 1445 @param to_authorized_keys: whether the key should be added to the 1446 C{authorized_keys} file of all nodes 1447 @type to_public_keys: boolean 1448 @param to_public_keys: whether the keys should be added to the public key file 1449 @type get_public_keys: boolean 1450 @param get_public_keys: whether the node should add the clusters' public keys 1451 to its {ganeti_pub_keys} file 1452 1453 """ 1454 node_list = [SshAddNodeInfo(name=node_name, uuid=node_uuid, 1455 to_authorized_keys=to_authorized_keys, 1456 to_public_keys=to_public_keys, 1457 get_public_keys=get_public_keys)] 1458 return AddNodeSshKeyBulk(node_list, 1459 potential_master_candidates, 1460 pub_key_file=pub_key_file, 1461 ssconf_store=ssconf_store, 1462 noded_cert_file=noded_cert_file, 1463 run_cmd_fn=run_cmd_fn, 1464 ssh_update_debug=ssh_update_debug, 1465 ssh_update_verbose=ssh_update_verbose)

1466 1467 1468 # Node info named tuple specifically for the use with AddNodeSshKeyBulk 1469 SshAddNodeInfo = collections.namedtuple( 1470 "SshAddNodeInfo", 1471 ["uuid", 1472 "name", 1473 "to_authorized_keys", 1474 "to_public_keys", 1475 "get_public_keys"])

1476 1477 1478 -def AddNodeSshKeyBulk(node_list, 1479 potential_master_candidates, 1480 pub_key_file=pathutils.SSH_PUB_KEYS, 1481 ssconf_store=None, 1482 noded_cert_file=pathutils.NODED_CERT_FILE, 1483 run_cmd_fn=ssh.RunSshCmdWithStdin, 1484 ssh_update_debug=False, 1485 ssh_update_verbose=False):

1486 """Distributes a node's public SSH key across the cluster. 1487 1488 Note that this function should only be executed on the master node, which 1489 then will copy the new node's key to all nodes in the cluster via SSH. 1490 1491 Also note: at least one of the flags C{to_authorized_keys}, 1492 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for 1493 the function to actually perform any actions. 1494 1495 @type node_list: list of SshAddNodeInfo tuples 1496 @param node_list: list of tuples containing the necessary node information for 1497 adding their keys 1498 @type potential_master_candidates: list of str 1499 @param potential_master_candidates: list of node names of potential master 1500 candidates; this should match the list of uuids in the public key file 1501 1502 """ 1503 # whether there are any keys to be added or retrieved at all 1504 to_authorized_keys = any([node_info.to_authorized_keys for node_info in 1505 node_list]) 1506 to_public_keys = any([node_info.to_public_keys for node_info in 1507 node_list]) 1508 1509 if not ssconf_store: 1510 ssconf_store = ssconf.SimpleStore() 1511 1512 for node_info in node_list: 1513 # replacement not necessary for keys that are not supposed to be in the 1514 # list of public keys 1515 if not node_info.to_public_keys: 1516 continue 1517 # Check and fix sanity of key file 1518 keys_by_name = ssh.QueryPubKeyFile([node_info.name], key_file=pub_key_file) 1519 keys_by_uuid = ssh.QueryPubKeyFile([node_info.uuid], key_file=pub_key_file) 1520 1521 if (not keys_by_name or node_info.name not in keys_by_name) \ 1522 and (not keys_by_uuid or node_info.uuid not in keys_by_uuid): 1523 raise errors.SshUpdateError( 1524 "No keys found for the new node '%s' (UUID %s) in the list of public" 1525 " SSH keys, neither for the name or the UUID" % 1526 (node_info.name, node_info.uuid)) 1527 else: 1528 if node_info.name in keys_by_name: 1529 # Replace the name by UUID in the file as the name should only be used 1530 # temporarily 1531 ssh.ReplaceNameByUuid(node_info.uuid, node_info.name, 1532 error_fn=errors.SshUpdateError, 1533 key_file=pub_key_file) 1534 1535 # Retrieve updated map of UUIDs to keys 1536 keys_by_uuid = ssh.QueryPubKeyFile( 1537 [node_info.uuid for node_info in node_list], key_file=pub_key_file) 1538 1539 # Update the master node's key files 1540 (auth_key_file, _) = \ 1541 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1542 for node_info in node_list: 1543 if node_info.to_authorized_keys: 1544 ssh.AddAuthorizedKeys(auth_key_file, keys_by_uuid[node_info.uuid]) 1545 1546 base_data = {} 1547 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) 1548 cluster_name = base_data[constants.SSHS_CLUSTER_NAME] 1549 1550 ssh_port_map = ssconf_store.GetSshPortMap() 1551 1552 # Update the target nodes themselves 1553 for node_info in node_list: 1554 logging.debug("Updating SSH key files of target node '%s'.", node_info.name) 1555 if node_info.get_public_keys: 1556 node_data = {} 1557 _InitSshUpdateData(node_data, noded_cert_file, ssconf_store) 1558 all_keys = ssh.QueryPubKeyFile(None, key_file=pub_key_file) 1559 node_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1560 (constants.SSHS_OVERRIDE, all_keys) 1561 1562 try: 1563 backoff = 5 # seconds 1564 utils.RetryByNumberOfTimes( 1565 constants.SSHS_MAX_RETRIES, backoff, 1566 errors.SshUpdateError, 1567 run_cmd_fn, cluster_name, node_info.name, pathutils.SSH_UPDATE, 1568 ssh_port_map.get(node_info.name), node_data, 1569 debug=ssh_update_debug, verbose=ssh_update_verbose, 1570 use_cluster_key=False, ask_key=False, strict_host_check=False) 1571 except errors.SshUpdateError as e: 1572 # Clean up the master's public key file if adding key fails 1573 if node_info.to_public_keys: 1574 ssh.RemovePublicKey(node_info.uuid) 1575 raise e 1576 1577 # Update all nodes except master and the target nodes 1578 keys_by_uuid_auth = ssh.QueryPubKeyFile( 1579 [node_info.uuid for node_info in node_list 1580 if node_info.to_authorized_keys], 1581 key_file=pub_key_file) 1582 if to_authorized_keys: 1583 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1584 (constants.SSHS_ADD, keys_by_uuid_auth) 1585 1586 pot_mc_data = base_data.copy() 1587 keys_by_uuid_pub = ssh.QueryPubKeyFile( 1588 [node_info.uuid for node_info in node_list 1589 if node_info.to_public_keys], 1590 key_file=pub_key_file) 1591 if to_public_keys: 1592 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1593 (constants.SSHS_REPLACE_OR_ADD, keys_by_uuid_pub) 1594 1595 all_nodes = ssconf_store.GetNodeList() 1596 master_node = ssconf_store.GetMasterNode() 1597 online_nodes = ssconf_store.GetOnlineNodeList() 1598 1599 node_errors = [] 1600 for node in all_nodes: 1601 if node == master_node: 1602 logging.debug("Skipping master node '%s'.", master_node) 1603 continue 1604 if node not in online_nodes: 1605 logging.debug("Skipping offline node '%s'.", node) 1606 continue 1607 if node in potential_master_candidates: 1608 logging.debug("Updating SSH key files of node '%s'.", node) 1609 try: 1610 backoff = 5 # seconds 1611 utils.RetryByNumberOfTimes( 1612 constants.SSHS_MAX_RETRIES, backoff, errors.SshUpdateError, 1613 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1614 ssh_port_map.get(node), pot_mc_data, 1615 debug=ssh_update_debug, verbose=ssh_update_verbose, 1616 use_cluster_key=False, ask_key=False, strict_host_check=False) 1617 except errors.SshUpdateError as last_exception: 1618 error_msg = ("When adding the key of node '%s', updating SSH key" 1619 " files of node '%s' failed after %s retries." 1620 " Not trying again. Last error was: %s." % 1621 (node, node_info.name, constants.SSHS_MAX_RETRIES, 1622 last_exception)) 1623 node_errors.append((node, error_msg)) 1624 # We only log the error and don't throw an exception, because 1625 # one unreachable node shall not abort the entire procedure. 1626 logging.error(error_msg) 1627 1628 else: 1629 if to_authorized_keys: 1630 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE, 1631 ssh_port_map.get(node), base_data, 1632 debug=ssh_update_debug, verbose=ssh_update_verbose, 1633 use_cluster_key=False, ask_key=False, 1634 strict_host_check=False) 1635 1636 return node_errors

1637

1638 1639 # TODO: will be fixed with pending patch series. 1640 # pylint: disable=R0913 1641 -def RemoveNodeSshKey(node_uuid, node_name, 1642 master_candidate_uuids, 1643 potential_master_candidates, 1644 master_uuid=None, 1645 keys_to_remove=None, 1646 from_authorized_keys=False, 1647 from_public_keys=False, 1648 clear_authorized_keys=False, 1649 clear_public_keys=False, 1650 pub_key_file=pathutils.SSH_PUB_KEYS, 1651 ssconf_store=None, 1652 noded_cert_file=pathutils.NODED_CERT_FILE, 1653 readd=False, 1654 run_cmd_fn=ssh.RunSshCmdWithStdin, 1655 ssh_update_debug=False, 1656 ssh_update_verbose=False):

1657 """Removes the node's SSH keys from the key files and distributes those. 1658 1659 Note that at least one of the flags C{from_authorized_keys}, 1660 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys} 1661 has to be set to C{True} for the function to perform any action at all. 1662 Not doing so will trigger an assertion in the function. 1663 1664 @type node_uuid: str 1665 @param node_uuid: UUID of the node whose key is removed 1666 @type node_name: str 1667 @param node_name: name of the node whose key is remove 1668 @type master_candidate_uuids: list of str 1669 @param master_candidate_uuids: list of UUIDs of the current master candidates 1670 @type potential_master_candidates: list of str 1671 @param potential_master_candidates: list of names of potential master 1672 candidates 1673 @type keys_to_remove: dict of str to list of str 1674 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys 1675 to be removed. This list is supposed to be used only if the keys are not 1676 in the public keys file. This is for example the case when removing a 1677 master node's key. 1678 @type from_authorized_keys: boolean 1679 @param from_authorized_keys: whether or not the key should be removed 1680 from the C{authorized_keys} file 1681 @type from_public_keys: boolean 1682 @param from_public_keys: whether or not the key should be remove from 1683 the C{ganeti_pub_keys} file 1684 @type clear_authorized_keys: boolean 1685 @param clear_authorized_keys: whether or not the C{authorized_keys} file 1686 should be cleared on the node whose keys are removed 1687 @type clear_public_keys: boolean 1688 @param clear_public_keys: whether to clear the node's C{ganeti_pub_key} file 1689 @type readd: boolean 1690 @param readd: whether this is called during a readd operation. 1691 @rtype: list of string 1692 @returns: list of feedback messages 1693 1694 """ 1695 node_list = [SshRemoveNodeInfo(uuid=node_uuid, 1696 name=node_name, 1697 from_authorized_keys=from_authorized_keys, 1698 from_public_keys=from_public_keys, 1699 clear_authorized_keys=clear_authorized_keys, 1700 clear_public_keys=clear_public_keys)] 1701 return RemoveNodeSshKeyBulk(node_list, 1702 master_candidate_uuids, 1703 potential_master_candidates, 1704 master_uuid=master_uuid, 1705 keys_to_remove=keys_to_remove, 1706 pub_key_file=pub_key_file, 1707 ssconf_store=ssconf_store, 1708 noded_cert_file=noded_cert_file, 1709 readd=readd, 1710 run_cmd_fn=run_cmd_fn, 1711 ssh_update_debug=ssh_update_debug, 1712 ssh_update_verbose=ssh_update_verbose)

1713 1714 1715 # Node info named tuple specifically for the use with RemoveNodeSshKeyBulk 1716 SshRemoveNodeInfo = collections.namedtuple( 1717 "SshRemoveNodeInfo", 1718 ["uuid", 1719 "name", 1720 "from_authorized_keys", 1721 "from_public_keys", 1722 "clear_authorized_keys", 1723 "clear_public_keys"])

1724 1725 1726 -def RemoveNodeSshKeyBulk(node_list, 1727 master_candidate_uuids, 1728 potential_master_candidates, 1729 master_uuid=None, 1730 keys_to_remove=None, 1731 pub_key_file=pathutils.SSH_PUB_KEYS, 1732 ssconf_store=None, 1733 noded_cert_file=pathutils.NODED_CERT_FILE, 1734 readd=False, 1735 run_cmd_fn=ssh.RunSshCmdWithStdin, 1736 ssh_update_debug=False, 1737 ssh_update_verbose=False):

1738 """Removes the node's SSH keys from the key files and distributes those. 1739 1740 Note that at least one of the flags C{from_authorized_keys}, 1741 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys} 1742 of at least one node has to be set to C{True} for the function to perform any 1743 action at all. Not doing so will trigger an assertion in the function. 1744 1745 @type node_list: list of C{SshRemoveNodeInfo}. 1746 @param node_list: list of information about nodes whose keys are being removed 1747 @type master_candidate_uuids: list of str 1748 @param master_candidate_uuids: list of UUIDs of the current master candidates 1749 @type potential_master_candidates: list of str 1750 @param potential_master_candidates: list of names of potential master 1751 candidates 1752 @type keys_to_remove: dict of str to list of str 1753 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys 1754 to be removed. This list is supposed to be used only if the keys are not 1755 in the public keys file. This is for example the case when removing a 1756 master node's key. 1757 @type readd: boolean 1758 @param readd: whether this is called during a readd operation. 1759 @rtype: list of string 1760 @returns: list of feedback messages 1761 1762 """ 1763 # Non-disruptive error messages, list of (node, msg) pairs 1764 result_msgs = [] 1765 1766 # whether there are any keys to be added or retrieved at all 1767 from_authorized_keys = any([node_info.from_authorized_keys for node_info in 1768 node_list]) 1769 from_public_keys = any([node_info.from_public_keys for node_info in 1770 node_list]) 1771 clear_authorized_keys = any([node_info.clear_authorized_keys for node_info in 1772 node_list]) 1773 clear_public_keys = any([node_info.clear_public_keys for node_info in 1774 node_list]) 1775 1776 # Make sure at least one of these flags is true. 1777 if not (from_authorized_keys or from_public_keys or clear_authorized_keys 1778 or clear_public_keys): 1779 raise errors.SshUpdateError("No removal from any key file was requested.") 1780 1781 if not ssconf_store: 1782 ssconf_store = ssconf.SimpleStore() 1783 1784 master_node = ssconf_store.GetMasterNode() 1785 ssh_port_map = ssconf_store.GetSshPortMap() 1786 1787 all_keys_to_remove = {} 1788 if from_authorized_keys or from_public_keys: 1789 for node_info in node_list: 1790 # Skip nodes that don't actually need any keys to be removed. 1791 if not (node_info.from_authorized_keys or node_info.from_public_keys): 1792 continue 1793 if node_info.name == master_node and not keys_to_remove: 1794 raise errors.SshUpdateError("Cannot remove the master node's keys.") 1795 if keys_to_remove: 1796 keys = keys_to_remove 1797 else: 1798 keys = ssh.QueryPubKeyFile([node_info.uuid], key_file=pub_key_file) 1799 if (not keys or node_info.uuid not in keys) and not readd: 1800 raise errors.SshUpdateError("Node '%s' not found in the list of" 1801 " public SSH keys. It seems someone" 1802 " tries to remove a key from outside" 1803 " the cluster!" % node_info.uuid) 1804 # During an upgrade all nodes have the master key. In this case we 1805 # should not remove it to avoid accidentally shutting down cluster 1806 # SSH communication 1807 master_keys = None 1808 if master_uuid: 1809 master_keys = ssh.QueryPubKeyFile([master_uuid], 1810 key_file=pub_key_file) 1811 for master_key in master_keys: 1812 if master_key in keys[node_info.uuid]: 1813 keys[node_info.uuid].remove(master_key) 1814 1815 all_keys_to_remove.update(keys) 1816 1817 if all_keys_to_remove: 1818 base_data = {} 1819 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) 1820 cluster_name = base_data[constants.SSHS_CLUSTER_NAME] 1821 1822 if from_authorized_keys: 1823 # UUIDs of nodes that are supposed to be removed from the 1824 # authorized_keys files. 1825 nodes_remove_from_authorized_keys = [ 1826 node_info.uuid for node_info in node_list 1827 if node_info.from_authorized_keys] 1828 keys_to_remove_from_authorized_keys = dict([ 1829 (uuid, keys) for (uuid, keys) in all_keys_to_remove.items() 1830 if uuid in nodes_remove_from_authorized_keys]) 1831 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1832 (constants.SSHS_REMOVE, keys_to_remove_from_authorized_keys) 1833 (auth_key_file, _) = \ 1834 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, 1835 dircheck=False) 1836 1837 for uuid in nodes_remove_from_authorized_keys: 1838 ssh.RemoveAuthorizedKeys(auth_key_file, 1839 keys_to_remove_from_authorized_keys[uuid]) 1840 1841 pot_mc_data = base_data.copy() 1842 1843 if from_public_keys: 1844 nodes_remove_from_public_keys = [ 1845 node_info.uuid for node_info in node_list 1846 if node_info.from_public_keys] 1847 keys_to_remove_from_public_keys = dict([ 1848 (uuid, keys) for (uuid, keys) in all_keys_to_remove.items() 1849 if uuid in nodes_remove_from_public_keys]) 1850 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1851 (constants.SSHS_REMOVE, keys_to_remove_from_public_keys) 1852 1853 all_nodes = ssconf_store.GetNodeList() 1854 online_nodes = ssconf_store.GetOnlineNodeList() 1855 all_nodes_to_remove = [node_info.name for node_info in node_list] 1856 logging.debug("Removing keys of nodes '%s' from all nodes but itself and" 1857 " master.", ", ".join(all_nodes_to_remove)) 1858 for node in all_nodes: 1859 if node == master_node: 1860 logging.debug("Skipping master node '%s'.", master_node) 1861 continue 1862 if node not in online_nodes: 1863 logging.debug("Skipping offline node '%s'.", node) 1864 continue 1865 if node in all_nodes_to_remove: 1866 logging.debug("Skipping node whose key is removed itself '%s'.", node) 1867 continue 1868 ssh_port = ssh_port_map.get(node) 1869 if not ssh_port: 1870 raise errors.OpExecError("No SSH port information available for" 1871 " node '%s', map: %s." % 1872 (node, ssh_port_map)) 1873 error_msg_final = ("When removing the key of node '%s', updating the" 1874 " SSH key files of node '%s' failed. Last error" 1875 " was: %s.") 1876 if node in potential_master_candidates: 1877 logging.debug("Updating key setup of potential master candidate node" 1878 " %s.", node) 1879 try: 1880 backoff = 5 # seconds 1881 utils.RetryByNumberOfTimes( 1882 constants.SSHS_MAX_RETRIES, backoff, errors.SshUpdateError, 1883 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1884 ssh_port, pot_mc_data, 1885 debug=ssh_update_debug, verbose=ssh_update_verbose, 1886 use_cluster_key=False, ask_key=False, strict_host_check=False) 1887 except errors.SshUpdateError as last_exception: 1888 error_msg = error_msg_final % ( 1889 node_info.name, node, last_exception) 1890 result_msgs.append((node, error_msg)) 1891 logging.error(error_msg) 1892 1893 else: 1894 if from_authorized_keys: 1895 logging.debug("Updating key setup of normal node %s.", node) 1896 try: 1897 backoff = 5 # seconds 1898 utils.RetryByNumberOfTimes( 1899 constants.SSHS_MAX_RETRIES, backoff, errors.SshUpdateError, 1900 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1901 ssh_port, base_data, 1902 debug=ssh_update_debug, verbose=ssh_update_verbose, 1903 use_cluster_key=False, ask_key=False, strict_host_check=False) 1904 except errors.SshUpdateError as last_exception: 1905 error_msg = error_msg_final % ( 1906 node_info.name, node, last_exception) 1907 result_msgs.append((node, error_msg)) 1908 logging.error(error_msg) 1909 1910 for node_info in node_list: 1911 if node_info.clear_authorized_keys or node_info.from_public_keys or \ 1912 node_info.clear_public_keys: 1913 data = {} 1914 _InitSshUpdateData(data, noded_cert_file, ssconf_store) 1915 cluster_name = data[constants.SSHS_CLUSTER_NAME] 1916 ssh_port = ssh_port_map.get(node_info.name) 1917 if not ssh_port: 1918 raise errors.OpExecError("No SSH port information available for" 1919 " node '%s', which is leaving the cluster.") 1920 1921 if node_info.clear_authorized_keys: 1922 # The 'authorized_keys' file is not solely managed by Ganeti. Therefore, 1923 # we have to specify exactly which keys to clear to leave keys untouched 1924 # that were not added by Ganeti. 1925 other_master_candidate_uuids = [uuid for uuid in master_candidate_uuids 1926 if uuid != node_info.uuid] 1927 candidate_keys = ssh.QueryPubKeyFile(other_master_candidate_uuids, 1928 key_file=pub_key_file) 1929 data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1930 (constants.SSHS_REMOVE, candidate_keys) 1931 1932 if node_info.clear_public_keys: 1933 data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1934 (constants.SSHS_CLEAR, {}) 1935 elif node_info.from_public_keys: 1936 # Since clearing the public keys subsumes removing just a single key, 1937 # we only do it if clear_public_keys is 'False'. 1938 1939 if all_keys_to_remove: 1940 data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1941 (constants.SSHS_REMOVE, all_keys_to_remove) 1942 1943 # If we have no changes to any keyfile, just return 1944 if not (constants.SSHS_SSH_PUBLIC_KEYS in data or 1945 constants.SSHS_SSH_AUTHORIZED_KEYS in data): 1946 return 1947 1948 logging.debug("Updating SSH key setup of target node '%s'.", 1949 node_info.name) 1950 try: 1951 backoff = 5 # seconds 1952 utils.RetryByNumberOfTimes( 1953 constants.SSHS_MAX_RETRIES, backoff, 1954 errors.SshUpdateError, 1955 run_cmd_fn, cluster_name, node_info.name, pathutils.SSH_UPDATE, 1956 ssh_port, data, 1957 debug=ssh_update_debug, verbose=ssh_update_verbose, 1958 use_cluster_key=False, ask_key=False, strict_host_check=False) 1959 except errors.SshUpdateError as last_exception: 1960 result_msgs.append( 1961 (node_info.name, 1962 ("Removing SSH keys from node '%s' failed." 1963 " This can happen when the node is already unreachable." 1964 " Error: %s" % (node_info.name, last_exception)))) 1965 1966 if all_keys_to_remove and from_public_keys: 1967 for node_uuid in nodes_remove_from_public_keys: 1968 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file) 1969 1970 return result_msgs

1971 # pylint: enable=R0913

1972 1973 1974 -def RemoveSshKeyFromPublicKeyFile(node_name, 1975 pub_key_file=pathutils.SSH_PUB_KEYS, 1976 ssconf_store=None):

1977 """Removes a SSH key from the master's public key file. 1978 1979 This is an operation that is only used to clean up after failed operations 1980 (for example failed hooks before adding a node). To avoid abuse of this 1981 function (and the matching RPC call), we add a safety check to make sure 1982 that only stray keys can be removed that belong to nodes that are not 1983 in the cluster (anymore). 1984 1985 @type node_name: string 1986 @param node_name: the name of the node whose key is removed 1987 1988 """ 1989 if not ssconf_store: 1990 ssconf_store = ssconf.SimpleStore() 1991 1992 node_list = ssconf_store.GetNodeList() 1993 1994 if node_name in node_list: 1995 raise errors.SshUpdateError("Cannot remove key of node '%s'," 1996 " because it still belongs to the cluster." 1997 % node_name) 1998 1999 keys_by_name = ssh.QueryPubKeyFile([node_name], key_file=pub_key_file) 2000 if not keys_by_name or node_name not in keys_by_name: 2001 logging.info("The node '%s' whose key is supposed to be removed does not" 2002 " have an entry in the public key file. Hence, there is" 2003 " nothing left to do.", node_name) 2004 2005 ssh.RemovePublicKey(node_name, key_file=pub_key_file)

2006

2007 2008 -def _GenerateNodeSshKey(node_name, ssh_port_map, ssh_key_type, ssh_key_bits, 2009 ssconf_store=None, 2010 noded_cert_file=pathutils.NODED_CERT_FILE, 2011 run_cmd_fn=ssh.RunSshCmdWithStdin, 2012 suffix="", 2013 ssh_update_debug=False, 2014 ssh_update_verbose=False):

2015 """Generates the root SSH key pair on the node. 2016 2017 @type node_name: str 2018 @param node_name: name of the node whose key is remove 2019 @type ssh_port_map: dict of str to int 2020 @param ssh_port_map: mapping of node names to their SSH port 2021 @type ssh_key_type: One of L{constants.SSHK_ALL} 2022 @param ssh_key_type: the type of SSH key to be generated 2023 @type ssh_key_bits: int 2024 @param ssh_key_bits: the length of the key to be generated 2025 2026 """ 2027 if not ssconf_store: 2028 ssconf_store = ssconf.SimpleStore() 2029 2030 data = {} 2031 _InitSshUpdateData(data, noded_cert_file, ssconf_store) 2032 cluster_name = data[constants.SSHS_CLUSTER_NAME] 2033 data[constants.SSHS_GENERATE] = (ssh_key_type, ssh_key_bits, suffix) 2034 2035 run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE, 2036 ssh_port_map.get(node_name), data, 2037 debug=ssh_update_debug, verbose=ssh_update_verbose, 2038 use_cluster_key=False, ask_key=False, strict_host_check=False)

2039

2040 2041 -def _GetMasterNodeUUID(node_uuid_name_map, master_node_name):

2042 master_node_uuids = [node_uuid for (node_uuid, node_name) 2043 in node_uuid_name_map 2044 if node_name == master_node_name] 2045 if len(master_node_uuids) != 1: 2046 raise errors.SshUpdateError("No (unique) master UUID found. Master node" 2047 " name: '%s', Master UUID: '%s'" % 2048 (master_node_name, master_node_uuids)) 2049 return master_node_uuids[0]

2050

2051 2052 -def _GetOldMasterKeys(master_node_uuid, pub_key_file):

2053 old_master_keys_by_uuid = ssh.QueryPubKeyFile([master_node_uuid], 2054 key_file=pub_key_file) 2055 if not old_master_keys_by_uuid: 2056 raise errors.SshUpdateError("No public key of the master node (UUID '%s')" 2057 " found, not generating a new key." 2058 % master_node_uuid) 2059 return old_master_keys_by_uuid

2060

2061 2062 -def RenewSshKeys(node_uuids, node_names, master_candidate_uuids, 2063 potential_master_candidates, old_key_type, new_key_type, 2064 new_key_bits, 2065 ganeti_pub_keys_file=pathutils.SSH_PUB_KEYS, 2066 ssconf_store=None, 2067 noded_cert_file=pathutils.NODED_CERT_FILE, 2068 run_cmd_fn=ssh.RunSshCmdWithStdin, 2069 ssh_update_debug=False, 2070 ssh_update_verbose=False):

2071 """Renews all SSH keys and updates authorized_keys and ganeti_pub_keys. 2072 2073 @type node_uuids: list of str 2074 @param node_uuids: list of node UUIDs whose keys should be renewed 2075 @type node_names: list of str 2076 @param node_names: list of node names whose keys should be removed. This list 2077 should match the C{node_uuids} parameter 2078 @type master_candidate_uuids: list of str 2079 @param master_candidate_uuids: list of UUIDs of master candidates or 2080 master node 2081 @type old_key_type: One of L{constants.SSHK_ALL} 2082 @param old_key_type: the type of SSH key already present on nodes 2083 @type new_key_type: One of L{constants.SSHK_ALL} 2084 @param new_key_type: the type of SSH key to be generated 2085 @type new_key_bits: int 2086 @param new_key_bits: the length of the key to be generated 2087 @type ganeti_pub_keys_file: str 2088 @param ganeti_pub_keys_file: file path of the the public key file 2089 @type noded_cert_file: str 2090 @param noded_cert_file: path of the noded SSL certificate file 2091 @type run_cmd_fn: function 2092 @param run_cmd_fn: function to run commands on remote nodes via SSH 2093 @raises ProgrammerError: if node_uuids and node_names don't match; 2094 SshUpdateError if a node's key is missing from the public key file, 2095 if a node's new SSH key could not be fetched from it, if there is 2096 none or more than one entry in the public key list for the master 2097 node. 2098 2099 """ 2100 if not ssconf_store: 2101 ssconf_store = ssconf.SimpleStore() 2102 cluster_name = ssconf_store.GetClusterName() 2103 2104 if not len(node_uuids) == len(node_names): 2105 raise errors.ProgrammerError("List of nodes UUIDs and node names" 2106 " does not match in length.") 2107 2108 old_pub_keyfile = ssh.GetSshPubKeyFilename(old_key_type) 2109 new_pub_keyfile = ssh.GetSshPubKeyFilename(new_key_type) 2110 old_master_key = ssh.ReadLocalSshPubKeys([old_key_type]) 2111 2112 node_uuid_name_map = zip(node_uuids, node_names) 2113 2114 master_node_name = ssconf_store.GetMasterNode() 2115 master_node_uuid = _GetMasterNodeUUID(node_uuid_name_map, master_node_name) 2116 ssh_port_map = ssconf_store.GetSshPortMap() 2117 # List of all node errors that happened, but which did not abort the 2118 # procedure as a whole. It is important that this is a list to have a 2119 # somewhat chronological history of events. 2120 all_node_errors = [] 2121 2122 # process non-master nodes 2123 2124 # keys to add in bulk at the end 2125 node_keys_to_add = [] 2126 2127 # list of all nodes 2128 node_list = [] 2129 2130 # list of keys to be removed before generating new keys 2131 node_info_to_remove = [] 2132 2133 for node_uuid, node_name in node_uuid_name_map: 2134 if node_name == master_node_name: 2135 continue 2136 master_candidate = node_uuid in master_candidate_uuids 2137 potential_master_candidate = node_name in potential_master_candidates 2138 node_list.append((node_uuid, node_name, master_candidate, 2139 potential_master_candidate)) 2140 2141 if master_candidate: 2142 logging.debug("Fetching old SSH key from node '%s'.", node_name) 2143 old_pub_key = ssh.ReadRemoteSshPubKey(old_pub_keyfile, 2144 node_name, cluster_name, 2145 ssh_port_map[node_name], 2146 False, # ask_key 2147 False) # key_check 2148 if old_pub_key != old_master_key: 2149 # If we are already in a multi-key setup (that is past Ganeti 2.12), 2150 # we can safely remove the old key of the node. Otherwise, we cannot 2151 # remove that node's key, because it is also the master node's key 2152 # and that would terminate all communication from the master to the 2153 # node. 2154 node_info_to_remove.append(SshRemoveNodeInfo( 2155 uuid=node_uuid, 2156 name=node_name, 2157 from_authorized_keys=master_candidate, 2158 from_public_keys=False, 2159 clear_authorized_keys=False, 2160 clear_public_keys=False)) 2161 else: 2162 logging.debug("Old key of node '%s' is the same as the current master" 2163 " key. Not deleting that key on the node.", node_name) 2164 2165 logging.debug("Removing old SSH keys of all master candidates.") 2166 if node_info_to_remove: 2167 node_errors = RemoveNodeSshKeyBulk( 2168 node_info_to_remove, 2169 master_candidate_uuids, 2170 potential_master_candidates, 2171 master_uuid=master_node_uuid, 2172 pub_key_file=ganeti_pub_keys_file, 2173 ssconf_store=ssconf_store, 2174 noded_cert_file=noded_cert_file, 2175 run_cmd_fn=run_cmd_fn, 2176 ssh_update_debug=ssh_update_debug, 2177 ssh_update_verbose=ssh_update_verbose) 2178 if node_errors: 2179 all_node_errors = all_node_errors + node_errors 2180 2181 for (node_uuid, node_name, master_candidate, potential_master_candidate) \ 2182 in node_list: 2183 2184 logging.debug("Generating new SSH key for node '%s'.", node_name) 2185 _GenerateNodeSshKey(node_name, ssh_port_map, new_key_type, new_key_bits, 2186 ssconf_store=ssconf_store, 2187 noded_cert_file=noded_cert_file, 2188 run_cmd_fn=run_cmd_fn, 2189 ssh_update_verbose=ssh_update_verbose, 2190 ssh_update_debug=ssh_update_debug) 2191 2192 try: 2193 logging.debug("Fetching newly created SSH key from node '%s'.", node_name) 2194 pub_key = ssh.ReadRemoteSshPubKey(new_pub_keyfile, 2195 node_name, cluster_name, 2196 ssh_port_map[node_name], 2197 False, # ask_key 2198 False) # key_check 2199 except: 2200 raise errors.SshUpdateError("Could not fetch key of node %s" 2201 " (UUID %s)" % (node_name, node_uuid)) 2202 2203 if potential_master_candidate: 2204 ssh.RemovePublicKey(node_uuid, key_file=ganeti_pub_keys_file) 2205 ssh.AddPublicKey(node_uuid, pub_key, key_file=ganeti_pub_keys_file) 2206 2207 node_info = SshAddNodeInfo(name=node_name, 2208 uuid=node_uuid, 2209 to_authorized_keys=master_candidate, 2210 to_public_keys=potential_master_candidate, 2211 get_public_keys=True) 2212 node_keys_to_add.append(node_info) 2213 2214 node_errors = AddNodeSshKeyBulk( 2215 node_keys_to_add, potential_master_candidates, 2216 pub_key_file=ganeti_pub_keys_file, ssconf_store=ssconf_store, 2217 noded_cert_file=noded_cert_file, 2218 run_cmd_fn=run_cmd_fn, 2219 ssh_update_debug=ssh_update_debug, 2220 ssh_update_verbose=ssh_update_verbose) 2221 if node_errors: 2222 all_node_errors = all_node_errors + node_errors 2223 2224 # Renewing the master node's key 2225 2226 # Preserve the old keys for now 2227 old_master_keys_by_uuid = _GetOldMasterKeys(master_node_uuid, 2228 ganeti_pub_keys_file) 2229 2230 # Generate a new master key with a suffix, don't touch the old one for now 2231 logging.debug("Generate new ssh key of master.") 2232 _GenerateNodeSshKey(master_node_name, ssh_port_map, 2233 new_key_type, new_key_bits, 2234 ssconf_store=ssconf_store, 2235 noded_cert_file=noded_cert_file, 2236 run_cmd_fn=run_cmd_fn, 2237 suffix=constants.SSHS_MASTER_SUFFIX, 2238 ssh_update_debug=ssh_update_debug, 2239 ssh_update_verbose=ssh_update_verbose) 2240 # Read newly created master key 2241 new_master_keys = ssh.ReadLocalSshPubKeys( 2242 [new_key_type], suffix=constants.SSHS_MASTER_SUFFIX) 2243 2244 # Replace master key in the master nodes' public key file 2245 ssh.RemovePublicKey(master_node_uuid, key_file=ganeti_pub_keys_file) 2246 for pub_key in new_master_keys: 2247 ssh.AddPublicKey(master_node_uuid, pub_key, key_file=ganeti_pub_keys_file) 2248 2249 # Add new master key to all node's public and authorized keys 2250 logging.debug("Add new master key to all nodes.") 2251 node_errors = AddNodeSshKey( 2252 master_node_uuid, master_node_name, potential_master_candidates, 2253 to_authorized_keys=True, to_public_keys=True, 2254 get_public_keys=False, pub_key_file=ganeti_pub_keys_file, 2255 ssconf_store=ssconf_store, noded_cert_file=noded_cert_file, 2256 run_cmd_fn=run_cmd_fn, 2257 ssh_update_debug=ssh_update_debug, 2258 ssh_update_verbose=ssh_update_verbose) 2259 if node_errors: 2260 all_node_errors = all_node_errors + node_errors 2261 2262 # Remove the old key file and rename the new key to the non-temporary filename 2263 ssh.ReplaceSshKeys(new_key_type, new_key_type, 2264 src_key_suffix=constants.SSHS_MASTER_SUFFIX) 2265 2266 # Remove old key from authorized keys 2267 (auth_key_file, _) = \ 2268 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 2269 ssh.RemoveAuthorizedKeys(auth_key_file, 2270 old_master_keys_by_uuid[master_node_uuid]) 2271 2272 # Remove the old key from all node's authorized keys file 2273 logging.debug("Remove the old master key from all nodes.") 2274 node_errors = RemoveNodeSshKey( 2275 master_node_uuid, master_node_name, master_candidate_uuids, 2276 potential_master_candidates, 2277 keys_to_remove=old_master_keys_by_uuid, from_authorized_keys=True, 2278 from_public_keys=False, clear_authorized_keys=False, 2279 clear_public_keys=False, 2280 pub_key_file=ganeti_pub_keys_file, 2281 ssconf_store=ssconf_store, 2282 noded_cert_file=noded_cert_file, 2283 run_cmd_fn=run_cmd_fn, 2284 ssh_update_debug=ssh_update_debug, 2285 ssh_update_verbose=ssh_update_verbose) 2286 if node_errors: 2287 all_node_errors = all_node_errors + node_errors 2288 2289 return all_node_errors

2290

2291 2292 -def GetBlockDevSizes(devices):

2293 """Return the size of the given block devices 2294 2295 @type devices: list 2296 @param devices: list of block device nodes to query 2297 @rtype: dict 2298 @return: 2299 dictionary of all block devices under /dev (key). The value is their 2300 size in MiB. 2301 2302 {'/dev/disk/by-uuid/123456-12321231-312312-312': 124} 2303 2304 """ 2305 DEV_PREFIX = "/dev/" 2306 blockdevs = {} 2307 2308 for devpath in devices: 2309 if not utils.IsBelowDir(DEV_PREFIX, devpath): 2310 continue 2311 2312 try: 2313 st = os.stat(devpath) 2314 except EnvironmentError, err: 2315 logging.warning("Error stat()'ing device %s: %s", devpath, str(err)) 2316 continue 2317 2318 if stat.S_ISBLK(st.st_mode): 2319 result = utils.RunCmd(["blockdev", "--getsize64", devpath]) 2320 if result.failed: 2321 # We don't want to fail, just do not list this device as available 2322 logging.warning("Cannot get size for block device %s", devpath) 2323 continue 2324 2325 size = int(result.stdout) / (1024 * 1024) 2326 blockdevs[devpath] = size 2327 return blockdevs

2328

2329 2330 -def GetVolumeList(vg_names):

2331 """Compute list of logical volumes and their size. 2332 2333 @type vg_names: list 2334 @param vg_names: the volume groups whose LVs we should list, or 2335 empty for all volume groups 2336 @rtype: dict 2337 @return: 2338 dictionary of all partions (key) with value being a tuple of 2339 their size (in MiB), inactive and online status:: 2340 2341 {'xenvg/test1': ('20.06', True, True)} 2342 2343 in case of errors, a string is returned with the error 2344 details. 2345 2346 """ 2347 lvs = {} 2348 sep = "|" 2349 if not vg_names: 2350 vg_names = [] 2351 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 2352 "--separator=%s" % sep, 2353 "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names) 2354 if result.failed: 2355 _Fail("Failed to list logical volumes, lvs output: %s", result.output) 2356 2357 for line in result.stdout.splitlines(): 2358 line = line.strip() 2359 match = _LVSLINE_REGEX.match(line) 2360 if not match: 2361 logging.error("Invalid line returned from lvs output: '%s'", line) 2362 continue 2363 vg_name, name, size, attr = match.groups() 2364 inactive = attr[4] == "-" 2365 online = attr[5] == "o" 2366 virtual = attr[0] == "v" 2367 if virtual: 2368 # we don't want to report such volumes as existing, since they 2369 # don't really hold data 2370 continue 2371 lvs[vg_name + "/" + name] = (size, inactive, online) 2372 2373 return lvs

2374

2375 2376 -def ListVolumeGroups():

2377 """List the volume groups and their size. 2378 2379 @rtype: dict 2380 @return: dictionary with keys volume name and values the 2381 size of the volume 2382 2383 """ 2384 return utils.ListVolumeGroups()

2385

2386 2387 -def NodeVolumes():

2388 """List all volumes on this node. 2389 2390 @rtype: list 2391 @return: 2392 A list of dictionaries, each having four keys: 2393 - name: the logical volume name, 2394 - size: the size of the logical volume 2395 - dev: the physical device on which the LV lives 2396 - vg: the volume group to which it belongs 2397 2398 In case of errors, we return an empty list and log the 2399 error. 2400 2401 Note that since a logical volume can live on multiple physical 2402 volumes, the resulting list might include a logical volume 2403 multiple times. 2404 2405 """ 2406 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 2407 "--separator=|", 2408 "--options=lv_name,lv_size,devices,vg_name"]) 2409 if result.failed: 2410 _Fail("Failed to list logical volumes, lvs output: %s", 2411 result.output) 2412 2413 def parse_dev(dev): 2414 return dev.split("(")[0]

2415 2416 def handle_dev(dev): 2417 return [parse_dev(x) for x in dev.split(",")] 2418 2419 def map_line(line): 2420 line = [v.strip() for v in line] 2421 return [{"name": line[0], "size": line[1], 2422 "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])] 2423 2424 all_devs = [] 2425 for line in result.stdout.splitlines(): 2426 if line.count("|") >= 3: 2427 all_devs.extend(map_line(line.split("|"))) 2428 else: 2429 logging.warning("Strange line in the output from lvs: '%s'", line) 2430 return all_devs 2431

2432 2433 -def BridgesExist(bridges_list):

2434 """Check if a list of bridges exist on the current node. 2435 2436 @rtype: boolean 2437 @return: C{True} if all of them exist, C{False} otherwise 2438 2439 """ 2440 missing = [] 2441 for bridge in bridges_list: 2442 if not utils.BridgeExists(bridge): 2443 missing.append(bridge) 2444 2445 if missing: 2446 _Fail("Missing bridges %s", utils.CommaJoin(missing))

2447

2448 2449 -def GetInstanceListForHypervisor(hname, hvparams=None, 2450 get_hv_fn=hypervisor.GetHypervisor):

2451 """Provides a list of instances of the given hypervisor. 2452 2453 @type hname: string 2454 @param hname: name of the hypervisor 2455 @type hvparams: dict of strings 2456 @param hvparams: hypervisor parameters for the given hypervisor 2457 @type get_hv_fn: function 2458 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 2459 name; optional parameter to increase testability 2460 2461 @rtype: list 2462 @return: a list of all running instances on the current node 2463 - instance1.example.com 2464 - instance2.example.com 2465 2466 """ 2467 try: 2468 return get_hv_fn(hname).ListInstances(hvparams=hvparams) 2469 except errors.HypervisorError, err: 2470 _Fail("Error enumerating instances (hypervisor %s): %s", 2471 hname, err, exc=True)

2472

2473 2474 -def GetInstanceList(hypervisor_list, all_hvparams=None, 2475 get_hv_fn=hypervisor.GetHypervisor):

2476 """Provides a list of instances. 2477 2478 @type hypervisor_list: list 2479 @param hypervisor_list: the list of hypervisors to query information 2480 @type all_hvparams: dict of dict of strings 2481 @param all_hvparams: a dictionary mapping hypervisor types to respective 2482 cluster-wide hypervisor parameters 2483 @type get_hv_fn: function 2484 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 2485 name; optional parameter to increase testability 2486 2487 @rtype: list 2488 @return: a list of all running instances on the current node 2489 - instance1.example.com 2490 - instance2.example.com 2491 2492 """ 2493 results = [] 2494 for hname in hypervisor_list: 2495 hvparams = all_hvparams[hname] 2496 results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams, 2497 get_hv_fn=get_hv_fn)) 2498 return results

2499

2500 2501 -def GetInstanceInfo(instance, hname, hvparams=None):

2502 """Gives back the information about an instance as a dictionary. 2503 2504 @type instance: string 2505 @param instance: the instance name 2506 @type hname: string 2507 @param hname: the hypervisor type of the instance 2508 @type hvparams: dict of strings 2509 @param hvparams: the instance's hvparams 2510 2511 @rtype: dict 2512 @return: dictionary with the following keys: 2513 - memory: memory size of instance (int) 2514 - state: state of instance (HvInstanceState) 2515 - time: cpu time of instance (float) 2516 - vcpus: the number of vcpus (int) 2517 2518 """ 2519 output = {} 2520 2521 iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance, 2522 hvparams=hvparams) 2523 if iinfo is not None: 2524 output["memory"] = iinfo[2] 2525 output["vcpus"] = iinfo[3] 2526 output["state"] = iinfo[4] 2527 output["time"] = iinfo[5] 2528 2529 return output

2530

2531 2532 -def GetInstanceMigratable(instance):

2533 """Computes whether an instance can be migrated. 2534 2535 @type instance: L{objects.Instance} 2536 @param instance: object representing the instance to be checked. 2537 2538 @rtype: tuple 2539 @return: tuple of (result, description) where: 2540 - result: whether the instance can be migrated or not 2541 - description: a description of the issue, if relevant 2542 2543 """ 2544 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2545 iname = instance.name 2546 if iname not in hyper.ListInstances(hvparams=instance.hvparams): 2547 _Fail("Instance %s is not running", iname) 2548 2549 for idx in range(len(instance.disks_info)): 2550 link_name = _GetBlockDevSymlinkPath(iname, idx) 2551 if not os.path.islink(link_name): 2552 logging.warning("Instance %s is missing symlink %s for disk %d", 2553 iname, link_name, idx)

2554

2555 2556 -def GetAllInstancesInfo(hypervisor_list, all_hvparams):

2557 """Gather data about all instances. 2558 2559 This is the equivalent of L{GetInstanceInfo}, except that it 2560 computes data for all instances at once, thus being faster if one 2561 needs data about more than one instance. 2562 2563 @type hypervisor_list: list 2564 @param hypervisor_list: list of hypervisors to query for instance data 2565 @type all_hvparams: dict of dict of strings 2566 @param all_hvparams: mapping of hypervisor names to hvparams 2567 2568 @rtype: dict 2569 @return: dictionary of instance: data, with data having the following keys: 2570 - memory: memory size of instance (int) 2571 - state: xen state of instance (string) 2572 - time: cpu time of instance (float) 2573 - vcpus: the number of vcpus 2574 2575 """ 2576 output = {} 2577 for hname in hypervisor_list: 2578 hvparams = all_hvparams[hname] 2579 iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams) 2580 if iinfo: 2581 for name, _, memory, vcpus, state, times in iinfo: 2582 value = { 2583 "memory": memory, 2584 "vcpus": vcpus, 2585 "state": state, 2586 "time": times, 2587 } 2588 if name in output: 2589 # we only check static parameters, like memory and vcpus, 2590 # and not state and time which can change between the 2591 # invocations of the different hypervisors 2592 for key in "memory", "vcpus": 2593 if value[key] != output[name][key]: 2594 _Fail("Instance %s is running twice" 2595 " with different parameters", name) 2596 output[name] = value 2597 2598 return output

2599

2600 2601 -def GetInstanceConsoleInfo(instance_param_dict, 2602 get_hv_fn=hypervisor.GetHypervisor):

2603 """Gather data about the console access of a set of instances of this node. 2604 2605 This function assumes that the caller already knows which instances are on 2606 this node, by calling a function such as L{GetAllInstancesInfo} or 2607 L{GetInstanceList}. 2608 2609 For every instance, a large amount of configuration data needs to be 2610 provided to the hypervisor interface in order to receive the console 2611 information. Whether this could or should be cut down can be discussed. 2612 The information is provided in a dictionary indexed by instance name, 2613 allowing any number of instance queries to be done. 2614 2615 @type instance_param_dict: dict of string to tuple of dictionaries, where the 2616 dictionaries represent: L{objects.Instance}, L{objects.Node}, 2617 L{objects.NodeGroup}, HvParams, BeParams 2618 @param instance_param_dict: mapping of instance name to parameters necessary 2619 for console information retrieval 2620 2621 @rtype: dict 2622 @return: dictionary of instance: data, with data having the following keys: 2623 - instance: instance name 2624 - kind: console kind 2625 - message: used with kind == CONS_MESSAGE, indicates console to be 2626 unavailable, supplies error message 2627 - host: host to connect to 2628 - port: port to use 2629 - user: user for login 2630 - command: the command, broken into parts as an array 2631 - display: unknown, potentially unused? 2632 2633 """ 2634 2635 output = {} 2636 for inst_name in instance_param_dict: 2637 instance = instance_param_dict[inst_name]["instance"] 2638 pnode = instance_param_dict[inst_name]["node"] 2639 group = instance_param_dict[inst_name]["group"] 2640 hvparams = instance_param_dict[inst_name]["hvParams"] 2641 beparams = instance_param_dict[inst_name]["beParams"] 2642 2643 instance = objects.Instance.FromDict(instance) 2644 pnode = objects.Node.FromDict(pnode) 2645 group = objects.NodeGroup.FromDict(group) 2646 2647 h = get_hv_fn(instance.hypervisor) 2648 output[inst_name] = h.GetInstanceConsole(instance, pnode, group, 2649 hvparams, beparams).ToDict() 2650 2651 return output

2652

2653 2654 -def _InstanceLogName(kind, os_name, instance, component):

2655 """Compute the OS log filename for a given instance and operation. 2656 2657 The instance name and os name are passed in as strings since not all 2658 operations have these as part of an instance object. 2659 2660 @type kind: string 2661 @param kind: the operation type (e.g. add, import, etc.) 2662 @type os_name: string 2663 @param os_name: the os name 2664 @type instance: string 2665 @param instance: the name of the instance being imported/added/etc. 2666 @type component: string or None 2667 @param component: the name of the component of the instance being 2668 transferred 2669 2670 """ 2671 # TODO: Use tempfile.mkstemp to create unique filename 2672 if component: 2673 assert "/" not in component 2674 c_msg = "-%s" % component 2675 else: 2676 c_msg = "" 2677 base = ("%s-%s-%s%s-%s.log" % 2678 (kind, os_name, instance, c_msg, utils.TimestampForFilename())) 2679 return utils.PathJoin(pathutils.LOG_OS_DIR, base)

2680

2681 2682 -def InstanceOsAdd(instance, reinstall, debug):

2683 """Add an OS to an instance. 2684 2685 @type instance: L{objects.Instance} 2686 @param instance: Instance whose OS is to be installed 2687 @type reinstall: boolean 2688 @param reinstall: whether this is an instance reinstall 2689 @type debug: integer 2690 @param debug: debug level, passed to the OS scripts 2691 @rtype: None 2692 2693 """ 2694 inst_os = OSFromDisk(instance.os) 2695 2696 create_env = OSEnvironment(instance, inst_os, debug) 2697 if reinstall: 2698 create_env["INSTANCE_REINSTALL"] = "1" 2699 2700 logfile = _InstanceLogName("add", instance.os, instance.name, None) 2701 2702 result = utils.RunCmd([inst_os.create_script], env=create_env, 2703 cwd=inst_os.path, output=logfile, reset_env=True) 2704 if result.failed: 2705 logging.error("os create command '%s' returned error: %s, logfile: %s," 2706 " output: %s", result.cmd, result.fail_reason, logfile, 2707 result.output) 2708 lines = [utils.SafeEncode(val) 2709 for val in utils.TailFile(logfile, lines=20)] 2710 _Fail("OS create script failed (%s), last lines in the" 2711 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)

2712

2713 2714 -def RunRenameInstance(instance, old_name, debug):

2715 """Run the OS rename script for an instance. 2716 2717 @type instance: L{objects.Instance} 2718 @param instance: Instance whose OS is to be installed 2719 @type old_name: string 2720 @param old_name: previous instance name 2721 @type debug: integer 2722 @param debug: debug level, passed to the OS scripts 2723 @rtype: boolean 2724 @return: the success of the operation 2725 2726 """ 2727 inst_os = OSFromDisk(instance.os) 2728 2729 rename_env = OSEnvironment(instance, inst_os, debug) 2730 rename_env["OLD_INSTANCE_NAME"] = old_name 2731 2732 logfile = _InstanceLogName("rename", instance.os, 2733 "%s-%s" % (old_name, instance.name), None) 2734 2735 result = utils.RunCmd([inst_os.rename_script], env=rename_env, 2736 cwd=inst_os.path, output=logfile, reset_env=True) 2737 2738 if result.failed: 2739 logging.error("os create command '%s' returned error: %s output: %s", 2740 result.cmd, result.fail_reason, result.output) 2741 lines = [utils.SafeEncode(val) 2742 for val in utils.TailFile(logfile, lines=20)] 2743 _Fail("OS rename script failed (%s), last lines in the" 2744 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)

2745

2746 2747 -def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None):

2748 """Returns symlink path for block device. 2749 2750 """ 2751 if _dir is None: 2752 _dir = pathutils.DISK_LINKS_DIR 2753 2754 return utils.PathJoin(_dir, 2755 ("%s%s%s" % 2756 (instance_name, constants.DISK_SEPARATOR, idx)))

2757

2758 2759 -def _SymlinkBlockDev(instance_name, device_path, idx):

2760 """Set up symlinks to a instance's block device. 2761 2762 This is an auxiliary function run when an instance is start (on the primary 2763 node) or when an instance is migrated (on the target node). 2764 2765 2766 @param instance_name: the name of the target instance 2767 @param device_path: path of the physical block device, on the node 2768 @param idx: the disk index 2769 @return: absolute path to the disk's symlink 2770 2771 """ 2772 # In case we have only a userspace access URI, device_path is None 2773 if not device_path: 2774 return None 2775 2776 link_name = _GetBlockDevSymlinkPath(instance_name, idx) 2777 try: 2778 os.symlink(device_path, link_name) 2779 except OSError, err: 2780 if err.errno == errno.EEXIST: 2781 if (not os.path.islink(link_name) or 2782 os.readlink(link_name) != device_path): 2783 os.remove(link_name) 2784 os.symlink(device_path, link_name) 2785 else: 2786 raise 2787 2788 return link_name

2789

2790 2791 -def _RemoveBlockDevLinks(instance_name, disks):

2792 """Remove the block device symlinks belonging to the given instance. 2793 2794 """ 2795 for idx, _ in enumerate(disks): 2796 link_name = _GetBlockDevSymlinkPath(instance_name, idx) 2797 if os.path.islink(link_name): 2798 try: 2799 os.remove(link_name) 2800 except OSError: 2801 logging.exception("Can't remove symlink '%s'", link_name)

2802

2803 2804 -def _CalculateDeviceURI(instance, disk, device):

2805 """Get the URI for the device. 2806 2807 @type instance: L{objects.Instance} 2808 @param instance: the instance which disk belongs to 2809 @type disk: L{objects.Disk} 2810 @param disk: the target disk object 2811 @type device: L{bdev.BlockDev} 2812 @param device: the corresponding BlockDevice 2813 @rtype: string 2814 @return: the device uri if any else None 2815 2816 """ 2817 access_mode = disk.params.get(constants.LDP_ACCESS, 2818 constants.DISK_KERNELSPACE) 2819 if access_mode == constants.DISK_USERSPACE: 2820 # This can raise errors.BlockDeviceError 2821 return device.GetUserspaceAccessUri(instance.hypervisor) 2822 else: 2823 return None

2824

2825 2826 -def _GatherAndLinkBlockDevs(instance):

2827 """Set up an instance's block device(s). 2828 2829 This is run on the primary node at instance startup. The block 2830 devices must be already assembled. 2831 2832 @type instance: L{objects.Instance} 2833 @param instance: the instance whose disks we should assemble 2834 @rtype: list 2835 @return: list of (disk_object, link_name, drive_uri) 2836 2837 """ 2838 block_devices = [] 2839 for idx, disk in enumerate(instance.disks_info): 2840 device = _RecursiveFindBD(disk) 2841 if device is None: 2842 raise errors.BlockDeviceError("Block device '%s' is not set up." % 2843 str(disk)) 2844 device.Open() 2845 try: 2846 link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx) 2847 except OSError, e: 2848 raise errors.BlockDeviceError("Cannot create block device symlink: %s" % 2849 e.strerror) 2850 uri = _CalculateDeviceURI(instance, disk, device) 2851 2852 block_devices.append((disk, link_name, uri)) 2853 2854 return block_devices

2855

2856 2857 -def _IsInstanceUserDown(instance_info):

2858 return instance_info and \ 2859 "state" in instance_info and \ 2860 hv_base.HvInstanceState.IsShutdown(instance_info["state"])

2861

2862 2863 -def _GetInstanceInfo(instance):

2864 """Helper function L{GetInstanceInfo}""" 2865 return GetInstanceInfo(instance.name, instance.hypervisor, 2866 hvparams=instance.hvparams)

2867

2868 2869 -def StartInstance(instance, startup_paused, reason, store_reason=True):

2870 """Start an instance. 2871 2872 @type instance: L{objects.Instance} 2873 @param instance: the instance object 2874 @type startup_paused: bool 2875 @param instance: pause instance at startup? 2876 @type reason: list of reasons 2877 @param reason: the reason trail for this startup 2878 @type store_reason: boolean 2879 @param store_reason: whether to store the shutdown reason trail on file 2880 @rtype: None 2881 2882 """ 2883 instance_info = _GetInstanceInfo(instance) 2884 2885 if instance_info and not _IsInstanceUserDown(instance_info): 2886 logging.info("Instance '%s' already running, not starting", instance.name) 2887 return 2888 2889 try: 2890 block_devices = _GatherAndLinkBlockDevs(instance) 2891 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2892 hyper.StartInstance(instance, block_devices, startup_paused) 2893 if store_reason: 2894 _StoreInstReasonTrail(instance.name, reason) 2895 except errors.BlockDeviceError, err: 2896 _Fail("Block device error: %s", err, exc=True) 2897 except errors.HypervisorError, err: 2898 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2899 _Fail("Hypervisor error: %s", err, exc=True)

2900

2901 2902 -def InstanceShutdown(instance, timeout, reason, store_reason=True):

2903 """Shut an instance down. 2904 2905 @note: this functions uses polling with a hardcoded timeout. 2906 2907 @type instance: L{objects.Instance} 2908 @param instance: the instance object 2909 @type timeout: integer 2910 @param timeout: maximum timeout for soft shutdown 2911 @type reason: list of reasons 2912 @param reason: the reason trail for this shutdown 2913 @type store_reason: boolean 2914 @param store_reason: whether to store the shutdown reason trail on file 2915 @rtype: None 2916 2917 """ 2918 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2919 2920 if not _GetInstanceInfo(instance): 2921 logging.info("Instance '%s' not running, doing nothing", instance.name) 2922 return 2923 2924 class _TryShutdown(object): 2925 def __init__(self): 2926 self.tried_once = False

2927 2928 def __call__(self): 2929 try: 2930 hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout) 2931 if store_reason: 2932 _StoreInstReasonTrail(instance.name, reason) 2933 except errors.HypervisorError, err: 2934 # if the instance does no longer exist, consider this success and go to 2935 # cleanup, otherwise fail without retrying 2936 if _GetInstanceInfo(instance): 2937 _Fail("Failed to stop instance '%s': %s", instance.name, err) 2938 return 2939 2940 # TODO: Cleanup hypervisor implementations to prevent them from failing 2941 # silently. We could easily decide if we want to retry or not by using 2942 # HypervisorSoftError()/HypervisorHardError() 2943 self.tried_once = True 2944 if _GetInstanceInfo(instance): 2945 raise utils.RetryAgain() 2946 2947 try: 2948 utils.Retry(_TryShutdown(), 5, timeout) 2949 except utils.RetryTimeout: 2950 # the shutdown did not succeed 2951 logging.error("Shutdown of '%s' unsuccessful, forcing", instance.name) 2952 2953 try: 2954 hyper.StopInstance(instance, force=True) 2955 except errors.HypervisorError, err: 2956 # only raise an error if the instance still exists, otherwise 2957 # the error could simply be "instance ... unknown"! 2958 if _GetInstanceInfo(instance): 2959 _Fail("Failed to force stop instance '%s': %s", instance.name, err) 2960 2961 time.sleep(1) 2962 2963 if _GetInstanceInfo(instance): 2964 _Fail("Could not shutdown instance '%s' even by destroy", instance.name) 2965 2966 try: 2967 hyper.CleanupInstance(instance.name) 2968 except errors.HypervisorError, err: 2969 logging.warning("Failed to execute post-shutdown cleanup step: %s", err) 2970 2971 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2972

2973 2974 -def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):

2975 """Reboot an instance. 2976 2977 @type instance: L{objects.Instance} 2978 @param instance: the instance object to reboot 2979 @type reboot_type: str 2980 @param reboot_type: the type of reboot, one the following 2981 constants: 2982 - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the 2983 instance OS, do not recreate the VM 2984 - L{constants.INSTANCE_REBOOT_HARD}: tear down and 2985 restart the VM (at the hypervisor level) 2986 - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is 2987 not accepted here, since that mode is handled differently, in 2988 cmdlib, and translates into full stop and start of the 2989 instance (instead of a call_instance_reboot RPC) 2990 @type shutdown_timeout: integer 2991 @param shutdown_timeout: maximum timeout for soft shutdown 2992 @type reason: list of reasons 2993 @param reason: the reason trail for this reboot 2994 @rtype: None 2995 2996 """ 2997 # TODO: this is inconsistent with 'StartInstance' and 'InstanceShutdown' 2998 # because those functions simply 'return' on error whereas this one 2999 # raises an exception with '_Fail' 3000 if not _GetInstanceInfo(instance): 3001 _Fail("Cannot reboot instance '%s' that is not running", instance.name) 3002 3003 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3004 if reboot_type == constants.INSTANCE_REBOOT_SOFT: 3005 try: 3006 hyper.RebootInstance(instance) 3007 except errors.HypervisorError, err: 3008 _Fail("Failed to soft reboot instance '%s': %s", instance.name, err) 3009 elif reboot_type == constants.INSTANCE_REBOOT_HARD: 3010 try: 3011 InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False) 3012 result = StartInstance(instance, False, reason, store_reason=False) 3013 _StoreInstReasonTrail(instance.name, reason) 3014 return result 3015 except errors.HypervisorError, err: 3016 _Fail("Failed to hard reboot instance '%s': %s", instance.name, err) 3017 else: 3018 _Fail("Invalid reboot_type received: '%s'", reboot_type)

3019

3020 3021 -def InstanceBalloonMemory(instance, memory):

3022 """Resize an instance's memory. 3023 3024 @type instance: L{objects.Instance} 3025 @param instance: the instance object 3026 @type memory: int 3027 @param memory: new memory amount in MB 3028 @rtype: None 3029 3030 """ 3031 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3032 running = hyper.ListInstances(hvparams=instance.hvparams) 3033 if instance.name not in running: 3034 logging.info("Instance %s is not running, cannot balloon", instance.name) 3035 return 3036 try: 3037 hyper.BalloonInstanceMemory(instance, memory) 3038 except errors.HypervisorError, err: 3039 _Fail("Failed to balloon instance memory: %s", err, exc=True)

3040

3041 3042 -def MigrationInfo(instance):

3043 """Gather information about an instance to be migrated. 3044 3045 @type instance: L{objects.Instance} 3046 @param instance: the instance definition 3047 3048 """ 3049 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3050 try: 3051 info = hyper.MigrationInfo(instance) 3052 except errors.HypervisorError, err: 3053 _Fail("Failed to fetch migration information: %s", err, exc=True) 3054 return info

3055

3056 3057 -def AcceptInstance(instance, info, target):

3058 """Prepare the node to accept an instance. 3059 3060 @type instance: L{objects.Instance} 3061 @param instance: the instance definition 3062 @type info: string/data (opaque) 3063 @param info: migration information, from the source node 3064 @type target: string 3065 @param target: target host (usually ip), on this node 3066 3067 """ 3068 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3069 try: 3070 hyper.AcceptInstance(instance, info, target) 3071 except errors.HypervisorError, err: 3072 _Fail("Failed to accept instance: %s", err, exc=True)

3073

3074 3075 -def FinalizeMigrationDst(instance, info, success):

3076 """Finalize any preparation to accept an instance. 3077 3078 @type instance: L{objects.Instance} 3079 @param instance: the instance definition 3080 @type info: string/data (opaque) 3081 @param info: migration information, from the source node 3082 @type success: boolean 3083 @param success: whether the migration was a success or a failure 3084 3085 """ 3086 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3087 try: 3088 hyper.FinalizeMigrationDst(instance, info, success) 3089 except errors.HypervisorError, err: 3090 _Fail("Failed to finalize migration on the target node: %s", err, exc=True)

3091

3092 3093 -def MigrateInstance(cluster_name, instance, target, live):

3094 """Migrates an instance to another node. 3095 3096 @type cluster_name: string 3097 @param cluster_name: name of the cluster 3098 @type instance: L{objects.Instance} 3099 @param instance: the instance definition 3100 @type target: string 3101 @param target: the target node name 3102 @type live: boolean 3103 @param live: whether the migration should be done live or not (the 3104 interpretation of this parameter is left to the hypervisor) 3105 @raise RPCFail: if migration fails for some reason 3106 3107 """ 3108 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3109 3110 try: 3111 hyper.MigrateInstance(cluster_name, instance, target, live) 3112 except errors.HypervisorError, err: 3113 _Fail("Failed to migrate instance: %s", err, exc=True)

3114

3115 3116 -def FinalizeMigrationSource(instance, success, live):

3117 """Finalize the instance migration on the source node. 3118 3119 @type instance: L{objects.Instance} 3120 @param instance: the instance definition of the migrated instance 3121 @type success: bool 3122 @param success: whether the migration succeeded or not 3123 @type live: bool 3124 @param live: whether the user requested a live migration or not 3125 @raise RPCFail: If the execution fails for some reason 3126 3127 """ 3128 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3129 3130 try: 3131 hyper.FinalizeMigrationSource(instance, success, live) 3132 except Exception, err: # pylint: disable=W0703 3133 _Fail("Failed to finalize the migration on the source node: %s", err, 3134 exc=True)

3135

3136 3137 -def GetMigrationStatus(instance):

3138 """Get the migration status 3139 3140 @type instance: L{objects.Instance} 3141 @param instance: the instance that is being migrated 3142 @rtype: L{objects.MigrationStatus} 3143 @return: the status of the current migration (one of 3144 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional 3145 progress info that can be retrieved from the hypervisor 3146 @raise RPCFail: If the migration status cannot be retrieved 3147 3148 """ 3149 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3150 try: 3151 return hyper.GetMigrationStatus(instance) 3152 except Exception, err: # pylint: disable=W0703 3153 _Fail("Failed to get migration status: %s", err, exc=True)

3154

3155 3156 -def HotplugDevice(instance, action, dev_type, device, extra, seq):

3157 """Hotplug a device 3158 3159 Hotplug is currently supported only for KVM Hypervisor. 3160 @type instance: L{objects.Instance} 3161 @param instance: the instance to which we hotplug a device 3162 @type action: string 3163 @param action: the hotplug action to perform 3164 @type dev_type: string 3165 @param dev_type: the device type to hotplug 3166 @type device: either L{objects.NIC} or L{objects.Disk} 3167 @param device: the device object to hotplug 3168 @type extra: tuple 3169 @param extra: extra info used for disk hotplug (disk link, drive uri) 3170 @type seq: int 3171 @param seq: the index of the device from master perspective 3172 @raise RPCFail: in case instance does not have KVM hypervisor 3173 3174 """ 3175 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3176 try: 3177 hyper.VerifyHotplugSupport(instance, action, dev_type) 3178 except errors.HotplugError, err: 3179 _Fail("Hotplug is not supported: %s", err) 3180 3181 if action == constants.HOTPLUG_ACTION_ADD: 3182 fn = hyper.HotAddDevice 3183 elif action == constants.HOTPLUG_ACTION_REMOVE: 3184 fn = hyper.HotDelDevice 3185 elif action == constants.HOTPLUG_ACTION_MODIFY: 3186 fn = hyper.HotModDevice 3187 else: 3188 assert action in constants.HOTPLUG_ALL_ACTIONS 3189 3190 return fn(instance, dev_type, device, extra, seq)

3191

3192 3193 -def HotplugSupported(instance):

3194 """Checks if hotplug is generally supported. 3195 3196 """ 3197 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3198 try: 3199 hyper.HotplugSupported(instance) 3200 except errors.HotplugError, err: 3201 _Fail("Hotplug is not supported: %s", err)

3202

3203 3204 -def ModifyInstanceMetadata(metadata):

3205 """Sends instance data to the metadata daemon. 3206 3207 Uses the Luxi transport layer to communicate with the metadata 3208 daemon configuration server. It starts the metadata daemon if it is 3209 not running. 3210 The daemon must be enabled during at configuration time. 3211 3212 @type metadata: dict 3213 @param metadata: instance metadata obtained by calling 3214 L{objects.Instance.ToDict} on an instance object 3215 3216 """ 3217 if not constants.ENABLE_METAD: 3218 raise errors.ProgrammerError("The metadata deamon is disabled, yet" 3219 " ModifyInstanceMetadata has been called") 3220 3221 if not utils.IsDaemonAlive(constants.METAD): 3222 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.METAD]) 3223 if result.failed: 3224 raise errors.HypervisorError("Failed to start metadata daemon") 3225 3226 with contextlib.closing(metad.Client()) as client: 3227 client.UpdateConfig(metadata)

3228

3229 3230 -def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):

3231 """Creates a block device for an instance. 3232 3233 @type disk: L{objects.Disk} 3234 @param disk: the object describing the disk we should create 3235 @type size: int 3236 @param size: the size of the physical underlying device, in MiB 3237 @type owner: str 3238 @param owner: the name of the instance for which disk is created, 3239 used for device cache data 3240 @type on_primary: boolean 3241 @param on_primary: indicates if it is the primary node or not 3242 @type info: string 3243 @param info: string that will be sent to the physical device 3244 creation, used for example to set (LVM) tags on LVs 3245 @type excl_stor: boolean 3246 @param excl_stor: Whether exclusive_storage is active 3247 3248 @return: the new unique_id of the device (this can sometime be 3249 computed only after creation), or None. On secondary nodes, 3250 it's not required to return anything. 3251 3252 """ 3253 # TODO: remove the obsolete "size" argument 3254 # pylint: disable=W0613 3255 clist = [] 3256 if disk.children: 3257 for child in disk.children: 3258 try: 3259 crdev = _RecursiveAssembleBD(child, owner, on_primary) 3260 except errors.BlockDeviceError, err: 3261 _Fail("Can't assemble device %s: %s", child, err) 3262 if on_primary or disk.AssembleOnSecondary(): 3263 # we need the children open in case the device itself has to 3264 # be assembled 3265 try: 3266 # pylint: disable=E1103 3267 crdev.Open() 3268 except errors.BlockDeviceError, err: 3269 _Fail("Can't make child '%s' read-write: %s", child, err) 3270 clist.append(crdev) 3271 3272 try: 3273 device = bdev.Create(disk, clist, excl_stor) 3274 except errors.BlockDeviceError, err: 3275 _Fail("Can't create block device: %s", err) 3276 3277 if on_primary or disk.AssembleOnSecondary(): 3278 try: 3279 device.Assemble() 3280 except errors.BlockDeviceError, err: 3281 _Fail("Can't assemble device after creation, unusual event: %s", err) 3282 if on_primary or disk.OpenOnSecondary(): 3283 try: 3284 device.Open(force=True) 3285 except errors.BlockDeviceError, err: 3286 _Fail("Can't make device r/w after creation, unusual event: %s", err) 3287 DevCacheManager.UpdateCache(device.dev_path, owner, 3288 on_primary, disk.iv_name) 3289 3290 device.SetInfo(info) 3291 3292 return device.unique_id

3293

3294 3295 -def _DumpDevice(source_path, target_path, offset, size, truncate):

3296 """This function images/wipes the device using a local file. 3297 3298 @type source_path: string 3299 @param source_path: path of the image or data source (e.g., "/dev/zero") 3300 3301 @type target_path: string 3302 @param target_path: path of the device to image/wipe 3303 3304 @type offset: int 3305 @param offset: offset in MiB in the output file 3306 3307 @type size: int 3308 @param size: maximum size in MiB to write (data source might be smaller) 3309 3310 @type truncate: bool 3311 @param truncate: whether the file should be truncated 3312 3313 @return: None 3314 @raise RPCFail: in case of failure 3315 3316 """ 3317 # Internal sizes are always in Mebibytes; if the following "dd" command 3318 # should use a different block size the offset and size given to this 3319 # function must be adjusted accordingly before being passed to "dd". 3320 block_size = constants.DD_BLOCK_SIZE 3321 3322 cmd = [constants.DD_CMD, "if=%s" % source_path, "seek=%d" % offset, 3323 "bs=%s" % block_size, "oflag=direct", "of=%s" % target_path, 3324 "count=%d" % size] 3325 3326 if not truncate: 3327 cmd.append("conv=notrunc") 3328 3329 result = utils.RunCmd(cmd) 3330 3331 if result.failed: 3332 _Fail("Dump command '%s' exited with error: %s; output: %s", result.cmd, 3333 result.fail_reason, result.output)

3334

3335 3336 -def _DownloadAndDumpDevice(source_url, target_path, size):

3337 """This function images a device using a downloaded image file. 3338 3339 @type source_url: string 3340 @param source_url: URL of image to dump to disk 3341 3342 @type target_path: string 3343 @param target_path: path of the device to image 3344 3345 @type size: int 3346 @param size: maximum size in MiB to write (data source might be smaller) 3347 3348 @rtype: NoneType 3349 @return: None 3350 @raise RPCFail: in case of download or write failures 3351 3352 """ 3353 class DDParams(object): 3354 def __init__(self, current_size, total_size): 3355 self.current_size = current_size 3356 self.total_size = total_size 3357 self.image_size_error = False

3358 3359 def dd_write(ddparams, out): 3360 if ddparams.current_size < ddparams.total_size: 3361 ddparams.current_size += len(out) 3362 target_file.write(out) 3363 else: 3364 ddparams.image_size_error = True 3365 return -1 3366 3367 target_file = open(target_path, "r+") 3368 ddparams = DDParams(0, 1024 * 1024 * size) 3369 3370 curl = pycurl.Curl() 3371 curl.setopt(pycurl.VERBOSE, True) 3372 curl.setopt(pycurl.NOSIGNAL, True) 3373 curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION) 3374 curl.setopt(pycurl.URL, source_url) 3375 curl.setopt(pycurl.WRITEFUNCTION, lambda out: dd_write(ddparams, out)) 3376 3377 try: 3378 curl.perform() 3379 except pycurl.error: 3380 if ddparams.image_size_error: 3381 _Fail("Disk image larger than the disk") 3382 else: 3383 raise 3384 3385 target_file.close() 3386

3387 3388 -def BlockdevConvert(src_disk, target_disk):

3389 """Copies data from source block device to target. 3390 3391 This function gets the export and import commands from the source and 3392 target devices respectively, and then concatenates them to a single 3393 command using a pipe ("|"). Finally, executes the unified command that 3394 will transfer the data between the devices during the disk template 3395 conversion operation. 3396 3397 @type src_disk: L{objects.Disk} 3398 @param src_disk: the disk object we want to copy from 3399 @type target_disk: L{objects.Disk} 3400 @param target_disk: the disk object we want to copy to 3401 3402 @rtype: NoneType 3403 @return: None 3404 @raise RPCFail: in case of failure 3405 3406 """ 3407 src_dev = _RecursiveFindBD(src_disk) 3408 if src_dev is None: 3409 _Fail("Cannot copy from device '%s': device not found", src_disk.uuid) 3410 3411 dest_dev = _RecursiveFindBD(target_disk) 3412 if dest_dev is None: 3413 _Fail("Cannot copy to device '%s': device not found", target_disk.uuid) 3414 3415 src_cmd = src_dev.Export() 3416 dest_cmd = dest_dev.Import() 3417 command = "%s | %s" % (utils.ShellQuoteArgs(src_cmd), 3418 utils.ShellQuoteArgs(dest_cmd)) 3419 3420 result = utils.RunCmd(command) 3421 if result.failed: 3422 _Fail("Disk conversion command '%s' exited with error: %s; output: %s", 3423 result.cmd, result.fail_reason, result.output)

3424

3425 3426 -def BlockdevWipe(disk, offset, size):

3427 """Wipes a block device. 3428 3429 @type disk: L{objects.Disk} 3430 @param disk: the disk object we want to wipe 3431 @type offset: int 3432 @param offset: The offset in MiB in the file 3433 @type size: int 3434 @param size: The size in MiB to write 3435 3436 """ 3437 try: 3438 rdev = _RecursiveFindBD(disk) 3439 except errors.BlockDeviceError: 3440 rdev = None 3441 3442 if not rdev: 3443 _Fail("Cannot wipe device %s: device not found", disk.iv_name) 3444 if offset < 0: 3445 _Fail("Negative offset") 3446 if size < 0: 3447 _Fail("Negative size") 3448 if offset > rdev.size: 3449 _Fail("Wipe offset is bigger than device size") 3450 if (offset + size) > rdev.size: 3451 _Fail("Wipe offset and size are bigger than device size") 3452 3453 _DumpDevice("/dev/zero", rdev.dev_path, offset, size, True)

3454

3455 3456 -def BlockdevImage(disk, image, size):

3457 """Images a block device either by dumping a local file or 3458 downloading a URL. 3459 3460 @type disk: L{objects.Disk} 3461 @param disk: the disk object we want to image 3462 3463 @type image: string 3464 @param image: file path to the disk image be dumped 3465 3466 @type size: int 3467 @param size: The size in MiB to write 3468 3469 @rtype: NoneType 3470 @return: None 3471 @raise RPCFail: in case of failure 3472 3473 """ 3474 if not (utils.IsUrl(image) or os.path.exists(image)): 3475 _Fail("Image '%s' not found", image) 3476 3477 try: 3478 rdev = _RecursiveFindBD(disk) 3479 except errors.BlockDeviceError: 3480 rdev = None 3481 3482 if not rdev: 3483 _Fail("Cannot image device %s: device not found", disk.iv_name) 3484 if size < 0: 3485 _Fail("Negative size") 3486 if size > rdev.size: 3487 _Fail("Image size is bigger than device size") 3488 3489 if utils.IsUrl(image): 3490 _DownloadAndDumpDevice(image, rdev.dev_path, size) 3491 else: 3492 _DumpDevice(image, rdev.dev_path, 0, size, False)

3493

3494 3495 -def BlockdevPauseResumeSync(disks, pause):

3496 """Pause or resume the sync of the block device. 3497 3498 @type disks: list of L{objects.Disk} 3499 @param disks: the disks object we want to pause/resume 3500 @type pause: bool 3501 @param pause: Wheater to pause or resume 3502 3503 """ 3504 success = [] 3505 for disk in disks: 3506 try: 3507 rdev = _RecursiveFindBD(disk) 3508 except errors.BlockDeviceError: 3509 rdev = None 3510 3511 if not rdev: 3512 success.append((False, ("Cannot change sync for device %s:" 3513 " device not found" % disk.iv_name))) 3514 continue 3515 3516 result = rdev.PauseResumeSync(pause) 3517 3518 if result: 3519 success.append((result, None)) 3520 else: 3521 if pause: 3522 msg = "Pause" 3523 else: 3524 msg = "Resume" 3525 success.append((result, "%s for device %s failed" % (msg, disk.iv_name))) 3526 3527 return success

3528

3529 3530 -def BlockdevRemove(disk):

3531 """Remove a block device. 3532 3533 @note: This is intended to be called recursively. 3534 3535 @type disk: L{objects.Disk} 3536 @param disk: the disk object we should remove 3537 @rtype: boolean 3538 @return: the success of the operation 3539 3540 """ 3541 msgs = [] 3542 try: 3543 rdev = _RecursiveFindBD(disk) 3544 except errors.BlockDeviceError, err: 3545 # probably can't attach 3546 logging.info("Can't attach to device %s in remove", disk) 3547 rdev = None 3548 if rdev is not None: 3549 r_path = rdev.dev_path 3550 3551 def _TryRemove(): 3552 try: 3553 rdev.Remove() 3554 return [] 3555 except errors.BlockDeviceError, err: 3556 return [str(err)]

3557 3558 msgs.extend(utils.SimpleRetry([], _TryRemove, 3559 constants.DISK_REMOVE_RETRY_INTERVAL, 3560 constants.DISK_REMOVE_RETRY_TIMEOUT)) 3561 3562 if not msgs: 3563 DevCacheManager.RemoveCache(r_path) 3564 3565 if disk.children: 3566 for child in disk.children: 3567 try: 3568 BlockdevRemove(child) 3569 except RPCFail, err: 3570 msgs.append(str(err)) 3571 3572 if msgs: 3573 _Fail("; ".join(msgs)) 3574

3575 3576 -def _RecursiveAssembleBD(disk, owner, as_primary):

3577 """Activate a block device for an instance. 3578 3579 This is run on the primary and secondary nodes for an instance. 3580 3581 @note: this function is called recursively. 3582 3583 @type disk: L{objects.Disk} 3584 @param disk: the disk we try to assemble 3585 @type owner: str 3586 @param owner: the name of the instance which owns the disk 3587 @type as_primary: boolean 3588 @param as_primary: if we should make the block device 3589 read/write 3590 3591 @return: the assembled device or None (in case no device 3592 was assembled) 3593 @raise errors.BlockDeviceError: in case there is an error 3594 during the activation of the children or the device 3595 itself 3596 3597 """ 3598 children = [] 3599 if disk.children: 3600 mcn = disk.ChildrenNeeded() 3601 if mcn == -1: 3602 mcn = 0 # max number of Nones allowed 3603 else: 3604 mcn = len(disk.children) - mcn # max number of Nones 3605 for chld_disk in disk.children: 3606 try: 3607 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary) 3608 except errors.BlockDeviceError, err: 3609 if children.count(None) >= mcn: 3610 raise 3611 cdev = None 3612 logging.error("Error in child activation (but continuing): %s", 3613 str(err)) 3614 children.append(cdev) 3615 3616 if as_primary or disk.AssembleOnSecondary(): 3617 r_dev = bdev.Assemble(disk, children) 3618 result = r_dev 3619 if as_primary or disk.OpenOnSecondary(): 3620 r_dev.Open() 3621 DevCacheManager.UpdateCache(r_dev.dev_path, owner, 3622 as_primary, disk.iv_name) 3623 3624 else: 3625 result = True 3626 return result

3627

3628 3629 -def BlockdevAssemble(disk, instance, as_primary, idx):

3630 """Activate a block device for an instance. 3631 3632 This is a wrapper over _RecursiveAssembleBD. 3633 3634 @rtype: str or boolean 3635 @return: a tuple with the C{/dev/...} path and the created symlink 3636 for primary nodes, and (C{True}, C{True}) for secondary nodes 3637 3638 """ 3639 try: 3640 result = _RecursiveAssembleBD(disk, instance.name, as_primary) 3641 if isinstance(result, BlockDev): 3642 # pylint: disable=E1103 3643 dev_path = result.dev_path 3644 link_name = None 3645 uri = None 3646 if as_primary: 3647 link_name = _SymlinkBlockDev(instance.name, dev_path, idx) 3648 uri = _CalculateDeviceURI(instance, disk, result) 3649 elif result: 3650 return result, result 3651 else: 3652 _Fail("Unexpected result from _RecursiveAssembleBD") 3653 except errors.BlockDeviceError, err: 3654 _Fail("Error while assembling disk: %s", err, exc=True) 3655 except OSError, err: 3656 _Fail("Error while symlinking disk: %s", err, exc=True) 3657 3658 return dev_path, link_name, uri

3659

3660 3661 -def BlockdevShutdown(disk):

3662 """Shut down a block device. 3663 3664 First, if the device is assembled (Attach() is successful), then 3665 the device is shutdown. Then the children of the device are 3666 shutdown. 3667 3668 This function is called recursively. Note that we don't cache the 3669 children or such, as oppossed to assemble, shutdown of different 3670 devices doesn't require that the upper device was active. 3671 3672 @type disk: L{objects.Disk} 3673 @param disk: the description of the disk we should 3674 shutdown 3675 @rtype: None 3676 3677 """ 3678 msgs = [] 3679 r_dev = _RecursiveFindBD(disk) 3680 if r_dev is not None: 3681 r_path = r_dev.dev_path 3682 try: 3683 r_dev.Shutdown() 3684 DevCacheManager.RemoveCache(r_path) 3685 except errors.BlockDeviceError, err: 3686 msgs.append(str(err)) 3687 3688 if disk.children: 3689 for child in disk.children: 3690 try: 3691 BlockdevShutdown(child) 3692 except RPCFail, err: 3693 msgs.append(str(err)) 3694 3695 if msgs: 3696 _Fail("; ".join(msgs))

3697

3698 3699 -def BlockdevAddchildren(parent_cdev, new_cdevs):

3700 """Extend a mirrored block device. 3701 3702 @type parent_cdev: L{objects.Disk} 3703 @param parent_cdev: the disk to which we should add children 3704 @type new_cdevs: list of L{objects.Disk} 3705 @param new_cdevs: the list of children which we should add 3706 @rtype: None 3707 3708 """ 3709 parent_bdev = _RecursiveFindBD(parent_cdev) 3710 if parent_bdev is None: 3711 _Fail("Can't find parent device '%s' in add children", parent_cdev) 3712 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs] 3713 if new_bdevs.count(None) > 0: 3714 _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs) 3715 parent_bdev.AddChildren(new_bdevs)

3716

3717 3718 -def BlockdevRemovechildren(parent_cdev, new_cdevs):

3719 """Shrink a mirrored block device. 3720 3721 @type parent_cdev: L{objects.Disk} 3722 @param parent_cdev: the disk from which we should remove children 3723 @type new_cdevs: list of L{objects.Disk} 3724 @param new_cdevs: the list of children which we should remove 3725 @rtype: None 3726 3727 """ 3728 parent_bdev = _RecursiveFindBD(parent_cdev) 3729 if parent_bdev is None: 3730 _Fail("Can't find parent device '%s' in remove children", parent_cdev) 3731 devs = [] 3732 for disk in new_cdevs: 3733 rpath = disk.StaticDevPath() 3734 if rpath is None: 3735 bd = _RecursiveFindBD(disk) 3736 if bd is None: 3737 _Fail("Can't find device %s while removing children", disk) 3738 else: 3739 devs.append(bd.dev_path) 3740 else: 3741 if not utils.IsNormAbsPath(rpath): 3742 _Fail("Strange path returned from StaticDevPath: '%s'", rpath) 3743 devs.append(rpath) 3744 parent_bdev.RemoveChildren(devs)

3745

3746 3747 -def BlockdevGetmirrorstatus(disks):

3748 """Get the mirroring status of a list of devices. 3749 3750 @type disks: list of L{objects.Disk} 3751 @param disks: the list of disks which we should query 3752 @rtype: disk 3753 @return: List of L{objects.BlockDevStatus}, one for each disk 3754 @raise errors.BlockDeviceError: if any of the disks cannot be 3755 found 3756 3757 """ 3758 stats = [] 3759 for dsk in disks: 3760 rbd = _RecursiveFindBD(dsk) 3761 if rbd is None: 3762 _Fail("Can't find device %s", dsk) 3763 3764 stats.append(rbd.CombinedSyncStatus()) 3765 3766 return stats

3767

3768 3769 -def BlockdevGetmirrorstatusMulti(disks):

3770 """Get the mirroring status of a list of devices. 3771 3772 @type disks: list of L{objects.Disk} 3773 @param disks: the list of disks which we should query 3774 @rtype: disk 3775 @return: List of tuples, (bool, status), one for each disk; bool denotes 3776 success/failure, status is L{objects.BlockDevStatus} on success, string 3777 otherwise 3778 3779 """ 3780 result = [] 3781 for disk in disks: 3782 try: 3783 rbd = _RecursiveFindBD(disk) 3784 if rbd is None: 3785 result.append((False, "Can't find device %s" % disk)) 3786 continue 3787 3788 status = rbd.CombinedSyncStatus() 3789 except errors.BlockDeviceError, err: 3790 logging.exception("Error while getting disk status") 3791 result.append((False, str(err))) 3792 else: 3793 result.append((True, status)) 3794 3795 assert len(disks) == len(result) 3796 3797 return result

3798

3799 3800 -def _RecursiveFindBD(disk):

3801 """Check if a device is activated. 3802 3803 If so, return information about the real device. 3804 3805 @type disk: L{objects.Disk} 3806 @param disk: the disk object we need to find 3807 3808 @return: None if the device can't be found, 3809 otherwise the device instance 3810 3811 """ 3812 children = [] 3813 if disk.children: 3814 for chdisk in disk.children: 3815 children.append(_RecursiveFindBD(chdisk)) 3816 3817 return bdev.FindDevice(disk, children)

3818

3819 3820 -def _OpenRealBD(disk):

3821 """Opens the underlying block device of a disk. 3822 3823 @type disk: L{objects.Disk} 3824 @param disk: the disk object we want to open 3825 3826 """ 3827 real_disk = _RecursiveFindBD(disk) 3828 if real_disk is None: 3829 _Fail("Block device '%s' is not set up", disk) 3830 3831 real_disk.Open() 3832 3833 return real_disk

3834

3835 3836 -def BlockdevFind(disk):

3837 """Check if a device is activated. 3838 3839 If it is, return information about the real device. 3840 3841 @type disk: L{objects.Disk} 3842 @param disk: the disk to find 3843 @rtype: None or objects.BlockDevStatus 3844 @return: None if the disk cannot be found, otherwise a the current 3845 information 3846 3847 """ 3848 try: 3849 rbd = _RecursiveFindBD(disk) 3850 except errors.BlockDeviceError, err: 3851 _Fail("Failed to find device: %s", err, exc=True) 3852 3853 if rbd is None: 3854 return None 3855 3856 return rbd.GetSyncStatus()

3857

3858 3859 -def BlockdevGetdimensions(disks):

3860 """Computes the size of the given disks. 3861 3862 If a disk is not found, returns None instead. 3863 3864 @type disks: list of L{objects.Disk} 3865 @param disks: the list of disk to compute the size for 3866 @rtype: list 3867 @return: list with elements None if the disk cannot be found, 3868 otherwise the pair (size, spindles), where spindles is None if the 3869 device doesn't support that 3870 3871 """ 3872 result = [] 3873 for cf in disks: 3874 try: 3875 rbd = _RecursiveFindBD(cf) 3876 except errors.BlockDeviceError: 3877 result.append(None) 3878 continue 3879 if rbd is None: 3880 result.append(None) 3881 else: 3882 result.append(rbd.GetActualDimensions()) 3883 return result

3884

3885 3886 -def UploadFile(file_name, data, mode, uid, gid, atime, mtime):

3887 """Write a file to the filesystem. 3888 3889 This allows the master to overwrite(!) a file. It will only perform 3890 the operation if the file belongs to a list of configuration files. 3891 3892 @type file_name: str 3893 @param file_name: the target file name 3894 @type data: str 3895 @param data: the new contents of the file 3896 @type mode: int 3897 @param mode: the mode to give the file (can be None) 3898 @type uid: string 3899 @param uid: the owner of the file 3900 @type gid: string 3901 @param gid: the group of the file 3902 @type atime: float 3903 @param atime: the atime to set on the file (can be None) 3904 @type mtime: float 3905 @param mtime: the mtime to set on the file (can be None) 3906 @rtype: None 3907 3908 """ 3909 file_name = vcluster.LocalizeVirtualPath(file_name) 3910 3911 if not os.path.isabs(file_name): 3912 _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name) 3913 3914 if file_name not in _ALLOWED_UPLOAD_FILES: 3915 _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'", 3916 file_name) 3917 3918 raw_data = _Decompress(data) 3919 3920 if not (isinstance(uid, basestring) and isinstance(gid, basestring)): 3921 _Fail("Invalid username/groupname type") 3922 3923 getents = runtime.GetEnts() 3924 uid = getents.LookupUser(uid) 3925 gid = getents.LookupGroup(gid) 3926 3927 utils.SafeWriteFile(file_name, None, 3928 data=raw_data, mode=mode, uid=uid, gid=gid, 3929 atime=atime, mtime=mtime)

3930

3931 3932 -def RunOob(oob_program, command, node, timeout):

3933 """Executes oob_program with given command on given node. 3934 3935 @param oob_program: The path to the executable oob_program 3936 @param command: The command to invoke on oob_program 3937 @param node: The node given as an argument to the program 3938 @param timeout: Timeout after which we kill the oob program 3939 3940 @return: stdout 3941 @raise RPCFail: If execution fails for some reason 3942 3943 """ 3944 result = utils.RunCmd([oob_program, command, node], timeout=timeout) 3945 3946 if result.failed: 3947 _Fail("'%s' failed with reason '%s'; output: %s", result.cmd, 3948 result.fail_reason, result.output) 3949 3950 return result.stdout

3951

3952 3953 -def _OSOndiskAPIVersion(os_dir):

3954 """Compute and return the API version of a given OS. 3955 3956 This function will try to read the API version of the OS residing in 3957 the 'os_dir' directory. 3958 3959 @type os_dir: str 3960 @param os_dir: the directory in which we should look for the OS 3961 @rtype: tuple 3962 @return: tuple (status, data) with status denoting the validity and 3963 data holding either the valid versions or an error message 3964 3965 """ 3966 api_file = utils.PathJoin(os_dir, constants.OS_API_FILE) 3967 3968 try: 3969 st = os.stat(api_file) 3970 except EnvironmentError, err: 3971 return False, ("Required file '%s' not found under path %s: %s" % 3972 (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err))) 3973 3974 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 3975 return False, ("File '%s' in %s is not a regular file" % 3976 (constants.OS_API_FILE, os_dir)) 3977 3978 try: 3979 api_versions = utils.ReadFile(api_file).splitlines() 3980 except EnvironmentError, err: 3981 return False, ("Error while reading the API version file at %s: %s" % 3982 (api_file, utils.ErrnoOrStr(err))) 3983 3984 try: 3985 api_versions = [int(version.strip()) for version in api_versions] 3986 except (TypeError, ValueError), err: 3987 return False, ("API version(s) can't be converted to integer: %s" % 3988 str(err)) 3989 3990 return True, api_versions

3991

3992 3993 -def DiagnoseOS(top_dirs=None):

3994 """Compute the validity for all OSes. 3995 3996 @type top_dirs: list 3997 @param top_dirs: the list of directories in which to 3998 search (if not given defaults to 3999 L{pathutils.OS_SEARCH_PATH}) 4000 @rtype: list of L{objects.OS} 4001 @return: a list of tuples (name, path, status, diagnose, variants, 4002 parameters, api_version) for all (potential) OSes under all 4003 search paths, where: 4004 - name is the (potential) OS name 4005 - path is the full path to the OS 4006 - status True/False is the validity of the OS 4007 - diagnose is the error message for an invalid OS, otherwise empty 4008 - variants is a list of supported OS variants, if any 4009 - parameters is a list of (name, help) parameters, if any 4010 - api_version is a list of support OS API versions 4011 4012 """ 4013 if top_dirs is None: 4014 top_dirs = pathutils.OS_SEARCH_PATH 4015 4016 result = [] 4017 for dir_name in top_dirs: 4018 if os.path.isdir(dir_name): 4019 try: 4020 f_names = utils.ListVisibleFiles(dir_name) 4021 except EnvironmentError, err: 4022 logging.exception("Can't list the OS directory %s: %s", dir_name, err) 4023 break 4024 for name in f_names: 4025 os_path = utils.PathJoin(dir_name, name) 4026 status, os_inst = _TryOSFromDisk(name, base_dir=dir_name) 4027 if status: 4028 diagnose = "" 4029 variants = os_inst.supported_variants 4030 parameters = os_inst.supported_parameters 4031 api_versions = os_inst.api_versions 4032 trusted = False if os_inst.create_script_untrusted else True 4033 else: 4034 diagnose = os_inst 4035 variants = parameters = api_versions = [] 4036 trusted = True 4037 result.append((name, os_path, status, diagnose, variants, 4038 parameters, api_versions, trusted)) 4039 4040 return result

4041

4042 4043 -def _TryOSFromDisk(name, base_dir=None):

4044 """Create an OS instance from disk. 4045 4046 This function will return an OS instance if the given name is a 4047 valid OS name. 4048 4049 @type base_dir: string 4050 @keyword base_dir: Base directory containing OS installations. 4051 Defaults to a search in all the OS_SEARCH_PATH dirs. 4052 @rtype: tuple 4053 @return: success and either the OS instance if we find a valid one, 4054 or error message 4055 4056 """ 4057 if base_dir is None: 4058 os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir) 4059 else: 4060 os_dir = utils.FindFile(name, [base_dir], os.path.isdir) 4061 4062 if os_dir is None: 4063 return False, "Directory for OS %s not found in search path" % name 4064 4065 status, api_versions = _OSOndiskAPIVersion(os_dir) 4066 if not status: 4067 # push the error up 4068 return status, api_versions 4069 4070 if not constants.OS_API_VERSIONS.intersection(api_versions): 4071 return False, ("API version mismatch for path '%s': found %s, want %s." % 4072 (os_dir, api_versions, constants.OS_API_VERSIONS)) 4073 4074 # OS Files dictionary, we will populate it with the absolute path 4075 # names; if the value is True, then it is a required file, otherwise 4076 # an optional one 4077 os_files = dict.fromkeys(constants.OS_SCRIPTS, True) 4078 4079 os_files[constants.OS_SCRIPT_CREATE] = False 4080 os_files[constants.OS_SCRIPT_CREATE_UNTRUSTED] = False 4081 4082 if max(api_versions) >= constants.OS_API_V15: 4083 os_files[constants.OS_VARIANTS_FILE] = False 4084 4085 if max(api_versions) >= constants.OS_API_V20: 4086 os_files[constants.OS_PARAMETERS_FILE] = True 4087 else: 4088 del os_files[constants.OS_SCRIPT_VERIFY] 4089 4090 for (filename, required) in os_files.items(): 4091 os_files[filename] = utils.PathJoin(os_dir, filename) 4092 4093 try: 4094 st = os.stat(os_files[filename]) 4095 except EnvironmentError, err: 4096 if err.errno == errno.ENOENT and not required: 4097 del os_files[filename] 4098 continue 4099 return False, ("File '%s' under path '%s' is missing (%s)" % 4100 (filename, os_dir, utils.ErrnoOrStr(err))) 4101 4102 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 4103 return False, ("File '%s' under path '%s' is not a regular file" % 4104 (filename, os_dir)) 4105 4106 if filename in constants.OS_SCRIPTS: 4107 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR: 4108 return False, ("File '%s' under path '%s' is not executable" % 4109 (filename, os_dir)) 4110 4111 if not constants.OS_SCRIPT_CREATE in os_files and \ 4112 not constants.OS_SCRIPT_CREATE_UNTRUSTED in os_files: 4113 return False, ("A create script (trusted or untrusted) under path '%s'" 4114 " must exist" % os_dir) 4115 4116 create_script = os_files.get(constants.OS_SCRIPT_CREATE, None) 4117 create_script_untrusted = os_files.get(constants.OS_SCRIPT_CREATE_UNTRUSTED, 4118 None) 4119 4120 variants = [] 4121 if constants.OS_VARIANTS_FILE in os_files: 4122 variants_file = os_files[constants.OS_VARIANTS_FILE] 4123 try: 4124 variants = \ 4125 utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file)) 4126 except EnvironmentError, err: 4127 # we accept missing files, but not other errors 4128 if err.errno != errno.ENOENT: 4129 return False, ("Error while reading the OS variants file at %s: %s" % 4130 (variants_file, utils.ErrnoOrStr(err))) 4131 4132 parameters = [] 4133 if constants.OS_PARAMETERS_FILE in os_files: 4134 parameters_file = os_files[constants.OS_PARAMETERS_FILE] 4135 try: 4136 parameters = utils.ReadFile(parameters_file).splitlines() 4137 except EnvironmentError, err: 4138 return False, ("Error while reading the OS parameters file at %s: %s" % 4139 (parameters_file, utils.ErrnoOrStr(err))) 4140 parameters = [v.split(None, 1) for v in parameters] 4141 4142 os_obj = objects.OS(name=name, path=os_dir, 4143 create_script=create_script, 4144 create_script_untrusted=create_script_untrusted, 4145 export_script=os_files[constants.OS_SCRIPT_EXPORT], 4146 import_script=os_files[constants.OS_SCRIPT_IMPORT], 4147 rename_script=os_files[constants.OS_SCRIPT_RENAME], 4148 verify_script=os_files.get(constants.OS_SCRIPT_VERIFY, 4149 None), 4150 supported_variants=variants, 4151 supported_parameters=parameters, 4152 api_versions=api_versions) 4153 return True, os_obj

4154

4155 4156 -def OSFromDisk(name, base_dir=None):

4157 """Create an OS instance from disk. 4158 4159 This function will return an OS instance if the given name is a 4160 valid OS name. Otherwise, it will raise an appropriate 4161 L{RPCFail} exception, detailing why this is not a valid OS. 4162 4163 This is just a wrapper over L{_TryOSFromDisk}, which doesn't raise 4164 an exception but returns true/false status data. 4165 4166 @type base_dir: string 4167 @keyword base_dir: Base directory containing OS installations. 4168 Defaults to a search in all the OS_SEARCH_PATH dirs. 4169 @rtype: L{objects.OS} 4170 @return: the OS instance if we find a valid one 4171 @raise RPCFail: if we don't find a valid OS 4172 4173 """ 4174 name_only = objects.OS.GetName(name) 4175 status, payload = _TryOSFromDisk(name_only, base_dir) 4176 4177 if not status: 4178 _Fail(payload) 4179 4180 return payload

4181

4182 4183 -def OSCoreEnv(os_name, inst_os, os_params, debug=0):

4184 """Calculate the basic environment for an os script. 4185 4186 @type os_name: str 4187 @param os_name: full operating system name (including variant) 4188 @type inst_os: L{objects.OS} 4189 @param inst_os: operating system for which the environment is being built 4190 @type os_params: dict 4191 @param os_params: the OS parameters 4192 @type debug: integer 4193 @param debug: debug level (0 or 1, for OS Api 10) 4194 @rtype: dict 4195 @return: dict of environment variables 4196 @raise errors.BlockDeviceError: if the block device 4197 cannot be found 4198 4199 """ 4200 result = {} 4201 api_version = \ 4202 max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions)) 4203 result["OS_API_VERSION"] = "%d" % api_version 4204 result["OS_NAME"] = inst_os.name 4205 result["DEBUG_LEVEL"] = "%d" % debug 4206 4207 # OS variants 4208 if api_version >= constants.OS_API_V15 and inst_os.supported_variants: 4209 variant = objects.OS.GetVariant(os_name) 4210 if not variant: 4211 variant = inst_os.supported_variants[0] 4212 else: 4213 variant = "" 4214 result["OS_VARIANT"] = variant 4215 4216 # OS params 4217 for pname, pvalue in os_params.items(): 4218 result["OSP_%s" % pname.upper().replace("-", "_")] = pvalue 4219 4220 # Set a default path otherwise programs called by OS scripts (or 4221 # even hooks called from OS scripts) might break, and we don't want 4222 # to have each script require setting a PATH variable 4223 result["PATH"] = constants.HOOKS_PATH 4224 4225 return result

4226

4227 4228 -def OSEnvironment(instance, inst_os, debug=0):

4229 """Calculate the environment for an os script. 4230 4231 @type instance: L{objects.Instance} 4232 @param instance: target instance for the os script run 4233 @type inst_os: L{objects.OS} 4234 @param inst_os: operating system for which the environment is being built 4235 @type debug: integer 4236 @param debug: debug level (0 or 1, for OS Api 10) 4237 @rtype: dict 4238 @return: dict of environment variables 4239 @raise errors.BlockDeviceError: if the block device 4240 cannot be found 4241 4242 """ 4243 result = OSCoreEnv(instance.os, inst_os, objects.FillDict(instance.osparams, 4244 instance.osparams_private.Unprivate()), debug=debug) 4245 4246 for attr in ["name", "os", "uuid", "ctime", "mtime", "primary_node"]: 4247 result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr)) 4248 4249 result["HYPERVISOR"] = instance.hypervisor 4250 result["DISK_COUNT"] = "%d" % len(instance.disks_info) 4251 result["NIC_COUNT"] = "%d" % len(instance.nics) 4252 result["INSTANCE_SECONDARY_NODES"] = \ 4253 ("%s" % " ".join(instance.secondary_nodes)) 4254 4255 # Disks 4256 for idx, disk in enumerate(instance.disks_info): 4257 real_disk = _OpenRealBD(disk) 4258 uri = _CalculateDeviceURI(instance, disk, real_disk) 4259 result["DISK_%d_ACCESS" % idx] = disk.mode 4260 result["DISK_%d_UUID" % idx] = disk.uuid 4261 if real_disk.dev_path: 4262 result["DISK_%d_PATH" % idx] = real_disk.dev_path 4263 if uri: 4264 result["DISK_%d_URI" % idx] = uri 4265 if disk.name: 4266 result["DISK_%d_NAME" % idx] = disk.name 4267 if constants.HV_DISK_TYPE in instance.hvparams: 4268 result["DISK_%d_FRONTEND_TYPE" % idx] = \ 4269 instance.hvparams[constants.HV_DISK_TYPE] 4270 if disk.dev_type in constants.DTS_BLOCK: 4271 result["DISK_%d_BACKEND_TYPE" % idx] = "block" 4272 elif disk.dev_type in constants.DTS_FILEBASED: 4273 result["DISK_%d_BACKEND_TYPE" % idx] = \ 4274 "file:%s" % disk.logical_id[0] 4275 4276 # NICs 4277 for idx, nic in enumerate(instance.nics): 4278 result["NIC_%d_MAC" % idx] = nic.mac 4279 result["NIC_%d_UUID" % idx] = nic.uuid 4280 if nic.name: 4281 result["NIC_%d_NAME" % idx] = nic.name 4282 if nic.ip: 4283 result["NIC_%d_IP" % idx] = nic.ip 4284 result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE] 4285 if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 4286 result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK] 4287 if nic.nicparams[constants.NIC_LINK]: 4288 result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK] 4289 if nic.netinfo: 4290 nobj = objects.Network.FromDict(nic.netinfo) 4291 result.update(nobj.HooksDict("NIC_%d_" % idx)) 4292 if constants.HV_NIC_TYPE in instance.hvparams: 4293 result["NIC_%d_FRONTEND_TYPE" % idx] = \ 4294 instance.hvparams[constants.HV_NIC_TYPE] 4295 4296 # HV/BE params 4297 for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]: 4298 for key, value in source.items(): 4299 result["INSTANCE_%s_%s" % (kind, key)] = str(value) 4300 4301 return result

4302

4303 4304 -def DiagnoseExtStorage(top_dirs=None):

4305 """Compute the validity for all ExtStorage Providers. 4306 4307 @type top_dirs: list 4308 @param top_dirs: the list of directories in which to 4309 search (if not given defaults to 4310 L{pathutils.ES_SEARCH_PATH}) 4311 @rtype: list of L{objects.ExtStorage} 4312 @return: a list of tuples (name, path, status, diagnose, parameters) 4313 for all (potential) ExtStorage Providers under all 4314 search paths, where: 4315 - name is the (potential) ExtStorage Provider 4316 - path is the full path to the ExtStorage Provider 4317 - status True/False is the validity of the ExtStorage Provider 4318 - diagnose is the error message for an invalid ExtStorage Provider, 4319 otherwise empty 4320 - parameters is a list of (name, help) parameters, if any 4321 4322 """ 4323 if top_dirs is None: 4324 top_dirs = pathutils.ES_SEARCH_PATH 4325 4326 result = [] 4327 for dir_name in top_dirs: 4328 if os.path.isdir(dir_name): 4329 try: 4330 f_names = utils.ListVisibleFiles(dir_name) 4331 except EnvironmentError, err: 4332 logging.exception("Can't list the ExtStorage directory %s: %s", 4333 dir_name, err) 4334 break 4335 for name in f_names: 4336 es_path = utils.PathJoin(dir_name, name) 4337 status, es_inst = extstorage.ExtStorageFromDisk(name, base_dir=dir_name) 4338 if status: 4339 diagnose = "" 4340 parameters = es_inst.supported_parameters 4341 else: 4342 diagnose = es_inst 4343 parameters = [] 4344 result.append((name, es_path, status, diagnose, parameters)) 4345 4346 return result

4347

4348 4349 -def BlockdevGrow(disk, amount, dryrun, backingstore, excl_stor):

4350 """Grow a stack of block devices. 4351 4352 This function is called recursively, with the childrens being the 4353 first ones to resize. 4354 4355 @type disk: L{objects.Disk} 4356 @param disk: the disk to be grown 4357 @type amount: integer 4358 @param amount: the amount (in mebibytes) to grow with 4359 @type dryrun: boolean 4360 @param dryrun: whether to execute the operation in simulation mode 4361 only, without actually increasing the size 4362 @param backingstore: whether to execute the operation on backing storage 4363 only, or on "logical" storage only; e.g. DRBD is logical storage, 4364 whereas LVM, file, RBD are backing storage 4365 @rtype: (status, result) 4366 @type excl_stor: boolean 4367 @param excl_stor: Whether exclusive_storage is active 4368 @return: a tuple with the status of the operation (True/False), and 4369 the errors message if status is False 4370 4371 """ 4372 r_dev = _RecursiveFindBD(disk) 4373 if r_dev is None: 4374 _Fail("Cannot find block device %s", disk) 4375 4376 try: 4377 r_dev.Grow(amount, dryrun, backingstore, excl_stor) 4378 except errors.BlockDeviceError, err: 4379 _Fail("Failed to grow block device: %s", err, exc=True)

4380

4381 4382 -def BlockdevSnapshot(disk, snap_name, snap_size):

4383 """Create a snapshot copy of a block device. 4384 4385 This function is called recursively, and the snapshot is actually created 4386 just for the leaf lvm backend device. 4387 4388 @type disk: L{objects.Disk} 4389 @param disk: the disk to be snapshotted 4390 @type snap_name: string 4391 @param snap_name: the name of the snapshot 4392 @type snap_size: int 4393 @param snap_size: the size of the snapshot 4394 @rtype: string 4395 @return: snapshot disk ID as (vg, lv) 4396 4397 """ 4398 def _DiskSnapshot(disk, snap_name=None, snap_size=None): 4399 r_dev = _RecursiveFindBD(disk) 4400 if r_dev is not None: 4401 return r_dev.Snapshot(snap_name=snap_name, snap_size=snap_size) 4402 else: 4403 _Fail("Cannot find block device %s", disk)

4404 4405 if disk.SupportsSnapshots(): 4406 if disk.dev_type == constants.DT_DRBD8: 4407 if not disk.children: 4408 _Fail("DRBD device '%s' without backing storage cannot be snapshotted", 4409 disk.unique_id) 4410 return BlockdevSnapshot(disk.children[0], snap_name, snap_size) 4411 else: 4412 return _DiskSnapshot(disk, snap_name, snap_size) 4413 else: 4414 _Fail("Cannot snapshot block device '%s' of type '%s'", 4415 disk.logical_id, disk.dev_type) 4416

4417 4418 -def BlockdevSetInfo(disk, info):

4419 """Sets 'metadata' information on block devices. 4420 4421 This function sets 'info' metadata on block devices. Initial 4422 information is set at device creation; this function should be used 4423 for example after renames. 4424 4425 @type disk: L{objects.Disk} 4426 @param disk: the disk to be grown 4427 @type info: string 4428 @param info: new 'info' metadata 4429 @rtype: (status, result) 4430 @return: a tuple with the status of the operation (True/False), and 4431 the errors message if status is False 4432 4433 """ 4434 r_dev = _RecursiveFindBD(disk) 4435 if r_dev is None: 4436 _Fail("Cannot find block device %s", disk) 4437 4438 try: 4439 r_dev.SetInfo(info) 4440 except errors.BlockDeviceError, err: 4441 _Fail("Failed to set information on block device: %s", err, exc=True)

4442

4443 4444 -def FinalizeExport(instance, snap_disks):

4445 """Write out the export configuration information. 4446 4447 @type instance: L{objects.Instance} 4448 @param instance: the instance which we export, used for 4449 saving configuration 4450 @type snap_disks: list of L{objects.Disk} 4451 @param snap_disks: list of snapshot block devices, which 4452 will be used to get the actual name of the dump file 4453 4454 @rtype: None 4455 4456 """ 4457 destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new") 4458 finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name) 4459 disk_template = utils.GetDiskTemplate(snap_disks) 4460 4461 config = objects.SerializableConfigParser() 4462 4463 config.add_section(constants.INISECT_EXP) 4464 config.set(constants.INISECT_EXP, "version", str(constants.EXPORT_VERSION)) 4465 config.set(constants.INISECT_EXP, "timestamp", "%d" % int(time.time())) 4466 config.set(constants.INISECT_EXP, "source", instance.primary_node) 4467 config.set(constants.INISECT_EXP, "os", instance.os) 4468 config.set(constants.INISECT_EXP, "compression", "none") 4469 4470 config.add_section(constants.INISECT_INS) 4471 config.set(constants.INISECT_INS, "name", instance.name) 4472 config.set(constants.INISECT_INS, "maxmem", "%d" % 4473 instance.beparams[constants.BE_MAXMEM]) 4474 config.set(constants.INISECT_INS, "minmem", "%d" % 4475 instance.beparams[constants.BE_MINMEM]) 4476 # "memory" is deprecated, but useful for exporting to old ganeti versions 4477 config.set(constants.INISECT_INS, "memory", "%d" % 4478 instance.beparams[constants.BE_MAXMEM]) 4479 config.set(constants.INISECT_INS, "vcpus", "%d" % 4480 instance.beparams[constants.BE_VCPUS]) 4481 config.set(constants.INISECT_INS, "disk_template", disk_template) 4482 config.set(constants.INISECT_INS, "hypervisor", instance.hypervisor) 4483 config.set(constants.INISECT_INS, "tags", " ".join(instance.GetTags())) 4484 4485 nic_total = 0 4486 for nic_count, nic in enumerate(instance.nics): 4487 nic_total += 1 4488 config.set(constants.INISECT_INS, "nic%d_mac" % 4489 nic_count, "%s" % nic.mac) 4490 config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip) 4491 config.set(constants.INISECT_INS, "nic%d_network" % nic_count, 4492 "%s" % nic.network) 4493 config.set(constants.INISECT_INS, "nic%d_name" % nic_count, 4494 "%s" % nic.name) 4495 for param in constants.NICS_PARAMETER_TYPES: 4496 config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param), 4497 "%s" % nic.nicparams.get(param, None)) 4498 # TODO: redundant: on load can read nics until it doesn't exist 4499 config.set(constants.INISECT_INS, "nic_count", "%d" % nic_total) 4500 4501 disk_total = 0 4502 for disk_count, disk in enumerate(snap_disks): 4503 if disk: 4504 disk_total += 1 4505 config.set(constants.INISECT_INS, "disk%d_ivname" % disk_count, 4506 ("%s" % disk.iv_name)) 4507 config.set(constants.INISECT_INS, "disk%d_dump" % disk_count, 4508 ("%s" % disk.uuid)) 4509 config.set(constants.INISECT_INS, "disk%d_size" % disk_count, 4510 ("%d" % disk.size)) 4511 config.set(constants.INISECT_INS, "disk%d_name" % disk_count, 4512 "%s" % disk.name) 4513 4514 config.set(constants.INISECT_INS, "disk_count", "%d" % disk_total) 4515 4516 # New-style hypervisor/backend parameters 4517 4518 config.add_section(constants.INISECT_HYP) 4519 for name, value in instance.hvparams.items(): 4520 if name not in constants.HVC_GLOBALS: 4521 config.set(constants.INISECT_HYP, name, str(value)) 4522 4523 config.add_section(constants.INISECT_BEP) 4524 for name, value in instance.beparams.items(): 4525 config.set(constants.INISECT_BEP, name, str(value)) 4526 4527 config.add_section(constants.INISECT_OSP) 4528 for name, value in instance.osparams.items(): 4529 config.set(constants.INISECT_OSP, name, str(value)) 4530 4531 config.add_section(constants.INISECT_OSP_PRIVATE) 4532 for name, value in instance.osparams_private.items(): 4533 config.set(constants.INISECT_OSP_PRIVATE, name, str(value.Get())) 4534 4535 utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE), 4536 data=config.Dumps()) 4537 shutil.rmtree(finaldestdir, ignore_errors=True) 4538 shutil.move(destdir, finaldestdir)

4539

4540 4541 -def ExportInfo(dest):

4542 """Get export configuration information. 4543 4544 @type dest: str 4545 @param dest: directory containing the export 4546 4547 @rtype: L{objects.SerializableConfigParser} 4548 @return: a serializable config file containing the 4549 export info 4550 4551 """ 4552 cff = utils.PathJoin(dest, constants.EXPORT_CONF_FILE) 4553 4554 config = objects.SerializableConfigParser() 4555 config.read(cff) 4556 4557 if (not config.has_section(constants.INISECT_EXP) or 4558 not config.has_section(constants.INISECT_INS)): 4559 _Fail("Export info file doesn't have the required fields") 4560 4561 return config.Dumps()

4562

4563 4564 -def ListExports():

4565 """Return a list of exports currently available on this machine. 4566 4567 @rtype: list 4568 @return: list of the exports 4569 4570 """ 4571 if os.path.isdir(pathutils.EXPORT_DIR): 4572 return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR)) 4573 else: 4574 _Fail("No exports directory")

4575

4576 4577 -def RemoveExport(export):

4578 """Remove an existing export from the node. 4579 4580 @type export: str 4581 @param export: the name of the export to remove 4582 @rtype: None 4583 4584 """ 4585 target = utils.PathJoin(pathutils.EXPORT_DIR, export) 4586 4587 try: 4588 shutil.rmtree(target) 4589 except EnvironmentError, err: 4590 _Fail("Error while removing the export: %s", err, exc=True)

4591

4592 4593 -def BlockdevRename(devlist):

4594 """Rename a list of block devices. 4595 4596 @type devlist: list of tuples 4597 @param devlist: list of tuples of the form (disk, new_unique_id); disk is 4598 an L{objects.Disk} object describing the current disk, and new 4599 unique_id is the name we rename it to 4600 @rtype: boolean 4601 @return: True if all renames succeeded, False otherwise 4602 4603 """ 4604 msgs = [] 4605 result = True 4606 for disk, unique_id in devlist: 4607 dev = _RecursiveFindBD(disk) 4608 if dev is None: 4609 msgs.append("Can't find device %s in rename" % str(disk)) 4610 result = False 4611 continue 4612 try: 4613 old_rpath = dev.dev_path 4614 dev.Rename(unique_id) 4615 new_rpath = dev.dev_path 4616 if old_rpath != new_rpath: 4617 DevCacheManager.RemoveCache(old_rpath) 4618 # FIXME: we should add the new cache information here, like: 4619 # DevCacheManager.UpdateCache(new_rpath, owner, ...) 4620 # but we don't have the owner here - maybe parse from existing 4621 # cache? for now, we only lose lvm data when we rename, which 4622 # is less critical than DRBD or MD 4623 except errors.BlockDeviceError, err: 4624 msgs.append("Can't rename device '%s' to '%s': %s" % 4625 (dev, unique_id, err)) 4626 logging.exception("Can't rename device '%s' to '%s'", dev, unique_id) 4627 result = False 4628 if not result: 4629 _Fail("; ".join(msgs))

4630

4631 4632 -def _TransformFileStorageDir(fs_dir):

4633 """Checks whether given file_storage_dir is valid. 4634 4635 Checks wheter the given fs_dir is within the cluster-wide default 4636 file_storage_dir or the shared_file_storage_dir, which are stored in 4637 SimpleStore. Only paths under those directories are allowed. 4638 4639 @type fs_dir: str 4640 @param fs_dir: the path to check 4641 4642 @return: the normalized path if valid, None otherwise 4643 4644 """ 4645 filestorage.CheckFileStoragePath(fs_dir) 4646 4647 return os.path.normpath(fs_dir)

4648

4649 4650 -def CreateFileStorageDir(file_storage_dir):

4651 """Create file storage directory. 4652 4653 @type file_storage_dir: str 4654 @param file_storage_dir: directory to create 4655 4656 @rtype: tuple 4657 @return: tuple with first element a boolean indicating wheter dir 4658 creation was successful or not 4659 4660 """ 4661 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 4662 if os.path.exists(file_storage_dir): 4663 if not os.path.isdir(file_storage_dir): 4664 _Fail("Specified storage dir '%s' is not a directory", 4665 file_storage_dir) 4666 else: 4667 try: 4668 os.makedirs(file_storage_dir, 0750) 4669 except OSError, err: 4670 _Fail("Cannot create file storage directory '%s': %s", 4671 file_storage_dir, err, exc=True)

4672

4673 4674 -def RemoveFileStorageDir(file_storage_dir):

4675 """Remove file storage directory. 4676 4677 Remove it only if it's empty. If not log an error and return. 4678 4679 @type file_storage_dir: str 4680 @param file_storage_dir: the directory we should cleanup 4681 @rtype: tuple (success,) 4682 @return: tuple of one element, C{success}, denoting 4683 whether the operation was successful 4684 4685 """ 4686 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 4687 if os.path.exists(file_storage_dir): 4688 if not os.path.isdir(file_storage_dir): 4689 _Fail("Specified Storage directory '%s' is not a directory", 4690 file_storage_dir) 4691 # deletes dir only if empty, otherwise we want to fail the rpc call 4692 try: 4693 os.rmdir(file_storage_dir) 4694 except OSError, err: 4695 _Fail("Cannot remove file storage directory '%s': %s", 4696 file_storage_dir, err)

4697

4698 4699 -def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):

4700 """Rename the file storage directory. 4701 4702 @type old_file_storage_dir: str 4703 @param old_file_storage_dir: the current path 4704 @type new_file_storage_dir: str 4705 @param new_file_storage_dir: the name we should rename to 4706 @rtype: tuple (success,) 4707 @return: tuple of one element, C{success}, denoting 4708 whether the operation was successful 4709 4710 """ 4711 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir) 4712 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir) 4713 if not os.path.exists(new_file_storage_dir): 4714 if os.path.isdir(old_file_storage_dir): 4715 try: 4716 os.rename(old_file_storage_dir, new_file_storage_dir) 4717 except OSError, err: 4718 _Fail("Cannot rename '%s' to '%s': %s", 4719 old_file_storage_dir, new_file_storage_dir, err) 4720 else: 4721 _Fail("Specified storage dir '%s' is not a directory", 4722 old_file_storage_dir) 4723 else: 4724 if os.path.exists(old_file_storage_dir): 4725 _Fail("Cannot rename '%s' to '%s': both locations exist", 4726 old_file_storage_dir, new_file_storage_dir)

4727

4728 4729 -def _EnsureJobQueueFile(file_name):

4730 """Checks whether the given filename is in the queue directory. 4731 4732 @type file_name: str 4733 @param file_name: the file name we should check 4734 @rtype: None 4735 @raises RPCFail: if the file is not valid 4736 4737 """ 4738 if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name): 4739 _Fail("Passed job queue file '%s' does not belong to" 4740 " the queue directory '%s'", file_name, pathutils.QUEUE_DIR)

4741

4742 4743 -def JobQueueUpdate(file_name, content):

4744 """Updates a file in the queue directory. 4745 4746 This is just a wrapper over L{utils.io.WriteFile}, with proper 4747 checking. 4748 4749 @type file_name: str 4750 @param file_name: the job file name 4751 @type content: str 4752 @param content: the new job contents 4753 @rtype: boolean 4754 @return: the success of the operation 4755 4756 """ 4757 file_name = vcluster.LocalizeVirtualPath(file_name) 4758 4759 _EnsureJobQueueFile(file_name) 4760 getents = runtime.GetEnts() 4761 4762 # Write and replace the file atomically 4763 utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid, 4764 gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS)

4765

4766 4767 -def JobQueueRename(old, new):

4768 """Renames a job queue file. 4769 4770 This is just a wrapper over os.rename with proper checking. 4771 4772 @type old: str 4773 @param old: the old (actual) file name 4774 @type new: str 4775 @param new: the desired file name 4776 @rtype: tuple 4777 @return: the success of the operation and payload 4778 4779 """ 4780 old = vcluster.LocalizeVirtualPath(old) 4781 new = vcluster.LocalizeVirtualPath(new) 4782 4783 _EnsureJobQueueFile(old) 4784 _EnsureJobQueueFile(new) 4785 4786 getents = runtime.GetEnts() 4787 4788 utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750, 4789 dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid)

4790

4791 4792 -def BlockdevClose(instance_name, disks):

4793 """Closes the given block devices. 4794 4795 This means they will be switched to secondary mode (in case of 4796 DRBD). 4797 4798 @param instance_name: if the argument is not empty, the symlinks 4799 of this instance will be removed 4800 @type disks: list of L{objects.Disk} 4801 @param disks: the list of disks to be closed 4802 @rtype: tuple (success, message) 4803 @return: a tuple of success and message, where success 4804 indicates the succes of the operation, and message 4805 which will contain the error details in case we 4806 failed 4807 4808 """ 4809 bdevs = [] 4810 for cf in disks: 4811 rd = _RecursiveFindBD(cf) 4812 if rd is None: 4813 _Fail("Can't find device %s", cf) 4814 bdevs.append(rd) 4815 4816 msg = [] 4817 for rd in bdevs: 4818 try: 4819 rd.Close() 4820 except errors.BlockDeviceError, err: 4821 msg.append(str(err)) 4822 if msg: 4823 _Fail("Can't close devices: %s", ",".join(msg)) 4824 else: 4825 if instance_name: 4826 _RemoveBlockDevLinks(instance_name, disks)

4827

4828 4829 -def BlockdevOpen(instance_name, disks, exclusive):

4830 """Opens the given block devices. 4831 4832 """ 4833 bdevs = [] 4834 for cf in disks: 4835 rd = _RecursiveFindBD(cf) 4836 if rd is None: 4837 _Fail("Can't find device %s", cf) 4838 bdevs.append(rd) 4839 4840 msg = [] 4841 for idx, rd in enumerate(bdevs): 4842 try: 4843 rd.Open(exclusive=exclusive) 4844 _SymlinkBlockDev(instance_name, rd.dev_path, idx) 4845 except errors.BlockDeviceError, err: 4846 msg.append(str(err)) 4847 4848 if msg: 4849 _Fail("Can't open devices: %s", ",".join(msg))

4850

4851 4852 -def ValidateHVParams(hvname, hvparams):

4853 """Validates the given hypervisor parameters. 4854 4855 @type hvname: string 4856 @param hvname: the hypervisor name 4857 @type hvparams: dict 4858 @param hvparams: the hypervisor parameters to be validated 4859 @rtype: None 4860 4861 """ 4862 try: 4863 hv_type = hypervisor.GetHypervisor(hvname) 4864 hv_type.ValidateParameters(hvparams) 4865 except errors.HypervisorError, err: 4866 _Fail(str(err), log=False)

4867

4868 4869 -def _CheckOSPList(os_obj, parameters):

4870 """Check whether a list of parameters is supported by the OS. 4871 4872 @type os_obj: L{objects.OS} 4873 @param os_obj: OS object to check 4874 @type parameters: list 4875 @param parameters: the list of parameters to check 4876 4877 """ 4878 supported = [v[0] for v in os_obj.supported_parameters] 4879 delta = frozenset(parameters).difference(supported) 4880 if delta: 4881 _Fail("The following parameters are not supported" 4882 " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta)))

4883

4884 4885 -def _CheckOSVariant(os_obj, name):

4886 """Check whether an OS name conforms to the os variants specification. 4887 4888 @type os_obj: L{objects.OS} 4889 @param os_obj: OS object to check 4890 4891 @type name: string 4892 @param name: OS name passed by the user, to check for validity 4893 4894 @rtype: NoneType 4895 @return: None 4896 @raise RPCFail: if OS variant is not valid 4897 4898 """ 4899 variant = objects.OS.GetVariant(name) 4900 4901 if not os_obj.supported_variants: 4902 if variant: 4903 _Fail("OS '%s' does not support variants ('%s' passed)" % 4904 (os_obj.name, variant)) 4905 else: 4906 return 4907 4908 if not variant: 4909 _Fail("OS name '%s' must include a variant" % name) 4910 4911 if variant not in os_obj.supported_variants: 4912 _Fail("OS '%s' does not support variant '%s'" % (os_obj.name, variant))

4913

4914 4915 -def ValidateOS(required, osname, checks, osparams, force_variant):

4916 """Validate the given OS parameters. 4917 4918 @type required: boolean 4919 @param required: whether absence of the OS should translate into 4920 failure or not 4921 @type osname: string 4922 @param osname: the OS to be validated 4923 @type checks: list 4924 @param checks: list of the checks to run (currently only 'parameters') 4925 @type osparams: dict 4926 @param osparams: dictionary with OS parameters, some of which may be 4927 private. 4928 @rtype: boolean 4929 @return: True if the validation passed, or False if the OS was not 4930 found and L{required} was false 4931 4932 """ 4933 if not constants.OS_VALIDATE_CALLS.issuperset(checks): 4934 _Fail("Unknown checks required for OS %s: %s", osname, 4935 set(checks).difference(constants.OS_VALIDATE_CALLS)) 4936 4937 name_only = objects.OS.GetName(osname) 4938 status, tbv = _TryOSFromDisk(name_only, None) 4939 4940 if not status: 4941 if required: 4942 _Fail(tbv) 4943 else: 4944 return False 4945 4946 if not force_variant: 4947 _CheckOSVariant(tbv, osname) 4948 4949 if max(tbv.api_versions) < constants.OS_API_V20: 4950 return True 4951 4952 if constants.OS_VALIDATE_PARAMETERS in checks: 4953 _CheckOSPList(tbv, osparams.keys()) 4954 4955 validate_env = OSCoreEnv(osname, tbv, osparams) 4956 result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env, 4957 cwd=tbv.path, reset_env=True) 4958 if result.failed: 4959 logging.error("os validate command '%s' returned error: %s output: %s", 4960 result.cmd, result.fail_reason, result.output) 4961 _Fail("OS validation script failed (%s), output: %s", 4962 result.fail_reason, result.output, log=False) 4963 4964 return True

4965

4966 4967 -def ExportOS(instance, override_env):

4968 """Creates a GZIPed tarball with an OS definition and environment. 4969 4970 The archive contains a file with the environment variables needed by 4971 the OS scripts. 4972 4973 @type instance: L{objects.Instance} 4974 @param instance: instance for which the OS definition is exported 4975 4976 @type override_env: dict of string to string 4977 @param override_env: if supplied, it overrides the environment on a 4978 key-by-key basis that is part of the archive 4979 4980 @rtype: string 4981 @return: filepath of the archive 4982 4983 """ 4984 assert instance 4985 assert instance.os 4986 4987 temp_dir = tempfile.mkdtemp() 4988 inst_os = OSFromDisk(instance.os) 4989 4990 result = utils.RunCmd(["ln", "-s", inst_os.path, 4991 utils.PathJoin(temp_dir, "os")]) 4992 if result.failed: 4993 _Fail("Failed to copy OS package '%s' to '%s': %s, output '%s'", 4994 inst_os, temp_dir, result.fail_reason, result.output) 4995 4996 env = OSEnvironment(instance, inst_os) 4997 env.update(override_env) 4998 4999 with open(utils.PathJoin(temp_dir, "environment"), "w") as f: 5000 for var in env: 5001 f.write(var + "=" + env[var] + "\n") 5002 5003 (fd, os_package) = tempfile.mkstemp(suffix=".tgz") 5004 os.close(fd) 5005 5006 result = utils.RunCmd(["tar", "--dereference", "-czv", 5007 "-f", os_package, 5008 "-C", temp_dir, 5009 "."]) 5010 if result.failed: 5011 _Fail("Failed to create OS archive '%s': %s, output '%s'", 5012 os_package, result.fail_reason, result.output) 5013 5014 result = utils.RunCmd(["rm", "-rf", temp_dir]) 5015 if result.failed: 5016 _Fail("Failed to remove copy of OS package '%s' in '%s': %s, output '%s'", 5017 inst_os, temp_dir, result.fail_reason, result.output) 5018 5019 return os_package

5020

5021 5022 -def DemoteFromMC():

5023 """Demotes the current node from master candidate role. 5024 5025 """ 5026 # try to ensure we're not the master by mistake 5027 master, myself = ssconf.GetMasterAndMyself() 5028 if master == myself: 5029 _Fail("ssconf status shows I'm the master node, will not demote") 5030 5031 result = utils.RunCmd([pathutils.DAEMON_UTIL, "check", constants.MASTERD]) 5032 if not result.failed: 5033 _Fail("The master daemon is running, will not demote") 5034 5035 try: 5036 if os.path.isfile(pathutils.CLUSTER_CONF_FILE): 5037 utils.CreateBackup(pathutils.CLUSTER_CONF_FILE) 5038 except EnvironmentError, err: 5039 if err.errno != errno.ENOENT: 5040 _Fail("Error while backing up cluster file: %s", err, exc=True) 5041 5042 utils.RemoveFile(pathutils.CLUSTER_CONF_FILE)

5043

5044 5045 -def _GetX509Filenames(cryptodir, name):

5046 """Returns the full paths for the private key and certificate. 5047 5048 """ 5049 return (utils.PathJoin(cryptodir, name), 5050 utils.PathJoin(cryptodir, name, _X509_KEY_FILE), 5051 utils.PathJoin(cryptodir, name, _X509_CERT_FILE))

5052

5053 5054 -def CreateX509Certificate(validity, cryptodir=pathutils.CRYPTO_KEYS_DIR):

5055 """Creates a new X509 certificate for SSL/TLS. 5056 5057 @type validity: int 5058 @param validity: Validity in seconds 5059 @rtype: tuple; (string, string) 5060 @return: Certificate name and public part 5061 5062 """ 5063 serial_no = int(time.time()) 5064 (key_pem, cert_pem) = \ 5065 utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(), 5066 min(validity, _MAX_SSL_CERT_VALIDITY), 5067 serial_no) 5068 5069 cert_dir = tempfile.mkdtemp(dir=cryptodir, 5070 prefix="x509-%s-" % utils.TimestampForFilename()) 5071 try: 5072 name = os.path.basename(cert_dir) 5073 assert len(name) > 5 5074 5075 (_, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 5076 5077 utils.WriteFile(key_file, mode=0400, data=key_pem) 5078 utils.WriteFile(cert_file, mode=0400, data=cert_pem) 5079 5080 # Never return private key as it shouldn't leave the node 5081 return (name, cert_pem) 5082 except Exception: 5083 shutil.rmtree(cert_dir, ignore_errors=True) 5084 raise

5085

5086 5087 -def RemoveX509Certificate(name, cryptodir=pathutils.CRYPTO_KEYS_DIR):

5088 """Removes a X509 certificate. 5089 5090 @type name: string 5091 @param name: Certificate name 5092 5093 """ 5094 (cert_dir, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 5095 5096 utils.RemoveFile(key_file) 5097 utils.RemoveFile(cert_file) 5098 5099 try: 5100 os.rmdir(cert_dir) 5101 except EnvironmentError, err: 5102 _Fail("Cannot remove certificate directory '%s': %s", 5103 cert_dir, err)

5104

5105 5106 -def _GetImportExportIoCommand(instance, mode, ieio, ieargs):

5107 """Returns the command for the requested input/output. 5108 5109 @type instance: L{objects.Instance} 5110 @param instance: The instance object 5111 @param mode: Import/export mode 5112 @param ieio: Input/output type 5113 @param ieargs: Input/output arguments 5114 5115 """ 5116 assert mode in (constants.IEM_IMPORT, constants.IEM_EXPORT) 5117 5118 env = None 5119 prefix = None 5120 suffix = None 5121 exp_size = None 5122 5123 if ieio == constants.IEIO_FILE: 5124 (filename, ) = ieargs 5125 5126 if not utils.IsNormAbsPath(filename): 5127 _Fail("Path '%s' is not normalized or absolute", filename) 5128 5129 real_filename = os.path.realpath(filename) 5130 directory = os.path.dirname(real_filename) 5131 5132 if not utils.IsBelowDir(pathutils.EXPORT_DIR, real_filename): 5133 _Fail("File '%s' is not under exports directory '%s': %s", 5134 filename, pathutils.EXPORT_DIR, real_filename) 5135 5136 # Create directory 5137 utils.Makedirs(directory, mode=0750) 5138 5139 quoted_filename = utils.ShellQuote(filename) 5140 5141 if mode == constants.IEM_IMPORT: 5142 suffix = "> %s" % quoted_filename 5143 elif mode == constants.IEM_EXPORT: 5144 suffix = "< %s" % quoted_filename 5145 5146 # Retrieve file size 5147 try: 5148 st = os.stat(filename) 5149 except EnvironmentError, err: 5150 logging.error("Can't stat(2) %s: %s", filename, err) 5151 else: 5152 exp_size = utils.BytesToMebibyte(st.st_size) 5153 5154 elif ieio == constants.IEIO_RAW_DISK: 5155 (disk, ) = ieargs 5156 real_disk = _OpenRealBD(disk) 5157 5158 if mode == constants.IEM_IMPORT: 5159 suffix = "| %s" % utils.ShellQuoteArgs(real_disk.Import()) 5160 5161 elif mode == constants.IEM_EXPORT: 5162 prefix = "%s |" % utils.ShellQuoteArgs(real_disk.Export()) 5163 exp_size = disk.size 5164 5165 elif ieio == constants.IEIO_SCRIPT: 5166 (disk, disk_index, ) = ieargs 5167 5168 assert isinstance(disk_index, (int, long)) 5169 5170 inst_os = OSFromDisk(instance.os) 5171 env = OSEnvironment(instance, inst_os) 5172 5173 if mode == constants.IEM_IMPORT: 5174 disk_path_var = "DISK_%d_PATH" % disk_index 5175 if disk_path_var in env: 5176 env["IMPORT_DEVICE"] = env[disk_path_var] 5177 env["IMPORT_DISK_PATH"] = env[disk_path_var] 5178 5179 disk_uri_var = "DISK_%d_URI" % disk_index 5180 if disk_uri_var in env: 5181 env["IMPORT_DISK_URI"] = env[disk_uri_var] 5182 5183 env["IMPORT_INDEX"] = str(disk_index) 5184 script = inst_os.import_script 5185 5186 elif mode == constants.IEM_EXPORT: 5187 disk_path_var = "DISK_%d_PATH" % disk_index 5188 if disk_path_var in env: 5189 env["EXPORT_DEVICE"] = env[disk_path_var] 5190 env["EXPORT_DISK_PATH"] = env[disk_path_var] 5191 5192 disk_uri_var = "DISK_%d_URI" % disk_index 5193 if disk_uri_var in env: 5194 env["EXPORT_DISK_URI"] = env[disk_uri_var] 5195 5196 env["EXPORT_INDEX"] = str(disk_index) 5197 script = inst_os.export_script 5198 5199 # TODO: Pass special environment only to script 5200 script_cmd = utils.BuildShellCmd("( cd %s && %s; )", inst_os.path, script) 5201 5202 if mode == constants.IEM_IMPORT: 5203 suffix = "| %s" % script_cmd 5204 5205 elif mode == constants.IEM_EXPORT: 5206 prefix = "%s |" % script_cmd 5207 5208 # Let script predict size 5209 exp_size = constants.IE_CUSTOM_SIZE 5210 5211 else: 5212 _Fail("Invalid %s I/O mode %r", mode, ieio) 5213 5214 return (env, prefix, suffix, exp_size)

5215

5216 5217 -def _CreateImportExportStatusDir(prefix):

5218 """Creates status directory for import/export. 5219 5220 """ 5221 return tempfile.mkdtemp(dir=pathutils.IMPORT_EXPORT_DIR, 5222 prefix=("%s-%s-" % 5223 (prefix, utils.TimestampForFilename())))

5224

5225 5226 -def StartImportExportDaemon(mode, opts, host, port, instance, component, 5227 ieio, ieioargs):

5228 """Starts an import or export daemon. 5229 5230 @param mode: Import/output mode 5231 @type opts: L{objects.ImportExportOptions} 5232 @param opts: Daemon options 5233 @type host: string 5234 @param host: Remote host for export (None for import) 5235 @type port: int 5236 @param port: Remote port for export (None for import) 5237 @type instance: L{objects.Instance} 5238 @param instance: Instance object 5239 @type component: string 5240 @param component: which part of the instance is transferred now, 5241 e.g. 'disk/0' 5242 @param ieio: Input/output type 5243 @param ieioargs: Input/output arguments 5244 5245 """ 5246 5247 # Use Import/Export over socat. 5248 # 5249 # Export() gives a command that produces a flat stream. 5250 # Import() gives a command that reads a flat stream to a disk template. 5251 if mode == constants.IEM_IMPORT: 5252 prefix = "import" 5253 5254 if not (host is None and port is None): 5255 _Fail("Can not specify host or port on import") 5256 5257 elif mode == constants.IEM_EXPORT: 5258 prefix = "export" 5259 5260 if host is None or port is None: 5261 _Fail("Host and port must be specified for an export") 5262 5263 else: 5264 _Fail("Invalid mode %r", mode) 5265 5266 if (opts.key_name is None) ^ (opts.ca_pem is None): 5267 _Fail("Cluster certificate can only be used for both key and CA") 5268 5269 (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \ 5270 _GetImportExportIoCommand(instance, mode, ieio, ieioargs) 5271 5272 if opts.key_name is None: 5273 # Use server.pem 5274 key_path = pathutils.NODED_CERT_FILE 5275 cert_path = pathutils.NODED_CERT_FILE 5276 assert opts.ca_pem is None 5277 else: 5278 (_, key_path, cert_path) = _GetX509Filenames(pathutils.CRYPTO_KEYS_DIR, 5279 opts.key_name) 5280 assert opts.ca_pem is not None 5281 5282 for i in [key_path, cert_path]: 5283 if not os.path.exists(i): 5284 _Fail("File '%s' does not exist" % i) 5285 5286 status_dir = _CreateImportExportStatusDir("%s-%s" % (prefix, component)) 5287 try: 5288 status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE) 5289 pid_file = utils.PathJoin(status_dir, _IES_PID_FILE) 5290 ca_file = utils.PathJoin(status_dir, _IES_CA_FILE) 5291 5292 if opts.ca_pem is None: 5293 # Use server.pem 5294 ca = utils.ReadFile(pathutils.NODED_CERT_FILE) 5295 else: 5296 ca = opts.ca_pem 5297 5298 # Write CA file 5299 utils.WriteFile(ca_file, data=ca, mode=0400) 5300 5301 cmd = [ 5302 pathutils.IMPORT_EXPORT_DAEMON, 5303 status_file, mode, 5304 "--key=%s" % key_path, 5305 "--cert=%s" % cert_path, 5306 "--ca=%s" % ca_file, 5307 ] 5308 5309 if host: 5310 cmd.append("--host=%s" % host) 5311 5312 if port: 5313 cmd.append("--port=%s" % port) 5314 5315 if opts.ipv6: 5316 cmd.append("--ipv6") 5317 else: 5318 cmd.append("--ipv4") 5319 5320 if opts.compress: 5321 cmd.append("--compress=%s" % opts.compress) 5322 5323 if opts.magic: 5324 cmd.append("--magic=%s" % opts.magic) 5325 5326 if exp_size is not None: 5327 cmd.append("--expected-size=%s" % exp_size) 5328 5329 if cmd_prefix: 5330 cmd.append("--cmd-prefix=%s" % cmd_prefix) 5331 5332 if cmd_suffix: 5333 cmd.append("--cmd-suffix=%s" % cmd_suffix) 5334 5335 if mode == constants.IEM_EXPORT: 5336 # Retry connection a few times when connecting to remote peer 5337 cmd.append("--connect-retries=%s" % constants.RIE_CONNECT_RETRIES) 5338 cmd.append("--connect-timeout=%s" % constants.RIE_CONNECT_ATTEMPT_TIMEOUT) 5339 elif opts.connect_timeout is not None: 5340 assert mode == constants.IEM_IMPORT 5341 # Overall timeout for establishing connection while listening 5342 cmd.append("--connect-timeout=%s" % opts.connect_timeout) 5343 5344 logfile = _InstanceLogName(prefix, instance.os, instance.name, component) 5345 5346 # TODO: Once _InstanceLogName uses tempfile.mkstemp, StartDaemon has 5347 # support for receiving a file descriptor for output 5348 utils.StartDaemon(cmd, env=cmd_env, pidfile=pid_file, 5349 output=logfile) 5350 5351 # The import/export name is simply the status directory name 5352 return os.path.basename(status_dir) 5353 5354 except Exception: 5355 shutil.rmtree(status_dir, ignore_errors=True) 5356 raise

5357

5358 5359 -def GetImportExportStatus(names):

5360 """Returns import/export daemon status. 5361 5362 @type names: sequence 5363 @param names: List of names 5364 @rtype: List of dicts 5365 @return: Returns a list of the state of each named import/export or None if a 5366 status couldn't be read 5367 5368 """ 5369 result = [] 5370 5371 for name in names: 5372 status_file = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name, 5373 _IES_STATUS_FILE) 5374 5375 try: 5376 data = utils.ReadFile(status_file) 5377 except EnvironmentError, err: 5378 if err.errno != errno.ENOENT: 5379 raise 5380 data = None 5381 5382 if not data: 5383 result.append(None) 5384 continue 5385 5386 result.append(serializer.LoadJson(data)) 5387 5388 return result

5389

5390 5391 -def AbortImportExport(name):

5392 """Sends SIGTERM to a running import/export daemon. 5393 5394 """ 5395 logging.info("Abort import/export %s", name) 5396 5397 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 5398 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 5399 5400 if pid: 5401 logging.info("Import/export %s is running with PID %s, sending SIGTERM", 5402 name, pid) 5403 utils.IgnoreProcessNotFound(os.kill, pid, signal.SIGTERM)

5404

5405 5406 -def CleanupImportExport(name):

5407 """Cleanup after an import or export. 5408 5409 If the import/export daemon is still running it's killed. Afterwards the 5410 whole status directory is removed. 5411 5412 """ 5413 logging.info("Finalizing import/export %s", name) 5414 5415 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 5416 5417 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 5418 5419 if pid: 5420 logging.info("Import/export %s is still running with PID %s", 5421 name, pid) 5422 utils.KillProcess(pid, waitpid=False) 5423 5424 shutil.rmtree(status_dir, ignore_errors=True)

5425

5426 5427 -def _FindDisks(disks):

5428 """Finds attached L{BlockDev}s for the given disks. 5429 5430 @type disks: list of L{objects.Disk} 5431 @param disks: the disk objects we need to find 5432 5433 @return: list of L{BlockDev} objects or C{None} if a given disk 5434 was not found or was no attached. 5435 5436 """ 5437 bdevs = [] 5438 5439 for disk in disks: 5440 rd = _RecursiveFindBD(disk) 5441 if rd is None: 5442 _Fail("Can't find device %s", disk) 5443 bdevs.append(rd) 5444 return bdevs

5445

5446 5447 -def DrbdDisconnectNet(disks):

5448 """Disconnects the network on a list of drbd devices. 5449 5450 """ 5451 bdevs = _FindDisks(disks) 5452 5453 # disconnect disks 5454 for rd in bdevs: 5455 try: 5456 rd.DisconnectNet() 5457 except errors.BlockDeviceError, err: 5458 _Fail("Can't change network configuration to standalone mode: %s", 5459 err, exc=True)

5460

5461 5462 -def DrbdAttachNet(disks, multimaster):

5463 """Attaches the network on a list of drbd devices. 5464 5465 """ 5466 bdevs = _FindDisks(disks) 5467 5468 # reconnect disks, switch to new master configuration and if 5469 # needed primary mode 5470 for rd in bdevs: 5471 try: 5472 rd.AttachNet(multimaster) 5473 except errors.BlockDeviceError, err: 5474 _Fail("Can't change network configuration: %s", err) 5475 5476 # wait until the disks are connected; we need to retry the re-attach 5477 # if the device becomes standalone, as this might happen if the one 5478 # node disconnects and reconnects in a different mode before the 5479 # other node reconnects; in this case, one or both of the nodes will 5480 # decide it has wrong configuration and switch to standalone 5481 5482 def _Attach(): 5483 all_connected = True 5484 5485 for rd in bdevs: 5486 stats = rd.GetProcStatus() 5487 5488 if multimaster: 5489 # In the multimaster case we have to wait explicitly until 5490 # the resource is Connected and UpToDate/UpToDate, because 5491 # we promote *both nodes* to primary directly afterwards. 5492 # Being in resync is not enough, since there is a race during which we 5493 # may promote a node with an Outdated disk to primary, effectively 5494 # tearing down the connection. 5495 all_connected = (all_connected and 5496 stats.is_connected and 5497 stats.is_disk_uptodate and 5498 stats.peer_disk_uptodate) 5499 else: 5500 all_connected = (all_connected and 5501 (stats.is_connected or stats.is_in_resync)) 5502 5503 if stats.is_standalone: 5504 # peer had different config info and this node became 5505 # standalone, even though this should not happen with the 5506 # new staged way of changing disk configs 5507 try: 5508 rd.AttachNet(multimaster) 5509 except errors.BlockDeviceError, err: 5510 _Fail("Can't change network configuration: %s", err) 5511 5512 if not all_connected: 5513 raise utils.RetryAgain()

5514 5515 try: 5516 # Start with a delay of 100 miliseconds and go up to 5 seconds 5517 utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60) 5518 except utils.RetryTimeout: 5519 _Fail("Timeout in disk reconnecting") 5520

5521 5522 -def DrbdWaitSync(disks):

5523 """Wait until DRBDs have synchronized. 5524 5525 """ 5526 def _helper(rd): 5527 stats = rd.GetProcStatus() 5528 if not (stats.is_connected or stats.is_in_resync): 5529 raise utils.RetryAgain() 5530 return stats

5531 5532 bdevs = _FindDisks(disks) 5533 5534 min_resync = 100 5535 alldone = True 5536 for rd in bdevs: 5537 try: 5538 # poll each second for 15 seconds 5539 stats = utils.Retry(_helper, 1, 15, args=[rd]) 5540 except utils.RetryTimeout: 5541 stats = rd.GetProcStatus() 5542 # last check 5543 if not (stats.is_connected or stats.is_in_resync): 5544 _Fail("DRBD device %s is not in sync: stats=%s", rd, stats) 5545 alldone = alldone and (not stats.is_in_resync) 5546 if stats.sync_percent is not None: 5547 min_resync = min(min_resync, stats.sync_percent) 5548 5549 return (alldone, min_resync) 5550

5551 5552 -def DrbdNeedsActivation(disks):

5553 """Checks which of the passed disks needs activation and returns their UUIDs. 5554 5555 """ 5556 faulty_disks = [] 5557 5558 for disk in disks: 5559 rd = _RecursiveFindBD(disk) 5560 if rd is None: 5561 faulty_disks.append(disk) 5562 continue 5563 5564 stats = rd.GetProcStatus() 5565 if stats.is_standalone or stats.is_diskless: 5566 faulty_disks.append(disk) 5567 5568 return [disk.uuid for disk in faulty_disks]

5569

5570 5571 -def GetDrbdUsermodeHelper():

5572 """Returns DRBD usermode helper currently configured. 5573 5574 """ 5575 try: 5576 return drbd.DRBD8.GetUsermodeHelper() 5577 except errors.BlockDeviceError, err: 5578 _Fail(str(err))

5579

5580 5581 -def PowercycleNode(hypervisor_type, hvparams=None):

5582 """Hard-powercycle the node. 5583 5584 Because we need to return first, and schedule the powercycle in the 5585 background, we won't be able to report failures nicely. 5586 5587 """ 5588 hyper = hypervisor.GetHypervisor(hypervisor_type) 5589 try: 5590 pid = os.fork() 5591 except OSError: 5592 # if we can't fork, we'll pretend that we're in the child process 5593 pid = 0 5594 if pid > 0: 5595 return "Reboot scheduled in 5 seconds" 5596 # ensure the child is running on ram 5597 try: 5598 utils.Mlockall() 5599 except Exception: # pylint: disable=W0703 5600 pass 5601 time.sleep(5) 5602 hyper.PowercycleNode(hvparams=hvparams)

5603

5604 5605 -def _VerifyRestrictedCmdName(cmd):

5606 """Verifies a restricted command name. 5607 5608 @type cmd: string 5609 @param cmd: Command name 5610 @rtype: tuple; (boolean, string or None) 5611 @return: The tuple's first element is the status; if C{False}, the second 5612 element is an error message string, otherwise it's C{None} 5613 5614 """ 5615 if not cmd.strip(): 5616 return (False, "Missing command name") 5617 5618 if os.path.basename(cmd) != cmd: 5619 return (False, "Invalid command name") 5620 5621 if not constants.EXT_PLUGIN_MASK.match(cmd): 5622 return (False, "Command name contains forbidden characters") 5623 5624 return (True, None)

5625

5626 5627 -def _CommonRestrictedCmdCheck(path, owner):

5628 """Common checks for restricted command file system directories and files. 5629 5630 @type path: string 5631 @param path: Path to check 5632 @param owner: C{None} or tuple containing UID and GID 5633 @rtype: tuple; (boolean, string or C{os.stat} result) 5634 @return: The tuple's first element is the status; if C{False}, the second 5635 element is an error message string, otherwise it's the result of C{os.stat} 5636 5637 """ 5638 if owner is None: 5639 # Default to root as owner 5640 owner = (0, 0) 5641 5642 try: 5643 st = os.stat(path) 5644 except EnvironmentError, err: 5645 return (False, "Can't stat(2) '%s': %s" % (path, err)) 5646 5647 if stat.S_IMODE(st.st_mode) & (~_RCMD_MAX_MODE): 5648 return (False, "Permissions on '%s' are too permissive" % path) 5649 5650 if (st.st_uid, st.st_gid) != owner: 5651 (owner_uid, owner_gid) = owner 5652 return (False, "'%s' is not owned by %s:%s" % (path, owner_uid, owner_gid)) 5653 5654 return (True, st)

5655

5656 5657 -def _VerifyRestrictedCmdDirectory(path, _owner=None):

5658 """Verifies restricted command directory. 5659 5660 @type path: string 5661 @param path: Path to check 5662 @rtype: tuple; (boolean, string or None) 5663 @return: The tuple's first element is the status; if C{False}, the second 5664 element is an error message string, otherwise it's C{None} 5665 5666 """ 5667 (status, value) = _CommonRestrictedCmdCheck(path, _owner) 5668 5669 if not status: 5670 return (False, value) 5671 5672 if not stat.S_ISDIR(value.st_mode): 5673 return (False, "Path '%s' is not a directory" % path) 5674 5675 return (True, None)

5676

5677 5678 -def _VerifyRestrictedCmd(path, cmd, _owner=None):

5679 """Verifies a whole restricted command and returns its executable filename. 5680 5681 @type path: string 5682 @param path: Directory containing restricted commands 5683 @type cmd: string 5684 @param cmd: Command name 5685 @rtype: tuple; (boolean, string) 5686 @return: The tuple's first element is the status; if C{False}, the second 5687 element is an error message string, otherwise the second element is the 5688 absolute path to the executable 5689 5690 """ 5691 executable = utils.PathJoin(path, cmd) 5692 5693 (status, msg) = _CommonRestrictedCmdCheck(executable, _owner) 5694 5695 if not status: 5696 return (False, msg) 5697 5698 if not utils.IsExecutable(executable): 5699 return (False, "access(2) thinks '%s' can't be executed" % executable) 5700 5701 return (True, executable)

5702

5703 5704 -def _PrepareRestrictedCmd(path, cmd, 5705 _verify_dir=_VerifyRestrictedCmdDirectory, 5706 _verify_name=_VerifyRestrictedCmdName, 5707 _verify_cmd=_VerifyRestrictedCmd):

5708 """Performs a number of tests on a restricted command. 5709 5710 @type path: string 5711 @param path: Directory containing restricted commands 5712 @type cmd: string 5713 @param cmd: Command name 5714 @return: Same as L{_VerifyRestrictedCmd} 5715 5716 """ 5717 # Verify the directory first 5718 (status, msg) = _verify_dir(path) 5719 if status: 5720 # Check command if everything was alright 5721 (status, msg) = _verify_name(cmd) 5722 5723 if not status: 5724 return (False, msg) 5725 5726 # Check actual executable 5727 return _verify_cmd(path, cmd)

5728

5729 5730 -def RunConstrainedCmd(cmd, 5731 lock_file, 5732 path, 5733 inp=None, 5734 _lock_timeout=_RCMD_LOCK_TIMEOUT, 5735 _sleep_fn=time.sleep, 5736 _prepare_fn=_PrepareRestrictedCmd, 5737 _runcmd_fn=utils.RunCmd, 5738 _enabled=constants.ENABLE_RESTRICTED_COMMANDS):

5739 """Executes a command after performing strict tests. 5740 5741 @type cmd: string 5742 @param cmd: Command name 5743 @type lock_file: string 5744 @param lock_file: path to the lock file 5745 @type path: string 5746 @param path: path to the directory in which the command is present 5747 @type inp: string 5748 @param inp: Input to be passed to the command 5749 @rtype: string 5750 @return: Command output 5751 @raise RPCFail: In case of an error 5752 5753 """ 5754 logging.info("Preparing to run restricted command '%s'", cmd) 5755 5756 if not _enabled: 5757 _Fail("Restricted commands disabled at configure time") 5758 5759 lock = None 5760 try: 5761 cmdresult = None 5762 try: 5763 lock = utils.FileLock.Open(lock_file) 5764 lock.Exclusive(blocking=True, timeout=_lock_timeout) 5765 5766 (status, value) = _prepare_fn(path, cmd) 5767 5768 if status: 5769 if inp: 5770 input_fd = tempfile.TemporaryFile() 5771 input_fd.write(inp) 5772 input_fd.flush() 5773 input_fd.seek(0) 5774 else: 5775 input_fd = None 5776 cmdresult = _runcmd_fn([value], env={}, reset_env=True, 5777 postfork_fn=lambda _: lock.Unlock(), 5778 input_fd=input_fd) 5779 if input_fd: 5780 input_fd.close() 5781 else: 5782 logging.error(value) 5783 except Exception: # pylint: disable=W0703 5784 # Keep original error in log 5785 logging.exception("Caught exception") 5786 5787 if cmdresult is None: 5788 logging.info("Sleeping for %0.1f seconds before returning", 5789 _RCMD_INVALID_DELAY) 5790 _sleep_fn(_RCMD_INVALID_DELAY) 5791 5792 # Do not include original error message in returned error 5793 _Fail("Executing command '%s' failed" % cmd) 5794 elif cmdresult.failed or cmdresult.fail_reason: 5795 _Fail("Restricted command '%s' failed: %s; output: %s", 5796 cmd, cmdresult.fail_reason, cmdresult.output) 5797 else: 5798 return cmdresult.output 5799 finally: 5800 if lock is not None: 5801 # Release lock at last 5802 lock.Close() 5803 lock = None

5804

5805 5806 -def SetWatcherPause(until, _filename=pathutils.WATCHER_PAUSEFILE):

5807 """Creates or removes the watcher pause file. 5808 5809 @type until: None or number 5810 @param until: Unix timestamp saying until when the watcher shouldn't run 5811 5812 """ 5813 if until is None: 5814 logging.info("Received request to no longer pause watcher") 5815 utils.RemoveFile(_filename) 5816 else: 5817 logging.info("Received request to pause watcher until %s", until) 5818 5819 if not ht.TNumber(until): 5820 _Fail("Duration must be numeric") 5821 5822 utils.WriteFile(_filename, data="%d\n" % (until, ), mode=0644)

5823

5824 5825 -def ConfigureOVS(ovs_name, ovs_link):

5826 """Creates a OpenvSwitch on the node. 5827 5828 This function sets up a OpenvSwitch on the node with given name nad 5829 connects it via a given eth device. 5830 5831 @type ovs_name: string 5832 @param ovs_name: Name of the OpenvSwitch to create. 5833 @type ovs_link: None or string 5834 @param ovs_link: Ethernet device for outside connection (can be missing) 5835 5836 """ 5837 # Initialize the OpenvSwitch 5838 result = utils.RunCmd(["ovs-vsctl", "add-br", ovs_name]) 5839 if result.failed: 5840 _Fail("Failed to create openvswitch. Script return value: %s, output: '%s'" 5841 % (result.exit_code, result.output), log=True) 5842 5843 # And connect it to a physical interface, if given 5844 if ovs_link: 5845 result = utils.RunCmd(["ovs-vsctl", "add-port", ovs_name, ovs_link]) 5846 if result.failed: 5847 _Fail("Failed to connect openvswitch to interface %s. Script return" 5848 " value: %s, output: '%s'" % (ovs_link, result.exit_code, 5849 result.output), log=True)

5850

5851 5852 -def GetFileInfo(file_path):

5853 """ Checks if a file exists and returns information related to it. 5854 5855 Currently returned information: 5856 - file size: int, size in bytes 5857 5858 @type file_path: string 5859 @param file_path: Name of file to examine. 5860 5861 @rtype: tuple of bool, dict 5862 @return: Whether the file exists, and a dictionary of information about the 5863 file gathered by os.stat. 5864 5865 """ 5866 try: 5867 stat_info = os.stat(file_path) 5868 values_dict = { 5869 constants.STAT_SIZE: stat_info.st_size, 5870 } 5871 return True, values_dict 5872 except IOError: 5873 return False, {}

5874

5875 5876 -class HooksRunner(object):

5877 """Hook runner. 5878 5879 This class is instantiated on the node side (ganeti-noded) and not 5880 on the master side. 5881 5882 """

5883 - def __init__(self, hooks_base_dir=None):

5884 """Constructor for hooks runner. 5885 5886 @type hooks_base_dir: str or None 5887 @param hooks_base_dir: if not None, this overrides the 5888 L{pathutils.HOOKS_BASE_DIR} (useful for unittests) 5889 5890 """ 5891 if hooks_base_dir is None: 5892 hooks_base_dir = pathutils.HOOKS_BASE_DIR 5893 # yeah, _BASE_DIR is not valid for attributes, we use it like a 5894 # constant 5895 self._BASE_DIR = hooks_base_dir # pylint: disable=C0103

5896

5897 - def RunLocalHooks(self, node_list, hpath, phase, env):

5898 """Check that the hooks will be run only locally and then run them. 5899 5900 """ 5901 assert len(node_list) == 1 5902 node = node_list[0] 5903 _, myself = ssconf.GetMasterAndMyself() 5904 assert node == myself 5905 5906 results = self.RunHooks(hpath, phase, env) 5907 5908 # Return values in the form expected by HooksMaster 5909 return {node: (None, False, results)}

5910

5911 - def RunHooks(self, hpath, phase, env):

5912 """Run the scripts in the hooks directory. 5913 5914 @type hpath: str 5915 @param hpath: the path to the hooks directory which 5916 holds the scripts 5917 @type phase: str 5918 @param phase: either L{constants.HOOKS_PHASE_PRE} or 5919 L{constants.HOOKS_PHASE_POST} 5920 @type env: dict 5921 @param env: dictionary with the environment for the hook 5922 @rtype: list 5923 @return: list of 3-element tuples: 5924 - script path 5925 - script result, either L{constants.HKR_SUCCESS} or 5926 L{constants.HKR_FAIL} 5927 - output of the script 5928 5929 @raise errors.ProgrammerError: for invalid input 5930 parameters 5931 5932 """ 5933 if phase == constants.HOOKS_PHASE_PRE: 5934 suffix = "pre" 5935 elif phase == constants.HOOKS_PHASE_POST: 5936 suffix = "post" 5937 else: 5938 _Fail("Unknown hooks phase '%s'", phase) 5939 5940 subdir = "%s-%s.d" % (hpath, suffix) 5941 dir_name = utils.PathJoin(self._BASE_DIR, subdir) 5942 5943 results = [] 5944 5945 if not os.path.isdir(dir_name): 5946 # for non-existing/non-dirs, we simply exit instead of logging a 5947 # warning at every operation 5948 return results 5949 5950 runparts_results = utils.RunParts(dir_name, env=env, reset_env=True) 5951 5952 for (relname, relstatus, runresult) in runparts_results: 5953 if relstatus == constants.RUNPARTS_SKIP: 5954 rrval = constants.HKR_SKIP 5955 output = "" 5956 elif relstatus == constants.RUNPARTS_ERR: 5957 rrval = constants.HKR_FAIL 5958 output = "Hook script execution error: %s" % runresult 5959 elif relstatus == constants.RUNPARTS_RUN: 5960 if runresult.failed: 5961 rrval = constants.HKR_FAIL 5962 else: 5963 rrval = constants.HKR_SUCCESS 5964 output = utils.SafeEncode(runresult.output.strip()) 5965 results.append(("%s/%s" % (subdir, relname), rrval, output)) 5966 5967 return results

5968

5969 5970 -class IAllocatorRunner(object):

5971 """IAllocator runner. 5972 5973 This class is instantiated on the node side (ganeti-noded) and not on 5974 the master side. 5975 5976 """ 5977 @staticmethod

5978 - def Run(name, idata, ial_params):

5979 """Run an iallocator script. 5980 5981 @type name: str 5982 @param name: the iallocator script name 5983 @type idata: str 5984 @param idata: the allocator input data 5985 @type ial_params: list 5986 @param ial_params: the iallocator parameters 5987 5988 @rtype: tuple 5989 @return: two element tuple of: 5990 - status 5991 - either error message or stdout of allocator (for success) 5992 5993 """ 5994 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH, 5995 os.path.isfile) 5996 if alloc_script is None: 5997 _Fail("iallocator module '%s' not found in the search path", name) 5998 5999 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.") 6000 try: 6001 os.write(fd, idata) 6002 os.close(fd) 6003 result = utils.RunCmd([alloc_script, fin_name] + ial_params) 6004 if result.failed: 6005 _Fail("iallocator module '%s' failed: %s, output '%s'", 6006 name, result.fail_reason, result.output) 6007 finally: 6008 os.unlink(fin_name) 6009 6010 return result.stdout

6011

6012 6013 -class DevCacheManager(object):

6014 """Simple class for managing a cache of block device information. 6015 6016 """ 6017 _DEV_PREFIX = "/dev/" 6018 _ROOT_DIR = pathutils.BDEV_CACHE_DIR 6019 6020 @classmethod

6021 - def _ConvertPath(cls, dev_path):

6022 """Converts a /dev/name path to the cache file name. 6023 6024 This replaces slashes with underscores and strips the /dev 6025 prefix. It then returns the full path to the cache file. 6026 6027 @type dev_path: str 6028 @param dev_path: the C{/dev/} path name 6029 @rtype: str 6030 @return: the converted path name 6031 6032 """ 6033 if dev_path.startswith(cls._DEV_PREFIX): 6034 dev_path = dev_path[len(cls._DEV_PREFIX):] 6035 dev_path = dev_path.replace("/", "_") 6036 fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path) 6037 return fpath

6038 6039 @classmethod

6040 - def UpdateCache(cls, dev_path, owner, on_primary, iv_name):

6041 """Updates the cache information for a given device. 6042 6043 @type dev_path: str 6044 @param dev_path: the pathname of the device 6045 @type owner: str 6046 @param owner: the owner (instance name) of the device 6047 @type on_primary: bool 6048 @param on_primary: whether this is the primary 6049 node nor not 6050 @type iv_name: str 6051 @param iv_name: the instance-visible name of the 6052 device, as in objects.Disk.iv_name 6053 6054 @rtype: None 6055 6056 """ 6057 if dev_path is None: 6058 logging.error("DevCacheManager.UpdateCache got a None dev_path") 6059 return 6060 fpath = cls._ConvertPath(dev_path) 6061 if on_primary: 6062 state = "primary" 6063 else: 6064 state = "secondary" 6065 if iv_name is None: 6066 iv_name = "not_visible" 6067 fdata = "%s %s %s\n" % (str(owner), state, iv_name) 6068 try: 6069 utils.WriteFile(fpath, data=fdata) 6070 except EnvironmentError, err: 6071 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)

6072 6073 @classmethod

6074 - def RemoveCache(cls, dev_path):

6075 """Remove data for a dev_path. 6076 6077 This is just a wrapper over L{utils.io.RemoveFile} with a converted 6078 path name and logging. 6079 6080 @type dev_path: str 6081 @param dev_path: the pathname of the device 6082 6083 @rtype: None 6084 6085 """ 6086 if dev_path is None: 6087 logging.error("DevCacheManager.RemoveCache got a None dev_path") 6088 return 6089 fpath = cls._ConvertPath(dev_path) 6090 try: 6091 utils.RemoveFile(fpath) 6092 except EnvironmentError, err: 6093 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)

6094