ganeti.backend

Source Code for Module ganeti.backend

1 # 2 # 3 4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc. 5 # All rights reserved. 6 # 7 # Redistribution and use in source and binary forms, with or without 8 # modification, are permitted provided that the following conditions are 9 # met: 10 # 11 # 1. Redistributions of source code must retain the above copyright notice, 12 # this list of conditions and the following disclaimer. 13 # 14 # 2. Redistributions in binary form must reproduce the above copyright 15 # notice, this list of conditions and the following disclaimer in the 16 # documentation and/or other materials provided with the distribution. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 31 """Functions used by the node daemon 32 33 @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in 34 the L{UploadFile} function 35 @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted 36 in the L{_CleanDirectory} function 37 38 """ 39 40 # pylint: disable=E1103,C0302 41 42 # E1103: %s %r has no %r member (but some types could not be 43 # inferred), because the _TryOSFromDisk returns either (True, os_obj) 44 # or (False, "string") which confuses pylint 45 46 # C0302: This module has become too big and should be split up 47 48 49 import base64 50 import errno 51 import logging 52 import os 53 import os.path 54 import pycurl 55 import random 56 import re 57 import shutil 58 import signal 59 import stat 60 import tempfile 61 import time 62 import zlib 63 import contextlib 64 import collections 65 66 from ganeti import errors 67 from ganeti import http 68 from ganeti import utils 69 from ganeti import ssh 70 from ganeti import hypervisor 71 from ganeti.hypervisor import hv_base 72 from ganeti import constants 73 from ganeti.storage import bdev 74 from ganeti.storage import drbd 75 from ganeti.storage import extstorage 76 from ganeti.storage import filestorage 77 from ganeti import objects 78 from ganeti import ssconf 79 from ganeti import serializer 80 from ganeti import netutils 81 from ganeti import runtime 82 from ganeti import compat 83 from ganeti import pathutils 84 from ganeti import vcluster 85 from ganeti import ht 86 from ganeti.storage.base import BlockDev 87 from ganeti.storage.drbd import DRBD8 88 from ganeti import hooksmaster 89 import ganeti.metad as metad 90 91 92 _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" 93 _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([ 94 pathutils.DATA_DIR, 95 pathutils.JOB_QUEUE_ARCHIVE_DIR, 96 pathutils.QUEUE_DIR, 97 pathutils.CRYPTO_KEYS_DIR, 98 ]) 99 _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60 100 _X509_KEY_FILE = "key" 101 _X509_CERT_FILE = "cert" 102 _IES_STATUS_FILE = "status" 103 _IES_PID_FILE = "pid" 104 _IES_CA_FILE = "ca" 105 106 #: Valid LVS output line regex 107 _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$") 108 109 # Actions for the master setup script 110 _MASTER_START = "start" 111 _MASTER_STOP = "stop" 112 113 #: Maximum file permissions for restricted command directory and executables 114 _RCMD_MAX_MODE = (stat.S_IRWXU | 115 stat.S_IRGRP | stat.S_IXGRP | 116 stat.S_IROTH | stat.S_IXOTH) 117 118 #: Delay before returning an error for restricted commands 119 _RCMD_INVALID_DELAY = 10 120 121 #: How long to wait to acquire lock for restricted commands (shorter than 122 #: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many 123 #: command requests arrive 124 _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8

125 126 127 -class RPCFail(Exception):

128 """Class denoting RPC failure. 129 130 Its argument is the error message. 131 132 """

133

134 135 -def _GetInstReasonFilename(instance_name):

136 """Path of the file containing the reason of the instance status change. 137 138 @type instance_name: string 139 @param instance_name: The name of the instance 140 @rtype: string 141 @return: The path of the file 142 143 """ 144 return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)

145

146 147 -def _StoreInstReasonTrail(instance_name, trail):

148 """Serialize a reason trail related to an instance change of state to file. 149 150 The exact location of the file depends on the name of the instance and on 151 the configuration of the Ganeti cluster defined at deploy time. 152 153 @type instance_name: string 154 @param instance_name: The name of the instance 155 156 @type trail: list of reasons 157 @param trail: reason trail 158 159 @rtype: None 160 161 """ 162 json = serializer.DumpJson(trail) 163 filename = _GetInstReasonFilename(instance_name) 164 utils.WriteFile(filename, data=json)

165

166 167 -def _Fail(msg, *args, **kwargs):

168 """Log an error and the raise an RPCFail exception. 169 170 This exception is then handled specially in the ganeti daemon and 171 turned into a 'failed' return type. As such, this function is a 172 useful shortcut for logging the error and returning it to the master 173 daemon. 174 175 @type msg: string 176 @param msg: the text of the exception 177 @raise RPCFail 178 179 """ 180 if args: 181 msg = msg % args 182 if "log" not in kwargs or kwargs["log"]: # if we should log this error 183 if "exc" in kwargs and kwargs["exc"]: 184 logging.exception(msg) 185 else: 186 logging.error(msg) 187 raise RPCFail(msg)

188

189 190 -def _GetConfig():

191 """Simple wrapper to return a SimpleStore. 192 193 @rtype: L{ssconf.SimpleStore} 194 @return: a SimpleStore instance 195 196 """ 197 return ssconf.SimpleStore()

198

199 200 -def _GetSshRunner(cluster_name):

201 """Simple wrapper to return an SshRunner. 202 203 @type cluster_name: str 204 @param cluster_name: the cluster name, which is needed 205 by the SshRunner constructor 206 @rtype: L{ssh.SshRunner} 207 @return: an SshRunner instance 208 209 """ 210 return ssh.SshRunner(cluster_name)

211

212 213 -def _Decompress(data):

214 """Unpacks data compressed by the RPC client. 215 216 @type data: list or tuple 217 @param data: Data sent by RPC client 218 @rtype: str 219 @return: Decompressed data 220 221 """ 222 assert isinstance(data, (list, tuple)) 223 assert len(data) == 2 224 (encoding, content) = data 225 if encoding == constants.RPC_ENCODING_NONE: 226 return content 227 elif encoding == constants.RPC_ENCODING_ZLIB_BASE64: 228 return zlib.decompress(base64.b64decode(content)) 229 else: 230 raise AssertionError("Unknown data encoding")

231

232 233 -def _CleanDirectory(path, exclude=None):

234 """Removes all regular files in a directory. 235 236 @type path: str 237 @param path: the directory to clean 238 @type exclude: list 239 @param exclude: list of files to be excluded, defaults 240 to the empty list 241 242 """ 243 if path not in _ALLOWED_CLEAN_DIRS: 244 _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'", 245 path) 246 247 if not os.path.isdir(path): 248 return 249 if exclude is None: 250 exclude = [] 251 else: 252 # Normalize excluded paths 253 exclude = [os.path.normpath(i) for i in exclude] 254 255 for rel_name in utils.ListVisibleFiles(path): 256 full_name = utils.PathJoin(path, rel_name) 257 if full_name in exclude: 258 continue 259 if os.path.isfile(full_name) and not os.path.islink(full_name): 260 utils.RemoveFile(full_name)

261

262 263 -def _BuildUploadFileList():

264 """Build the list of allowed upload files. 265 266 This is abstracted so that it's built only once at module import time. 267 268 """ 269 allowed_files = set([ 270 pathutils.CLUSTER_CONF_FILE, 271 pathutils.ETC_HOSTS, 272 pathutils.SSH_KNOWN_HOSTS_FILE, 273 pathutils.VNC_PASSWORD_FILE, 274 pathutils.RAPI_CERT_FILE, 275 pathutils.SPICE_CERT_FILE, 276 pathutils.SPICE_CACERT_FILE, 277 pathutils.RAPI_USERS_FILE, 278 pathutils.CONFD_HMAC_KEY, 279 pathutils.CLUSTER_DOMAIN_SECRET_FILE, 280 ]) 281 282 for hv_name in constants.HYPER_TYPES: 283 hv_class = hypervisor.GetHypervisorClass(hv_name) 284 allowed_files.update(hv_class.GetAncillaryFiles()[0]) 285 286 assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \ 287 "Allowed file storage paths should never be uploaded via RPC" 288 289 return frozenset(allowed_files)

290 291 292 _ALLOWED_UPLOAD_FILES = _BuildUploadFileList()

293 294 295 -def JobQueuePurge():

296 """Removes job queue files and archived jobs. 297 298 @rtype: tuple 299 @return: True, None 300 301 """ 302 _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE]) 303 _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR)

304

305 306 -def GetMasterNodeName():

307 """Returns the master node name. 308 309 @rtype: string 310 @return: name of the master node 311 @raise RPCFail: in case of errors 312 313 """ 314 try: 315 return _GetConfig().GetMasterNode() 316 except errors.ConfigurationError, err: 317 _Fail("Cluster configuration incomplete: %s", err, exc=True)

318

319 320 -def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn):

321 """Decorator that runs hooks before and after the decorated function. 322 323 @type hook_opcode: string 324 @param hook_opcode: opcode of the hook 325 @type hooks_path: string 326 @param hooks_path: path of the hooks 327 @type env_builder_fn: function 328 @param env_builder_fn: function that returns a dictionary containing the 329 environment variables for the hooks. Will get all the parameters of the 330 decorated function. 331 @raise RPCFail: in case of pre-hook failure 332 333 """ 334 def decorator(fn): 335 def wrapper(*args, **kwargs): 336 # Despite the hooks run locally, we still have to pass an uuid which 337 # will be ignored in RunLocalHooks then. 338 nodes = ([constants.DUMMY_UUID], [constants.DUMMY_UUID]) 339 340 env_fn = compat.partial(env_builder_fn, *args, **kwargs) 341 342 cfg = _GetConfig() 343 hr = HooksRunner() 344 hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes, 345 hr.RunLocalHooks, None, env_fn, None, 346 logging.warning, cfg.GetClusterName(), 347 cfg.GetMasterNode()) 348 hm.RunPhase(constants.HOOKS_PHASE_PRE) 349 result = fn(*args, **kwargs) 350 hm.RunPhase(constants.HOOKS_PHASE_POST) 351 352 return result

353 return wrapper 354 return decorator 355

356 357 -def _BuildMasterIpEnv(master_params, use_external_mip_script=None):

358 """Builds environment variables for master IP hooks. 359 360 @type master_params: L{objects.MasterNetworkParameters} 361 @param master_params: network parameters of the master 362 @type use_external_mip_script: boolean 363 @param use_external_mip_script: whether to use an external master IP 364 address setup script (unused, but necessary per the implementation of the 365 _RunLocalHooks decorator) 366 367 """ 368 # pylint: disable=W0613 369 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family) 370 env = { 371 "MASTER_NETDEV": master_params.netdev, 372 "MASTER_IP": master_params.ip, 373 "MASTER_NETMASK": str(master_params.netmask), 374 "CLUSTER_IP_VERSION": str(ver), 375 } 376 377 return env

378

379 380 -def _RunMasterSetupScript(master_params, action, use_external_mip_script):

381 """Execute the master IP address setup script. 382 383 @type master_params: L{objects.MasterNetworkParameters} 384 @param master_params: network parameters of the master 385 @type action: string 386 @param action: action to pass to the script. Must be one of 387 L{backend._MASTER_START} or L{backend._MASTER_STOP} 388 @type use_external_mip_script: boolean 389 @param use_external_mip_script: whether to use an external master IP 390 address setup script 391 @raise backend.RPCFail: if there are errors during the execution of the 392 script 393 394 """ 395 env = _BuildMasterIpEnv(master_params) 396 397 if use_external_mip_script: 398 setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT 399 else: 400 setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT 401 402 result = utils.RunCmd([setup_script, action], env=env, reset_env=True) 403 404 if result.failed: 405 _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" % 406 (action, result.exit_code, result.output), log=True)

407 408 409 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup", 410 _BuildMasterIpEnv)

411 -def ActivateMasterIp(master_params, use_external_mip_script):

412 """Activate the IP address of the master daemon. 413 414 @type master_params: L{objects.MasterNetworkParameters} 415 @param master_params: network parameters of the master 416 @type use_external_mip_script: boolean 417 @param use_external_mip_script: whether to use an external master IP 418 address setup script 419 @raise RPCFail: in case of errors during the IP startup 420 421 """ 422 _RunMasterSetupScript(master_params, _MASTER_START, 423 use_external_mip_script)

424

425 426 -def StartMasterDaemons(no_voting):

427 """Activate local node as master node. 428 429 The function will start the master daemons (ganeti-masterd and ganeti-rapi). 430 431 @type no_voting: boolean 432 @param no_voting: whether to start ganeti-masterd without a node vote 433 but still non-interactively 434 @rtype: None 435 436 """ 437 438 if no_voting: 439 daemon_args = "--no-voting --yes-do-it" 440 else: 441 daemon_args = "" 442 443 env = { 444 "EXTRA_LUXID_ARGS": daemon_args, 445 "EXTRA_WCONFD_ARGS": daemon_args, 446 } 447 448 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env) 449 if result.failed: 450 msg = "Can't start Ganeti master: %s" % result.output 451 logging.error(msg) 452 _Fail(msg)

453 454 455 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown", 456 _BuildMasterIpEnv)

457 -def DeactivateMasterIp(master_params, use_external_mip_script):

458 """Deactivate the master IP on this node. 459 460 @type master_params: L{objects.MasterNetworkParameters} 461 @param master_params: network parameters of the master 462 @type use_external_mip_script: boolean 463 @param use_external_mip_script: whether to use an external master IP 464 address setup script 465 @raise RPCFail: in case of errors during the IP turndown 466 467 """ 468 _RunMasterSetupScript(master_params, _MASTER_STOP, 469 use_external_mip_script)

470

471 472 -def StopMasterDaemons():

473 """Stop the master daemons on this node. 474 475 Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node. 476 477 @rtype: None 478 479 """ 480 # TODO: log and report back to the caller the error failures; we 481 # need to decide in which case we fail the RPC for this 482 483 result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"]) 484 if result.failed: 485 logging.error("Could not stop Ganeti master, command %s had exitcode %s" 486 " and error %s", 487 result.cmd, result.exit_code, result.output)

488

489 490 -def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev):

491 """Change the netmask of the master IP. 492 493 @param old_netmask: the old value of the netmask 494 @param netmask: the new value of the netmask 495 @param master_ip: the master IP 496 @param master_netdev: the master network device 497 498 """ 499 if old_netmask == netmask: 500 return 501 502 if not netutils.IPAddress.Own(master_ip): 503 _Fail("The master IP address is not up, not attempting to change its" 504 " netmask") 505 506 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add", 507 "%s/%s" % (master_ip, netmask), 508 "dev", master_netdev, "label", 509 "%s:0" % master_netdev]) 510 if result.failed: 511 _Fail("Could not set the new netmask on the master IP address") 512 513 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del", 514 "%s/%s" % (master_ip, old_netmask), 515 "dev", master_netdev, "label", 516 "%s:0" % master_netdev]) 517 if result.failed: 518 _Fail("Could not bring down the master IP address with the old netmask")

519

520 521 -def EtcHostsModify(mode, host, ip):

522 """Modify a host entry in /etc/hosts. 523 524 @param mode: The mode to operate. Either add or remove entry 525 @param host: The host to operate on 526 @param ip: The ip associated with the entry 527 528 """ 529 if mode == constants.ETC_HOSTS_ADD: 530 if not ip: 531 RPCFail("Mode 'add' needs 'ip' parameter, but parameter not" 532 " present") 533 utils.AddHostToEtcHosts(host, ip) 534 elif mode == constants.ETC_HOSTS_REMOVE: 535 if ip: 536 RPCFail("Mode 'remove' does not allow 'ip' parameter, but" 537 " parameter is present") 538 utils.RemoveHostFromEtcHosts(host) 539 else: 540 RPCFail("Mode not supported")

541

542 543 -def LeaveCluster(modify_ssh_setup):

544 """Cleans up and remove the current node. 545 546 This function cleans up and prepares the current node to be removed 547 from the cluster. 548 549 If processing is successful, then it raises an 550 L{errors.QuitGanetiException} which is used as a special case to 551 shutdown the node daemon. 552 553 @param modify_ssh_setup: boolean 554 555 """ 556 _CleanDirectory(pathutils.DATA_DIR) 557 _CleanDirectory(pathutils.CRYPTO_KEYS_DIR) 558 JobQueuePurge() 559 560 if modify_ssh_setup: 561 try: 562 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER) 563 564 ssh.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) 565 566 utils.RemoveFile(priv_key) 567 utils.RemoveFile(pub_key) 568 except errors.OpExecError: 569 logging.exception("Error while processing ssh files") 570 except IOError: 571 logging.exception("At least one SSH file was not accessible.") 572 573 try: 574 utils.RemoveFile(pathutils.CONFD_HMAC_KEY) 575 utils.RemoveFile(pathutils.RAPI_CERT_FILE) 576 utils.RemoveFile(pathutils.SPICE_CERT_FILE) 577 utils.RemoveFile(pathutils.SPICE_CACERT_FILE) 578 utils.RemoveFile(pathutils.NODED_CERT_FILE) 579 except: # pylint: disable=W0702 580 logging.exception("Error while removing cluster secrets") 581 582 utils.StopDaemon(constants.CONFD) 583 utils.StopDaemon(constants.MOND) 584 utils.StopDaemon(constants.KVMD) 585 586 # Raise a custom exception (handled in ganeti-noded) 587 raise errors.QuitGanetiException(True, "Shutdown scheduled")

588

589 590 -def _CheckStorageParams(params, num_params):

591 """Performs sanity checks for storage parameters. 592 593 @type params: list 594 @param params: list of storage parameters 595 @type num_params: int 596 @param num_params: expected number of parameters 597 598 """ 599 if params is None: 600 raise errors.ProgrammerError("No storage parameters for storage" 601 " reporting is provided.") 602 if not isinstance(params, list): 603 raise errors.ProgrammerError("The storage parameters are not of type" 604 " list: '%s'" % params) 605 if not len(params) == num_params: 606 raise errors.ProgrammerError("Did not receive the expected number of" 607 "storage parameters: expected %s," 608 " received '%s'" % (num_params, len(params)))

609

610 611 -def _CheckLvmStorageParams(params):

612 """Performs sanity check for the 'exclusive storage' flag. 613 614 @see: C{_CheckStorageParams} 615 616 """ 617 _CheckStorageParams(params, 1) 618 excl_stor = params[0] 619 if not isinstance(params[0], bool): 620 raise errors.ProgrammerError("Exclusive storage parameter is not" 621 " boolean: '%s'." % excl_stor) 622 return excl_stor

623

624 625 -def _GetLvmVgSpaceInfo(name, params):

626 """Wrapper around C{_GetVgInfo} which checks the storage parameters. 627 628 @type name: string 629 @param name: name of the volume group 630 @type params: list 631 @param params: list of storage parameters, which in this case should be 632 containing only one for exclusive storage 633 634 """ 635 excl_stor = _CheckLvmStorageParams(params) 636 return _GetVgInfo(name, excl_stor)

637

638 639 -def _GetVgInfo( 640 name, excl_stor, info_fn=bdev.LogicalVolume.GetVGInfo):

641 """Retrieves information about a LVM volume group. 642 643 """ 644 # TODO: GetVGInfo supports returning information for multiple VGs at once 645 vginfo = info_fn([name], excl_stor) 646 if vginfo: 647 vg_free = int(round(vginfo[0][0], 0)) 648 vg_size = int(round(vginfo[0][1], 0)) 649 else: 650 vg_free = None 651 vg_size = None 652 653 return { 654 "type": constants.ST_LVM_VG, 655 "name": name, 656 "storage_free": vg_free, 657 "storage_size": vg_size, 658 }

659

660 661 -def _GetLvmPvSpaceInfo(name, params):

662 """Wrapper around C{_GetVgSpindlesInfo} with sanity checks. 663 664 @see: C{_GetLvmVgSpaceInfo} 665 666 """ 667 excl_stor = _CheckLvmStorageParams(params) 668 return _GetVgSpindlesInfo(name, excl_stor)

669

670 671 -def _GetVgSpindlesInfo( 672 name, excl_stor, info_fn=bdev.LogicalVolume.GetVgSpindlesInfo):

673 """Retrieves information about spindles in an LVM volume group. 674 675 @type name: string 676 @param name: VG name 677 @type excl_stor: bool 678 @param excl_stor: exclusive storage 679 @rtype: dict 680 @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name, 681 free spindles, total spindles respectively 682 683 """ 684 if excl_stor: 685 (vg_free, vg_size) = info_fn(name) 686 else: 687 vg_free = 0 688 vg_size = 0 689 return { 690 "type": constants.ST_LVM_PV, 691 "name": name, 692 "storage_free": vg_free, 693 "storage_size": vg_size, 694 }

695

696 697 -def _GetHvInfo(name, hvparams, get_hv_fn=hypervisor.GetHypervisor):

698 """Retrieves node information from a hypervisor. 699 700 The information returned depends on the hypervisor. Common items: 701 702 - vg_size is the size of the configured volume group in MiB 703 - vg_free is the free size of the volume group in MiB 704 - memory_dom0 is the memory allocated for domain0 in MiB 705 - memory_free is the currently available (free) ram in MiB 706 - memory_total is the total number of ram in MiB 707 - hv_version: the hypervisor version, if available 708 709 @type hvparams: dict of string 710 @param hvparams: the hypervisor's hvparams 711 712 """ 713 return get_hv_fn(name).GetNodeInfo(hvparams=hvparams)

714

715 716 -def _GetHvInfoAll(hv_specs, get_hv_fn=hypervisor.GetHypervisor):

717 """Retrieves node information for all hypervisors. 718 719 See C{_GetHvInfo} for information on the output. 720 721 @type hv_specs: list of pairs (string, dict of strings) 722 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 723 724 """ 725 if hv_specs is None: 726 return None 727 728 result = [] 729 for hvname, hvparams in hv_specs: 730 result.append(_GetHvInfo(hvname, hvparams, get_hv_fn)) 731 return result

732

733 734 -def _GetNamedNodeInfo(names, fn):

735 """Calls C{fn} for all names in C{names} and returns a dictionary. 736 737 @rtype: None or dict 738 739 """ 740 if names is None: 741 return None 742 else: 743 return map(fn, names)

744

745 746 -def GetNodeInfo(storage_units, hv_specs):

747 """Gives back a hash with different information about the node. 748 749 @type storage_units: list of tuples (string, string, list) 750 @param storage_units: List of tuples (storage unit, identifier, parameters) to 751 ask for disk space information. In case of lvm-vg, the identifier is 752 the VG name. The parameters can contain additional, storage-type-specific 753 parameters, for example exclusive storage for lvm storage. 754 @type hv_specs: list of pairs (string, dict of strings) 755 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 756 @rtype: tuple; (string, None/dict, None/dict) 757 @return: Tuple containing boot ID, volume group information and hypervisor 758 information 759 760 """ 761 bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") 762 storage_info = _GetNamedNodeInfo( 763 storage_units, 764 (lambda (storage_type, storage_key, storage_params): 765 _ApplyStorageInfoFunction(storage_type, storage_key, storage_params))) 766 hv_info = _GetHvInfoAll(hv_specs) 767 return (bootid, storage_info, hv_info)

768

769 770 -def _GetFileStorageSpaceInfo(path, params):

771 """Wrapper around filestorage.GetSpaceInfo. 772 773 The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo 774 and ignore the *args parameter to not leak it into the filestorage 775 module's code. 776 777 @see: C{filestorage.GetFileStorageSpaceInfo} for description of the 778 parameters. 779 780 """ 781 _CheckStorageParams(params, 0) 782 return filestorage.GetFileStorageSpaceInfo(path)

783 784 785 # FIXME: implement storage reporting for all missing storage types. 786 _STORAGE_TYPE_INFO_FN = { 787 constants.ST_BLOCK: None, 788 constants.ST_DISKLESS: None, 789 constants.ST_EXT: None, 790 constants.ST_FILE: _GetFileStorageSpaceInfo, 791 constants.ST_LVM_PV: _GetLvmPvSpaceInfo, 792 constants.ST_LVM_VG: _GetLvmVgSpaceInfo, 793 constants.ST_SHARED_FILE: None, 794 constants.ST_GLUSTER: None, 795 constants.ST_RADOS: None, 796 }

797 798 799 -def _ApplyStorageInfoFunction(storage_type, storage_key, *args):

800 """Looks up and applies the correct function to calculate free and total 801 storage for the given storage type. 802 803 @type storage_type: string 804 @param storage_type: the storage type for which the storage shall be reported. 805 @type storage_key: string 806 @param storage_key: identifier of a storage unit, e.g. the volume group name 807 of an LVM storage unit 808 @type args: any 809 @param args: various parameters that can be used for storage reporting. These 810 parameters and their semantics vary from storage type to storage type and 811 are just propagated in this function. 812 @return: the results of the application of the storage space function (see 813 _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that 814 storage type 815 @raises NotImplementedError: for storage types who don't support space 816 reporting yet 817 """ 818 fn = _STORAGE_TYPE_INFO_FN[storage_type] 819 if fn is not None: 820 return fn(storage_key, *args) 821 else: 822 raise NotImplementedError

823

824 825 -def _CheckExclusivePvs(pvi_list):

826 """Check that PVs are not shared among LVs 827 828 @type pvi_list: list of L{objects.LvmPvInfo} objects 829 @param pvi_list: information about the PVs 830 831 @rtype: list of tuples (string, list of strings) 832 @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...]) 833 834 """ 835 res = [] 836 for pvi in pvi_list: 837 if len(pvi.lv_list) > 1: 838 res.append((pvi.name, pvi.lv_list)) 839 return res

840

841 842 -def _VerifyHypervisors(what, vm_capable, result, all_hvparams, 843 get_hv_fn=hypervisor.GetHypervisor):

844 """Verifies the hypervisor. Appends the results to the 'results' list. 845 846 @type what: C{dict} 847 @param what: a dictionary of things to check 848 @type vm_capable: boolean 849 @param vm_capable: whether or not this node is vm capable 850 @type result: dict 851 @param result: dictionary of verification results; results of the 852 verifications in this function will be added here 853 @type all_hvparams: dict of dict of string 854 @param all_hvparams: dictionary mapping hypervisor names to hvparams 855 @type get_hv_fn: function 856 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 857 858 """ 859 if not vm_capable: 860 return 861 862 if constants.NV_HYPERVISOR in what: 863 result[constants.NV_HYPERVISOR] = {} 864 for hv_name in what[constants.NV_HYPERVISOR]: 865 hvparams = all_hvparams[hv_name] 866 try: 867 val = get_hv_fn(hv_name).Verify(hvparams=hvparams) 868 except errors.HypervisorError, err: 869 val = "Error while checking hypervisor: %s" % str(err) 870 result[constants.NV_HYPERVISOR][hv_name] = val

871

872 873 -def _VerifyHvparams(what, vm_capable, result, 874 get_hv_fn=hypervisor.GetHypervisor):

875 """Verifies the hvparams. Appends the results to the 'results' list. 876 877 @type what: C{dict} 878 @param what: a dictionary of things to check 879 @type vm_capable: boolean 880 @param vm_capable: whether or not this node is vm capable 881 @type result: dict 882 @param result: dictionary of verification results; results of the 883 verifications in this function will be added here 884 @type get_hv_fn: function 885 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 886 887 """ 888 if not vm_capable: 889 return 890 891 if constants.NV_HVPARAMS in what: 892 result[constants.NV_HVPARAMS] = [] 893 for source, hv_name, hvparms in what[constants.NV_HVPARAMS]: 894 try: 895 logging.info("Validating hv %s, %s", hv_name, hvparms) 896 get_hv_fn(hv_name).ValidateParameters(hvparms) 897 except errors.HypervisorError, err: 898 result[constants.NV_HVPARAMS].append((source, hv_name, str(err)))

899

900 901 -def _VerifyInstanceList(what, vm_capable, result, all_hvparams):

902 """Verifies the instance list. 903 904 @type what: C{dict} 905 @param what: a dictionary of things to check 906 @type vm_capable: boolean 907 @param vm_capable: whether or not this node is vm capable 908 @type result: dict 909 @param result: dictionary of verification results; results of the 910 verifications in this function will be added here 911 @type all_hvparams: dict of dict of string 912 @param all_hvparams: dictionary mapping hypervisor names to hvparams 913 914 """ 915 if constants.NV_INSTANCELIST in what and vm_capable: 916 # GetInstanceList can fail 917 try: 918 val = GetInstanceList(what[constants.NV_INSTANCELIST], 919 all_hvparams=all_hvparams) 920 except RPCFail, err: 921 val = str(err) 922 result[constants.NV_INSTANCELIST] = val

923

924 925 -def _VerifyNodeInfo(what, vm_capable, result, all_hvparams):

926 """Verifies the node info. 927 928 @type what: C{dict} 929 @param what: a dictionary of things to check 930 @type vm_capable: boolean 931 @param vm_capable: whether or not this node is vm capable 932 @type result: dict 933 @param result: dictionary of verification results; results of the 934 verifications in this function will be added here 935 @type all_hvparams: dict of dict of string 936 @param all_hvparams: dictionary mapping hypervisor names to hvparams 937 938 """ 939 if constants.NV_HVINFO in what and vm_capable: 940 hvname = what[constants.NV_HVINFO] 941 hyper = hypervisor.GetHypervisor(hvname) 942 hvparams = all_hvparams[hvname] 943 result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams)

944

945 946 -def _VerifyClientCertificate(cert_file=pathutils.NODED_CLIENT_CERT_FILE):

947 """Verify the existance and validity of the client SSL certificate. 948 949 Also, verify that the client certificate is not self-signed. Self- 950 signed client certificates stem from Ganeti versions 2.12.0 - 2.12.4 951 and should be replaced by client certificates signed by the server 952 certificate. Hence we output a warning when we encounter a self-signed 953 one. 954 955 """ 956 create_cert_cmd = "gnt-cluster renew-crypto --new-node-certificates" 957 if not os.path.exists(cert_file): 958 return (constants.CV_ERROR, 959 "The client certificate does not exist. Run '%s' to create" 960 " client certificates for all nodes." % create_cert_cmd) 961 962 (errcode, msg) = utils.VerifyCertificate(cert_file) 963 if errcode is not None: 964 return (errcode, msg) 965 966 (errcode, msg) = utils.IsCertificateSelfSigned(cert_file) 967 if errcode is not None: 968 return (errcode, msg) 969 970 # if everything is fine, we return the digest to be compared to the config 971 return (None, utils.GetCertificateDigest(cert_filename=cert_file))

972

973 974 -def _VerifySshSetup(node_status_list, my_name, ssh_key_type, 975 ganeti_pub_keys_file=pathutils.SSH_PUB_KEYS):

976 """Verifies the state of the SSH key files. 977 978 @type node_status_list: list of tuples 979 @param node_status_list: list of nodes of the cluster associated with a 980 couple of flags: (uuid, name, is_master_candidate, 981 is_potential_master_candidate, online) 982 @type my_name: str 983 @param my_name: name of this node 984 @type ssh_key_type: one of L{constants.SSHK_ALL} 985 @param ssh_key_type: type of key used on nodes 986 @type ganeti_pub_keys_file: str 987 @param ganeti_pub_keys_file: filename of the public keys file 988 989 """ 990 if node_status_list is None: 991 return ["No node list to check against the pub_key_file received."] 992 993 my_status_list = [(my_uuid, name, mc, pot_mc, online) for 994 (my_uuid, name, mc, pot_mc, online) 995 in node_status_list if name == my_name] 996 if len(my_status_list) == 0: 997 return ["Cannot find node information for node '%s'." % my_name] 998 (my_uuid, _, _, potential_master_candidate, online) = \ 999 my_status_list[0] 1000 1001 result = [] 1002 1003 if not os.path.exists(ganeti_pub_keys_file): 1004 result.append("The public key file '%s' does not exist. Consider running" 1005 " 'gnt-cluster renew-crypto --new-ssh-keys" 1006 " [--no-ssh-key-check]' to fix this." % ganeti_pub_keys_file) 1007 return result 1008 1009 pot_mc_uuids = [uuid for (uuid, _, _, _, _) in node_status_list] 1010 offline_nodes = [uuid for (uuid, _, _, _, online) in node_status_list 1011 if not online] 1012 pub_keys = ssh.QueryPubKeyFile(None, key_file=ganeti_pub_keys_file) 1013 1014 if potential_master_candidate: 1015 # Check that the set of potential master candidates matches the 1016 # public key file 1017 pub_uuids_set = set(pub_keys.keys()) - set(offline_nodes) 1018 pot_mc_uuids_set = set(pot_mc_uuids) - set(offline_nodes) 1019 missing_uuids = set([]) 1020 if pub_uuids_set != pot_mc_uuids_set: 1021 unknown_uuids = pub_uuids_set - pot_mc_uuids_set 1022 if unknown_uuids: 1023 result.append("The following node UUIDs are listed in the public key" 1024 " file on node '%s', but are not potential master" 1025 " candidates: %s." 1026 % (my_name, ", ".join(list(unknown_uuids)))) 1027 missing_uuids = pot_mc_uuids_set - pub_uuids_set 1028 if missing_uuids: 1029 result.append("The following node UUIDs of potential master candidates" 1030 " are missing in the public key file on node %s: %s." 1031 % (my_name, ", ".join(list(missing_uuids)))) 1032 1033 (_, key_files) = \ 1034 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1035 (_, node_pub_key_file) = key_files[ssh_key_type] 1036 1037 my_keys = pub_keys[my_uuid] 1038 1039 node_pub_key = utils.ReadFile(node_pub_key_file) 1040 if node_pub_key.strip() not in my_keys: 1041 result.append("The dsa key of node %s does not match this node's key" 1042 " in the pub key file." % my_name) 1043 if len(my_keys) != 1: 1044 result.append("There is more than one key for node %s in the public key" 1045 " file." % my_name) 1046 else: 1047 if len(pub_keys.keys()) > 0: 1048 result.append("The public key file of node '%s' is not empty, although" 1049 " the node is not a potential master candidate." 1050 % my_name) 1051 1052 # Check that all master candidate keys are in the authorized_keys file 1053 (auth_key_file, _) = \ 1054 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1055 for (uuid, name, mc, _, online) in node_status_list: 1056 if not online: 1057 continue 1058 if uuid in missing_uuids: 1059 continue 1060 if mc: 1061 for key in pub_keys[uuid]: 1062 if not ssh.HasAuthorizedKey(auth_key_file, key): 1063 result.append("A SSH key of master candidate '%s' (UUID: '%s') is" 1064 " not in the 'authorized_keys' file of node '%s'." 1065 % (name, uuid, my_name)) 1066 else: 1067 for key in pub_keys[uuid]: 1068 if name != my_name and ssh.HasAuthorizedKey(auth_key_file, key): 1069 result.append("A SSH key of normal node '%s' (UUID: '%s') is in the" 1070 " 'authorized_keys' file of node '%s'." 1071 % (name, uuid, my_name)) 1072 if name == my_name and not ssh.HasAuthorizedKey(auth_key_file, key): 1073 result.append("A SSH key of normal node '%s' (UUID: '%s') is not" 1074 " in the 'authorized_keys' file of itself." 1075 % (my_name, uuid)) 1076 1077 return result

1078

1079 1080 -def _VerifySshClutter(node_status_list, my_name):

1081 """Verifies that the 'authorized_keys' files are not cluttered up. 1082 1083 @type node_status_list: list of tuples 1084 @param node_status_list: list of nodes of the cluster associated with a 1085 couple of flags: (uuid, name, is_master_candidate, 1086 is_potential_master_candidate, online) 1087 @type my_name: str 1088 @param my_name: name of this node 1089 1090 """ 1091 result = [] 1092 (auth_key_file, _) = \ 1093 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1094 node_names = [name for (_, name, _, _) in node_status_list] 1095 multiple_occurrences = ssh.CheckForMultipleKeys(auth_key_file, node_names) 1096 if multiple_occurrences: 1097 msg = "There are hosts which have more than one SSH key stored for the" \ 1098 " same user in the 'authorized_keys' file of node %s. This can be" \ 1099 " due to an unsuccessful operation which cluttered up the" \ 1100 " 'authorized_keys' file. We recommend to clean this up manually. " \ 1101 % my_name 1102 for host, occ in multiple_occurrences.items(): 1103 msg += "Entry for '%s' in lines %s. " % (host, utils.CommaJoin(occ)) 1104 result.append(msg) 1105 1106 return result

1107

1108 1109 -def VerifyNode(what, cluster_name, all_hvparams):

1110 """Verify the status of the local node. 1111 1112 Based on the input L{what} parameter, various checks are done on the 1113 local node. 1114 1115 If the I{filelist} key is present, this list of 1116 files is checksummed and the file/checksum pairs are returned. 1117 1118 If the I{nodelist} key is present, we check that we have 1119 connectivity via ssh with the target nodes (and check the hostname 1120 report). 1121 1122 If the I{node-net-test} key is present, we check that we have 1123 connectivity to the given nodes via both primary IP and, if 1124 applicable, secondary IPs. 1125 1126 @type what: C{dict} 1127 @param what: a dictionary of things to check: 1128 - filelist: list of files for which to compute checksums 1129 - nodelist: list of nodes we should check ssh communication with 1130 - node-net-test: list of nodes we should check node daemon port 1131 connectivity with 1132 - hypervisor: list with hypervisors to run the verify for 1133 @type cluster_name: string 1134 @param cluster_name: the cluster's name 1135 @type all_hvparams: dict of dict of strings 1136 @param all_hvparams: a dictionary mapping hypervisor names to hvparams 1137 @rtype: dict 1138 @return: a dictionary with the same keys as the input dict, and 1139 values representing the result of the checks 1140 1141 """ 1142 result = {} 1143 my_name = netutils.Hostname.GetSysName() 1144 port = netutils.GetDaemonPort(constants.NODED) 1145 vm_capable = my_name not in what.get(constants.NV_NONVMNODES, []) 1146 1147 _VerifyHypervisors(what, vm_capable, result, all_hvparams) 1148 _VerifyHvparams(what, vm_capable, result) 1149 1150 if constants.NV_FILELIST in what: 1151 fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath, 1152 what[constants.NV_FILELIST])) 1153 result[constants.NV_FILELIST] = \ 1154 dict((vcluster.MakeVirtualPath(key), value) 1155 for (key, value) in fingerprints.items()) 1156 1157 if constants.NV_CLIENT_CERT in what: 1158 result[constants.NV_CLIENT_CERT] = _VerifyClientCertificate() 1159 1160 if constants.NV_SSH_SETUP in what: 1161 node_status_list, key_type = what[constants.NV_SSH_SETUP] 1162 result[constants.NV_SSH_SETUP] = \ 1163 _VerifySshSetup(node_status_list, my_name, key_type) 1164 if constants.NV_SSH_CLUTTER in what: 1165 result[constants.NV_SSH_CLUTTER] = \ 1166 _VerifySshClutter(what[constants.NV_SSH_SETUP], my_name) 1167 1168 if constants.NV_NODELIST in what: 1169 (nodes, bynode, mcs) = what[constants.NV_NODELIST] 1170 1171 # Add nodes from other groups (different for each node) 1172 try: 1173 nodes.extend(bynode[my_name]) 1174 except KeyError: 1175 pass 1176 1177 # Use a random order 1178 random.shuffle(nodes) 1179 1180 # Try to contact all nodes 1181 val = {} 1182 ssh_port_map = ssconf.SimpleStore().GetSshPortMap() 1183 for node in nodes: 1184 # We only test if master candidates can communicate to other nodes. 1185 # We cannot test if normal nodes cannot communicate with other nodes, 1186 # because the administrator might have installed additional SSH keys, 1187 # over which Ganeti has no power. 1188 if my_name in mcs: 1189 success, message = _GetSshRunner(cluster_name). \ 1190 VerifyNodeHostname(node, ssh_port_map[node]) 1191 if not success: 1192 val[node] = message 1193 1194 result[constants.NV_NODELIST] = val 1195 1196 if constants.NV_NODENETTEST in what: 1197 result[constants.NV_NODENETTEST] = tmp = {} 1198 my_pip = my_sip = None 1199 for name, pip, sip in what[constants.NV_NODENETTEST]: 1200 if name == my_name: 1201 my_pip = pip 1202 my_sip = sip 1203 break 1204 if not my_pip: 1205 tmp[my_name] = ("Can't find my own primary/secondary IP" 1206 " in the node list") 1207 else: 1208 for name, pip, sip in what[constants.NV_NODENETTEST]: 1209 fail = [] 1210 if not netutils.TcpPing(pip, port, source=my_pip): 1211 fail.append("primary") 1212 if sip != pip: 1213 if not netutils.TcpPing(sip, port, source=my_sip): 1214 fail.append("secondary") 1215 if fail: 1216 tmp[name] = ("failure using the %s interface(s)" % 1217 " and ".join(fail)) 1218 1219 if constants.NV_MASTERIP in what: 1220 # FIXME: add checks on incoming data structures (here and in the 1221 # rest of the function) 1222 master_name, master_ip = what[constants.NV_MASTERIP] 1223 if master_name == my_name: 1224 source = constants.IP4_ADDRESS_LOCALHOST 1225 else: 1226 source = None 1227 result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port, 1228 source=source) 1229 1230 if constants.NV_USERSCRIPTS in what: 1231 result[constants.NV_USERSCRIPTS] = \ 1232 [script for script in what[constants.NV_USERSCRIPTS] 1233 if not utils.IsExecutable(script)] 1234 1235 if constants.NV_OOB_PATHS in what: 1236 result[constants.NV_OOB_PATHS] = tmp = [] 1237 for path in what[constants.NV_OOB_PATHS]: 1238 try: 1239 st = os.stat(path) 1240 except OSError, err: 1241 tmp.append("error stating out of band helper: %s" % err) 1242 else: 1243 if stat.S_ISREG(st.st_mode): 1244 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR: 1245 tmp.append(None) 1246 else: 1247 tmp.append("out of band helper %s is not executable" % path) 1248 else: 1249 tmp.append("out of band helper %s is not a file" % path) 1250 1251 if constants.NV_LVLIST in what and vm_capable: 1252 try: 1253 val = GetVolumeList(utils.ListVolumeGroups().keys()) 1254 except RPCFail, err: 1255 val = str(err) 1256 result[constants.NV_LVLIST] = val 1257 1258 _VerifyInstanceList(what, vm_capable, result, all_hvparams) 1259 1260 if constants.NV_VGLIST in what and vm_capable: 1261 result[constants.NV_VGLIST] = utils.ListVolumeGroups() 1262 1263 if constants.NV_PVLIST in what and vm_capable: 1264 check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what 1265 val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], 1266 filter_allocatable=False, 1267 include_lvs=check_exclusive_pvs) 1268 if check_exclusive_pvs: 1269 result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val) 1270 for pvi in val: 1271 # Avoid sending useless data on the wire 1272 pvi.lv_list = [] 1273 result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val) 1274 1275 if constants.NV_VERSION in what: 1276 result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION, 1277 constants.RELEASE_VERSION) 1278 1279 _VerifyNodeInfo(what, vm_capable, result, all_hvparams) 1280 1281 if constants.NV_DRBDVERSION in what and vm_capable: 1282 try: 1283 drbd_version = DRBD8.GetProcInfo().GetVersionString() 1284 except errors.BlockDeviceError, err: 1285 logging.warning("Can't get DRBD version", exc_info=True) 1286 drbd_version = str(err) 1287 result[constants.NV_DRBDVERSION] = drbd_version 1288 1289 if constants.NV_DRBDLIST in what and vm_capable: 1290 try: 1291 used_minors = drbd.DRBD8.GetUsedDevs() 1292 except errors.BlockDeviceError, err: 1293 logging.warning("Can't get used minors list", exc_info=True) 1294 used_minors = str(err) 1295 result[constants.NV_DRBDLIST] = used_minors 1296 1297 if constants.NV_DRBDHELPER in what and vm_capable: 1298 status = True 1299 try: 1300 payload = drbd.DRBD8.GetUsermodeHelper() 1301 except errors.BlockDeviceError, err: 1302 logging.error("Can't get DRBD usermode helper: %s", str(err)) 1303 status = False 1304 payload = str(err) 1305 result[constants.NV_DRBDHELPER] = (status, payload) 1306 1307 if constants.NV_NODESETUP in what: 1308 result[constants.NV_NODESETUP] = tmpr = [] 1309 if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"): 1310 tmpr.append("The sysfs filesytem doesn't seem to be mounted" 1311 " under /sys, missing required directories /sys/block" 1312 " and /sys/class/net") 1313 if (not os.path.isdir("/proc/sys") or 1314 not os.path.isfile("/proc/sysrq-trigger")): 1315 tmpr.append("The procfs filesystem doesn't seem to be mounted" 1316 " under /proc, missing required directory /proc/sys and" 1317 " the file /proc/sysrq-trigger") 1318 1319 if constants.NV_TIME in what: 1320 result[constants.NV_TIME] = utils.SplitTime(time.time()) 1321 1322 if constants.NV_OSLIST in what and vm_capable: 1323 result[constants.NV_OSLIST] = DiagnoseOS() 1324 1325 if constants.NV_BRIDGES in what and vm_capable: 1326 result[constants.NV_BRIDGES] = [bridge 1327 for bridge in what[constants.NV_BRIDGES] 1328 if not utils.BridgeExists(bridge)] 1329 1330 if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name: 1331 result[constants.NV_ACCEPTED_STORAGE_PATHS] = \ 1332 filestorage.ComputeWrongFileStoragePaths() 1333 1334 if what.get(constants.NV_FILE_STORAGE_PATH): 1335 pathresult = filestorage.CheckFileStoragePath( 1336 what[constants.NV_FILE_STORAGE_PATH]) 1337 if pathresult: 1338 result[constants.NV_FILE_STORAGE_PATH] = pathresult 1339 1340 if what.get(constants.NV_SHARED_FILE_STORAGE_PATH): 1341 pathresult = filestorage.CheckFileStoragePath( 1342 what[constants.NV_SHARED_FILE_STORAGE_PATH]) 1343 if pathresult: 1344 result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult 1345 1346 return result

1347

1348 1349 -def GetCryptoTokens(token_requests):

1350 """Perform actions on the node's cryptographic tokens. 1351 1352 Token types can be 'ssl' or 'ssh'. So far only some actions are implemented 1353 for 'ssl'. Action 'get' returns the digest of the public client ssl 1354 certificate. Action 'create' creates a new client certificate and private key 1355 and also returns the digest of the certificate. The third parameter of a 1356 token request are optional parameters for the actions, so far only the 1357 filename is supported. 1358 1359 @type token_requests: list of tuples of (string, string, dict), where the 1360 first string is in constants.CRYPTO_TYPES, the second in 1361 constants.CRYPTO_ACTIONS. The third parameter is a dictionary of string 1362 to string. 1363 @param token_requests: list of requests of cryptographic tokens and actions 1364 to perform on them. The actions come with a dictionary of options. 1365 @rtype: list of tuples (string, string) 1366 @return: list of tuples of the token type and the public crypto token 1367 1368 """ 1369 tokens = [] 1370 for (token_type, action, _) in token_requests: 1371 if token_type not in constants.CRYPTO_TYPES: 1372 raise errors.ProgrammerError("Token type '%s' not supported." % 1373 token_type) 1374 if action not in constants.CRYPTO_ACTIONS: 1375 raise errors.ProgrammerError("Action '%s' is not supported." % 1376 action) 1377 if token_type == constants.CRYPTO_TYPE_SSL_DIGEST: 1378 tokens.append((token_type, 1379 utils.GetCertificateDigest())) 1380 return tokens

1381

1382 1383 -def EnsureDaemon(daemon_name, run):

1384 """Ensures the given daemon is running or stopped. 1385 1386 @type daemon_name: string 1387 @param daemon_name: name of the daemon (e.g., constants.KVMD) 1388 1389 @type run: bool 1390 @param run: whether to start or stop the daemon 1391 1392 @rtype: bool 1393 @return: 'True' if daemon successfully started/stopped, 1394 'False' otherwise 1395 1396 """ 1397 allowed_daemons = [constants.KVMD] 1398 1399 if daemon_name not in allowed_daemons: 1400 fn = lambda _: False 1401 elif run: 1402 fn = utils.EnsureDaemon 1403 else: 1404 fn = utils.StopDaemon 1405 1406 return fn(daemon_name)

1407

1408 1409 -def _InitSshUpdateData(data, noded_cert_file, ssconf_store):

1410 (_, noded_cert) = \ 1411 utils.ExtractX509Certificate(utils.ReadFile(noded_cert_file)) 1412 data[constants.SSHS_NODE_DAEMON_CERTIFICATE] = noded_cert 1413 1414 cluster_name = ssconf_store.GetClusterName() 1415 data[constants.SSHS_CLUSTER_NAME] = cluster_name

1416

1417 1418 -def AddNodeSshKey(node_uuid, node_name, 1419 potential_master_candidates, 1420 to_authorized_keys=False, 1421 to_public_keys=False, 1422 get_public_keys=False, 1423 pub_key_file=pathutils.SSH_PUB_KEYS, 1424 ssconf_store=None, 1425 noded_cert_file=pathutils.NODED_CERT_FILE, 1426 run_cmd_fn=ssh.RunSshCmdWithStdin, 1427 ssh_update_debug=False, 1428 ssh_update_verbose=False):

1429 """Distributes a node's public SSH key across the cluster. 1430 1431 Note that this function should only be executed on the master node, which 1432 then will copy the new node's key to all nodes in the cluster via SSH. 1433 1434 Also note: at least one of the flags C{to_authorized_keys}, 1435 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for 1436 the function to actually perform any actions. 1437 1438 @type node_uuid: str 1439 @param node_uuid: the UUID of the node whose key is added 1440 @type node_name: str 1441 @param node_name: the name of the node whose key is added 1442 @type potential_master_candidates: list of str 1443 @param potential_master_candidates: list of node names of potential master 1444 candidates; this should match the list of uuids in the public key file 1445 @type to_authorized_keys: boolean 1446 @param to_authorized_keys: whether the key should be added to the 1447 C{authorized_keys} file of all nodes 1448 @type to_public_keys: boolean 1449 @param to_public_keys: whether the keys should be added to the public key file 1450 @type get_public_keys: boolean 1451 @param get_public_keys: whether the node should add the clusters' public keys 1452 to its {ganeti_pub_keys} file 1453 1454 """ 1455 node_list = [SshAddNodeInfo(name=node_name, uuid=node_uuid, 1456 to_authorized_keys=to_authorized_keys, 1457 to_public_keys=to_public_keys, 1458 get_public_keys=get_public_keys)] 1459 return AddNodeSshKeyBulk(node_list, 1460 potential_master_candidates, 1461 pub_key_file=pub_key_file, 1462 ssconf_store=ssconf_store, 1463 noded_cert_file=noded_cert_file, 1464 run_cmd_fn=run_cmd_fn, 1465 ssh_update_debug=ssh_update_debug, 1466 ssh_update_verbose=ssh_update_verbose)

1467 1468 1469 # Node info named tuple specifically for the use with AddNodeSshKeyBulk 1470 SshAddNodeInfo = collections.namedtuple( 1471 "SshAddNodeInfo", 1472 ["uuid", 1473 "name", 1474 "to_authorized_keys", 1475 "to_public_keys", 1476 "get_public_keys"])

1477 1478 1479 -def AddNodeSshKeyBulk(node_list, 1480 potential_master_candidates, 1481 pub_key_file=pathutils.SSH_PUB_KEYS, 1482 ssconf_store=None, 1483 noded_cert_file=pathutils.NODED_CERT_FILE, 1484 run_cmd_fn=ssh.RunSshCmdWithStdin, 1485 ssh_update_debug=False, 1486 ssh_update_verbose=False):

1487 """Distributes a node's public SSH key across the cluster. 1488 1489 Note that this function should only be executed on the master node, which 1490 then will copy the new node's key to all nodes in the cluster via SSH. 1491 1492 Also note: at least one of the flags C{to_authorized_keys}, 1493 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for 1494 the function to actually perform any actions. 1495 1496 @type node_list: list of SshAddNodeInfo tuples 1497 @param node_list: list of tuples containing the necessary node information for 1498 adding their keys 1499 @type potential_master_candidates: list of str 1500 @param potential_master_candidates: list of node names of potential master 1501 candidates; this should match the list of uuids in the public key file 1502 1503 """ 1504 # whether there are any keys to be added or retrieved at all 1505 to_authorized_keys = any([node_info.to_authorized_keys for node_info in 1506 node_list]) 1507 to_public_keys = any([node_info.to_public_keys for node_info in 1508 node_list]) 1509 1510 if not ssconf_store: 1511 ssconf_store = ssconf.SimpleStore() 1512 1513 for node_info in node_list: 1514 # replacement not necessary for keys that are not supposed to be in the 1515 # list of public keys 1516 if not node_info.to_public_keys: 1517 continue 1518 # Check and fix sanity of key file 1519 keys_by_name = ssh.QueryPubKeyFile([node_info.name], key_file=pub_key_file) 1520 keys_by_uuid = ssh.QueryPubKeyFile([node_info.uuid], key_file=pub_key_file) 1521 1522 if (not keys_by_name or node_info.name not in keys_by_name) \ 1523 and (not keys_by_uuid or node_info.uuid not in keys_by_uuid): 1524 raise errors.SshUpdateError( 1525 "No keys found for the new node '%s' (UUID %s) in the list of public" 1526 " SSH keys, neither for the name or the UUID" % 1527 (node_info.name, node_info.uuid)) 1528 else: 1529 if node_info.name in keys_by_name: 1530 # Replace the name by UUID in the file as the name should only be used 1531 # temporarily 1532 ssh.ReplaceNameByUuid(node_info.uuid, node_info.name, 1533 error_fn=errors.SshUpdateError, 1534 key_file=pub_key_file) 1535 1536 # Retrieve updated map of UUIDs to keys 1537 keys_by_uuid = ssh.QueryPubKeyFile( 1538 [node_info.uuid for node_info in node_list], key_file=pub_key_file) 1539 1540 # Update the master node's key files 1541 (auth_key_file, _) = \ 1542 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1543 for node_info in node_list: 1544 if node_info.to_authorized_keys: 1545 ssh.AddAuthorizedKeys(auth_key_file, keys_by_uuid[node_info.uuid]) 1546 1547 base_data = {} 1548 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) 1549 cluster_name = base_data[constants.SSHS_CLUSTER_NAME] 1550 1551 ssh_port_map = ssconf_store.GetSshPortMap() 1552 1553 # Update the target nodes themselves 1554 for node_info in node_list: 1555 logging.debug("Updating SSH key files of target node '%s'.", node_info.name) 1556 if node_info.get_public_keys: 1557 node_data = {} 1558 _InitSshUpdateData(node_data, noded_cert_file, ssconf_store) 1559 all_keys = ssh.QueryPubKeyFile(None, key_file=pub_key_file) 1560 node_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1561 (constants.SSHS_OVERRIDE, all_keys) 1562 1563 try: 1564 backoff = 5 # seconds 1565 utils.RetryByNumberOfTimes( 1566 constants.SSHS_MAX_RETRIES, backoff, 1567 errors.SshUpdateError, 1568 run_cmd_fn, cluster_name, node_info.name, pathutils.SSH_UPDATE, 1569 ssh_port_map.get(node_info.name), node_data, 1570 debug=ssh_update_debug, verbose=ssh_update_verbose, 1571 use_cluster_key=False, ask_key=False, strict_host_check=False) 1572 except errors.SshUpdateError as e: 1573 # Clean up the master's public key file if adding key fails 1574 if node_info.to_public_keys: 1575 ssh.RemovePublicKey(node_info.uuid) 1576 raise e 1577 1578 # Update all nodes except master and the target nodes 1579 keys_by_uuid_auth = ssh.QueryPubKeyFile( 1580 [node_info.uuid for node_info in node_list 1581 if node_info.to_authorized_keys], 1582 key_file=pub_key_file) 1583 if to_authorized_keys: 1584 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1585 (constants.SSHS_ADD, keys_by_uuid_auth) 1586 1587 pot_mc_data = base_data.copy() 1588 keys_by_uuid_pub = ssh.QueryPubKeyFile( 1589 [node_info.uuid for node_info in node_list 1590 if node_info.to_public_keys], 1591 key_file=pub_key_file) 1592 if to_public_keys: 1593 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1594 (constants.SSHS_REPLACE_OR_ADD, keys_by_uuid_pub) 1595 1596 all_nodes = ssconf_store.GetNodeList() 1597 master_node = ssconf_store.GetMasterNode() 1598 online_nodes = ssconf_store.GetOnlineNodeList() 1599 1600 node_errors = [] 1601 for node in all_nodes: 1602 if node == master_node: 1603 logging.debug("Skipping master node '%s'.", master_node) 1604 continue 1605 if node not in online_nodes: 1606 logging.debug("Skipping offline node '%s'.", node) 1607 continue 1608 if node in potential_master_candidates: 1609 logging.debug("Updating SSH key files of node '%s'.", node) 1610 try: 1611 backoff = 5 # seconds 1612 utils.RetryByNumberOfTimes( 1613 constants.SSHS_MAX_RETRIES, backoff, errors.SshUpdateError, 1614 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1615 ssh_port_map.get(node), pot_mc_data, 1616 debug=ssh_update_debug, verbose=ssh_update_verbose, 1617 use_cluster_key=False, ask_key=False, strict_host_check=False) 1618 except errors.SshUpdateError as last_exception: 1619 error_msg = ("When adding the key of node '%s', updating SSH key" 1620 " files of node '%s' failed after %s retries." 1621 " Not trying again. Last error was: %s." % 1622 (node, node_info.name, constants.SSHS_MAX_RETRIES, 1623 last_exception)) 1624 node_errors.append((node, error_msg)) 1625 # We only log the error and don't throw an exception, because 1626 # one unreachable node shall not abort the entire procedure. 1627 logging.error(error_msg) 1628 1629 else: 1630 if to_authorized_keys: 1631 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE, 1632 ssh_port_map.get(node), base_data, 1633 debug=ssh_update_debug, verbose=ssh_update_verbose, 1634 use_cluster_key=False, ask_key=False, 1635 strict_host_check=False) 1636 1637 return node_errors

1638

1639 1640 # TODO: will be fixed with pending patch series. 1641 # pylint: disable=R0913 1642 -def RemoveNodeSshKey(node_uuid, node_name, 1643 master_candidate_uuids, 1644 potential_master_candidates, 1645 master_uuid=None, 1646 keys_to_remove=None, 1647 from_authorized_keys=False, 1648 from_public_keys=False, 1649 clear_authorized_keys=False, 1650 clear_public_keys=False, 1651 pub_key_file=pathutils.SSH_PUB_KEYS, 1652 ssconf_store=None, 1653 noded_cert_file=pathutils.NODED_CERT_FILE, 1654 readd=False, 1655 run_cmd_fn=ssh.RunSshCmdWithStdin, 1656 ssh_update_debug=False, 1657 ssh_update_verbose=False):

1658 """Removes the node's SSH keys from the key files and distributes those. 1659 1660 Note that at least one of the flags C{from_authorized_keys}, 1661 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys} 1662 has to be set to C{True} for the function to perform any action at all. 1663 Not doing so will trigger an assertion in the function. 1664 1665 @type node_uuid: str 1666 @param node_uuid: UUID of the node whose key is removed 1667 @type node_name: str 1668 @param node_name: name of the node whose key is remove 1669 @type master_candidate_uuids: list of str 1670 @param master_candidate_uuids: list of UUIDs of the current master candidates 1671 @type potential_master_candidates: list of str 1672 @param potential_master_candidates: list of names of potential master 1673 candidates 1674 @type keys_to_remove: dict of str to list of str 1675 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys 1676 to be removed. This list is supposed to be used only if the keys are not 1677 in the public keys file. This is for example the case when removing a 1678 master node's key. 1679 @type from_authorized_keys: boolean 1680 @param from_authorized_keys: whether or not the key should be removed 1681 from the C{authorized_keys} file 1682 @type from_public_keys: boolean 1683 @param from_public_keys: whether or not the key should be remove from 1684 the C{ganeti_pub_keys} file 1685 @type clear_authorized_keys: boolean 1686 @param clear_authorized_keys: whether or not the C{authorized_keys} file 1687 should be cleared on the node whose keys are removed 1688 @type clear_public_keys: boolean 1689 @param clear_public_keys: whether to clear the node's C{ganeti_pub_key} file 1690 @type readd: boolean 1691 @param readd: whether this is called during a readd operation. 1692 @rtype: list of string 1693 @returns: list of feedback messages 1694 1695 """ 1696 node_list = [SshRemoveNodeInfo(uuid=node_uuid, 1697 name=node_name, 1698 from_authorized_keys=from_authorized_keys, 1699 from_public_keys=from_public_keys, 1700 clear_authorized_keys=clear_authorized_keys, 1701 clear_public_keys=clear_public_keys)] 1702 return RemoveNodeSshKeyBulk(node_list, 1703 master_candidate_uuids, 1704 potential_master_candidates, 1705 master_uuid=master_uuid, 1706 keys_to_remove=keys_to_remove, 1707 pub_key_file=pub_key_file, 1708 ssconf_store=ssconf_store, 1709 noded_cert_file=noded_cert_file, 1710 readd=readd, 1711 run_cmd_fn=run_cmd_fn, 1712 ssh_update_debug=ssh_update_debug, 1713 ssh_update_verbose=ssh_update_verbose)

1714 1715 1716 # Node info named tuple specifically for the use with RemoveNodeSshKeyBulk 1717 SshRemoveNodeInfo = collections.namedtuple( 1718 "SshRemoveNodeInfo", 1719 ["uuid", 1720 "name", 1721 "from_authorized_keys", 1722 "from_public_keys", 1723 "clear_authorized_keys", 1724 "clear_public_keys"])

1725 1726 1727 -def RemoveNodeSshKeyBulk(node_list, 1728 master_candidate_uuids, 1729 potential_master_candidates, 1730 master_uuid=None, 1731 keys_to_remove=None, 1732 pub_key_file=pathutils.SSH_PUB_KEYS, 1733 ssconf_store=None, 1734 noded_cert_file=pathutils.NODED_CERT_FILE, 1735 readd=False, 1736 run_cmd_fn=ssh.RunSshCmdWithStdin, 1737 ssh_update_debug=False, 1738 ssh_update_verbose=False):

1739 """Removes the node's SSH keys from the key files and distributes those. 1740 1741 Note that at least one of the flags C{from_authorized_keys}, 1742 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys} 1743 of at least one node has to be set to C{True} for the function to perform any 1744 action at all. Not doing so will trigger an assertion in the function. 1745 1746 @type node_list: list of C{SshRemoveNodeInfo}. 1747 @param node_list: list of information about nodes whose keys are being removed 1748 @type master_candidate_uuids: list of str 1749 @param master_candidate_uuids: list of UUIDs of the current master candidates 1750 @type potential_master_candidates: list of str 1751 @param potential_master_candidates: list of names of potential master 1752 candidates 1753 @type keys_to_remove: dict of str to list of str 1754 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys 1755 to be removed. This list is supposed to be used only if the keys are not 1756 in the public keys file. This is for example the case when removing a 1757 master node's key. 1758 @type readd: boolean 1759 @param readd: whether this is called during a readd operation. 1760 @rtype: list of string 1761 @returns: list of feedback messages 1762 1763 """ 1764 # Non-disruptive error messages, list of (node, msg) pairs 1765 result_msgs = [] 1766 1767 # whether there are any keys to be added or retrieved at all 1768 from_authorized_keys = any([node_info.from_authorized_keys for node_info in 1769 node_list]) 1770 from_public_keys = any([node_info.from_public_keys for node_info in 1771 node_list]) 1772 clear_authorized_keys = any([node_info.clear_authorized_keys for node_info in 1773 node_list]) 1774 clear_public_keys = any([node_info.clear_public_keys for node_info in 1775 node_list]) 1776 1777 # Make sure at least one of these flags is true. 1778 if not (from_authorized_keys or from_public_keys or clear_authorized_keys 1779 or clear_public_keys): 1780 raise errors.SshUpdateError("No removal from any key file was requested.") 1781 1782 if not ssconf_store: 1783 ssconf_store = ssconf.SimpleStore() 1784 1785 master_node = ssconf_store.GetMasterNode() 1786 ssh_port_map = ssconf_store.GetSshPortMap() 1787 1788 all_keys_to_remove = {} 1789 if from_authorized_keys or from_public_keys: 1790 for node_info in node_list: 1791 # Skip nodes that don't actually need any keys to be removed. 1792 if not (node_info.from_authorized_keys or node_info.from_public_keys): 1793 continue 1794 if node_info.name == master_node and not keys_to_remove: 1795 raise errors.SshUpdateError("Cannot remove the master node's keys.") 1796 if keys_to_remove: 1797 keys = keys_to_remove 1798 else: 1799 keys = ssh.QueryPubKeyFile([node_info.uuid], key_file=pub_key_file) 1800 if (not keys or node_info.uuid not in keys) and not readd: 1801 raise errors.SshUpdateError("Node '%s' not found in the list of" 1802 " public SSH keys. It seems someone" 1803 " tries to remove a key from outside" 1804 " the cluster!" % node_info.uuid) 1805 # During an upgrade all nodes have the master key. In this case we 1806 # should not remove it to avoid accidentally shutting down cluster 1807 # SSH communication 1808 master_keys = None 1809 if master_uuid: 1810 master_keys = ssh.QueryPubKeyFile([master_uuid], 1811 key_file=pub_key_file) 1812 for master_key in master_keys: 1813 if master_key in keys[node_info.uuid]: 1814 keys[node_info.uuid].remove(master_key) 1815 1816 all_keys_to_remove.update(keys) 1817 1818 if all_keys_to_remove: 1819 base_data = {} 1820 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) 1821 cluster_name = base_data[constants.SSHS_CLUSTER_NAME] 1822 1823 if from_authorized_keys: 1824 # UUIDs of nodes that are supposed to be removed from the 1825 # authorized_keys files. 1826 nodes_remove_from_authorized_keys = [ 1827 node_info.uuid for node_info in node_list 1828 if node_info.from_authorized_keys] 1829 keys_to_remove_from_authorized_keys = dict([ 1830 (uuid, keys) for (uuid, keys) in all_keys_to_remove.items() 1831 if uuid in nodes_remove_from_authorized_keys]) 1832 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1833 (constants.SSHS_REMOVE, keys_to_remove_from_authorized_keys) 1834 (auth_key_file, _) = \ 1835 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, 1836 dircheck=False) 1837 1838 for uuid in nodes_remove_from_authorized_keys: 1839 ssh.RemoveAuthorizedKeys(auth_key_file, 1840 keys_to_remove_from_authorized_keys[uuid]) 1841 1842 pot_mc_data = base_data.copy() 1843 1844 if from_public_keys: 1845 nodes_remove_from_public_keys = [ 1846 node_info.uuid for node_info in node_list 1847 if node_info.from_public_keys] 1848 keys_to_remove_from_public_keys = dict([ 1849 (uuid, keys) for (uuid, keys) in all_keys_to_remove.items() 1850 if uuid in nodes_remove_from_public_keys]) 1851 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1852 (constants.SSHS_REMOVE, keys_to_remove_from_public_keys) 1853 1854 all_nodes = ssconf_store.GetNodeList() 1855 online_nodes = ssconf_store.GetOnlineNodeList() 1856 all_nodes_to_remove = [node_info.name for node_info in node_list] 1857 logging.debug("Removing keys of nodes '%s' from all nodes but itself and" 1858 " master.", ", ".join(all_nodes_to_remove)) 1859 for node in all_nodes: 1860 if node == master_node: 1861 logging.debug("Skipping master node '%s'.", master_node) 1862 continue 1863 if node not in online_nodes: 1864 logging.debug("Skipping offline node '%s'.", node) 1865 continue 1866 if node in all_nodes_to_remove: 1867 logging.debug("Skipping node whose key is removed itself '%s'.", node) 1868 continue 1869 ssh_port = ssh_port_map.get(node) 1870 if not ssh_port: 1871 raise errors.OpExecError("No SSH port information available for" 1872 " node '%s', map: %s." % 1873 (node, ssh_port_map)) 1874 error_msg_final = ("When removing the key of node '%s', updating the" 1875 " SSH key files of node '%s' failed. Last error" 1876 " was: %s.") 1877 if node in potential_master_candidates: 1878 logging.debug("Updating key setup of potential master candidate node" 1879 " %s.", node) 1880 try: 1881 backoff = 5 # seconds 1882 utils.RetryByNumberOfTimes( 1883 constants.SSHS_MAX_RETRIES, backoff, errors.SshUpdateError, 1884 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1885 ssh_port, pot_mc_data, 1886 debug=ssh_update_debug, verbose=ssh_update_verbose, 1887 use_cluster_key=False, ask_key=False, strict_host_check=False) 1888 except errors.SshUpdateError as last_exception: 1889 error_msg = error_msg_final % ( 1890 node_info.name, node, last_exception) 1891 result_msgs.append((node, error_msg)) 1892 logging.error(error_msg) 1893 1894 else: 1895 if from_authorized_keys: 1896 logging.debug("Updating key setup of normal node %s.", node) 1897 try: 1898 backoff = 5 # seconds 1899 utils.RetryByNumberOfTimes( 1900 constants.SSHS_MAX_RETRIES, backoff, errors.SshUpdateError, 1901 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1902 ssh_port, base_data, 1903 debug=ssh_update_debug, verbose=ssh_update_verbose, 1904 use_cluster_key=False, ask_key=False, strict_host_check=False) 1905 except errors.SshUpdateError as last_exception: 1906 error_msg = error_msg_final % ( 1907 node_info.name, node, last_exception) 1908 result_msgs.append((node, error_msg)) 1909 logging.error(error_msg) 1910 1911 for node_info in node_list: 1912 if node_info.clear_authorized_keys or node_info.from_public_keys or \ 1913 node_info.clear_public_keys: 1914 data = {} 1915 _InitSshUpdateData(data, noded_cert_file, ssconf_store) 1916 cluster_name = data[constants.SSHS_CLUSTER_NAME] 1917 ssh_port = ssh_port_map.get(node_info.name) 1918 if not ssh_port: 1919 raise errors.OpExecError("No SSH port information available for" 1920 " node '%s', which is leaving the cluster.") 1921 1922 if node_info.clear_authorized_keys: 1923 # The 'authorized_keys' file is not solely managed by Ganeti. Therefore, 1924 # we have to specify exactly which keys to clear to leave keys untouched 1925 # that were not added by Ganeti. 1926 other_master_candidate_uuids = [uuid for uuid in master_candidate_uuids 1927 if uuid != node_info.uuid] 1928 candidate_keys = ssh.QueryPubKeyFile(other_master_candidate_uuids, 1929 key_file=pub_key_file) 1930 data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1931 (constants.SSHS_REMOVE, candidate_keys) 1932 1933 if node_info.clear_public_keys: 1934 data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1935 (constants.SSHS_CLEAR, {}) 1936 elif node_info.from_public_keys: 1937 # Since clearing the public keys subsumes removing just a single key, 1938 # we only do it if clear_public_keys is 'False'. 1939 1940 if all_keys_to_remove: 1941 data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1942 (constants.SSHS_REMOVE, all_keys_to_remove) 1943 1944 # If we have no changes to any keyfile, just return 1945 if not (constants.SSHS_SSH_PUBLIC_KEYS in data or 1946 constants.SSHS_SSH_AUTHORIZED_KEYS in data): 1947 return 1948 1949 logging.debug("Updating SSH key setup of target node '%s'.", 1950 node_info.name) 1951 try: 1952 backoff = 5 # seconds 1953 utils.RetryByNumberOfTimes( 1954 constants.SSHS_MAX_RETRIES, backoff, 1955 errors.SshUpdateError, 1956 run_cmd_fn, cluster_name, node_info.name, pathutils.SSH_UPDATE, 1957 ssh_port, data, 1958 debug=ssh_update_debug, verbose=ssh_update_verbose, 1959 use_cluster_key=False, ask_key=False, strict_host_check=False) 1960 except errors.SshUpdateError as last_exception: 1961 result_msgs.append( 1962 (node_info.name, 1963 ("Removing SSH keys from node '%s' failed." 1964 " This can happen when the node is already unreachable." 1965 " Error: %s" % (node_info.name, last_exception)))) 1966 1967 if all_keys_to_remove and from_public_keys: 1968 for node_uuid in nodes_remove_from_public_keys: 1969 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file) 1970 1971 return result_msgs

1972 # pylint: enable=R0913

1973 1974 1975 -def RemoveSshKeyFromPublicKeyFile(node_name, 1976 pub_key_file=pathutils.SSH_PUB_KEYS, 1977 ssconf_store=None):

1978 """Removes a SSH key from the master's public key file. 1979 1980 This is an operation that is only used to clean up after failed operations 1981 (for example failed hooks before adding a node). To avoid abuse of this 1982 function (and the matching RPC call), we add a safety check to make sure 1983 that only stray keys can be removed that belong to nodes that are not 1984 in the cluster (anymore). 1985 1986 @type node_name: string 1987 @param node_name: the name of the node whose key is removed 1988 1989 """ 1990 if not ssconf_store: 1991 ssconf_store = ssconf.SimpleStore() 1992 1993 node_list = ssconf_store.GetNodeList() 1994 1995 if node_name in node_list: 1996 raise errors.SshUpdateError("Cannot remove key of node '%s'," 1997 " because it still belongs to the cluster." 1998 % node_name) 1999 2000 keys_by_name = ssh.QueryPubKeyFile([node_name], key_file=pub_key_file) 2001 if not keys_by_name or node_name not in keys_by_name: 2002 logging.info("The node '%s' whose key is supposed to be removed does not" 2003 " have an entry in the public key file. Hence, there is" 2004 " nothing left to do.", node_name) 2005 2006 ssh.RemovePublicKey(node_name, key_file=pub_key_file)

2007

2008 2009 -def _GenerateNodeSshKey(node_name, ssh_port_map, ssh_key_type, ssh_key_bits, 2010 ssconf_store=None, 2011 noded_cert_file=pathutils.NODED_CERT_FILE, 2012 run_cmd_fn=ssh.RunSshCmdWithStdin, 2013 suffix="", 2014 ssh_update_debug=False, 2015 ssh_update_verbose=False):

2016 """Generates the root SSH key pair on the node. 2017 2018 @type node_name: str 2019 @param node_name: name of the node whose key is remove 2020 @type ssh_port_map: dict of str to int 2021 @param ssh_port_map: mapping of node names to their SSH port 2022 @type ssh_key_type: One of L{constants.SSHK_ALL} 2023 @param ssh_key_type: the type of SSH key to be generated 2024 @type ssh_key_bits: int 2025 @param ssh_key_bits: the length of the key to be generated 2026 2027 """ 2028 if not ssconf_store: 2029 ssconf_store = ssconf.SimpleStore() 2030 2031 data = {} 2032 _InitSshUpdateData(data, noded_cert_file, ssconf_store) 2033 cluster_name = data[constants.SSHS_CLUSTER_NAME] 2034 data[constants.SSHS_GENERATE] = (ssh_key_type, ssh_key_bits, suffix) 2035 2036 run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE, 2037 ssh_port_map.get(node_name), data, 2038 debug=ssh_update_debug, verbose=ssh_update_verbose, 2039 use_cluster_key=False, ask_key=False, strict_host_check=False)

2040

2041 2042 -def _GetMasterNodeUUID(node_uuid_name_map, master_node_name):

2043 master_node_uuids = [node_uuid for (node_uuid, node_name) 2044 in node_uuid_name_map 2045 if node_name == master_node_name] 2046 if len(master_node_uuids) != 1: 2047 raise errors.SshUpdateError("No (unique) master UUID found. Master node" 2048 " name: '%s', Master UUID: '%s'" % 2049 (master_node_name, master_node_uuids)) 2050 return master_node_uuids[0]

2051

2052 2053 -def _GetOldMasterKeys(master_node_uuid, pub_key_file):

2054 old_master_keys_by_uuid = ssh.QueryPubKeyFile([master_node_uuid], 2055 key_file=pub_key_file) 2056 if not old_master_keys_by_uuid: 2057 raise errors.SshUpdateError("No public key of the master node (UUID '%s')" 2058 " found, not generating a new key." 2059 % master_node_uuid) 2060 return old_master_keys_by_uuid

2061

2062 2063 -def RenewSshKeys(node_uuids, node_names, master_candidate_uuids, 2064 potential_master_candidates, old_key_type, new_key_type, 2065 new_key_bits, 2066 ganeti_pub_keys_file=pathutils.SSH_PUB_KEYS, 2067 ssconf_store=None, 2068 noded_cert_file=pathutils.NODED_CERT_FILE, 2069 run_cmd_fn=ssh.RunSshCmdWithStdin, 2070 ssh_update_debug=False, 2071 ssh_update_verbose=False):

2072 """Renews all SSH keys and updates authorized_keys and ganeti_pub_keys. 2073 2074 @type node_uuids: list of str 2075 @param node_uuids: list of node UUIDs whose keys should be renewed 2076 @type node_names: list of str 2077 @param node_names: list of node names whose keys should be removed. This list 2078 should match the C{node_uuids} parameter 2079 @type master_candidate_uuids: list of str 2080 @param master_candidate_uuids: list of UUIDs of master candidates or 2081 master node 2082 @type old_key_type: One of L{constants.SSHK_ALL} 2083 @param old_key_type: the type of SSH key already present on nodes 2084 @type new_key_type: One of L{constants.SSHK_ALL} 2085 @param new_key_type: the type of SSH key to be generated 2086 @type new_key_bits: int 2087 @param new_key_bits: the length of the key to be generated 2088 @type ganeti_pub_keys_file: str 2089 @param ganeti_pub_keys_file: file path of the the public key file 2090 @type noded_cert_file: str 2091 @param noded_cert_file: path of the noded SSL certificate file 2092 @type run_cmd_fn: function 2093 @param run_cmd_fn: function to run commands on remote nodes via SSH 2094 @raises ProgrammerError: if node_uuids and node_names don't match; 2095 SshUpdateError if a node's key is missing from the public key file, 2096 if a node's new SSH key could not be fetched from it, if there is 2097 none or more than one entry in the public key list for the master 2098 node. 2099 2100 """ 2101 if not ssconf_store: 2102 ssconf_store = ssconf.SimpleStore() 2103 cluster_name = ssconf_store.GetClusterName() 2104 2105 if not len(node_uuids) == len(node_names): 2106 raise errors.ProgrammerError("List of nodes UUIDs and node names" 2107 " does not match in length.") 2108 2109 old_pub_keyfile = ssh.GetSshPubKeyFilename(old_key_type) 2110 new_pub_keyfile = ssh.GetSshPubKeyFilename(new_key_type) 2111 old_master_key = ssh.ReadLocalSshPubKeys([old_key_type]) 2112 2113 node_uuid_name_map = zip(node_uuids, node_names) 2114 2115 master_node_name = ssconf_store.GetMasterNode() 2116 master_node_uuid = _GetMasterNodeUUID(node_uuid_name_map, master_node_name) 2117 ssh_port_map = ssconf_store.GetSshPortMap() 2118 # List of all node errors that happened, but which did not abort the 2119 # procedure as a whole. It is important that this is a list to have a 2120 # somewhat chronological history of events. 2121 all_node_errors = [] 2122 2123 # process non-master nodes 2124 2125 # keys to add in bulk at the end 2126 node_keys_to_add = [] 2127 2128 # list of all nodes 2129 node_list = [] 2130 2131 # list of keys to be removed before generating new keys 2132 node_info_to_remove = [] 2133 2134 for node_uuid, node_name in node_uuid_name_map: 2135 if node_name == master_node_name: 2136 continue 2137 master_candidate = node_uuid in master_candidate_uuids 2138 potential_master_candidate = node_name in potential_master_candidates 2139 node_list.append((node_uuid, node_name, master_candidate, 2140 potential_master_candidate)) 2141 2142 if master_candidate: 2143 logging.debug("Fetching old SSH key from node '%s'.", node_name) 2144 old_pub_key = ssh.ReadRemoteSshPubKey(old_pub_keyfile, 2145 node_name, cluster_name, 2146 ssh_port_map[node_name], 2147 False, # ask_key 2148 False) # key_check 2149 if old_pub_key != old_master_key: 2150 # If we are already in a multi-key setup (that is past Ganeti 2.12), 2151 # we can safely remove the old key of the node. Otherwise, we cannot 2152 # remove that node's key, because it is also the master node's key 2153 # and that would terminate all communication from the master to the 2154 # node. 2155 node_info_to_remove.append(SshRemoveNodeInfo( 2156 uuid=node_uuid, 2157 name=node_name, 2158 from_authorized_keys=master_candidate, 2159 from_public_keys=False, 2160 clear_authorized_keys=False, 2161 clear_public_keys=False)) 2162 else: 2163 logging.debug("Old key of node '%s' is the same as the current master" 2164 " key. Not deleting that key on the node.", node_name) 2165 2166 logging.debug("Removing old SSH keys of all master candidates.") 2167 if node_info_to_remove: 2168 node_errors = RemoveNodeSshKeyBulk( 2169 node_info_to_remove, 2170 master_candidate_uuids, 2171 potential_master_candidates, 2172 master_uuid=master_node_uuid, 2173 pub_key_file=ganeti_pub_keys_file, 2174 ssconf_store=ssconf_store, 2175 noded_cert_file=noded_cert_file, 2176 run_cmd_fn=run_cmd_fn, 2177 ssh_update_debug=ssh_update_debug, 2178 ssh_update_verbose=ssh_update_verbose) 2179 if node_errors: 2180 all_node_errors = all_node_errors + node_errors 2181 2182 for (node_uuid, node_name, master_candidate, potential_master_candidate) \ 2183 in node_list: 2184 2185 logging.debug("Generating new SSH key for node '%s'.", node_name) 2186 _GenerateNodeSshKey(node_name, ssh_port_map, new_key_type, new_key_bits, 2187 ssconf_store=ssconf_store, 2188 noded_cert_file=noded_cert_file, 2189 run_cmd_fn=run_cmd_fn, 2190 ssh_update_verbose=ssh_update_verbose, 2191 ssh_update_debug=ssh_update_debug) 2192 2193 try: 2194 logging.debug("Fetching newly created SSH key from node '%s'.", node_name) 2195 pub_key = ssh.ReadRemoteSshPubKey(new_pub_keyfile, 2196 node_name, cluster_name, 2197 ssh_port_map[node_name], 2198 False, # ask_key 2199 False) # key_check 2200 except: 2201 raise errors.SshUpdateError("Could not fetch key of node %s" 2202 " (UUID %s)" % (node_name, node_uuid)) 2203 2204 if potential_master_candidate: 2205 ssh.RemovePublicKey(node_uuid, key_file=ganeti_pub_keys_file) 2206 ssh.AddPublicKey(node_uuid, pub_key, key_file=ganeti_pub_keys_file) 2207 2208 node_info = SshAddNodeInfo(name=node_name, 2209 uuid=node_uuid, 2210 to_authorized_keys=master_candidate, 2211 to_public_keys=potential_master_candidate, 2212 get_public_keys=True) 2213 node_keys_to_add.append(node_info) 2214 2215 node_errors = AddNodeSshKeyBulk( 2216 node_keys_to_add, potential_master_candidates, 2217 pub_key_file=ganeti_pub_keys_file, ssconf_store=ssconf_store, 2218 noded_cert_file=noded_cert_file, 2219 run_cmd_fn=run_cmd_fn, 2220 ssh_update_debug=ssh_update_debug, 2221 ssh_update_verbose=ssh_update_verbose) 2222 if node_errors: 2223 all_node_errors = all_node_errors + node_errors 2224 2225 # Renewing the master node's key 2226 2227 # Preserve the old keys for now 2228 old_master_keys_by_uuid = _GetOldMasterKeys(master_node_uuid, 2229 ganeti_pub_keys_file) 2230 2231 # Generate a new master key with a suffix, don't touch the old one for now 2232 logging.debug("Generate new ssh key of master.") 2233 _GenerateNodeSshKey(master_node_name, ssh_port_map, 2234 new_key_type, new_key_bits, 2235 ssconf_store=ssconf_store, 2236 noded_cert_file=noded_cert_file, 2237 run_cmd_fn=run_cmd_fn, 2238 suffix=constants.SSHS_MASTER_SUFFIX, 2239 ssh_update_debug=ssh_update_debug, 2240 ssh_update_verbose=ssh_update_verbose) 2241 # Read newly created master key 2242 new_master_keys = ssh.ReadLocalSshPubKeys( 2243 [new_key_type], suffix=constants.SSHS_MASTER_SUFFIX) 2244 2245 # Replace master key in the master nodes' public key file 2246 ssh.RemovePublicKey(master_node_uuid, key_file=ganeti_pub_keys_file) 2247 for pub_key in new_master_keys: 2248 ssh.AddPublicKey(master_node_uuid, pub_key, key_file=ganeti_pub_keys_file) 2249 2250 # Add new master key to all node's public and authorized keys 2251 logging.debug("Add new master key to all nodes.") 2252 node_errors = AddNodeSshKey( 2253 master_node_uuid, master_node_name, potential_master_candidates, 2254 to_authorized_keys=True, to_public_keys=True, 2255 get_public_keys=False, pub_key_file=ganeti_pub_keys_file, 2256 ssconf_store=ssconf_store, noded_cert_file=noded_cert_file, 2257 run_cmd_fn=run_cmd_fn, 2258 ssh_update_debug=ssh_update_debug, 2259 ssh_update_verbose=ssh_update_verbose) 2260 if node_errors: 2261 all_node_errors = all_node_errors + node_errors 2262 2263 # Remove the old key file and rename the new key to the non-temporary filename 2264 ssh.ReplaceSshKeys(new_key_type, new_key_type, 2265 src_key_suffix=constants.SSHS_MASTER_SUFFIX) 2266 2267 # Remove old key from authorized keys 2268 (auth_key_file, _) = \ 2269 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 2270 ssh.RemoveAuthorizedKeys(auth_key_file, 2271 old_master_keys_by_uuid[master_node_uuid]) 2272 2273 # Remove the old key from all node's authorized keys file 2274 logging.debug("Remove the old master key from all nodes.") 2275 node_errors = RemoveNodeSshKey( 2276 master_node_uuid, master_node_name, master_candidate_uuids, 2277 potential_master_candidates, 2278 keys_to_remove=old_master_keys_by_uuid, from_authorized_keys=True, 2279 from_public_keys=False, clear_authorized_keys=False, 2280 clear_public_keys=False, 2281 pub_key_file=ganeti_pub_keys_file, 2282 ssconf_store=ssconf_store, 2283 noded_cert_file=noded_cert_file, 2284 run_cmd_fn=run_cmd_fn, 2285 ssh_update_debug=ssh_update_debug, 2286 ssh_update_verbose=ssh_update_verbose) 2287 if node_errors: 2288 all_node_errors = all_node_errors + node_errors 2289 2290 return all_node_errors

2291

2292 2293 -def GetBlockDevSizes(devices):

2294 """Return the size of the given block devices 2295 2296 @type devices: list 2297 @param devices: list of block device nodes to query 2298 @rtype: dict 2299 @return: 2300 dictionary of all block devices under /dev (key). The value is their 2301 size in MiB. 2302 2303 {'/dev/disk/by-uuid/123456-12321231-312312-312': 124} 2304 2305 """ 2306 DEV_PREFIX = "/dev/" 2307 blockdevs = {} 2308 2309 for devpath in devices: 2310 if not utils.IsBelowDir(DEV_PREFIX, devpath): 2311 continue 2312 2313 try: 2314 st = os.stat(devpath) 2315 except EnvironmentError, err: 2316 logging.warning("Error stat()'ing device %s: %s", devpath, str(err)) 2317 continue 2318 2319 if stat.S_ISBLK(st.st_mode): 2320 result = utils.RunCmd(["blockdev", "--getsize64", devpath]) 2321 if result.failed: 2322 # We don't want to fail, just do not list this device as available 2323 logging.warning("Cannot get size for block device %s", devpath) 2324 continue 2325 2326 size = int(result.stdout) / (1024 * 1024) 2327 blockdevs[devpath] = size 2328 return blockdevs

2329

2330 2331 -def GetVolumeList(vg_names):

2332 """Compute list of logical volumes and their size. 2333 2334 @type vg_names: list 2335 @param vg_names: the volume groups whose LVs we should list, or 2336 empty for all volume groups 2337 @rtype: dict 2338 @return: 2339 dictionary of all partions (key) with value being a tuple of 2340 their size (in MiB), inactive and online status:: 2341 2342 {'xenvg/test1': ('20.06', True, True)} 2343 2344 in case of errors, a string is returned with the error 2345 details. 2346 2347 """ 2348 lvs = {} 2349 sep = "|" 2350 if not vg_names: 2351 vg_names = [] 2352 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 2353 "--separator=%s" % sep, 2354 "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names) 2355 if result.failed: 2356 _Fail("Failed to list logical volumes, lvs output: %s", result.output) 2357 2358 for line in result.stdout.splitlines(): 2359 line = line.strip() 2360 match = _LVSLINE_REGEX.match(line) 2361 if not match: 2362 logging.error("Invalid line returned from lvs output: '%s'", line) 2363 continue 2364 vg_name, name, size, attr = match.groups() 2365 inactive = attr[4] == "-" 2366 online = attr[5] == "o" 2367 virtual = attr[0] == "v" 2368 if virtual: 2369 # we don't want to report such volumes as existing, since they 2370 # don't really hold data 2371 continue 2372 lvs[vg_name + "/" + name] = (size, inactive, online) 2373 2374 return lvs

2375

2376 2377 -def ListVolumeGroups():

2378 """List the volume groups and their size. 2379 2380 @rtype: dict 2381 @return: dictionary with keys volume name and values the 2382 size of the volume 2383 2384 """ 2385 return utils.ListVolumeGroups()

2386

2387 2388 -def NodeVolumes():

2389 """List all volumes on this node. 2390 2391 @rtype: list 2392 @return: 2393 A list of dictionaries, each having four keys: 2394 - name: the logical volume name, 2395 - size: the size of the logical volume 2396 - dev: the physical device on which the LV lives 2397 - vg: the volume group to which it belongs 2398 2399 In case of errors, we return an empty list and log the 2400 error. 2401 2402 Note that since a logical volume can live on multiple physical 2403 volumes, the resulting list might include a logical volume 2404 multiple times. 2405 2406 """ 2407 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 2408 "--separator=|", 2409 "--options=lv_name,lv_size,devices,vg_name"]) 2410 if result.failed: 2411 _Fail("Failed to list logical volumes, lvs output: %s", 2412 result.output) 2413 2414 def parse_dev(dev): 2415 return dev.split("(")[0]

2416 2417 def handle_dev(dev): 2418 return [parse_dev(x) for x in dev.split(",")] 2419 2420 def map_line(line): 2421 line = [v.strip() for v in line] 2422 return [{"name": line[0], "size": line[1], 2423 "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])] 2424 2425 all_devs = [] 2426 for line in result.stdout.splitlines(): 2427 if line.count("|") >= 3: 2428 all_devs.extend(map_line(line.split("|"))) 2429 else: 2430 logging.warning("Strange line in the output from lvs: '%s'", line) 2431 return all_devs 2432

2433 2434 -def BridgesExist(bridges_list):

2435 """Check if a list of bridges exist on the current node. 2436 2437 @rtype: boolean 2438 @return: C{True} if all of them exist, C{False} otherwise 2439 2440 """ 2441 missing = [] 2442 for bridge in bridges_list: 2443 if not utils.BridgeExists(bridge): 2444 missing.append(bridge) 2445 2446 if missing: 2447 _Fail("Missing bridges %s", utils.CommaJoin(missing))

2448

2449 2450 -def GetInstanceListForHypervisor(hname, hvparams=None, 2451 get_hv_fn=hypervisor.GetHypervisor):

2452 """Provides a list of instances of the given hypervisor. 2453 2454 @type hname: string 2455 @param hname: name of the hypervisor 2456 @type hvparams: dict of strings 2457 @param hvparams: hypervisor parameters for the given hypervisor 2458 @type get_hv_fn: function 2459 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 2460 name; optional parameter to increase testability 2461 2462 @rtype: list 2463 @return: a list of all running instances on the current node 2464 - instance1.example.com 2465 - instance2.example.com 2466 2467 """ 2468 try: 2469 return get_hv_fn(hname).ListInstances(hvparams=hvparams) 2470 except errors.HypervisorError, err: 2471 _Fail("Error enumerating instances (hypervisor %s): %s", 2472 hname, err, exc=True)

2473

2474 2475 -def GetInstanceList(hypervisor_list, all_hvparams=None, 2476 get_hv_fn=hypervisor.GetHypervisor):

2477 """Provides a list of instances. 2478 2479 @type hypervisor_list: list 2480 @param hypervisor_list: the list of hypervisors to query information 2481 @type all_hvparams: dict of dict of strings 2482 @param all_hvparams: a dictionary mapping hypervisor types to respective 2483 cluster-wide hypervisor parameters 2484 @type get_hv_fn: function 2485 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 2486 name; optional parameter to increase testability 2487 2488 @rtype: list 2489 @return: a list of all running instances on the current node 2490 - instance1.example.com 2491 - instance2.example.com 2492 2493 """ 2494 results = [] 2495 for hname in hypervisor_list: 2496 hvparams = all_hvparams[hname] 2497 results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams, 2498 get_hv_fn=get_hv_fn)) 2499 return results

2500

2501 2502 -def GetInstanceInfo(instance, hname, hvparams=None):

2503 """Gives back the information about an instance as a dictionary. 2504 2505 @type instance: string 2506 @param instance: the instance name 2507 @type hname: string 2508 @param hname: the hypervisor type of the instance 2509 @type hvparams: dict of strings 2510 @param hvparams: the instance's hvparams 2511 2512 @rtype: dict 2513 @return: dictionary with the following keys: 2514 - memory: memory size of instance (int) 2515 - state: state of instance (HvInstanceState) 2516 - time: cpu time of instance (float) 2517 - vcpus: the number of vcpus (int) 2518 2519 """ 2520 output = {} 2521 2522 iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance, 2523 hvparams=hvparams) 2524 if iinfo is not None: 2525 output["memory"] = iinfo[2] 2526 output["vcpus"] = iinfo[3] 2527 output["state"] = iinfo[4] 2528 output["time"] = iinfo[5] 2529 2530 return output

2531

2532 2533 -def GetInstanceMigratable(instance):

2534 """Computes whether an instance can be migrated. 2535 2536 @type instance: L{objects.Instance} 2537 @param instance: object representing the instance to be checked. 2538 2539 @rtype: tuple 2540 @return: tuple of (result, description) where: 2541 - result: whether the instance can be migrated or not 2542 - description: a description of the issue, if relevant 2543 2544 """ 2545 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2546 iname = instance.name 2547 if iname not in hyper.ListInstances(hvparams=instance.hvparams): 2548 _Fail("Instance %s is not running", iname)

2549

2550 2551 -def GetAllInstancesInfo(hypervisor_list, all_hvparams):

2552 """Gather data about all instances. 2553 2554 This is the equivalent of L{GetInstanceInfo}, except that it 2555 computes data for all instances at once, thus being faster if one 2556 needs data about more than one instance. 2557 2558 @type hypervisor_list: list 2559 @param hypervisor_list: list of hypervisors to query for instance data 2560 @type all_hvparams: dict of dict of strings 2561 @param all_hvparams: mapping of hypervisor names to hvparams 2562 2563 @rtype: dict 2564 @return: dictionary of instance: data, with data having the following keys: 2565 - memory: memory size of instance (int) 2566 - state: xen state of instance (string) 2567 - time: cpu time of instance (float) 2568 - vcpus: the number of vcpus 2569 2570 """ 2571 output = {} 2572 for hname in hypervisor_list: 2573 hvparams = all_hvparams[hname] 2574 iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams) 2575 if iinfo: 2576 for name, _, memory, vcpus, state, times in iinfo: 2577 value = { 2578 "memory": memory, 2579 "vcpus": vcpus, 2580 "state": state, 2581 "time": times, 2582 } 2583 if name in output: 2584 # we only check static parameters, like memory and vcpus, 2585 # and not state and time which can change between the 2586 # invocations of the different hypervisors 2587 for key in "memory", "vcpus": 2588 if value[key] != output[name][key]: 2589 _Fail("Instance %s is running twice" 2590 " with different parameters", name) 2591 output[name] = value 2592 2593 return output

2594

2595 2596 -def GetInstanceConsoleInfo(instance_param_dict, 2597 get_hv_fn=hypervisor.GetHypervisor):

2598 """Gather data about the console access of a set of instances of this node. 2599 2600 This function assumes that the caller already knows which instances are on 2601 this node, by calling a function such as L{GetAllInstancesInfo} or 2602 L{GetInstanceList}. 2603 2604 For every instance, a large amount of configuration data needs to be 2605 provided to the hypervisor interface in order to receive the console 2606 information. Whether this could or should be cut down can be discussed. 2607 The information is provided in a dictionary indexed by instance name, 2608 allowing any number of instance queries to be done. 2609 2610 @type instance_param_dict: dict of string to tuple of dictionaries, where the 2611 dictionaries represent: L{objects.Instance}, L{objects.Node}, 2612 L{objects.NodeGroup}, HvParams, BeParams 2613 @param instance_param_dict: mapping of instance name to parameters necessary 2614 for console information retrieval 2615 2616 @rtype: dict 2617 @return: dictionary of instance: data, with data having the following keys: 2618 - instance: instance name 2619 - kind: console kind 2620 - message: used with kind == CONS_MESSAGE, indicates console to be 2621 unavailable, supplies error message 2622 - host: host to connect to 2623 - port: port to use 2624 - user: user for login 2625 - command: the command, broken into parts as an array 2626 - display: unknown, potentially unused? 2627 2628 """ 2629 2630 output = {} 2631 for inst_name in instance_param_dict: 2632 instance = instance_param_dict[inst_name]["instance"] 2633 pnode = instance_param_dict[inst_name]["node"] 2634 group = instance_param_dict[inst_name]["group"] 2635 hvparams = instance_param_dict[inst_name]["hvParams"] 2636 beparams = instance_param_dict[inst_name]["beParams"] 2637 2638 instance = objects.Instance.FromDict(instance) 2639 pnode = objects.Node.FromDict(pnode) 2640 group = objects.NodeGroup.FromDict(group) 2641 2642 h = get_hv_fn(instance.hypervisor) 2643 output[inst_name] = h.GetInstanceConsole(instance, pnode, group, 2644 hvparams, beparams).ToDict() 2645 2646 return output

2647

2648 2649 -def _InstanceLogName(kind, os_name, instance, component):

2650 """Compute the OS log filename for a given instance and operation. 2651 2652 The instance name and os name are passed in as strings since not all 2653 operations have these as part of an instance object. 2654 2655 @type kind: string 2656 @param kind: the operation type (e.g. add, import, etc.) 2657 @type os_name: string 2658 @param os_name: the os name 2659 @type instance: string 2660 @param instance: the name of the instance being imported/added/etc. 2661 @type component: string or None 2662 @param component: the name of the component of the instance being 2663 transferred 2664 2665 """ 2666 # TODO: Use tempfile.mkstemp to create unique filename 2667 if component: 2668 assert "/" not in component 2669 c_msg = "-%s" % component 2670 else: 2671 c_msg = "" 2672 base = ("%s-%s-%s%s-%s.log" % 2673 (kind, os_name, instance, c_msg, utils.TimestampForFilename())) 2674 return utils.PathJoin(pathutils.LOG_OS_DIR, base)

2675

2676 2677 -def InstanceOsAdd(instance, reinstall, debug):

2678 """Add an OS to an instance. 2679 2680 @type instance: L{objects.Instance} 2681 @param instance: Instance whose OS is to be installed 2682 @type reinstall: boolean 2683 @param reinstall: whether this is an instance reinstall 2684 @type debug: integer 2685 @param debug: debug level, passed to the OS scripts 2686 @rtype: None 2687 2688 """ 2689 inst_os = OSFromDisk(instance.os) 2690 2691 create_env = OSEnvironment(instance, inst_os, debug) 2692 if reinstall: 2693 create_env["INSTANCE_REINSTALL"] = "1" 2694 2695 logfile = _InstanceLogName("add", instance.os, instance.name, None) 2696 2697 result = utils.RunCmd([inst_os.create_script], env=create_env, 2698 cwd=inst_os.path, output=logfile, reset_env=True) 2699 if result.failed: 2700 logging.error("os create command '%s' returned error: %s, logfile: %s," 2701 " output: %s", result.cmd, result.fail_reason, logfile, 2702 result.output) 2703 lines = [utils.SafeEncode(val) 2704 for val in utils.TailFile(logfile, lines=20)] 2705 _Fail("OS create script failed (%s), last lines in the" 2706 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)

2707

2708 2709 -def RunRenameInstance(instance, old_name, debug):

2710 """Run the OS rename script for an instance. 2711 2712 @type instance: L{objects.Instance} 2713 @param instance: Instance whose OS is to be installed 2714 @type old_name: string 2715 @param old_name: previous instance name 2716 @type debug: integer 2717 @param debug: debug level, passed to the OS scripts 2718 @rtype: boolean 2719 @return: the success of the operation 2720 2721 """ 2722 inst_os = OSFromDisk(instance.os) 2723 2724 rename_env = OSEnvironment(instance, inst_os, debug) 2725 rename_env["OLD_INSTANCE_NAME"] = old_name 2726 2727 logfile = _InstanceLogName("rename", instance.os, 2728 "%s-%s" % (old_name, instance.name), None) 2729 2730 result = utils.RunCmd([inst_os.rename_script], env=rename_env, 2731 cwd=inst_os.path, output=logfile, reset_env=True) 2732 2733 if result.failed: 2734 logging.error("os create command '%s' returned error: %s output: %s", 2735 result.cmd, result.fail_reason, result.output) 2736 lines = [utils.SafeEncode(val) 2737 for val in utils.TailFile(logfile, lines=20)] 2738 _Fail("OS rename script failed (%s), last lines in the" 2739 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)

2740

2741 2742 -def _GetBlockDevSymlinkPath(instance_name, idx=None, uuid=None, _dir=None):

2743 """Returns symlink path for block device. 2744 2745 """ 2746 if _dir is None: 2747 _dir = pathutils.DISK_LINKS_DIR 2748 2749 assert idx is not None or uuid is not None 2750 2751 # Using the idx is deprecated. Use the uuid instead if it is available. 2752 if uuid: 2753 ident = uuid 2754 else: 2755 ident = idx 2756 2757 return utils.PathJoin(_dir, 2758 ("%s%s%s" % 2759 (instance_name, constants.DISK_SEPARATOR, ident)))

2760

2761 2762 -def _SymlinkBlockDev(instance_name, device_path, idx=None, uuid=None):

2763 """Set up symlinks to a instance's block device. 2764 2765 This is an auxiliary function run when an instance is start (on the primary 2766 node) or when an instance is migrated (on the target node). 2767 2768 2769 @param instance_name: the name of the target instance 2770 @param device_path: path of the physical block device, on the node 2771 @param idx: the disk index 2772 @param uuid: the disk uuid 2773 @return: absolute path to the disk's symlink 2774 2775 """ 2776 # In case we have only a userspace access URI, device_path is None 2777 if not device_path: 2778 return None 2779 2780 link_name = _GetBlockDevSymlinkPath(instance_name, idx, uuid) 2781 2782 try: 2783 os.symlink(device_path, link_name) 2784 except OSError, err: 2785 if err.errno == errno.EEXIST: 2786 if (not os.path.islink(link_name) or 2787 os.readlink(link_name) != device_path): 2788 os.remove(link_name) 2789 os.symlink(device_path, link_name) 2790 else: 2791 raise 2792 2793 return link_name

2794

2795 2796 -def _RemoveBlockDevLinks(instance_name, disks):

2797 """Remove the block device symlinks belonging to the given instance. 2798 2799 """ 2800 def _remove_symlink(link_name): 2801 if os.path.islink(link_name): 2802 try: 2803 os.remove(link_name) 2804 except OSError: 2805 logging.exception("Can't remove symlink '%s'", link_name)

2806 2807 for idx, disk in enumerate(disks): 2808 link_name = _GetBlockDevSymlinkPath(instance_name, uuid=disk.uuid) 2809 _remove_symlink(link_name) 2810 # Remove also the deprecated symlink (if any) 2811 link_name = _GetBlockDevSymlinkPath(instance_name, idx=idx) 2812 _remove_symlink(link_name) 2813

2814 2815 -def _CalculateDeviceURI(instance, disk, device):

2816 """Get the URI for the device. 2817 2818 @type instance: L{objects.Instance} 2819 @param instance: the instance which disk belongs to 2820 @type disk: L{objects.Disk} 2821 @param disk: the target disk object 2822 @type device: L{bdev.BlockDev} 2823 @param device: the corresponding BlockDevice 2824 @rtype: string 2825 @return: the device uri if any else None 2826 2827 """ 2828 access_mode = disk.params.get(constants.LDP_ACCESS, 2829 constants.DISK_KERNELSPACE) 2830 if access_mode == constants.DISK_USERSPACE: 2831 # This can raise errors.BlockDeviceError 2832 return device.GetUserspaceAccessUri(instance.hypervisor) 2833 else: 2834 return None

2835

2836 2837 -def _GatherAndLinkBlockDevs(instance):

2838 """Set up an instance's block device(s). 2839 2840 This is run on the primary node at instance startup. The block 2841 devices must be already assembled. 2842 2843 @type instance: L{objects.Instance} 2844 @param instance: the instance whose disks we should assemble 2845 @rtype: list 2846 @return: list of (disk_object, link_name, drive_uri) 2847 2848 """ 2849 block_devices = [] 2850 for idx, disk in enumerate(instance.disks_info): 2851 device = _RecursiveFindBD(disk) 2852 if device is None: 2853 raise errors.BlockDeviceError("Block device '%s' is not set up." % 2854 str(disk)) 2855 device.Open() 2856 try: 2857 # Create both index-based and uuid-based symlinks 2858 # for backwards compatibility 2859 _SymlinkBlockDev(instance.name, device.dev_path, idx=idx) 2860 link_name = _SymlinkBlockDev(instance.name, device.dev_path, 2861 uuid=disk.uuid) 2862 except OSError, e: 2863 raise errors.BlockDeviceError("Cannot create block device symlink: %s" % 2864 e.strerror) 2865 uri = _CalculateDeviceURI(instance, disk, device) 2866 2867 block_devices.append((disk, link_name, uri)) 2868 2869 return block_devices

2870

2871 2872 -def _IsInstanceUserDown(instance_info):

2873 return instance_info and \ 2874 "state" in instance_info and \ 2875 hv_base.HvInstanceState.IsShutdown(instance_info["state"])

2876

2877 2878 -def _GetInstanceInfo(instance):

2879 """Helper function L{GetInstanceInfo}""" 2880 return GetInstanceInfo(instance.name, instance.hypervisor, 2881 hvparams=instance.hvparams)

2882

2883 2884 -def StartInstance(instance, startup_paused, reason, store_reason=True):

2885 """Start an instance. 2886 2887 @type instance: L{objects.Instance} 2888 @param instance: the instance object 2889 @type startup_paused: bool 2890 @param instance: pause instance at startup? 2891 @type reason: list of reasons 2892 @param reason: the reason trail for this startup 2893 @type store_reason: boolean 2894 @param store_reason: whether to store the shutdown reason trail on file 2895 @rtype: None 2896 2897 """ 2898 instance_info = _GetInstanceInfo(instance) 2899 2900 if instance_info and not _IsInstanceUserDown(instance_info): 2901 logging.info("Instance '%s' already running, not starting", instance.name) 2902 return 2903 2904 try: 2905 block_devices = _GatherAndLinkBlockDevs(instance) 2906 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2907 hyper.StartInstance(instance, block_devices, startup_paused) 2908 if store_reason: 2909 _StoreInstReasonTrail(instance.name, reason) 2910 except errors.BlockDeviceError, err: 2911 _Fail("Block device error: %s", err, exc=True) 2912 except errors.HypervisorError, err: 2913 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2914 _Fail("Hypervisor error: %s", err, exc=True)

2915

2916 2917 -def InstanceShutdown(instance, timeout, reason, store_reason=True):

2918 """Shut an instance down. 2919 2920 @note: this functions uses polling with a hardcoded timeout. 2921 2922 @type instance: L{objects.Instance} 2923 @param instance: the instance object 2924 @type timeout: integer 2925 @param timeout: maximum timeout for soft shutdown 2926 @type reason: list of reasons 2927 @param reason: the reason trail for this shutdown 2928 @type store_reason: boolean 2929 @param store_reason: whether to store the shutdown reason trail on file 2930 @rtype: None 2931 2932 """ 2933 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2934 2935 if not _GetInstanceInfo(instance): 2936 logging.info("Instance '%s' not running, doing nothing", instance.name) 2937 return 2938 2939 class _TryShutdown(object): 2940 def __init__(self): 2941 self.tried_once = False

2942 2943 def __call__(self): 2944 if not _GetInstanceInfo(instance): 2945 return 2946 2947 try: 2948 hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout) 2949 if store_reason: 2950 _StoreInstReasonTrail(instance.name, reason) 2951 except errors.HypervisorError, err: 2952 # if the instance is no longer existing, consider this a 2953 # success and go to cleanup 2954 if not _GetInstanceInfo(instance): 2955 return 2956 2957 _Fail("Failed to stop instance '%s': %s", instance.name, err) 2958 2959 self.tried_once = True 2960 2961 raise utils.RetryAgain() 2962 2963 try: 2964 utils.Retry(_TryShutdown(), 5, timeout) 2965 except utils.RetryTimeout: 2966 # the shutdown did not succeed 2967 logging.error("Shutdown of '%s' unsuccessful, forcing", instance.name) 2968 2969 try: 2970 hyper.StopInstance(instance, force=True) 2971 except errors.HypervisorError, err: 2972 # only raise an error if the instance still exists, otherwise 2973 # the error could simply be "instance ... unknown"! 2974 if _GetInstanceInfo(instance): 2975 _Fail("Failed to force stop instance '%s': %s", instance.name, err) 2976 2977 time.sleep(1) 2978 2979 if _GetInstanceInfo(instance): 2980 _Fail("Could not shutdown instance '%s' even by destroy", instance.name) 2981 2982 try: 2983 hyper.CleanupInstance(instance.name) 2984 except errors.HypervisorError, err: 2985 logging.warning("Failed to execute post-shutdown cleanup step: %s", err) 2986 2987 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2988

2989 2990 -def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):

2991 """Reboot an instance. 2992 2993 @type instance: L{objects.Instance} 2994 @param instance: the instance object to reboot 2995 @type reboot_type: str 2996 @param reboot_type: the type of reboot, one the following 2997 constants: 2998 - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the 2999 instance OS, do not recreate the VM 3000 - L{constants.INSTANCE_REBOOT_HARD}: tear down and 3001 restart the VM (at the hypervisor level) 3002 - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is 3003 not accepted here, since that mode is handled differently, in 3004 cmdlib, and translates into full stop and start of the 3005 instance (instead of a call_instance_reboot RPC) 3006 @type shutdown_timeout: integer 3007 @param shutdown_timeout: maximum timeout for soft shutdown 3008 @type reason: list of reasons 3009 @param reason: the reason trail for this reboot 3010 @rtype: None 3011 3012 """ 3013 # TODO: this is inconsistent with 'StartInstance' and 'InstanceShutdown' 3014 # because those functions simply 'return' on error whereas this one 3015 # raises an exception with '_Fail' 3016 if not _GetInstanceInfo(instance): 3017 _Fail("Cannot reboot instance '%s' that is not running", instance.name) 3018 3019 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3020 if reboot_type == constants.INSTANCE_REBOOT_SOFT: 3021 try: 3022 hyper.RebootInstance(instance) 3023 except errors.HypervisorError, err: 3024 _Fail("Failed to soft reboot instance '%s': %s", instance.name, err) 3025 elif reboot_type == constants.INSTANCE_REBOOT_HARD: 3026 try: 3027 InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False) 3028 result = StartInstance(instance, False, reason, store_reason=False) 3029 _StoreInstReasonTrail(instance.name, reason) 3030 return result 3031 except errors.HypervisorError, err: 3032 _Fail("Failed to hard reboot instance '%s': %s", instance.name, err) 3033 else: 3034 _Fail("Invalid reboot_type received: '%s'", reboot_type)

3035

3036 3037 -def InstanceBalloonMemory(instance, memory):

3038 """Resize an instance's memory. 3039 3040 @type instance: L{objects.Instance} 3041 @param instance: the instance object 3042 @type memory: int 3043 @param memory: new memory amount in MB 3044 @rtype: None 3045 3046 """ 3047 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3048 running = hyper.ListInstances(hvparams=instance.hvparams) 3049 if instance.name not in running: 3050 logging.info("Instance %s is not running, cannot balloon", instance.name) 3051 return 3052 try: 3053 hyper.BalloonInstanceMemory(instance, memory) 3054 except errors.HypervisorError, err: 3055 _Fail("Failed to balloon instance memory: %s", err, exc=True)

3056

3057 3058 -def MigrationInfo(instance):

3059 """Gather information about an instance to be migrated. 3060 3061 @type instance: L{objects.Instance} 3062 @param instance: the instance definition 3063 3064 """ 3065 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3066 try: 3067 info = hyper.MigrationInfo(instance) 3068 except errors.HypervisorError, err: 3069 _Fail("Failed to fetch migration information: %s", err, exc=True) 3070 return info

3071

3072 3073 -def AcceptInstance(instance, info, target):

3074 """Prepare the node to accept an instance. 3075 3076 @type instance: L{objects.Instance} 3077 @param instance: the instance definition 3078 @type info: string/data (opaque) 3079 @param info: migration information, from the source node 3080 @type target: string 3081 @param target: target host (usually ip), on this node 3082 3083 """ 3084 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3085 try: 3086 hyper.AcceptInstance(instance, info, target) 3087 except errors.HypervisorError, err: 3088 _Fail("Failed to accept instance: %s", err, exc=True)

3089

3090 3091 -def FinalizeMigrationDst(instance, info, success):

3092 """Finalize any preparation to accept an instance. 3093 3094 @type instance: L{objects.Instance} 3095 @param instance: the instance definition 3096 @type info: string/data (opaque) 3097 @param info: migration information, from the source node 3098 @type success: boolean 3099 @param success: whether the migration was a success or a failure 3100 3101 """ 3102 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3103 try: 3104 hyper.FinalizeMigrationDst(instance, info, success) 3105 except errors.HypervisorError, err: 3106 _Fail("Failed to finalize migration on the target node: %s", err, exc=True)

3107

3108 3109 -def MigrateInstance(cluster_name, instance, target, live):

3110 """Migrates an instance to another node. 3111 3112 @type cluster_name: string 3113 @param cluster_name: name of the cluster 3114 @type instance: L{objects.Instance} 3115 @param instance: the instance definition 3116 @type target: string 3117 @param target: the target node name 3118 @type live: boolean 3119 @param live: whether the migration should be done live or not (the 3120 interpretation of this parameter is left to the hypervisor) 3121 @raise RPCFail: if migration fails for some reason 3122 3123 """ 3124 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3125 3126 try: 3127 hyper.MigrateInstance(cluster_name, instance, target, live) 3128 except errors.HypervisorError, err: 3129 _Fail("Failed to migrate instance: %s", err, exc=True)

3130

3131 3132 -def FinalizeMigrationSource(instance, success, live):

3133 """Finalize the instance migration on the source node. 3134 3135 @type instance: L{objects.Instance} 3136 @param instance: the instance definition of the migrated instance 3137 @type success: bool 3138 @param success: whether the migration succeeded or not 3139 @type live: bool 3140 @param live: whether the user requested a live migration or not 3141 @raise RPCFail: If the execution fails for some reason 3142 3143 """ 3144 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3145 3146 try: 3147 hyper.FinalizeMigrationSource(instance, success, live) 3148 except Exception, err: # pylint: disable=W0703 3149 _Fail("Failed to finalize the migration on the source node: %s", err, 3150 exc=True)

3151

3152 3153 -def GetMigrationStatus(instance):

3154 """Get the migration status 3155 3156 @type instance: L{objects.Instance} 3157 @param instance: the instance that is being migrated 3158 @rtype: L{objects.MigrationStatus} 3159 @return: the status of the current migration (one of 3160 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional 3161 progress info that can be retrieved from the hypervisor 3162 @raise RPCFail: If the migration status cannot be retrieved 3163 3164 """ 3165 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3166 try: 3167 return hyper.GetMigrationStatus(instance) 3168 except Exception, err: # pylint: disable=W0703 3169 _Fail("Failed to get migration status: %s", err, exc=True)

3170

3171 3172 -def HotplugDevice(instance, action, dev_type, device, extra, seq):

3173 """Hotplug a device 3174 3175 Hotplug is currently supported only for KVM Hypervisor. 3176 @type instance: L{objects.Instance} 3177 @param instance: the instance to which we hotplug a device 3178 @type action: string 3179 @param action: the hotplug action to perform 3180 @type dev_type: string 3181 @param dev_type: the device type to hotplug 3182 @type device: either L{objects.NIC} or L{objects.Disk} 3183 @param device: the device object to hotplug 3184 @type extra: tuple 3185 @param extra: extra info used for disk hotplug (disk link, drive uri) 3186 @type seq: int 3187 @param seq: the index of the device from master perspective 3188 @raise RPCFail: in case instance does not have KVM hypervisor 3189 3190 """ 3191 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3192 try: 3193 hyper.VerifyHotplugSupport(instance, action, dev_type) 3194 except errors.HotplugError, err: 3195 _Fail("Hotplug is not supported: %s", err) 3196 3197 if action == constants.HOTPLUG_ACTION_ADD: 3198 fn = hyper.HotAddDevice 3199 elif action == constants.HOTPLUG_ACTION_REMOVE: 3200 fn = hyper.HotDelDevice 3201 elif action == constants.HOTPLUG_ACTION_MODIFY: 3202 fn = hyper.HotModDevice 3203 else: 3204 assert action in constants.HOTPLUG_ALL_ACTIONS 3205 3206 return fn(instance, dev_type, device, extra, seq)

3207

3208 3209 -def HotplugSupported(instance):

3210 """Checks if hotplug is generally supported. 3211 3212 """ 3213 hyper = hypervisor.GetHypervisor(instance.hypervisor) 3214 try: 3215 hyper.HotplugSupported(instance) 3216 except errors.HotplugError, err: 3217 _Fail("Hotplug is not supported: %s", err)

3218

3219 3220 -def ModifyInstanceMetadata(metadata):

3221 """Sends instance data to the metadata daemon. 3222 3223 Uses the Luxi transport layer to communicate with the metadata 3224 daemon configuration server. It starts the metadata daemon if it is 3225 not running. 3226 The daemon must be enabled during at configuration time. 3227 3228 @type metadata: dict 3229 @param metadata: instance metadata obtained by calling 3230 L{objects.Instance.ToDict} on an instance object 3231 3232 """ 3233 if not constants.ENABLE_METAD: 3234 raise errors.ProgrammerError("The metadata deamon is disabled, yet" 3235 " ModifyInstanceMetadata has been called") 3236 3237 if not utils.IsDaemonAlive(constants.METAD): 3238 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.METAD]) 3239 if result.failed: 3240 raise errors.HypervisorError("Failed to start metadata daemon") 3241 3242 with contextlib.closing(metad.Client()) as client: 3243 client.UpdateConfig(metadata)

3244

3245 3246 -def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):

3247 """Creates a block device for an instance. 3248 3249 @type disk: L{objects.Disk} 3250 @param disk: the object describing the disk we should create 3251 @type size: int 3252 @param size: the size of the physical underlying device, in MiB 3253 @type owner: str 3254 @param owner: the name of the instance for which disk is created, 3255 used for device cache data 3256 @type on_primary: boolean 3257 @param on_primary: indicates if it is the primary node or not 3258 @type info: string 3259 @param info: string that will be sent to the physical device 3260 creation, used for example to set (LVM) tags on LVs 3261 @type excl_stor: boolean 3262 @param excl_stor: Whether exclusive_storage is active 3263 3264 @return: the new unique_id of the device (this can sometime be 3265 computed only after creation), or None. On secondary nodes, 3266 it's not required to return anything. 3267 3268 """ 3269 # TODO: remove the obsolete "size" argument 3270 # pylint: disable=W0613 3271 clist = [] 3272 if disk.children: 3273 for child in disk.children: 3274 try: 3275 crdev = _RecursiveAssembleBD(child, owner, on_primary) 3276 except errors.BlockDeviceError, err: 3277 _Fail("Can't assemble device %s: %s", child, err) 3278 if on_primary or disk.AssembleOnSecondary(): 3279 # we need the children open in case the device itself has to 3280 # be assembled 3281 try: 3282 # pylint: disable=E1103 3283 crdev.Open() 3284 except errors.BlockDeviceError, err: 3285 _Fail("Can't make child '%s' read-write: %s", child, err) 3286 clist.append(crdev) 3287 3288 try: 3289 device = bdev.Create(disk, clist, excl_stor) 3290 except errors.BlockDeviceError, err: 3291 _Fail("Can't create block device: %s", err) 3292 3293 if on_primary or disk.AssembleOnSecondary(): 3294 try: 3295 device.Assemble() 3296 except errors.BlockDeviceError, err: 3297 _Fail("Can't assemble device after creation, unusual event: %s", err) 3298 if on_primary or disk.OpenOnSecondary(): 3299 try: 3300 device.Open(force=True) 3301 except errors.BlockDeviceError, err: 3302 _Fail("Can't make device r/w after creation, unusual event: %s", err) 3303 DevCacheManager.UpdateCache(device.dev_path, owner, 3304 on_primary, disk.iv_name) 3305 3306 device.SetInfo(info) 3307 3308 return device.unique_id

3309

3310 3311 -def _DumpDevice(source_path, target_path, offset, size, truncate):

3312 """This function images/wipes the device using a local file. 3313 3314 @type source_path: string 3315 @param source_path: path of the image or data source (e.g., "/dev/zero") 3316 3317 @type target_path: string 3318 @param target_path: path of the device to image/wipe 3319 3320 @type offset: int 3321 @param offset: offset in MiB in the output file 3322 3323 @type size: int 3324 @param size: maximum size in MiB to write (data source might be smaller) 3325 3326 @type truncate: bool 3327 @param truncate: whether the file should be truncated 3328 3329 @return: None 3330 @raise RPCFail: in case of failure 3331 3332 """ 3333 # Internal sizes are always in Mebibytes; if the following "dd" command 3334 # should use a different block size the offset and size given to this 3335 # function must be adjusted accordingly before being passed to "dd". 3336 block_size = constants.DD_BLOCK_SIZE 3337 3338 cmd = [constants.DD_CMD, "if=%s" % source_path, "seek=%d" % offset, 3339 "bs=%s" % block_size, "oflag=direct", "of=%s" % target_path, 3340 "count=%d" % size] 3341 3342 if not truncate: 3343 cmd.append("conv=notrunc") 3344 3345 result = utils.RunCmd(cmd) 3346 3347 if result.failed: 3348 _Fail("Dump command '%s' exited with error: %s; output: %s", result.cmd, 3349 result.fail_reason, result.output)

3350

3351 3352 -def _DownloadAndDumpDevice(source_url, target_path, size):

3353 """This function images a device using a downloaded image file. 3354 3355 @type source_url: string 3356 @param source_url: URL of image to dump to disk 3357 3358 @type target_path: string 3359 @param target_path: path of the device to image 3360 3361 @type size: int 3362 @param size: maximum size in MiB to write (data source might be smaller) 3363 3364 @rtype: NoneType 3365 @return: None 3366 @raise RPCFail: in case of download or write failures 3367 3368 """ 3369 class DDParams(object): 3370 def __init__(self, current_size, total_size): 3371 self.current_size = current_size 3372 self.total_size = total_size 3373 self.image_size_error = False

3374 3375 def dd_write(ddparams, out): 3376 if ddparams.current_size < ddparams.total_size: 3377 ddparams.current_size += len(out) 3378 target_file.write(out) 3379 else: 3380 ddparams.image_size_error = True 3381 return -1 3382 3383 target_file = open(target_path, "r+") 3384 ddparams = DDParams(0, 1024 * 1024 * size) 3385 3386 curl = pycurl.Curl() 3387 curl.setopt(pycurl.VERBOSE, True) 3388 curl.setopt(pycurl.NOSIGNAL, True) 3389 curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION) 3390 curl.setopt(pycurl.URL, source_url) 3391 curl.setopt(pycurl.WRITEFUNCTION, lambda out: dd_write(ddparams, out)) 3392 3393 try: 3394 curl.perform() 3395 except pycurl.error: 3396 if ddparams.image_size_error: 3397 _Fail("Disk image larger than the disk") 3398 else: 3399 raise 3400 3401 target_file.close() 3402

3403 3404 -def BlockdevConvert(src_disk, target_disk):

3405 """Copies data from source block device to target. 3406 3407 This function gets the export and import commands from the source and 3408 target devices respectively, and then concatenates them to a single 3409 command using a pipe ("|"). Finally, executes the unified command that 3410 will transfer the data between the devices during the disk template 3411 conversion operation. 3412 3413 @type src_disk: L{objects.Disk} 3414 @param src_disk: the disk object we want to copy from 3415 @type target_disk: L{objects.Disk} 3416 @param target_disk: the disk object we want to copy to 3417 3418 @rtype: NoneType 3419 @return: None 3420 @raise RPCFail: in case of failure 3421 3422 """ 3423 src_dev = _RecursiveFindBD(src_disk) 3424 if src_dev is None: 3425 _Fail("Cannot copy from device '%s': device not found", src_disk.uuid) 3426 3427 dest_dev = _RecursiveFindBD(target_disk) 3428 if dest_dev is None: 3429 _Fail("Cannot copy to device '%s': device not found", target_disk.uuid) 3430 3431 src_cmd = src_dev.Export() 3432 dest_cmd = dest_dev.Import() 3433 command = "%s | %s" % (utils.ShellQuoteArgs(src_cmd), 3434 utils.ShellQuoteArgs(dest_cmd)) 3435 3436 result = utils.RunCmd(command) 3437 if result.failed: 3438 _Fail("Disk conversion command '%s' exited with error: %s; output: %s", 3439 result.cmd, result.fail_reason, result.output)

3440

3441 3442 -def BlockdevWipe(disk, offset, size):

3443 """Wipes a block device. 3444 3445 @type disk: L{objects.Disk} 3446 @param disk: the disk object we want to wipe 3447 @type offset: int 3448 @param offset: The offset in MiB in the file 3449 @type size: int 3450 @param size: The size in MiB to write 3451 3452 """ 3453 try: 3454 rdev = _RecursiveFindBD(disk) 3455 except errors.BlockDeviceError: 3456 rdev = None 3457 3458 if not rdev: 3459 _Fail("Cannot wipe device %s: device not found", disk.iv_name) 3460 if offset < 0: 3461 _Fail("Negative offset") 3462 if size < 0: 3463 _Fail("Negative size") 3464 if offset > rdev.size: 3465 _Fail("Wipe offset is bigger than device size") 3466 if (offset + size) > rdev.size: 3467 _Fail("Wipe offset and size are bigger than device size") 3468 3469 _DumpDevice("/dev/zero", rdev.dev_path, offset, size, True)

3470

3471 3472 -def BlockdevImage(disk, image, size):

3473 """Images a block device either by dumping a local file or 3474 downloading a URL. 3475 3476 @type disk: L{objects.Disk} 3477 @param disk: the disk object we want to image 3478 3479 @type image: string 3480 @param image: file path to the disk image be dumped 3481 3482 @type size: int 3483 @param size: The size in MiB to write 3484 3485 @rtype: NoneType 3486 @return: None 3487 @raise RPCFail: in case of failure 3488 3489 """ 3490 if not (utils.IsUrl(image) or os.path.exists(image)): 3491 _Fail("Image '%s' not found", image) 3492 3493 try: 3494 rdev = _RecursiveFindBD(disk) 3495 except errors.BlockDeviceError: 3496 rdev = None 3497 3498 if not rdev: 3499 _Fail("Cannot image device %s: device not found", disk.iv_name) 3500 if size < 0: 3501 _Fail("Negative size") 3502 if size > rdev.size: 3503 _Fail("Image size is bigger than device size") 3504 3505 if utils.IsUrl(image): 3506 _DownloadAndDumpDevice(image, rdev.dev_path, size) 3507 else: 3508 _DumpDevice(image, rdev.dev_path, 0, size, False)

3509

3510 3511 -def BlockdevPauseResumeSync(disks, pause):

3512 """Pause or resume the sync of the block device. 3513 3514 @type disks: list of L{objects.Disk} 3515 @param disks: the disks object we want to pause/resume 3516 @type pause: bool 3517 @param pause: Wheater to pause or resume 3518 3519 """ 3520 success = [] 3521 for disk in disks: 3522 try: 3523 rdev = _RecursiveFindBD(disk) 3524 except errors.BlockDeviceError: 3525 rdev = None 3526 3527 if not rdev: 3528 success.append((False, ("Cannot change sync for device %s:" 3529 " device not found" % disk.iv_name))) 3530 continue 3531 3532 result = rdev.PauseResumeSync(pause) 3533 3534 if result: 3535 success.append((result, None)) 3536 else: 3537 if pause: 3538 msg = "Pause" 3539 else: 3540 msg = "Resume" 3541 success.append((result, "%s for device %s failed" % (msg, disk.iv_name))) 3542 3543 return success

3544

3545 3546 -def BlockdevRemove(disk):

3547 """Remove a block device. 3548 3549 @note: This is intended to be called recursively. 3550 3551 @type disk: L{objects.Disk} 3552 @param disk: the disk object we should remove 3553 @rtype: boolean 3554 @return: the success of the operation 3555 3556 """ 3557 msgs = [] 3558 try: 3559 rdev = _RecursiveFindBD(disk) 3560 except errors.BlockDeviceError, err: 3561 # probably can't attach 3562 logging.info("Can't attach to device %s in remove", disk) 3563 rdev = None 3564 if rdev is not None: 3565 r_path = rdev.dev_path 3566 3567 def _TryRemove(): 3568 try: 3569 rdev.Remove() 3570 return [] 3571 except errors.BlockDeviceError, err: 3572 return [str(err)]

3573 3574 msgs.extend(utils.SimpleRetry([], _TryRemove, 3575 constants.DISK_REMOVE_RETRY_INTERVAL, 3576 constants.DISK_REMOVE_RETRY_TIMEOUT)) 3577 3578 if not msgs: 3579 DevCacheManager.RemoveCache(r_path) 3580 3581 if disk.children: 3582 for child in disk.children: 3583 try: 3584 BlockdevRemove(child) 3585 except RPCFail, err: 3586 msgs.append(str(err)) 3587 3588 if msgs: 3589 _Fail("; ".join(msgs)) 3590

3591 3592 -def _RecursiveAssembleBD(disk, owner, as_primary):

3593 """Activate a block device for an instance. 3594 3595 This is run on the primary and secondary nodes for an instance. 3596 3597 @note: this function is called recursively. 3598 3599 @type disk: L{objects.Disk} 3600 @param disk: the disk we try to assemble 3601 @type owner: str 3602 @param owner: the name of the instance which owns the disk 3603 @type as_primary: boolean 3604 @param as_primary: if we should make the block device 3605 read/write 3606 3607 @return: the assembled device or None (in case no device 3608 was assembled) 3609 @raise errors.BlockDeviceError: in case there is an error 3610 during the activation of the children or the device 3611 itself 3612 3613 """ 3614 children = [] 3615 if disk.children: 3616 mcn = disk.ChildrenNeeded() 3617 if mcn == -1: 3618 mcn = 0 # max number of Nones allowed 3619 else: 3620 mcn = len(disk.children) - mcn # max number of Nones 3621 for chld_disk in disk.children: 3622 try: 3623 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary) 3624 except errors.BlockDeviceError, err: 3625 if children.count(None) >= mcn: 3626 raise 3627 cdev = None 3628 logging.error("Error in child activation (but continuing): %s", 3629 str(err)) 3630 children.append(cdev) 3631 3632 if as_primary or disk.AssembleOnSecondary(): 3633 r_dev = bdev.Assemble(disk, children) 3634 result = r_dev 3635 if as_primary or disk.OpenOnSecondary(): 3636 r_dev.Open() 3637 DevCacheManager.UpdateCache(r_dev.dev_path, owner, 3638 as_primary, disk.iv_name) 3639 3640 else: 3641 result = True 3642 return result

3643

3644 3645 -def BlockdevAssemble(disk, instance, as_primary, idx):

3646 """Activate a block device for an instance. 3647 3648 This is a wrapper over _RecursiveAssembleBD. 3649 3650 @rtype: str or boolean 3651 @return: a tuple with the C{/dev/...} path and the created symlink 3652 for primary nodes, and (C{True}, C{True}) for secondary nodes 3653 3654 """ 3655 try: 3656 result = _RecursiveAssembleBD(disk, instance.name, as_primary) 3657 if isinstance(result, BlockDev): 3658 # pylint: disable=E1103 3659 dev_path = result.dev_path 3660 link_name = None 3661 uri = None 3662 if as_primary: 3663 # Create both index-based and uuid-based symlinks 3664 # for backwards compatibility 3665 _SymlinkBlockDev(instance.name, dev_path, idx=idx) 3666 link_name = _SymlinkBlockDev(instance.name, dev_path, uuid=disk.uuid) 3667 uri = _CalculateDeviceURI(instance, disk, result) 3668 elif result: 3669 return result, result 3670 else: 3671 _Fail("Unexpected result from _RecursiveAssembleBD") 3672 except errors.BlockDeviceError, err: 3673 _Fail("Error while assembling disk: %s", err, exc=True) 3674 except OSError, err: 3675 _Fail("Error while symlinking disk: %s", err, exc=True) 3676 3677 return dev_path, link_name, uri

3678

3679 3680 -def BlockdevShutdown(disk):

3681 """Shut down a block device. 3682 3683 First, if the device is assembled (Attach() is successful), then 3684 the device is shutdown. Then the children of the device are 3685 shutdown. 3686 3687 This function is called recursively. Note that we don't cache the 3688 children or such, as oppossed to assemble, shutdown of different 3689 devices doesn't require that the upper device was active. 3690 3691 @type disk: L{objects.Disk} 3692 @param disk: the description of the disk we should 3693 shutdown 3694 @rtype: None 3695 3696 """ 3697 msgs = [] 3698 r_dev = _RecursiveFindBD(disk) 3699 if r_dev is not None: 3700 r_path = r_dev.dev_path 3701 try: 3702 r_dev.Shutdown() 3703 DevCacheManager.RemoveCache(r_path) 3704 except errors.BlockDeviceError, err: 3705 msgs.append(str(err)) 3706 3707 if disk.children: 3708 for child in disk.children: 3709 try: 3710 BlockdevShutdown(child) 3711 except RPCFail, err: 3712 msgs.append(str(err)) 3713 3714 if msgs: 3715 _Fail("; ".join(msgs))

3716

3717 3718 -def BlockdevAddchildren(parent_cdev, new_cdevs):

3719 """Extend a mirrored block device. 3720 3721 @type parent_cdev: L{objects.Disk} 3722 @param parent_cdev: the disk to which we should add children 3723 @type new_cdevs: list of L{objects.Disk} 3724 @param new_cdevs: the list of children which we should add 3725 @rtype: None 3726 3727 """ 3728 parent_bdev = _RecursiveFindBD(parent_cdev) 3729 if parent_bdev is None: 3730 _Fail("Can't find parent device '%s' in add children", parent_cdev) 3731 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs] 3732 if new_bdevs.count(None) > 0: 3733 _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs) 3734 parent_bdev.AddChildren(new_bdevs)

3735

3736 3737 -def BlockdevRemovechildren(parent_cdev, new_cdevs):

3738 """Shrink a mirrored block device. 3739 3740 @type parent_cdev: L{objects.Disk} 3741 @param parent_cdev: the disk from which we should remove children 3742 @type new_cdevs: list of L{objects.Disk} 3743 @param new_cdevs: the list of children which we should remove 3744 @rtype: None 3745 3746 """ 3747 parent_bdev = _RecursiveFindBD(parent_cdev) 3748 if parent_bdev is None: 3749 _Fail("Can't find parent device '%s' in remove children", parent_cdev) 3750 devs = [] 3751 for disk in new_cdevs: 3752 rpath = disk.StaticDevPath() 3753 if rpath is None: 3754 bd = _RecursiveFindBD(disk) 3755 if bd is None: 3756 _Fail("Can't find device %s while removing children", disk) 3757 else: 3758 devs.append(bd.dev_path) 3759 else: 3760 if not utils.IsNormAbsPath(rpath): 3761 _Fail("Strange path returned from StaticDevPath: '%s'", rpath) 3762 devs.append(rpath) 3763 parent_bdev.RemoveChildren(devs)

3764

3765 3766 -def BlockdevGetmirrorstatus(disks):

3767 """Get the mirroring status of a list of devices. 3768 3769 @type disks: list of L{objects.Disk} 3770 @param disks: the list of disks which we should query 3771 @rtype: disk 3772 @return: List of L{objects.BlockDevStatus}, one for each disk 3773 @raise errors.BlockDeviceError: if any of the disks cannot be 3774 found 3775 3776 """ 3777 stats = [] 3778 for dsk in disks: 3779 rbd = _RecursiveFindBD(dsk) 3780 if rbd is None: 3781 _Fail("Can't find device %s", dsk) 3782 3783 stats.append(rbd.CombinedSyncStatus()) 3784 3785 return stats

3786

3787 3788 -def BlockdevGetmirrorstatusMulti(disks):

3789 """Get the mirroring status of a list of devices. 3790 3791 @type disks: list of L{objects.Disk} 3792 @param disks: the list of disks which we should query 3793 @rtype: disk 3794 @return: List of tuples, (bool, status), one for each disk; bool denotes 3795 success/failure, status is L{objects.BlockDevStatus} on success, string 3796 otherwise 3797 3798 """ 3799 result = [] 3800 for disk in disks: 3801 try: 3802 rbd = _RecursiveFindBD(disk) 3803 if rbd is None: 3804 result.append((False, "Can't find device %s" % disk)) 3805 continue 3806 3807 status = rbd.CombinedSyncStatus() 3808 except errors.BlockDeviceError, err: 3809 logging.exception("Error while getting disk status") 3810 result.append((False, str(err))) 3811 else: 3812 result.append((True, status)) 3813 3814 assert len(disks) == len(result) 3815 3816 return result

3817

3818 3819 -def _RecursiveFindBD(disk):

3820 """Check if a device is activated. 3821 3822 If so, return information about the real device. 3823 3824 @type disk: L{objects.Disk} 3825 @param disk: the disk object we need to find 3826 3827 @return: None if the device can't be found, 3828 otherwise the device instance 3829 3830 """ 3831 children = [] 3832 if disk.children: 3833 for chdisk in disk.children: 3834 children.append(_RecursiveFindBD(chdisk)) 3835 3836 return bdev.FindDevice(disk, children)

3837

3838 3839 -def _OpenRealBD(disk):

3840 """Opens the underlying block device of a disk. 3841 3842 @type disk: L{objects.Disk} 3843 @param disk: the disk object we want to open 3844 3845 """ 3846 real_disk = _RecursiveFindBD(disk) 3847 if real_disk is None: 3848 _Fail("Block device '%s' is not set up", disk) 3849 3850 real_disk.Open() 3851 3852 return real_disk

3853

3854 3855 -def BlockdevFind(disk):

3856 """Check if a device is activated. 3857 3858 If it is, return information about the real device. 3859 3860 @type disk: L{objects.Disk} 3861 @param disk: the disk to find 3862 @rtype: None or objects.BlockDevStatus 3863 @return: None if the disk cannot be found, otherwise a the current 3864 information 3865 3866 """ 3867 try: 3868 rbd = _RecursiveFindBD(disk) 3869 except errors.BlockDeviceError, err: 3870 _Fail("Failed to find device: %s", err, exc=True) 3871 3872 if rbd is None: 3873 return None 3874 3875 return rbd.GetSyncStatus()

3876

3877 3878 -def BlockdevGetdimensions(disks):

3879 """Computes the size of the given disks. 3880 3881 If a disk is not found, returns None instead. 3882 3883 @type disks: list of L{objects.Disk} 3884 @param disks: the list of disk to compute the size for 3885 @rtype: list 3886 @return: list with elements None if the disk cannot be found, 3887 otherwise the pair (size, spindles), where spindles is None if the 3888 device doesn't support that 3889 3890 """ 3891 result = [] 3892 for cf in disks: 3893 try: 3894 rbd = _RecursiveFindBD(cf) 3895 except errors.BlockDeviceError: 3896 result.append(None) 3897 continue 3898 if rbd is None: 3899 result.append(None) 3900 else: 3901 result.append(rbd.GetActualDimensions()) 3902 return result

3903

3904 3905 -def UploadFile(file_name, data, mode, uid, gid, atime, mtime):

3906 """Write a file to the filesystem. 3907 3908 This allows the master to overwrite(!) a file. It will only perform 3909 the operation if the file belongs to a list of configuration files. 3910 3911 @type file_name: str 3912 @param file_name: the target file name 3913 @type data: str 3914 @param data: the new contents of the file 3915 @type mode: int 3916 @param mode: the mode to give the file (can be None) 3917 @type uid: string 3918 @param uid: the owner of the file 3919 @type gid: string 3920 @param gid: the group of the file 3921 @type atime: float 3922 @param atime: the atime to set on the file (can be None) 3923 @type mtime: float 3924 @param mtime: the mtime to set on the file (can be None) 3925 @rtype: None 3926 3927 """ 3928 file_name = vcluster.LocalizeVirtualPath(file_name) 3929 3930 if not os.path.isabs(file_name): 3931 _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name) 3932 3933 if file_name not in _ALLOWED_UPLOAD_FILES: 3934 _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'", 3935 file_name) 3936 3937 raw_data = _Decompress(data) 3938 3939 if not (isinstance(uid, basestring) and isinstance(gid, basestring)): 3940 _Fail("Invalid username/groupname type") 3941 3942 getents = runtime.GetEnts() 3943 uid = getents.LookupUser(uid) 3944 gid = getents.LookupGroup(gid) 3945 3946 utils.SafeWriteFile(file_name, None, 3947 data=raw_data, mode=mode, uid=uid, gid=gid, 3948 atime=atime, mtime=mtime)

3949

3950 3951 -def RunOob(oob_program, command, node, timeout):

3952 """Executes oob_program with given command on given node. 3953 3954 @param oob_program: The path to the executable oob_program 3955 @param command: The command to invoke on oob_program 3956 @param node: The node given as an argument to the program 3957 @param timeout: Timeout after which we kill the oob program 3958 3959 @return: stdout 3960 @raise RPCFail: If execution fails for some reason 3961 3962 """ 3963 result = utils.RunCmd([oob_program, command, node], timeout=timeout) 3964 3965 if result.failed: 3966 _Fail("'%s' failed with reason '%s'; output: %s", result.cmd, 3967 result.fail_reason, result.output) 3968 3969 return result.stdout

3970

3971 3972 -def _OSOndiskAPIVersion(os_dir):

3973 """Compute and return the API version of a given OS. 3974 3975 This function will try to read the API version of the OS residing in 3976 the 'os_dir' directory. 3977 3978 @type os_dir: str 3979 @param os_dir: the directory in which we should look for the OS 3980 @rtype: tuple 3981 @return: tuple (status, data) with status denoting the validity and 3982 data holding either the valid versions or an error message 3983 3984 """ 3985 api_file = utils.PathJoin(os_dir, constants.OS_API_FILE) 3986 3987 try: 3988 st = os.stat(api_file) 3989 except EnvironmentError, err: 3990 return False, ("Required file '%s' not found under path %s: %s" % 3991 (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err))) 3992 3993 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 3994 return False, ("File '%s' in %s is not a regular file" % 3995 (constants.OS_API_FILE, os_dir)) 3996 3997 try: 3998 api_versions = utils.ReadFile(api_file).splitlines() 3999 except EnvironmentError, err: 4000 return False, ("Error while reading the API version file at %s: %s" % 4001 (api_file, utils.ErrnoOrStr(err))) 4002 4003 try: 4004 api_versions = [int(version.strip()) for version in api_versions] 4005 except (TypeError, ValueError), err: 4006 return False, ("API version(s) can't be converted to integer: %s" % 4007 str(err)) 4008 4009 return True, api_versions

4010

4011 4012 -def DiagnoseOS(top_dirs=None):

4013 """Compute the validity for all OSes. 4014 4015 @type top_dirs: list 4016 @param top_dirs: the list of directories in which to 4017 search (if not given defaults to 4018 L{pathutils.OS_SEARCH_PATH}) 4019 @rtype: list of L{objects.OS} 4020 @return: a list of tuples (name, path, status, diagnose, variants, 4021 parameters, api_version) for all (potential) OSes under all 4022 search paths, where: 4023 - name is the (potential) OS name 4024 - path is the full path to the OS 4025 - status True/False is the validity of the OS 4026 - diagnose is the error message for an invalid OS, otherwise empty 4027 - variants is a list of supported OS variants, if any 4028 - parameters is a list of (name, help) parameters, if any 4029 - api_version is a list of support OS API versions 4030 4031 """ 4032 if top_dirs is None: 4033 top_dirs = pathutils.OS_SEARCH_PATH 4034 4035 result = [] 4036 for dir_name in top_dirs: 4037 if os.path.isdir(dir_name): 4038 try: 4039 f_names = utils.ListVisibleFiles(dir_name) 4040 except EnvironmentError, err: 4041 logging.exception("Can't list the OS directory %s: %s", dir_name, err) 4042 break 4043 for name in f_names: 4044 os_path = utils.PathJoin(dir_name, name) 4045 status, os_inst = _TryOSFromDisk(name, base_dir=dir_name) 4046 if status: 4047 diagnose = "" 4048 variants = os_inst.supported_variants 4049 parameters = os_inst.supported_parameters 4050 api_versions = os_inst.api_versions 4051 trusted = False if os_inst.create_script_untrusted else True 4052 else: 4053 diagnose = os_inst 4054 variants = parameters = api_versions = [] 4055 trusted = True 4056 result.append((name, os_path, status, diagnose, variants, 4057 parameters, api_versions, trusted)) 4058 4059 return result

4060

4061 4062 -def _TryOSFromDisk(name, base_dir=None):

4063 """Create an OS instance from disk. 4064 4065 This function will return an OS instance if the given name is a 4066 valid OS name. 4067 4068 @type base_dir: string 4069 @keyword base_dir: Base directory containing OS installations. 4070 Defaults to a search in all the OS_SEARCH_PATH dirs. 4071 @rtype: tuple 4072 @return: success and either the OS instance if we find a valid one, 4073 or error message 4074 4075 """ 4076 if base_dir is None: 4077 os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir) 4078 else: 4079 os_dir = utils.FindFile(name, [base_dir], os.path.isdir) 4080 4081 if os_dir is None: 4082 return False, "Directory for OS %s not found in search path" % name 4083 4084 status, api_versions = _OSOndiskAPIVersion(os_dir) 4085 if not status: 4086 # push the error up 4087 return status, api_versions 4088 4089 if not constants.OS_API_VERSIONS.intersection(api_versions): 4090 return False, ("API version mismatch for path '%s': found %s, want %s." % 4091 (os_dir, api_versions, constants.OS_API_VERSIONS)) 4092 4093 # OS Files dictionary, we will populate it with the absolute path 4094 # names; if the value is True, then it is a required file, otherwise 4095 # an optional one 4096 os_files = dict.fromkeys(constants.OS_SCRIPTS, True) 4097 4098 os_files[constants.OS_SCRIPT_CREATE] = False 4099 os_files[constants.OS_SCRIPT_CREATE_UNTRUSTED] = False 4100 4101 if max(api_versions) >= constants.OS_API_V15: 4102 os_files[constants.OS_VARIANTS_FILE] = False 4103 4104 if max(api_versions) >= constants.OS_API_V20: 4105 os_files[constants.OS_PARAMETERS_FILE] = True 4106 else: 4107 del os_files[constants.OS_SCRIPT_VERIFY] 4108 4109 for (filename, required) in os_files.items(): 4110 os_files[filename] = utils.PathJoin(os_dir, filename) 4111 4112 try: 4113 st = os.stat(os_files[filename]) 4114 except EnvironmentError, err: 4115 if err.errno == errno.ENOENT and not required: 4116 del os_files[filename] 4117 continue 4118 return False, ("File '%s' under path '%s' is missing (%s)" % 4119 (filename, os_dir, utils.ErrnoOrStr(err))) 4120 4121 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 4122 return False, ("File '%s' under path '%s' is not a regular file" % 4123 (filename, os_dir)) 4124 4125 if filename in constants.OS_SCRIPTS: 4126 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR: 4127 return False, ("File '%s' under path '%s' is not executable" % 4128 (filename, os_dir)) 4129 4130 if not constants.OS_SCRIPT_CREATE in os_files and \ 4131 not constants.OS_SCRIPT_CREATE_UNTRUSTED in os_files: 4132 return False, ("A create script (trusted or untrusted) under path '%s'" 4133 " must exist" % os_dir) 4134 4135 create_script = os_files.get(constants.OS_SCRIPT_CREATE, None) 4136 create_script_untrusted = os_files.get(constants.OS_SCRIPT_CREATE_UNTRUSTED, 4137 None) 4138 4139 variants = [] 4140 if constants.OS_VARIANTS_FILE in os_files: 4141 variants_file = os_files[constants.OS_VARIANTS_FILE] 4142 try: 4143 variants = \ 4144 utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file)) 4145 except EnvironmentError, err: 4146 # we accept missing files, but not other errors 4147 if err.errno != errno.ENOENT: 4148 return False, ("Error while reading the OS variants file at %s: %s" % 4149 (variants_file, utils.ErrnoOrStr(err))) 4150 4151 parameters = [] 4152 if constants.OS_PARAMETERS_FILE in os_files: 4153 parameters_file = os_files[constants.OS_PARAMETERS_FILE] 4154 try: 4155 parameters = utils.ReadFile(parameters_file).splitlines() 4156 except EnvironmentError, err: 4157 return False, ("Error while reading the OS parameters file at %s: %s" % 4158 (parameters_file, utils.ErrnoOrStr(err))) 4159 parameters = [v.split(None, 1) for v in parameters] 4160 4161 os_obj = objects.OS(name=name, path=os_dir, 4162 create_script=create_script, 4163 create_script_untrusted=create_script_untrusted, 4164 export_script=os_files[constants.OS_SCRIPT_EXPORT], 4165 import_script=os_files[constants.OS_SCRIPT_IMPORT], 4166 rename_script=os_files[constants.OS_SCRIPT_RENAME], 4167 verify_script=os_files.get(constants.OS_SCRIPT_VERIFY, 4168 None), 4169 supported_variants=variants, 4170 supported_parameters=parameters, 4171 api_versions=api_versions) 4172 return True, os_obj

4173

4174 4175 -def OSFromDisk(name, base_dir=None):

4176 """Create an OS instance from disk. 4177 4178 This function will return an OS instance if the given name is a 4179 valid OS name. Otherwise, it will raise an appropriate 4180 L{RPCFail} exception, detailing why this is not a valid OS. 4181 4182 This is just a wrapper over L{_TryOSFromDisk}, which doesn't raise 4183 an exception but returns true/false status data. 4184 4185 @type base_dir: string 4186 @keyword base_dir: Base directory containing OS installations. 4187 Defaults to a search in all the OS_SEARCH_PATH dirs. 4188 @rtype: L{objects.OS} 4189 @return: the OS instance if we find a valid one 4190 @raise RPCFail: if we don't find a valid OS 4191 4192 """ 4193 name_only = objects.OS.GetName(name) 4194 status, payload = _TryOSFromDisk(name_only, base_dir) 4195 4196 if not status: 4197 _Fail(payload) 4198 4199 return payload

4200

4201 4202 -def OSCoreEnv(os_name, inst_os, os_params, debug=0):

4203 """Calculate the basic environment for an os script. 4204 4205 @type os_name: str 4206 @param os_name: full operating system name (including variant) 4207 @type inst_os: L{objects.OS} 4208 @param inst_os: operating system for which the environment is being built 4209 @type os_params: dict 4210 @param os_params: the OS parameters 4211 @type debug: integer 4212 @param debug: debug level (0 or 1, for OS Api 10) 4213 @rtype: dict 4214 @return: dict of environment variables 4215 @raise errors.BlockDeviceError: if the block device 4216 cannot be found 4217 4218 """ 4219 result = {} 4220 api_version = \ 4221 max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions)) 4222 result["OS_API_VERSION"] = "%d" % api_version 4223 result["OS_NAME"] = inst_os.name 4224 result["DEBUG_LEVEL"] = "%d" % debug 4225 4226 # OS variants 4227 if api_version >= constants.OS_API_V15 and inst_os.supported_variants: 4228 variant = objects.OS.GetVariant(os_name) 4229 if not variant: 4230 variant = inst_os.supported_variants[0] 4231 else: 4232 variant = "" 4233 result["OS_VARIANT"] = variant 4234 4235 # OS params 4236 for pname, pvalue in os_params.items(): 4237 result["OSP_%s" % pname.upper().replace("-", "_")] = pvalue 4238 4239 # Set a default path otherwise programs called by OS scripts (or 4240 # even hooks called from OS scripts) might break, and we don't want 4241 # to have each script require setting a PATH variable 4242 result["PATH"] = constants.HOOKS_PATH 4243 4244 return result

4245

4246 4247 -def OSEnvironment(instance, inst_os, debug=0):

4248 """Calculate the environment for an os script. 4249 4250 @type instance: L{objects.Instance} 4251 @param instance: target instance for the os script run 4252 @type inst_os: L{objects.OS} 4253 @param inst_os: operating system for which the environment is being built 4254 @type debug: integer 4255 @param debug: debug level (0 or 1, for OS Api 10) 4256 @rtype: dict 4257 @return: dict of environment variables 4258 @raise errors.BlockDeviceError: if the block device 4259 cannot be found 4260 4261 """ 4262 result = OSCoreEnv(instance.os, inst_os, objects.FillDict(instance.osparams, 4263 instance.osparams_private.Unprivate()), debug=debug) 4264 4265 for attr in ["name", "os", "uuid", "ctime", "mtime", "primary_node"]: 4266 result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr)) 4267 4268 result["HYPERVISOR"] = instance.hypervisor 4269 result["DISK_COUNT"] = "%d" % len(instance.disks_info) 4270 result["NIC_COUNT"] = "%d" % len(instance.nics) 4271 result["INSTANCE_SECONDARY_NODES"] = \ 4272 ("%s" % " ".join(instance.secondary_nodes)) 4273 4274 # Disks 4275 for idx, disk in enumerate(instance.disks_info): 4276 real_disk = _OpenRealBD(disk) 4277 uri = _CalculateDeviceURI(instance, disk, real_disk) 4278 result["DISK_%d_ACCESS" % idx] = disk.mode 4279 result["DISK_%d_UUID" % idx] = disk.uuid 4280 if real_disk.dev_path: 4281 result["DISK_%d_PATH" % idx] = real_disk.dev_path 4282 if uri: 4283 result["DISK_%d_URI" % idx] = uri 4284 if disk.name: 4285 result["DISK_%d_NAME" % idx] = disk.name 4286 if constants.HV_DISK_TYPE in instance.hvparams: 4287 result["DISK_%d_FRONTEND_TYPE" % idx] = \ 4288 instance.hvparams[constants.HV_DISK_TYPE] 4289 if disk.dev_type in constants.DTS_BLOCK: 4290 result["DISK_%d_BACKEND_TYPE" % idx] = "block" 4291 elif disk.dev_type in constants.DTS_FILEBASED: 4292 result["DISK_%d_BACKEND_TYPE" % idx] = \ 4293 "file:%s" % disk.logical_id[0] 4294 4295 # NICs 4296 for idx, nic in enumerate(instance.nics): 4297 result["NIC_%d_MAC" % idx] = nic.mac 4298 result["NIC_%d_UUID" % idx] = nic.uuid 4299 if nic.name: 4300 result["NIC_%d_NAME" % idx] = nic.name 4301 if nic.ip: 4302 result["NIC_%d_IP" % idx] = nic.ip 4303 result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE] 4304 if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 4305 result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK] 4306 if nic.nicparams[constants.NIC_LINK]: 4307 result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK] 4308 if nic.netinfo: 4309 nobj = objects.Network.FromDict(nic.netinfo) 4310 result.update(nobj.HooksDict("NIC_%d_" % idx)) 4311 if constants.HV_NIC_TYPE in instance.hvparams: 4312 result["NIC_%d_FRONTEND_TYPE" % idx] = \ 4313 instance.hvparams[constants.HV_NIC_TYPE] 4314 4315 # HV/BE params 4316 for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]: 4317 for key, value in source.items(): 4318 result["INSTANCE_%s_%s" % (kind, key)] = str(value) 4319 4320 return result

4321

4322 4323 -def DiagnoseExtStorage(top_dirs=None):

4324 """Compute the validity for all ExtStorage Providers. 4325 4326 @type top_dirs: list 4327 @param top_dirs: the list of directories in which to 4328 search (if not given defaults to 4329 L{pathutils.ES_SEARCH_PATH}) 4330 @rtype: list of L{objects.ExtStorage} 4331 @return: a list of tuples (name, path, status, diagnose, parameters) 4332 for all (potential) ExtStorage Providers under all 4333 search paths, where: 4334 - name is the (potential) ExtStorage Provider 4335 - path is the full path to the ExtStorage Provider 4336 - status True/False is the validity of the ExtStorage Provider 4337 - diagnose is the error message for an invalid ExtStorage Provider, 4338 otherwise empty 4339 - parameters is a list of (name, help) parameters, if any 4340 4341 """ 4342 if top_dirs is None: 4343 top_dirs = pathutils.ES_SEARCH_PATH 4344 4345 result = [] 4346 for dir_name in top_dirs: 4347 if os.path.isdir(dir_name): 4348 try: 4349 f_names = utils.ListVisibleFiles(dir_name) 4350 except EnvironmentError, err: 4351 logging.exception("Can't list the ExtStorage directory %s: %s", 4352 dir_name, err) 4353 break 4354 for name in f_names: 4355 es_path = utils.PathJoin(dir_name, name) 4356 status, es_inst = extstorage.ExtStorageFromDisk(name, base_dir=dir_name) 4357 if status: 4358 diagnose = "" 4359 parameters = es_inst.supported_parameters 4360 else: 4361 diagnose = es_inst 4362 parameters = [] 4363 result.append((name, es_path, status, diagnose, parameters)) 4364 4365 return result

4366

4367 4368 -def BlockdevGrow(disk, amount, dryrun, backingstore, excl_stor):

4369 """Grow a stack of block devices. 4370 4371 This function is called recursively, with the childrens being the 4372 first ones to resize. 4373 4374 @type disk: L{objects.Disk} 4375 @param disk: the disk to be grown 4376 @type amount: integer 4377 @param amount: the amount (in mebibytes) to grow with 4378 @type dryrun: boolean 4379 @param dryrun: whether to execute the operation in simulation mode 4380 only, without actually increasing the size 4381 @param backingstore: whether to execute the operation on backing storage 4382 only, or on "logical" storage only; e.g. DRBD is logical storage, 4383 whereas LVM, file, RBD are backing storage 4384 @rtype: (status, result) 4385 @type excl_stor: boolean 4386 @param excl_stor: Whether exclusive_storage is active 4387 @return: a tuple with the status of the operation (True/False), and 4388 the errors message if status is False 4389 4390 """ 4391 r_dev = _RecursiveFindBD(disk) 4392 if r_dev is None: 4393 _Fail("Cannot find block device %s", disk) 4394 4395 try: 4396 r_dev.Grow(amount, dryrun, backingstore, excl_stor) 4397 except errors.BlockDeviceError, err: 4398 _Fail("Failed to grow block device: %s", err, exc=True)

4399

4400 4401 -def BlockdevSnapshot(disk, snap_name, snap_size):

4402 """Create a snapshot copy of a block device. 4403 4404 This function is called recursively, and the snapshot is actually created 4405 just for the leaf lvm backend device. 4406 4407 @type disk: L{objects.Disk} 4408 @param disk: the disk to be snapshotted 4409 @type snap_name: string 4410 @param snap_name: the name of the snapshot 4411 @type snap_size: int 4412 @param snap_size: the size of the snapshot 4413 @rtype: string 4414 @return: snapshot disk ID as (vg, lv) 4415 4416 """ 4417 def _DiskSnapshot(disk, snap_name=None, snap_size=None): 4418 r_dev = _RecursiveFindBD(disk) 4419 if r_dev is not None: 4420 return r_dev.Snapshot(snap_name=snap_name, snap_size=snap_size) 4421 else: 4422 _Fail("Cannot find block device %s", disk)

4423 4424 if disk.SupportsSnapshots(): 4425 if disk.dev_type == constants.DT_DRBD8: 4426 if not disk.children: 4427 _Fail("DRBD device '%s' without backing storage cannot be snapshotted", 4428 disk.unique_id) 4429 return BlockdevSnapshot(disk.children[0], snap_name, snap_size) 4430 else: 4431 return _DiskSnapshot(disk, snap_name, snap_size) 4432 else: 4433 _Fail("Cannot snapshot block device '%s' of type '%s'", 4434 disk.logical_id, disk.dev_type) 4435

4436 4437 -def BlockdevSetInfo(disk, info):

4438 """Sets 'metadata' information on block devices. 4439 4440 This function sets 'info' metadata on block devices. Initial 4441 information is set at device creation; this function should be used 4442 for example after renames. 4443 4444 @type disk: L{objects.Disk} 4445 @param disk: the disk to be grown 4446 @type info: string 4447 @param info: new 'info' metadata 4448 @rtype: (status, result) 4449 @return: a tuple with the status of the operation (True/False), and 4450 the errors message if status is False 4451 4452 """ 4453 r_dev = _RecursiveFindBD(disk) 4454 if r_dev is None: 4455 _Fail("Cannot find block device %s", disk) 4456 4457 try: 4458 r_dev.SetInfo(info) 4459 except errors.BlockDeviceError, err: 4460 _Fail("Failed to set information on block device: %s", err, exc=True)

4461

4462 4463 -def FinalizeExport(instance, snap_disks):

4464 """Write out the export configuration information. 4465 4466 @type instance: L{objects.Instance} 4467 @param instance: the instance which we export, used for 4468 saving configuration 4469 @type snap_disks: list of L{objects.Disk} 4470 @param snap_disks: list of snapshot block devices, which 4471 will be used to get the actual name of the dump file 4472 4473 @rtype: None 4474 4475 """ 4476 destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new") 4477 finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name) 4478 disk_template = utils.GetDiskTemplate(snap_disks) 4479 4480 config = objects.SerializableConfigParser() 4481 4482 config.add_section(constants.INISECT_EXP) 4483 config.set(constants.INISECT_EXP, "version", str(constants.EXPORT_VERSION)) 4484 config.set(constants.INISECT_EXP, "timestamp", "%d" % int(time.time())) 4485 config.set(constants.INISECT_EXP, "source", instance.primary_node) 4486 config.set(constants.INISECT_EXP, "os", instance.os) 4487 config.set(constants.INISECT_EXP, "compression", "none") 4488 4489 config.add_section(constants.INISECT_INS) 4490 config.set(constants.INISECT_INS, "name", instance.name) 4491 config.set(constants.INISECT_INS, "maxmem", "%d" % 4492 instance.beparams[constants.BE_MAXMEM]) 4493 config.set(constants.INISECT_INS, "minmem", "%d" % 4494 instance.beparams[constants.BE_MINMEM]) 4495 # "memory" is deprecated, but useful for exporting to old ganeti versions 4496 config.set(constants.INISECT_INS, "memory", "%d" % 4497 instance.beparams[constants.BE_MAXMEM]) 4498 config.set(constants.INISECT_INS, "vcpus", "%d" % 4499 instance.beparams[constants.BE_VCPUS]) 4500 config.set(constants.INISECT_INS, "disk_template", disk_template) 4501 config.set(constants.INISECT_INS, "hypervisor", instance.hypervisor) 4502 config.set(constants.INISECT_INS, "tags", " ".join(instance.GetTags())) 4503 4504 nic_total = 0 4505 for nic_count, nic in enumerate(instance.nics): 4506 nic_total += 1 4507 config.set(constants.INISECT_INS, "nic%d_mac" % 4508 nic_count, "%s" % nic.mac) 4509 config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip) 4510 config.set(constants.INISECT_INS, "nic%d_network" % nic_count, 4511 "%s" % nic.network) 4512 config.set(constants.INISECT_INS, "nic%d_name" % nic_count, 4513 "%s" % nic.name) 4514 for param in constants.NICS_PARAMETER_TYPES: 4515 config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param), 4516 "%s" % nic.nicparams.get(param, None)) 4517 # TODO: redundant: on load can read nics until it doesn't exist 4518 config.set(constants.INISECT_INS, "nic_count", "%d" % nic_total) 4519 4520 disk_total = 0 4521 for disk_count, disk in enumerate(snap_disks): 4522 if disk: 4523 disk_total += 1 4524 config.set(constants.INISECT_INS, "disk%d_ivname" % disk_count, 4525 ("%s" % disk.iv_name)) 4526 config.set(constants.INISECT_INS, "disk%d_dump" % disk_count, 4527 ("%s" % disk.uuid)) 4528 config.set(constants.INISECT_INS, "disk%d_size" % disk_count, 4529 ("%d" % disk.size)) 4530 config.set(constants.INISECT_INS, "disk%d_name" % disk_count, 4531 "%s" % disk.name) 4532 4533 config.set(constants.INISECT_INS, "disk_count", "%d" % disk_total) 4534 4535 # New-style hypervisor/backend parameters 4536 4537 config.add_section(constants.INISECT_HYP) 4538 for name, value in instance.hvparams.items(): 4539 if name not in constants.HVC_GLOBALS: 4540 config.set(constants.INISECT_HYP, name, str(value)) 4541 4542 config.add_section(constants.INISECT_BEP) 4543 for name, value in instance.beparams.items(): 4544 config.set(constants.INISECT_BEP, name, str(value)) 4545 4546 config.add_section(constants.INISECT_OSP) 4547 for name, value in instance.osparams.items(): 4548 config.set(constants.INISECT_OSP, name, str(value)) 4549 4550 config.add_section(constants.INISECT_OSP_PRIVATE) 4551 for name, value in instance.osparams_private.items(): 4552 config.set(constants.INISECT_OSP_PRIVATE, name, str(value.Get())) 4553 4554 utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE), 4555 data=config.Dumps()) 4556 shutil.rmtree(finaldestdir, ignore_errors=True) 4557 shutil.move(destdir, finaldestdir)

4558

4559 4560 -def ExportInfo(dest):

4561 """Get export configuration information. 4562 4563 @type dest: str 4564 @param dest: directory containing the export 4565 4566 @rtype: L{objects.SerializableConfigParser} 4567 @return: a serializable config file containing the 4568 export info 4569 4570 """ 4571 cff = utils.PathJoin(dest, constants.EXPORT_CONF_FILE) 4572 4573 config = objects.SerializableConfigParser() 4574 config.read(cff) 4575 4576 if (not config.has_section(constants.INISECT_EXP) or 4577 not config.has_section(constants.INISECT_INS)): 4578 _Fail("Export info file doesn't have the required fields") 4579 4580 return config.Dumps()

4581

4582 4583 -def ListExports():

4584 """Return a list of exports currently available on this machine. 4585 4586 @rtype: list 4587 @return: list of the exports 4588 4589 """ 4590 if os.path.isdir(pathutils.EXPORT_DIR): 4591 return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR)) 4592 else: 4593 _Fail("No exports directory")

4594

4595 4596 -def RemoveExport(export):

4597 """Remove an existing export from the node. 4598 4599 @type export: str 4600 @param export: the name of the export to remove 4601 @rtype: None 4602 4603 """ 4604 target = utils.PathJoin(pathutils.EXPORT_DIR, export) 4605 4606 try: 4607 shutil.rmtree(target) 4608 except EnvironmentError, err: 4609 _Fail("Error while removing the export: %s", err, exc=True)

4610

4611 4612 -def BlockdevRename(devlist):

4613 """Rename a list of block devices. 4614 4615 @type devlist: list of tuples 4616 @param devlist: list of tuples of the form (disk, new_unique_id); disk is 4617 an L{objects.Disk} object describing the current disk, and new 4618 unique_id is the name we rename it to 4619 @rtype: boolean 4620 @return: True if all renames succeeded, False otherwise 4621 4622 """ 4623 msgs = [] 4624 result = True 4625 for disk, unique_id in devlist: 4626 dev = _RecursiveFindBD(disk) 4627 if dev is None: 4628 msgs.append("Can't find device %s in rename" % str(disk)) 4629 result = False 4630 continue 4631 try: 4632 old_rpath = dev.dev_path 4633 dev.Rename(unique_id) 4634 new_rpath = dev.dev_path 4635 if old_rpath != new_rpath: 4636 DevCacheManager.RemoveCache(old_rpath) 4637 # FIXME: we should add the new cache information here, like: 4638 # DevCacheManager.UpdateCache(new_rpath, owner, ...) 4639 # but we don't have the owner here - maybe parse from existing 4640 # cache? for now, we only lose lvm data when we rename, which 4641 # is less critical than DRBD or MD 4642 except errors.BlockDeviceError, err: 4643 msgs.append("Can't rename device '%s' to '%s': %s" % 4644 (dev, unique_id, err)) 4645 logging.exception("Can't rename device '%s' to '%s'", dev, unique_id) 4646 result = False 4647 if not result: 4648 _Fail("; ".join(msgs))

4649

4650 4651 -def _TransformFileStorageDir(fs_dir):

4652 """Checks whether given file_storage_dir is valid. 4653 4654 Checks wheter the given fs_dir is within the cluster-wide default 4655 file_storage_dir or the shared_file_storage_dir, which are stored in 4656 SimpleStore. Only paths under those directories are allowed. 4657 4658 @type fs_dir: str 4659 @param fs_dir: the path to check 4660 4661 @return: the normalized path if valid, None otherwise 4662 4663 """ 4664 filestorage.CheckFileStoragePath(fs_dir) 4665 4666 return os.path.normpath(fs_dir)

4667

4668 4669 -def CreateFileStorageDir(file_storage_dir):

4670 """Create file storage directory. 4671 4672 @type file_storage_dir: str 4673 @param file_storage_dir: directory to create 4674 4675 @rtype: tuple 4676 @return: tuple with first element a boolean indicating wheter dir 4677 creation was successful or not 4678 4679 """ 4680 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 4681 if os.path.exists(file_storage_dir): 4682 if not os.path.isdir(file_storage_dir): 4683 _Fail("Specified storage dir '%s' is not a directory", 4684 file_storage_dir) 4685 else: 4686 try: 4687 os.makedirs(file_storage_dir, 0750) 4688 except OSError, err: 4689 _Fail("Cannot create file storage directory '%s': %s", 4690 file_storage_dir, err, exc=True)

4691

4692 4693 -def RemoveFileStorageDir(file_storage_dir):

4694 """Remove file storage directory. 4695 4696 Remove it only if it's empty. If not log an error and return. 4697 4698 @type file_storage_dir: str 4699 @param file_storage_dir: the directory we should cleanup 4700 @rtype: tuple (success,) 4701 @return: tuple of one element, C{success}, denoting 4702 whether the operation was successful 4703 4704 """ 4705 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 4706 if os.path.exists(file_storage_dir): 4707 if not os.path.isdir(file_storage_dir): 4708 _Fail("Specified Storage directory '%s' is not a directory", 4709 file_storage_dir) 4710 # deletes dir only if empty, otherwise we want to fail the rpc call 4711 try: 4712 os.rmdir(file_storage_dir) 4713 except OSError, err: 4714 _Fail("Cannot remove file storage directory '%s': %s", 4715 file_storage_dir, err)

4716

4717 4718 -def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):

4719 """Rename the file storage directory. 4720 4721 @type old_file_storage_dir: str 4722 @param old_file_storage_dir: the current path 4723 @type new_file_storage_dir: str 4724 @param new_file_storage_dir: the name we should rename to 4725 @rtype: tuple (success,) 4726 @return: tuple of one element, C{success}, denoting 4727 whether the operation was successful 4728 4729 """ 4730 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir) 4731 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir) 4732 if not os.path.exists(new_file_storage_dir): 4733 if os.path.isdir(old_file_storage_dir): 4734 try: 4735 os.rename(old_file_storage_dir, new_file_storage_dir) 4736 except OSError, err: 4737 _Fail("Cannot rename '%s' to '%s': %s", 4738 old_file_storage_dir, new_file_storage_dir, err) 4739 else: 4740 _Fail("Specified storage dir '%s' is not a directory", 4741 old_file_storage_dir) 4742 else: 4743 if os.path.exists(old_file_storage_dir): 4744 _Fail("Cannot rename '%s' to '%s': both locations exist", 4745 old_file_storage_dir, new_file_storage_dir)

4746

4747 4748 -def _EnsureJobQueueFile(file_name):

4749 """Checks whether the given filename is in the queue directory. 4750 4751 @type file_name: str 4752 @param file_name: the file name we should check 4753 @rtype: None 4754 @raises RPCFail: if the file is not valid 4755 4756 """ 4757 if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name): 4758 _Fail("Passed job queue file '%s' does not belong to" 4759 " the queue directory '%s'", file_name, pathutils.QUEUE_DIR)

4760

4761 4762 -def JobQueueUpdate(file_name, content):

4763 """Updates a file in the queue directory. 4764 4765 This is just a wrapper over L{utils.io.WriteFile}, with proper 4766 checking. 4767 4768 @type file_name: str 4769 @param file_name: the job file name 4770 @type content: str 4771 @param content: the new job contents 4772 @rtype: boolean 4773 @return: the success of the operation 4774 4775 """ 4776 file_name = vcluster.LocalizeVirtualPath(file_name) 4777 4778 _EnsureJobQueueFile(file_name) 4779 getents = runtime.GetEnts() 4780 4781 # Write and replace the file atomically 4782 utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid, 4783 gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS)

4784

4785 4786 -def JobQueueRename(old, new):

4787 """Renames a job queue file. 4788 4789 This is just a wrapper over os.rename with proper checking. 4790 4791 @type old: str 4792 @param old: the old (actual) file name 4793 @type new: str 4794 @param new: the desired file name 4795 @rtype: tuple 4796 @return: the success of the operation and payload 4797 4798 """ 4799 old = vcluster.LocalizeVirtualPath(old) 4800 new = vcluster.LocalizeVirtualPath(new) 4801 4802 _EnsureJobQueueFile(old) 4803 _EnsureJobQueueFile(new) 4804 4805 getents = runtime.GetEnts() 4806 4807 utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750, 4808 dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid)

4809

4810 4811 -def BlockdevClose(instance_name, disks):

4812 """Closes the given block devices. 4813 4814 This means they will be switched to secondary mode (in case of 4815 DRBD). 4816 4817 @param instance_name: if the argument is not empty, the symlinks 4818 of this instance will be removed 4819 @type disks: list of L{objects.Disk} 4820 @param disks: the list of disks to be closed 4821 @rtype: tuple (success, message) 4822 @return: a tuple of success and message, where success 4823 indicates the succes of the operation, and message 4824 which will contain the error details in case we 4825 failed 4826 4827 """ 4828 bdevs = [] 4829 for cf in disks: 4830 rd = _RecursiveFindBD(cf) 4831 if rd is None: 4832 _Fail("Can't find device %s", cf) 4833 bdevs.append(rd) 4834 4835 msg = [] 4836 for rd in bdevs: 4837 try: 4838 rd.Close() 4839 except errors.BlockDeviceError, err: 4840 msg.append(str(err)) 4841 if msg: 4842 _Fail("Can't close devices: %s", ",".join(msg)) 4843 else: 4844 if instance_name: 4845 _RemoveBlockDevLinks(instance_name, disks)

4846

4847 4848 -def BlockdevOpen(instance_name, disks, exclusive):

4849 """Opens the given block devices. 4850 4851 """ 4852 bdevs = [] 4853 for cf in disks: 4854 rd = _RecursiveFindBD(cf) 4855 if rd is None: 4856 _Fail("Can't find device %s", cf) 4857 bdevs.append(rd) 4858 4859 msg = [] 4860 for idx, rd in enumerate(bdevs): 4861 try: 4862 rd.Open(exclusive=exclusive) 4863 _SymlinkBlockDev(instance_name, rd.dev_path, uuid=disks[idx].uuid) 4864 # Also create an old type of symlink so that instances 4865 # can be migratable, since they may still have deprecated 4866 # symlinks in their runtime files. 4867 _SymlinkBlockDev(instance_name, rd.dev_path, idx=idx) 4868 except errors.BlockDeviceError, err: 4869 msg.append(str(err)) 4870 4871 if msg: 4872 _Fail("Can't open devices: %s", ",".join(msg))

4873

4874 4875 -def ValidateHVParams(hvname, hvparams):

4876 """Validates the given hypervisor parameters. 4877 4878 @type hvname: string 4879 @param hvname: the hypervisor name 4880 @type hvparams: dict 4881 @param hvparams: the hypervisor parameters to be validated 4882 @rtype: None 4883 4884 """ 4885 try: 4886 hv_type = hypervisor.GetHypervisor(hvname) 4887 hv_type.ValidateParameters(hvparams) 4888 except errors.HypervisorError, err: 4889 _Fail(str(err), log=False)

4890

4891 4892 -def _CheckOSPList(os_obj, parameters):

4893 """Check whether a list of parameters is supported by the OS. 4894 4895 @type os_obj: L{objects.OS} 4896 @param os_obj: OS object to check 4897 @type parameters: list 4898 @param parameters: the list of parameters to check 4899 4900 """ 4901 supported = [v[0] for v in os_obj.supported_parameters] 4902 delta = frozenset(parameters).difference(supported) 4903 if delta: 4904 _Fail("The following parameters are not supported" 4905 " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta)))

4906

4907 4908 -def _CheckOSVariant(os_obj, name):

4909 """Check whether an OS name conforms to the os variants specification. 4910 4911 @type os_obj: L{objects.OS} 4912 @param os_obj: OS object to check 4913 4914 @type name: string 4915 @param name: OS name passed by the user, to check for validity 4916 4917 @rtype: NoneType 4918 @return: None 4919 @raise RPCFail: if OS variant is not valid 4920 4921 """ 4922 variant = objects.OS.GetVariant(name) 4923 4924 if not os_obj.supported_variants: 4925 if variant: 4926 _Fail("OS '%s' does not support variants ('%s' passed)" % 4927 (os_obj.name, variant)) 4928 else: 4929 return 4930 4931 if not variant: 4932 _Fail("OS name '%s' must include a variant" % name) 4933 4934 if variant not in os_obj.supported_variants: 4935 _Fail("OS '%s' does not support variant '%s'" % (os_obj.name, variant))

4936

4937 4938 -def ValidateOS(required, osname, checks, osparams, force_variant):

4939 """Validate the given OS parameters. 4940 4941 @type required: boolean 4942 @param required: whether absence of the OS should translate into 4943 failure or not 4944 @type osname: string 4945 @param osname: the OS to be validated 4946 @type checks: list 4947 @param checks: list of the checks to run (currently only 'parameters') 4948 @type osparams: dict 4949 @param osparams: dictionary with OS parameters, some of which may be 4950 private. 4951 @rtype: boolean 4952 @return: True if the validation passed, or False if the OS was not 4953 found and L{required} was false 4954 4955 """ 4956 if not constants.OS_VALIDATE_CALLS.issuperset(checks): 4957 _Fail("Unknown checks required for OS %s: %s", osname, 4958 set(checks).difference(constants.OS_VALIDATE_CALLS)) 4959 4960 name_only = objects.OS.GetName(osname) 4961 status, tbv = _TryOSFromDisk(name_only, None) 4962 4963 if not status: 4964 if required: 4965 _Fail(tbv) 4966 else: 4967 return False 4968 4969 if not force_variant: 4970 _CheckOSVariant(tbv, osname) 4971 4972 if max(tbv.api_versions) < constants.OS_API_V20: 4973 return True 4974 4975 if constants.OS_VALIDATE_PARAMETERS in checks: 4976 _CheckOSPList(tbv, osparams.keys()) 4977 4978 validate_env = OSCoreEnv(osname, tbv, osparams) 4979 result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env, 4980 cwd=tbv.path, reset_env=True) 4981 if result.failed: 4982 logging.error("os validate command '%s' returned error: %s output: %s", 4983 result.cmd, result.fail_reason, result.output) 4984 _Fail("OS validation script failed (%s), output: %s", 4985 result.fail_reason, result.output, log=False) 4986 4987 return True

4988

4989 4990 -def ExportOS(instance, override_env):

4991 """Creates a GZIPed tarball with an OS definition and environment. 4992 4993 The archive contains a file with the environment variables needed by 4994 the OS scripts. 4995 4996 @type instance: L{objects.Instance} 4997 @param instance: instance for which the OS definition is exported 4998 4999 @type override_env: dict of string to string 5000 @param override_env: if supplied, it overrides the environment on a 5001 key-by-key basis that is part of the archive 5002 5003 @rtype: string 5004 @return: filepath of the archive 5005 5006 """ 5007 assert instance 5008 assert instance.os 5009 5010 temp_dir = tempfile.mkdtemp() 5011 inst_os = OSFromDisk(instance.os) 5012 5013 result = utils.RunCmd(["ln", "-s", inst_os.path, 5014 utils.PathJoin(temp_dir, "os")]) 5015 if result.failed: 5016 _Fail("Failed to copy OS package '%s' to '%s': %s, output '%s'", 5017 inst_os, temp_dir, result.fail_reason, result.output) 5018 5019 env = OSEnvironment(instance, inst_os) 5020 env.update(override_env) 5021 5022 with open(utils.PathJoin(temp_dir, "environment"), "w") as f: 5023 for var in env: 5024 f.write(var + "=" + env[var] + "\n") 5025 5026 (fd, os_package) = tempfile.mkstemp(suffix=".tgz") 5027 os.close(fd) 5028 5029 result = utils.RunCmd(["tar", "--dereference", "-czv", 5030 "-f", os_package, 5031 "-C", temp_dir, 5032 "."]) 5033 if result.failed: 5034 _Fail("Failed to create OS archive '%s': %s, output '%s'", 5035 os_package, result.fail_reason, result.output) 5036 5037 result = utils.RunCmd(["rm", "-rf", temp_dir]) 5038 if result.failed: 5039 _Fail("Failed to remove copy of OS package '%s' in '%s': %s, output '%s'", 5040 inst_os, temp_dir, result.fail_reason, result.output) 5041 5042 return os_package

5043

5044 5045 -def DemoteFromMC():

5046 """Demotes the current node from master candidate role. 5047 5048 """ 5049 # try to ensure we're not the master by mistake 5050 master, myself = ssconf.GetMasterAndMyself() 5051 if master == myself: 5052 _Fail("ssconf status shows I'm the master node, will not demote") 5053 5054 result = utils.RunCmd([pathutils.DAEMON_UTIL, "check", constants.MASTERD]) 5055 if not result.failed: 5056 _Fail("The master daemon is running, will not demote") 5057 5058 try: 5059 if os.path.isfile(pathutils.CLUSTER_CONF_FILE): 5060 utils.CreateBackup(pathutils.CLUSTER_CONF_FILE) 5061 except EnvironmentError, err: 5062 if err.errno != errno.ENOENT: 5063 _Fail("Error while backing up cluster file: %s", err, exc=True) 5064 5065 utils.RemoveFile(pathutils.CLUSTER_CONF_FILE)

5066

5067 5068 -def _GetX509Filenames(cryptodir, name):

5069 """Returns the full paths for the private key and certificate. 5070 5071 """ 5072 return (utils.PathJoin(cryptodir, name), 5073 utils.PathJoin(cryptodir, name, _X509_KEY_FILE), 5074 utils.PathJoin(cryptodir, name, _X509_CERT_FILE))

5075

5076 5077 -def CreateX509Certificate(validity, cryptodir=pathutils.CRYPTO_KEYS_DIR):

5078 """Creates a new X509 certificate for SSL/TLS. 5079 5080 @type validity: int 5081 @param validity: Validity in seconds 5082 @rtype: tuple; (string, string) 5083 @return: Certificate name and public part 5084 5085 """ 5086 serial_no = int(time.time()) 5087 (key_pem, cert_pem) = \ 5088 utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(), 5089 min(validity, _MAX_SSL_CERT_VALIDITY), 5090 serial_no) 5091 5092 cert_dir = tempfile.mkdtemp(dir=cryptodir, 5093 prefix="x509-%s-" % utils.TimestampForFilename()) 5094 try: 5095 name = os.path.basename(cert_dir) 5096 assert len(name) > 5 5097 5098 (_, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 5099 5100 utils.WriteFile(key_file, mode=0400, data=key_pem) 5101 utils.WriteFile(cert_file, mode=0400, data=cert_pem) 5102 5103 # Never return private key as it shouldn't leave the node 5104 return (name, cert_pem) 5105 except Exception: 5106 shutil.rmtree(cert_dir, ignore_errors=True) 5107 raise

5108

5109 5110 -def RemoveX509Certificate(name, cryptodir=pathutils.CRYPTO_KEYS_DIR):

5111 """Removes a X509 certificate. 5112 5113 @type name: string 5114 @param name: Certificate name 5115 5116 """ 5117 (cert_dir, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 5118 5119 utils.RemoveFile(key_file) 5120 utils.RemoveFile(cert_file) 5121 5122 try: 5123 os.rmdir(cert_dir) 5124 except EnvironmentError, err: 5125 _Fail("Cannot remove certificate directory '%s': %s", 5126 cert_dir, err)

5127

5128 5129 -def _GetImportExportIoCommand(instance, mode, ieio, ieargs):

5130 """Returns the command for the requested input/output. 5131 5132 @type instance: L{objects.Instance} 5133 @param instance: The instance object 5134 @param mode: Import/export mode 5135 @param ieio: Input/output type 5136 @param ieargs: Input/output arguments 5137 5138 """ 5139 assert mode in (constants.IEM_IMPORT, constants.IEM_EXPORT) 5140 5141 env = None 5142 prefix = None 5143 suffix = None 5144 exp_size = None 5145 5146 if ieio == constants.IEIO_FILE: 5147 (filename, ) = ieargs 5148 5149 if not utils.IsNormAbsPath(filename): 5150 _Fail("Path '%s' is not normalized or absolute", filename) 5151 5152 real_filename = os.path.realpath(filename) 5153 directory = os.path.dirname(real_filename) 5154 5155 if not utils.IsBelowDir(pathutils.EXPORT_DIR, real_filename): 5156 _Fail("File '%s' is not under exports directory '%s': %s", 5157 filename, pathutils.EXPORT_DIR, real_filename) 5158 5159 # Create directory 5160 utils.Makedirs(directory, mode=0750) 5161 5162 quoted_filename = utils.ShellQuote(filename) 5163 5164 if mode == constants.IEM_IMPORT: 5165 suffix = "> %s" % quoted_filename 5166 elif mode == constants.IEM_EXPORT: 5167 suffix = "< %s" % quoted_filename 5168 5169 # Retrieve file size 5170 try: 5171 st = os.stat(filename) 5172 except EnvironmentError, err: 5173 logging.error("Can't stat(2) %s: %s", filename, err) 5174 else: 5175 exp_size = utils.BytesToMebibyte(st.st_size) 5176 5177 elif ieio == constants.IEIO_RAW_DISK: 5178 (disk, ) = ieargs 5179 real_disk = _OpenRealBD(disk) 5180 5181 if mode == constants.IEM_IMPORT: 5182 suffix = "| %s" % utils.ShellQuoteArgs(real_disk.Import()) 5183 5184 elif mode == constants.IEM_EXPORT: 5185 prefix = "%s |" % utils.ShellQuoteArgs(real_disk.Export()) 5186 exp_size = disk.size 5187 5188 elif ieio == constants.IEIO_SCRIPT: 5189 (disk, disk_index, ) = ieargs 5190 5191 assert isinstance(disk_index, (int, long)) 5192 5193 inst_os = OSFromDisk(instance.os) 5194 env = OSEnvironment(instance, inst_os) 5195 5196 if mode == constants.IEM_IMPORT: 5197 disk_path_var = "DISK_%d_PATH" % disk_index 5198 if disk_path_var in env: 5199 env["IMPORT_DEVICE"] = env[disk_path_var] 5200 env["IMPORT_DISK_PATH"] = env[disk_path_var] 5201 5202 disk_uri_var = "DISK_%d_URI" % disk_index 5203 if disk_uri_var in env: 5204 env["IMPORT_DISK_URI"] = env[disk_uri_var] 5205 5206 env["IMPORT_INDEX"] = str(disk_index) 5207 script = inst_os.import_script 5208 5209 elif mode == constants.IEM_EXPORT: 5210 disk_path_var = "DISK_%d_PATH" % disk_index 5211 if disk_path_var in env: 5212 env["EXPORT_DEVICE"] = env[disk_path_var] 5213 env["EXPORT_DISK_PATH"] = env[disk_path_var] 5214 5215 disk_uri_var = "DISK_%d_URI" % disk_index 5216 if disk_uri_var in env: 5217 env["EXPORT_DISK_URI"] = env[disk_uri_var] 5218 5219 env["EXPORT_INDEX"] = str(disk_index) 5220 script = inst_os.export_script 5221 5222 # TODO: Pass special environment only to script 5223 script_cmd = utils.BuildShellCmd("( cd %s && %s; )", inst_os.path, script) 5224 5225 if mode == constants.IEM_IMPORT: 5226 suffix = "| %s" % script_cmd 5227 5228 elif mode == constants.IEM_EXPORT: 5229 prefix = "%s |" % script_cmd 5230 5231 # Let script predict size 5232 exp_size = constants.IE_CUSTOM_SIZE 5233 5234 else: 5235 _Fail("Invalid %s I/O mode %r", mode, ieio) 5236 5237 return (env, prefix, suffix, exp_size)

5238

5239 5240 -def _CreateImportExportStatusDir(prefix):

5241 """Creates status directory for import/export. 5242 5243 """ 5244 return tempfile.mkdtemp(dir=pathutils.IMPORT_EXPORT_DIR, 5245 prefix=("%s-%s-" % 5246 (prefix, utils.TimestampForFilename())))

5247

5248 5249 -def StartImportExportDaemon(mode, opts, host, port, instance, component, 5250 ieio, ieioargs):

5251 """Starts an import or export daemon. 5252 5253 @param mode: Import/output mode 5254 @type opts: L{objects.ImportExportOptions} 5255 @param opts: Daemon options 5256 @type host: string 5257 @param host: Remote host for export (None for import) 5258 @type port: int 5259 @param port: Remote port for export (None for import) 5260 @type instance: L{objects.Instance} 5261 @param instance: Instance object 5262 @type component: string 5263 @param component: which part of the instance is transferred now, 5264 e.g. 'disk/0' 5265 @param ieio: Input/output type 5266 @param ieioargs: Input/output arguments 5267 5268 """ 5269 5270 # Use Import/Export over socat. 5271 # 5272 # Export() gives a command that produces a flat stream. 5273 # Import() gives a command that reads a flat stream to a disk template. 5274 if mode == constants.IEM_IMPORT: 5275 prefix = "import" 5276 5277 if not (host is None and port is None): 5278 _Fail("Can not specify host or port on import") 5279 5280 elif mode == constants.IEM_EXPORT: 5281 prefix = "export" 5282 5283 if host is None or port is None: 5284 _Fail("Host and port must be specified for an export") 5285 5286 else: 5287 _Fail("Invalid mode %r", mode) 5288 5289 if (opts.key_name is None) ^ (opts.ca_pem is None): 5290 _Fail("Cluster certificate can only be used for both key and CA") 5291 5292 (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \ 5293 _GetImportExportIoCommand(instance, mode, ieio, ieioargs) 5294 5295 if opts.key_name is None: 5296 # Use server.pem 5297 key_path = pathutils.NODED_CERT_FILE 5298 cert_path = pathutils.NODED_CERT_FILE 5299 assert opts.ca_pem is None 5300 else: 5301 (_, key_path, cert_path) = _GetX509Filenames(pathutils.CRYPTO_KEYS_DIR, 5302 opts.key_name) 5303 assert opts.ca_pem is not None 5304 5305 for i in [key_path, cert_path]: 5306 if not os.path.exists(i): 5307 _Fail("File '%s' does not exist" % i) 5308 5309 status_dir = _CreateImportExportStatusDir("%s-%s" % (prefix, component)) 5310 try: 5311 status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE) 5312 pid_file = utils.PathJoin(status_dir, _IES_PID_FILE) 5313 ca_file = utils.PathJoin(status_dir, _IES_CA_FILE) 5314 5315 if opts.ca_pem is None: 5316 # Use server.pem 5317 ca = utils.ReadFile(pathutils.NODED_CERT_FILE) 5318 else: 5319 ca = opts.ca_pem 5320 5321 # Write CA file 5322 utils.WriteFile(ca_file, data=ca, mode=0400) 5323 5324 cmd = [ 5325 pathutils.IMPORT_EXPORT_DAEMON, 5326 status_file, mode, 5327 "--key=%s" % key_path, 5328 "--cert=%s" % cert_path, 5329 "--ca=%s" % ca_file, 5330 ] 5331 5332 if host: 5333 cmd.append("--host=%s" % host) 5334 5335 if port: 5336 cmd.append("--port=%s" % port) 5337 5338 if opts.ipv6: 5339 cmd.append("--ipv6") 5340 else: 5341 cmd.append("--ipv4") 5342 5343 if opts.compress: 5344 cmd.append("--compress=%s" % opts.compress) 5345 5346 if opts.magic: 5347 cmd.append("--magic=%s" % opts.magic) 5348 5349 if exp_size is not None: 5350 cmd.append("--expected-size=%s" % exp_size) 5351 5352 if cmd_prefix: 5353 cmd.append("--cmd-prefix=%s" % cmd_prefix) 5354 5355 if cmd_suffix: 5356 cmd.append("--cmd-suffix=%s" % cmd_suffix) 5357 5358 if mode == constants.IEM_EXPORT: 5359 # Retry connection a few times when connecting to remote peer 5360 cmd.append("--connect-retries=%s" % constants.RIE_CONNECT_RETRIES) 5361 cmd.append("--connect-timeout=%s" % constants.RIE_CONNECT_ATTEMPT_TIMEOUT) 5362 elif opts.connect_timeout is not None: 5363 assert mode == constants.IEM_IMPORT 5364 # Overall timeout for establishing connection while listening 5365 cmd.append("--connect-timeout=%s" % opts.connect_timeout) 5366 5367 logfile = _InstanceLogName(prefix, instance.os, instance.name, component) 5368 5369 # TODO: Once _InstanceLogName uses tempfile.mkstemp, StartDaemon has 5370 # support for receiving a file descriptor for output 5371 utils.StartDaemon(cmd, env=cmd_env, pidfile=pid_file, 5372 output=logfile) 5373 5374 # The import/export name is simply the status directory name 5375 return os.path.basename(status_dir) 5376 5377 except Exception: 5378 shutil.rmtree(status_dir, ignore_errors=True) 5379 raise

5380

5381 5382 -def GetImportExportStatus(names):

5383 """Returns import/export daemon status. 5384 5385 @type names: sequence 5386 @param names: List of names 5387 @rtype: List of dicts 5388 @return: Returns a list of the state of each named import/export or None if a 5389 status couldn't be read 5390 5391 """ 5392 result = [] 5393 5394 for name in names: 5395 status_file = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name, 5396 _IES_STATUS_FILE) 5397 5398 try: 5399 data = utils.ReadFile(status_file) 5400 except EnvironmentError, err: 5401 if err.errno != errno.ENOENT: 5402 raise 5403 data = None 5404 5405 if not data: 5406 result.append(None) 5407 continue 5408 5409 result.append(serializer.LoadJson(data)) 5410 5411 return result

5412

5413 5414 -def AbortImportExport(name):

5415 """Sends SIGTERM to a running import/export daemon. 5416 5417 """ 5418 logging.info("Abort import/export %s", name) 5419 5420 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 5421 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 5422 5423 if pid: 5424 logging.info("Import/export %s is running with PID %s, sending SIGTERM", 5425 name, pid) 5426 utils.IgnoreProcessNotFound(os.kill, pid, signal.SIGTERM)

5427

5428 5429 -def CleanupImportExport(name):

5430 """Cleanup after an import or export. 5431 5432 If the import/export daemon is still running it's killed. Afterwards the 5433 whole status directory is removed. 5434 5435 """ 5436 logging.info("Finalizing import/export %s", name) 5437 5438 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 5439 5440 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 5441 5442 if pid: 5443 logging.info("Import/export %s is still running with PID %s", 5444 name, pid) 5445 utils.KillProcess(pid, waitpid=False) 5446 5447 shutil.rmtree(status_dir, ignore_errors=True)

5448

5449 5450 -def _FindDisks(disks):

5451 """Finds attached L{BlockDev}s for the given disks. 5452 5453 @type disks: list of L{objects.Disk} 5454 @param disks: the disk objects we need to find 5455 5456 @return: list of L{BlockDev} objects or C{None} if a given disk 5457 was not found or was no attached. 5458 5459 """ 5460 bdevs = [] 5461 5462 for disk in disks: 5463 rd = _RecursiveFindBD(disk) 5464 if rd is None: 5465 _Fail("Can't find device %s", disk) 5466 bdevs.append(rd) 5467 return bdevs

5468

5469 5470 -def DrbdDisconnectNet(disks):

5471 """Disconnects the network on a list of drbd devices. 5472 5473 """ 5474 bdevs = _FindDisks(disks) 5475 5476 # disconnect disks 5477 for rd in bdevs: 5478 try: 5479 rd.DisconnectNet() 5480 except errors.BlockDeviceError, err: 5481 _Fail("Can't change network configuration to standalone mode: %s", 5482 err, exc=True)

5483

5484 5485 -def DrbdAttachNet(disks, multimaster):

5486 """Attaches the network on a list of drbd devices. 5487 5488 """ 5489 bdevs = _FindDisks(disks) 5490 5491 # reconnect disks, switch to new master configuration and if 5492 # needed primary mode 5493 for rd in bdevs: 5494 try: 5495 rd.AttachNet(multimaster) 5496 except errors.BlockDeviceError, err: 5497 _Fail("Can't change network configuration: %s", err) 5498 5499 # wait until the disks are connected; we need to retry the re-attach 5500 # if the device becomes standalone, as this might happen if the one 5501 # node disconnects and reconnects in a different mode before the 5502 # other node reconnects; in this case, one or both of the nodes will 5503 # decide it has wrong configuration and switch to standalone 5504 5505 def _Attach(): 5506 all_connected = True 5507 5508 for rd in bdevs: 5509 stats = rd.GetProcStatus() 5510 5511 if multimaster: 5512 # In the multimaster case we have to wait explicitly until 5513 # the resource is Connected and UpToDate/UpToDate, because 5514 # we promote *both nodes* to primary directly afterwards. 5515 # Being in resync is not enough, since there is a race during which we 5516 # may promote a node with an Outdated disk to primary, effectively 5517 # tearing down the connection. 5518 all_connected = (all_connected and 5519 stats.is_connected and 5520 stats.is_disk_uptodate and 5521 stats.peer_disk_uptodate) 5522 else: 5523 all_connected = (all_connected and 5524 (stats.is_connected or stats.is_in_resync)) 5525 5526 if stats.is_standalone: 5527 # peer had different config info and this node became 5528 # standalone, even though this should not happen with the 5529 # new staged way of changing disk configs 5530 try: 5531 rd.AttachNet(multimaster) 5532 except errors.BlockDeviceError, err: 5533 _Fail("Can't change network configuration: %s", err) 5534 5535 if not all_connected: 5536 raise utils.RetryAgain()

5537 5538 try: 5539 # Start with a delay of 100 miliseconds and go up to 5 seconds 5540 utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60) 5541 except utils.RetryTimeout: 5542 _Fail("Timeout in disk reconnecting") 5543

5544 5545 -def DrbdWaitSync(disks):

5546 """Wait until DRBDs have synchronized. 5547 5548 """ 5549 def _helper(rd): 5550 stats = rd.GetProcStatus() 5551 if not (stats.is_connected or stats.is_in_resync): 5552 raise utils.RetryAgain() 5553 return stats

5554 5555 bdevs = _FindDisks(disks) 5556 5557 min_resync = 100 5558 alldone = True 5559 for rd in bdevs: 5560 try: 5561 # poll each second for 15 seconds 5562 stats = utils.Retry(_helper, 1, 15, args=[rd]) 5563 except utils.RetryTimeout: 5564 stats = rd.GetProcStatus() 5565 # last check 5566 if not (stats.is_connected or stats.is_in_resync): 5567 _Fail("DRBD device %s is not in sync: stats=%s", rd, stats) 5568 alldone = alldone and (not stats.is_in_resync) 5569 if stats.sync_percent is not None: 5570 min_resync = min(min_resync, stats.sync_percent) 5571 5572 return (alldone, min_resync) 5573

5574 5575 -def DrbdNeedsActivation(disks):

5576 """Checks which of the passed disks needs activation and returns their UUIDs. 5577 5578 """ 5579 faulty_disks = [] 5580 5581 for disk in disks: 5582 rd = _RecursiveFindBD(disk) 5583 if rd is None: 5584 faulty_disks.append(disk) 5585 continue 5586 5587 stats = rd.GetProcStatus() 5588 if stats.is_standalone or stats.is_diskless: 5589 faulty_disks.append(disk) 5590 5591 return [disk.uuid for disk in faulty_disks]

5592

5593 5594 -def GetDrbdUsermodeHelper():

5595 """Returns DRBD usermode helper currently configured. 5596 5597 """ 5598 try: 5599 return drbd.DRBD8.GetUsermodeHelper() 5600 except errors.BlockDeviceError, err: 5601 _Fail(str(err))

5602

5603 5604 -def PowercycleNode(hypervisor_type, hvparams=None):

5605 """Hard-powercycle the node. 5606 5607 Because we need to return first, and schedule the powercycle in the 5608 background, we won't be able to report failures nicely. 5609 5610 """ 5611 hyper = hypervisor.GetHypervisor(hypervisor_type) 5612 try: 5613 pid = os.fork() 5614 except OSError: 5615 # if we can't fork, we'll pretend that we're in the child process 5616 pid = 0 5617 if pid > 0: 5618 return "Reboot scheduled in 5 seconds" 5619 # ensure the child is running on ram 5620 try: 5621 utils.Mlockall() 5622 except Exception: # pylint: disable=W0703 5623 pass 5624 time.sleep(5) 5625 hyper.PowercycleNode(hvparams=hvparams)

5626

5627 5628 -def _VerifyRestrictedCmdName(cmd):

5629 """Verifies a restricted command name. 5630 5631 @type cmd: string 5632 @param cmd: Command name 5633 @rtype: tuple; (boolean, string or None) 5634 @return: The tuple's first element is the status; if C{False}, the second 5635 element is an error message string, otherwise it's C{None} 5636 5637 """ 5638 if not cmd.strip(): 5639 return (False, "Missing command name") 5640 5641 if os.path.basename(cmd) != cmd: 5642 return (False, "Invalid command name") 5643 5644 if not constants.EXT_PLUGIN_MASK.match(cmd): 5645 return (False, "Command name contains forbidden characters") 5646 5647 return (True, None)

5648

5649 5650 -def _CommonRestrictedCmdCheck(path, owner):

5651 """Common checks for restricted command file system directories and files. 5652 5653 @type path: string 5654 @param path: Path to check 5655 @param owner: C{None} or tuple containing UID and GID 5656 @rtype: tuple; (boolean, string or C{os.stat} result) 5657 @return: The tuple's first element is the status; if C{False}, the second 5658 element is an error message string, otherwise it's the result of C{os.stat} 5659 5660 """ 5661 if owner is None: 5662 # Default to root as owner 5663 owner = (0, 0) 5664 5665 try: 5666 st = os.stat(path) 5667 except EnvironmentError, err: 5668 return (False, "Can't stat(2) '%s': %s" % (path, err)) 5669 5670 if stat.S_IMODE(st.st_mode) & (~_RCMD_MAX_MODE): 5671 return (False, "Permissions on '%s' are too permissive" % path) 5672 5673 if (st.st_uid, st.st_gid) != owner: 5674 (owner_uid, owner_gid) = owner 5675 return (False, "'%s' is not owned by %s:%s" % (path, owner_uid, owner_gid)) 5676 5677 return (True, st)

5678

5679 5680 -def _VerifyRestrictedCmdDirectory(path, _owner=None):

5681 """Verifies restricted command directory. 5682 5683 @type path: string 5684 @param path: Path to check 5685 @rtype: tuple; (boolean, string or None) 5686 @return: The tuple's first element is the status; if C{False}, the second 5687 element is an error message string, otherwise it's C{None} 5688 5689 """ 5690 (status, value) = _CommonRestrictedCmdCheck(path, _owner) 5691 5692 if not status: 5693 return (False, value) 5694 5695 if not stat.S_ISDIR(value.st_mode): 5696 return (False, "Path '%s' is not a directory" % path) 5697 5698 return (True, None)

5699

5700 5701 -def _VerifyRestrictedCmd(path, cmd, _owner=None):

5702 """Verifies a whole restricted command and returns its executable filename. 5703 5704 @type path: string 5705 @param path: Directory containing restricted commands 5706 @type cmd: string 5707 @param cmd: Command name 5708 @rtype: tuple; (boolean, string) 5709 @return: The tuple's first element is the status; if C{False}, the second 5710 element is an error message string, otherwise the second element is the 5711 absolute path to the executable 5712 5713 """ 5714 executable = utils.PathJoin(path, cmd) 5715 5716 (status, msg) = _CommonRestrictedCmdCheck(executable, _owner) 5717 5718 if not status: 5719 return (False, msg) 5720 5721 if not utils.IsExecutable(executable): 5722 return (False, "access(2) thinks '%s' can't be executed" % executable) 5723 5724 return (True, executable)

5725

5726 5727 -def _PrepareRestrictedCmd(path, cmd, 5728 _verify_dir=_VerifyRestrictedCmdDirectory, 5729 _verify_name=_VerifyRestrictedCmdName, 5730 _verify_cmd=_VerifyRestrictedCmd):

5731 """Performs a number of tests on a restricted command. 5732 5733 @type path: string 5734 @param path: Directory containing restricted commands 5735 @type cmd: string 5736 @param cmd: Command name 5737 @return: Same as L{_VerifyRestrictedCmd} 5738 5739 """ 5740 # Verify the directory first 5741 (status, msg) = _verify_dir(path) 5742 if status: 5743 # Check command if everything was alright 5744 (status, msg) = _verify_name(cmd) 5745 5746 if not status: 5747 return (False, msg) 5748 5749 # Check actual executable 5750 return _verify_cmd(path, cmd)

5751

5752 5753 -def RunConstrainedCmd(cmd, 5754 lock_file, 5755 path, 5756 inp=None, 5757 _lock_timeout=_RCMD_LOCK_TIMEOUT, 5758 _sleep_fn=time.sleep, 5759 _prepare_fn=_PrepareRestrictedCmd, 5760 _runcmd_fn=utils.RunCmd, 5761 _enabled=constants.ENABLE_RESTRICTED_COMMANDS):

5762 """Executes a command after performing strict tests. 5763 5764 @type cmd: string 5765 @param cmd: Command name 5766 @type lock_file: string 5767 @param lock_file: path to the lock file 5768 @type path: string 5769 @param path: path to the directory in which the command is present 5770 @type inp: string 5771 @param inp: Input to be passed to the command 5772 @rtype: string 5773 @return: Command output 5774 @raise RPCFail: In case of an error 5775 5776 """ 5777 logging.info("Preparing to run restricted command '%s'", cmd) 5778 5779 if not _enabled: 5780 _Fail("Restricted commands disabled at configure time") 5781 5782 lock = None 5783 try: 5784 cmdresult = None 5785 try: 5786 lock = utils.FileLock.Open(lock_file) 5787 lock.Exclusive(blocking=True, timeout=_lock_timeout) 5788 5789 (status, value) = _prepare_fn(path, cmd) 5790 5791 if status: 5792 if inp: 5793 input_fd = tempfile.TemporaryFile() 5794 input_fd.write(inp) 5795 input_fd.flush() 5796 input_fd.seek(0) 5797 else: 5798 input_fd = None 5799 cmdresult = _runcmd_fn([value], env={}, reset_env=True, 5800 postfork_fn=lambda _: lock.Unlock(), 5801 input_fd=input_fd) 5802 if input_fd: 5803 input_fd.close() 5804 else: 5805 logging.error(value) 5806 except Exception: # pylint: disable=W0703 5807 # Keep original error in log 5808 logging.exception("Caught exception") 5809 5810 if cmdresult is None: 5811 logging.info("Sleeping for %0.1f seconds before returning", 5812 _RCMD_INVALID_DELAY) 5813 _sleep_fn(_RCMD_INVALID_DELAY) 5814 5815 # Do not include original error message in returned error 5816 _Fail("Executing command '%s' failed" % cmd) 5817 elif cmdresult.failed or cmdresult.fail_reason: 5818 _Fail("Restricted command '%s' failed: %s; output: %s", 5819 cmd, cmdresult.fail_reason, cmdresult.output) 5820 else: 5821 return cmdresult.output 5822 finally: 5823 if lock is not None: 5824 # Release lock at last 5825 lock.Close() 5826 lock = None

5827

5828 5829 -def SetWatcherPause(until, _filename=pathutils.WATCHER_PAUSEFILE):

5830 """Creates or removes the watcher pause file. 5831 5832 @type until: None or number 5833 @param until: Unix timestamp saying until when the watcher shouldn't run 5834 5835 """ 5836 if until is None: 5837 logging.info("Received request to no longer pause watcher") 5838 utils.RemoveFile(_filename) 5839 else: 5840 logging.info("Received request to pause watcher until %s", until) 5841 5842 if not ht.TNumber(until): 5843 _Fail("Duration must be numeric") 5844 5845 utils.WriteFile(_filename, data="%d\n" % (until, ), mode=0644)

5846

5847 5848 -def ConfigureOVS(ovs_name, ovs_link):

5849 """Creates a OpenvSwitch on the node. 5850 5851 This function sets up a OpenvSwitch on the node with given name nad 5852 connects it via a given eth device. 5853 5854 @type ovs_name: string 5855 @param ovs_name: Name of the OpenvSwitch to create. 5856 @type ovs_link: None or string 5857 @param ovs_link: Ethernet device for outside connection (can be missing) 5858 5859 """ 5860 # Initialize the OpenvSwitch 5861 result = utils.RunCmd(["ovs-vsctl", "add-br", ovs_name]) 5862 if result.failed: 5863 _Fail("Failed to create openvswitch. Script return value: %s, output: '%s'" 5864 % (result.exit_code, result.output), log=True) 5865 5866 # And connect it to a physical interface, if given 5867 if ovs_link: 5868 result = utils.RunCmd(["ovs-vsctl", "add-port", ovs_name, ovs_link]) 5869 if result.failed: 5870 _Fail("Failed to connect openvswitch to interface %s. Script return" 5871 " value: %s, output: '%s'" % (ovs_link, result.exit_code, 5872 result.output), log=True)

5873

5874 5875 -def GetFileInfo(file_path):

5876 """ Checks if a file exists and returns information related to it. 5877 5878 Currently returned information: 5879 - file size: int, size in bytes 5880 5881 @type file_path: string 5882 @param file_path: Name of file to examine. 5883 5884 @rtype: tuple of bool, dict 5885 @return: Whether the file exists, and a dictionary of information about the 5886 file gathered by os.stat. 5887 5888 """ 5889 try: 5890 stat_info = os.stat(file_path) 5891 values_dict = { 5892 constants.STAT_SIZE: stat_info.st_size, 5893 } 5894 return True, values_dict 5895 except IOError: 5896 return False, {}

5897

5898 5899 -class HooksRunner(object):

5900 """Hook runner. 5901 5902 This class is instantiated on the node side (ganeti-noded) and not 5903 on the master side. 5904 5905 """

5906 - def __init__(self, hooks_base_dir=None):

5907 """Constructor for hooks runner. 5908 5909 @type hooks_base_dir: str or None 5910 @param hooks_base_dir: if not None, this overrides the 5911 L{pathutils.HOOKS_BASE_DIR} (useful for unittests) 5912 5913 """ 5914 if hooks_base_dir is None: 5915 hooks_base_dir = pathutils.HOOKS_BASE_DIR 5916 # yeah, _BASE_DIR is not valid for attributes, we use it like a 5917 # constant 5918 self._BASE_DIR = hooks_base_dir # pylint: disable=C0103

5919

5920 - def RunLocalHooks(self, node_list, hpath, phase, env):

5921 """Check that the hooks will be run only locally and then run them. 5922 5923 """ 5924 assert len(node_list) == 1 5925 node = node_list[0] 5926 assert node == constants.DUMMY_UUID 5927 5928 results = self.RunHooks(hpath, phase, env) 5929 5930 # Return values in the form expected by HooksMaster 5931 return {node: (None, False, results)}

5932

5933 - def RunHooks(self, hpath, phase, env):

5934 """Run the scripts in the hooks directory. 5935 5936 @type hpath: str 5937 @param hpath: the path to the hooks directory which 5938 holds the scripts 5939 @type phase: str 5940 @param phase: either L{constants.HOOKS_PHASE_PRE} or 5941 L{constants.HOOKS_PHASE_POST} 5942 @type env: dict 5943 @param env: dictionary with the environment for the hook 5944 @rtype: list 5945 @return: list of 3-element tuples: 5946 - script path 5947 - script result, either L{constants.HKR_SUCCESS} or 5948 L{constants.HKR_FAIL} 5949 - output of the script 5950 5951 @raise errors.ProgrammerError: for invalid input 5952 parameters 5953 5954 """ 5955 if phase == constants.HOOKS_PHASE_PRE: 5956 suffix = "pre" 5957 elif phase == constants.HOOKS_PHASE_POST: 5958 suffix = "post" 5959 else: 5960 _Fail("Unknown hooks phase '%s'", phase) 5961 5962 subdir = "%s-%s.d" % (hpath, suffix) 5963 dir_name = utils.PathJoin(self._BASE_DIR, subdir) 5964 5965 results = [] 5966 5967 if not os.path.isdir(dir_name): 5968 # for non-existing/non-dirs, we simply exit instead of logging a 5969 # warning at every operation 5970 return results 5971 5972 runparts_results = utils.RunParts(dir_name, env=env, reset_env=True) 5973 5974 for (relname, relstatus, runresult) in runparts_results: 5975 if relstatus == constants.RUNPARTS_SKIP: 5976 rrval = constants.HKR_SKIP 5977 output = "" 5978 elif relstatus == constants.RUNPARTS_ERR: 5979 rrval = constants.HKR_FAIL 5980 output = "Hook script execution error: %s" % runresult 5981 elif relstatus == constants.RUNPARTS_RUN: 5982 if runresult.failed: 5983 rrval = constants.HKR_FAIL 5984 else: 5985 rrval = constants.HKR_SUCCESS 5986 output = utils.SafeEncode(runresult.output.strip()) 5987 results.append(("%s/%s" % (subdir, relname), rrval, output)) 5988 5989 return results

5990

5991 5992 -class IAllocatorRunner(object):

5993 """IAllocator runner. 5994 5995 This class is instantiated on the node side (ganeti-noded) and not on 5996 the master side. 5997 5998 """ 5999 @staticmethod

6000 - def Run(name, idata, ial_params):

6001 """Run an iallocator script. 6002 6003 @type name: str 6004 @param name: the iallocator script name 6005 @type idata: str 6006 @param idata: the allocator input data 6007 @type ial_params: list 6008 @param ial_params: the iallocator parameters 6009 6010 @rtype: tuple 6011 @return: two element tuple of: 6012 - status 6013 - either error message or stdout of allocator (for success) 6014 6015 """ 6016 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH, 6017 os.path.isfile) 6018 if alloc_script is None: 6019 _Fail("iallocator module '%s' not found in the search path", name) 6020 6021 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.") 6022 try: 6023 os.write(fd, idata) 6024 os.close(fd) 6025 result = utils.RunCmd([alloc_script, fin_name] + ial_params) 6026 if result.failed: 6027 _Fail("iallocator module '%s' failed: %s, output '%s'", 6028 name, result.fail_reason, result.output) 6029 finally: 6030 os.unlink(fin_name) 6031 6032 return result.stdout

6033

6034 6035 -class DevCacheManager(object):

6036 """Simple class for managing a cache of block device information. 6037 6038 """ 6039 _DEV_PREFIX = "/dev/" 6040 _ROOT_DIR = pathutils.BDEV_CACHE_DIR 6041 6042 @classmethod

6043 - def _ConvertPath(cls, dev_path):

6044 """Converts a /dev/name path to the cache file name. 6045 6046 This replaces slashes with underscores and strips the /dev 6047 prefix. It then returns the full path to the cache file. 6048 6049 @type dev_path: str 6050 @param dev_path: the C{/dev/} path name 6051 @rtype: str 6052 @return: the converted path name 6053 6054 """ 6055 if dev_path.startswith(cls._DEV_PREFIX): 6056 dev_path = dev_path[len(cls._DEV_PREFIX):] 6057 dev_path = dev_path.replace("/", "_") 6058 fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path) 6059 return fpath

6060 6061 @classmethod

6062 - def UpdateCache(cls, dev_path, owner, on_primary, iv_name):

6063 """Updates the cache information for a given device. 6064 6065 @type dev_path: str 6066 @param dev_path: the pathname of the device 6067 @type owner: str 6068 @param owner: the owner (instance name) of the device 6069 @type on_primary: bool 6070 @param on_primary: whether this is the primary 6071 node nor not 6072 @type iv_name: str 6073 @param iv_name: the instance-visible name of the 6074 device, as in objects.Disk.iv_name 6075 6076 @rtype: None 6077 6078 """ 6079 if dev_path is None: 6080 logging.error("DevCacheManager.UpdateCache got a None dev_path") 6081 return 6082 fpath = cls._ConvertPath(dev_path) 6083 if on_primary: 6084 state = "primary" 6085 else: 6086 state = "secondary" 6087 if iv_name is None: 6088 iv_name = "not_visible" 6089 fdata = "%s %s %s\n" % (str(owner), state, iv_name) 6090 try: 6091 utils.WriteFile(fpath, data=fdata) 6092 except EnvironmentError, err: 6093 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)

6094 6095 @classmethod

6096 - def RemoveCache(cls, dev_path):

6097 """Remove data for a dev_path. 6098 6099 This is just a wrapper over L{utils.io.RemoveFile} with a converted 6100 path name and logging. 6101 6102 @type dev_path: str 6103 @param dev_path: the pathname of the device 6104 6105 @rtype: None 6106 6107 """ 6108 if dev_path is None: 6109 logging.error("DevCacheManager.RemoveCache got a None dev_path") 6110 return 6111 fpath = cls._ConvertPath(dev_path) 6112 try: 6113 utils.RemoveFile(fpath) 6114 except EnvironmentError, err: 6115 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)

6116