ganeti.backend

Source Code for Module ganeti.backend

1 # 2 # 3 4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc. 5 # All rights reserved. 6 # 7 # Redistribution and use in source and binary forms, with or without 8 # modification, are permitted provided that the following conditions are 9 # met: 10 # 11 # 1. Redistributions of source code must retain the above copyright notice, 12 # this list of conditions and the following disclaimer. 13 # 14 # 2. Redistributions in binary form must reproduce the above copyright 15 # notice, this list of conditions and the following disclaimer in the 16 # documentation and/or other materials provided with the distribution. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 31 """Functions used by the node daemon 32 33 @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in 34 the L{UploadFile} function 35 @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted 36 in the L{_CleanDirectory} function 37 38 """ 39 40 # pylint: disable=E1103,C0302 41 42 # E1103: %s %r has no %r member (but some types could not be 43 # inferred), because the _TryOSFromDisk returns either (True, os_obj) 44 # or (False, "string") which confuses pylint 45 46 # C0302: This module has become too big and should be split up 47 48 49 import base64 50 import errno 51 import logging 52 import os 53 import os.path 54 import pycurl 55 import random 56 import re 57 import shutil 58 import signal 59 import socket 60 import stat 61 import tempfile 62 import time 63 import zlib 64 import copy 65 66 from ganeti import errors 67 from ganeti import http 68 from ganeti import utils 69 from ganeti import ssh 70 from ganeti import hypervisor 71 from ganeti.hypervisor import hv_base 72 from ganeti import constants 73 from ganeti.storage import bdev 74 from ganeti.storage import drbd 75 from ganeti.storage import extstorage 76 from ganeti.storage import filestorage 77 from ganeti import objects 78 from ganeti import ssconf 79 from ganeti import serializer 80 from ganeti import netutils 81 from ganeti import runtime 82 from ganeti import compat 83 from ganeti import pathutils 84 from ganeti import vcluster 85 from ganeti import ht 86 from ganeti.storage.base import BlockDev 87 from ganeti.storage.drbd import DRBD8 88 from ganeti import hooksmaster 89 from ganeti.rpc import transport 90 from ganeti.rpc.errors import NoMasterError, TimeoutError 91 92 93 _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" 94 _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([ 95 pathutils.DATA_DIR, 96 pathutils.JOB_QUEUE_ARCHIVE_DIR, 97 pathutils.QUEUE_DIR, 98 pathutils.CRYPTO_KEYS_DIR, 99 ]) 100 _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60 101 _X509_KEY_FILE = "key" 102 _X509_CERT_FILE = "cert" 103 _IES_STATUS_FILE = "status" 104 _IES_PID_FILE = "pid" 105 _IES_CA_FILE = "ca" 106 107 #: Valid LVS output line regex 108 _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$") 109 110 # Actions for the master setup script 111 _MASTER_START = "start" 112 _MASTER_STOP = "stop" 113 114 #: Maximum file permissions for restricted command directory and executables 115 _RCMD_MAX_MODE = (stat.S_IRWXU | 116 stat.S_IRGRP | stat.S_IXGRP | 117 stat.S_IROTH | stat.S_IXOTH) 118 119 #: Delay before returning an error for restricted commands 120 _RCMD_INVALID_DELAY = 10 121 122 #: How long to wait to acquire lock for restricted commands (shorter than 123 #: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many 124 #: command requests arrive 125 _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8

126 127 128 -class RPCFail(Exception):

129 """Class denoting RPC failure. 130 131 Its argument is the error message. 132 133 """

134

135 136 -def _GetInstReasonFilename(instance_name):

137 """Path of the file containing the reason of the instance status change. 138 139 @type instance_name: string 140 @param instance_name: The name of the instance 141 @rtype: string 142 @return: The path of the file 143 144 """ 145 return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)

146

147 148 -def _StoreInstReasonTrail(instance_name, trail):

149 """Serialize a reason trail related to an instance change of state to file. 150 151 The exact location of the file depends on the name of the instance and on 152 the configuration of the Ganeti cluster defined at deploy time. 153 154 @type instance_name: string 155 @param instance_name: The name of the instance 156 157 @type trail: list of reasons 158 @param trail: reason trail 159 160 @rtype: None 161 162 """ 163 json = serializer.DumpJson(trail) 164 filename = _GetInstReasonFilename(instance_name) 165 utils.WriteFile(filename, data=json)

166

167 168 -def _Fail(msg, *args, **kwargs):

169 """Log an error and the raise an RPCFail exception. 170 171 This exception is then handled specially in the ganeti daemon and 172 turned into a 'failed' return type. As such, this function is a 173 useful shortcut for logging the error and returning it to the master 174 daemon. 175 176 @type msg: string 177 @param msg: the text of the exception 178 @raise RPCFail 179 180 """ 181 if args: 182 msg = msg % args 183 if "log" not in kwargs or kwargs["log"]: # if we should log this error 184 if "exc" in kwargs and kwargs["exc"]: 185 logging.exception(msg) 186 else: 187 logging.error(msg) 188 raise RPCFail(msg)

189

190 191 -def _GetConfig():

192 """Simple wrapper to return a SimpleStore. 193 194 @rtype: L{ssconf.SimpleStore} 195 @return: a SimpleStore instance 196 197 """ 198 return ssconf.SimpleStore()

199

200 201 -def _GetSshRunner(cluster_name):

202 """Simple wrapper to return an SshRunner. 203 204 @type cluster_name: str 205 @param cluster_name: the cluster name, which is needed 206 by the SshRunner constructor 207 @rtype: L{ssh.SshRunner} 208 @return: an SshRunner instance 209 210 """ 211 return ssh.SshRunner(cluster_name)

212

213 214 -def _Decompress(data):

215 """Unpacks data compressed by the RPC client. 216 217 @type data: list or tuple 218 @param data: Data sent by RPC client 219 @rtype: str 220 @return: Decompressed data 221 222 """ 223 assert isinstance(data, (list, tuple)) 224 assert len(data) == 2 225 (encoding, content) = data 226 if encoding == constants.RPC_ENCODING_NONE: 227 return content 228 elif encoding == constants.RPC_ENCODING_ZLIB_BASE64: 229 return zlib.decompress(base64.b64decode(content)) 230 else: 231 raise AssertionError("Unknown data encoding")

232

233 234 -def _CleanDirectory(path, exclude=None):

235 """Removes all regular files in a directory. 236 237 @type path: str 238 @param path: the directory to clean 239 @type exclude: list 240 @param exclude: list of files to be excluded, defaults 241 to the empty list 242 243 """ 244 if path not in _ALLOWED_CLEAN_DIRS: 245 _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'", 246 path) 247 248 if not os.path.isdir(path): 249 return 250 if exclude is None: 251 exclude = [] 252 else: 253 # Normalize excluded paths 254 exclude = [os.path.normpath(i) for i in exclude] 255 256 for rel_name in utils.ListVisibleFiles(path): 257 full_name = utils.PathJoin(path, rel_name) 258 if full_name in exclude: 259 continue 260 if os.path.isfile(full_name) and not os.path.islink(full_name): 261 utils.RemoveFile(full_name)

262

263 264 -def _BuildUploadFileList():

265 """Build the list of allowed upload files. 266 267 This is abstracted so that it's built only once at module import time. 268 269 """ 270 allowed_files = set([ 271 pathutils.CLUSTER_CONF_FILE, 272 pathutils.ETC_HOSTS, 273 pathutils.SSH_KNOWN_HOSTS_FILE, 274 pathutils.VNC_PASSWORD_FILE, 275 pathutils.RAPI_CERT_FILE, 276 pathutils.SPICE_CERT_FILE, 277 pathutils.SPICE_CACERT_FILE, 278 pathutils.RAPI_USERS_FILE, 279 pathutils.CONFD_HMAC_KEY, 280 pathutils.CLUSTER_DOMAIN_SECRET_FILE, 281 ]) 282 283 for hv_name in constants.HYPER_TYPES: 284 hv_class = hypervisor.GetHypervisorClass(hv_name) 285 allowed_files.update(hv_class.GetAncillaryFiles()[0]) 286 287 assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \ 288 "Allowed file storage paths should never be uploaded via RPC" 289 290 return frozenset(allowed_files)

291 292 293 _ALLOWED_UPLOAD_FILES = _BuildUploadFileList()

294 295 296 -def JobQueuePurge():

297 """Removes job queue files and archived jobs. 298 299 @rtype: tuple 300 @return: True, None 301 302 """ 303 _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE]) 304 _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR)

305

306 307 -def GetMasterNodeName():

308 """Returns the master node name. 309 310 @rtype: string 311 @return: name of the master node 312 @raise RPCFail: in case of errors 313 314 """ 315 try: 316 return _GetConfig().GetMasterNode() 317 except errors.ConfigurationError, err: 318 _Fail("Cluster configuration incomplete: %s", err, exc=True)

319

320 321 -def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn):

322 """Decorator that runs hooks before and after the decorated function. 323 324 @type hook_opcode: string 325 @param hook_opcode: opcode of the hook 326 @type hooks_path: string 327 @param hooks_path: path of the hooks 328 @type env_builder_fn: function 329 @param env_builder_fn: function that returns a dictionary containing the 330 environment variables for the hooks. Will get all the parameters of the 331 decorated function. 332 @raise RPCFail: in case of pre-hook failure 333 334 """ 335 def decorator(fn): 336 def wrapper(*args, **kwargs): 337 _, myself = ssconf.GetMasterAndMyself() 338 nodes = ([myself], [myself]) # these hooks run locally 339 340 env_fn = compat.partial(env_builder_fn, *args, **kwargs) 341 342 cfg = _GetConfig() 343 hr = HooksRunner() 344 hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes, 345 hr.RunLocalHooks, None, env_fn, None, 346 logging.warning, cfg.GetClusterName(), 347 cfg.GetMasterNode()) 348 hm.RunPhase(constants.HOOKS_PHASE_PRE) 349 result = fn(*args, **kwargs) 350 hm.RunPhase(constants.HOOKS_PHASE_POST) 351 352 return result

353 return wrapper 354 return decorator 355

356 357 -def _BuildMasterIpEnv(master_params, use_external_mip_script=None):

358 """Builds environment variables for master IP hooks. 359 360 @type master_params: L{objects.MasterNetworkParameters} 361 @param master_params: network parameters of the master 362 @type use_external_mip_script: boolean 363 @param use_external_mip_script: whether to use an external master IP 364 address setup script (unused, but necessary per the implementation of the 365 _RunLocalHooks decorator) 366 367 """ 368 # pylint: disable=W0613 369 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family) 370 env = { 371 "MASTER_NETDEV": master_params.netdev, 372 "MASTER_IP": master_params.ip, 373 "MASTER_NETMASK": str(master_params.netmask), 374 "CLUSTER_IP_VERSION": str(ver), 375 } 376 377 return env

378

379 380 -def _RunMasterSetupScript(master_params, action, use_external_mip_script):

381 """Execute the master IP address setup script. 382 383 @type master_params: L{objects.MasterNetworkParameters} 384 @param master_params: network parameters of the master 385 @type action: string 386 @param action: action to pass to the script. Must be one of 387 L{backend._MASTER_START} or L{backend._MASTER_STOP} 388 @type use_external_mip_script: boolean 389 @param use_external_mip_script: whether to use an external master IP 390 address setup script 391 @raise backend.RPCFail: if there are errors during the execution of the 392 script 393 394 """ 395 env = _BuildMasterIpEnv(master_params) 396 397 if use_external_mip_script: 398 setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT 399 else: 400 setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT 401 402 result = utils.RunCmd([setup_script, action], env=env, reset_env=True) 403 404 if result.failed: 405 _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" % 406 (action, result.exit_code, result.output), log=True)

407 408 409 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup", 410 _BuildMasterIpEnv)

411 -def ActivateMasterIp(master_params, use_external_mip_script):

412 """Activate the IP address of the master daemon. 413 414 @type master_params: L{objects.MasterNetworkParameters} 415 @param master_params: network parameters of the master 416 @type use_external_mip_script: boolean 417 @param use_external_mip_script: whether to use an external master IP 418 address setup script 419 @raise RPCFail: in case of errors during the IP startup 420 421 """ 422 _RunMasterSetupScript(master_params, _MASTER_START, 423 use_external_mip_script)

424

425 426 -def StartMasterDaemons(no_voting):

427 """Activate local node as master node. 428 429 The function will start the master daemons (ganeti-masterd and ganeti-rapi). 430 431 @type no_voting: boolean 432 @param no_voting: whether to start ganeti-masterd without a node vote 433 but still non-interactively 434 @rtype: None 435 436 """ 437 438 if no_voting: 439 daemon_args = "--no-voting --yes-do-it" 440 else: 441 daemon_args = "" 442 443 env = { 444 "EXTRA_LUXID_ARGS": daemon_args, 445 "EXTRA_WCONFD_ARGS": daemon_args, 446 } 447 448 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env) 449 if result.failed: 450 msg = "Can't start Ganeti master: %s" % result.output 451 logging.error(msg) 452 _Fail(msg)

453 454 455 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown", 456 _BuildMasterIpEnv)

457 -def DeactivateMasterIp(master_params, use_external_mip_script):

458 """Deactivate the master IP on this node. 459 460 @type master_params: L{objects.MasterNetworkParameters} 461 @param master_params: network parameters of the master 462 @type use_external_mip_script: boolean 463 @param use_external_mip_script: whether to use an external master IP 464 address setup script 465 @raise RPCFail: in case of errors during the IP turndown 466 467 """ 468 _RunMasterSetupScript(master_params, _MASTER_STOP, 469 use_external_mip_script)

470

471 472 -def StopMasterDaemons():

473 """Stop the master daemons on this node. 474 475 Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node. 476 477 @rtype: None 478 479 """ 480 # TODO: log and report back to the caller the error failures; we 481 # need to decide in which case we fail the RPC for this 482 483 result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"]) 484 if result.failed: 485 logging.error("Could not stop Ganeti master, command %s had exitcode %s" 486 " and error %s", 487 result.cmd, result.exit_code, result.output)

488

489 490 -def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev):

491 """Change the netmask of the master IP. 492 493 @param old_netmask: the old value of the netmask 494 @param netmask: the new value of the netmask 495 @param master_ip: the master IP 496 @param master_netdev: the master network device 497 498 """ 499 if old_netmask == netmask: 500 return 501 502 if not netutils.IPAddress.Own(master_ip): 503 _Fail("The master IP address is not up, not attempting to change its" 504 " netmask") 505 506 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add", 507 "%s/%s" % (master_ip, netmask), 508 "dev", master_netdev, "label", 509 "%s:0" % master_netdev]) 510 if result.failed: 511 _Fail("Could not set the new netmask on the master IP address") 512 513 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del", 514 "%s/%s" % (master_ip, old_netmask), 515 "dev", master_netdev, "label", 516 "%s:0" % master_netdev]) 517 if result.failed: 518 _Fail("Could not bring down the master IP address with the old netmask")

519

520 521 -def EtcHostsModify(mode, host, ip):

522 """Modify a host entry in /etc/hosts. 523 524 @param mode: The mode to operate. Either add or remove entry 525 @param host: The host to operate on 526 @param ip: The ip associated with the entry 527 528 """ 529 if mode == constants.ETC_HOSTS_ADD: 530 if not ip: 531 RPCFail("Mode 'add' needs 'ip' parameter, but parameter not" 532 " present") 533 utils.AddHostToEtcHosts(host, ip) 534 elif mode == constants.ETC_HOSTS_REMOVE: 535 if ip: 536 RPCFail("Mode 'remove' does not allow 'ip' parameter, but" 537 " parameter is present") 538 utils.RemoveHostFromEtcHosts(host) 539 else: 540 RPCFail("Mode not supported")

541

542 543 -def LeaveCluster(modify_ssh_setup):

544 """Cleans up and remove the current node. 545 546 This function cleans up and prepares the current node to be removed 547 from the cluster. 548 549 If processing is successful, then it raises an 550 L{errors.QuitGanetiException} which is used as a special case to 551 shutdown the node daemon. 552 553 @param modify_ssh_setup: boolean 554 555 """ 556 _CleanDirectory(pathutils.DATA_DIR) 557 _CleanDirectory(pathutils.CRYPTO_KEYS_DIR) 558 JobQueuePurge() 559 560 if modify_ssh_setup: 561 try: 562 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER) 563 564 ssh.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) 565 566 utils.RemoveFile(priv_key) 567 utils.RemoveFile(pub_key) 568 except errors.OpExecError: 569 logging.exception("Error while processing ssh files") 570 571 try: 572 utils.RemoveFile(pathutils.CONFD_HMAC_KEY) 573 utils.RemoveFile(pathutils.RAPI_CERT_FILE) 574 utils.RemoveFile(pathutils.SPICE_CERT_FILE) 575 utils.RemoveFile(pathutils.SPICE_CACERT_FILE) 576 utils.RemoveFile(pathutils.NODED_CERT_FILE) 577 except: # pylint: disable=W0702 578 logging.exception("Error while removing cluster secrets") 579 580 utils.StopDaemon(constants.CONFD) 581 utils.StopDaemon(constants.MOND) 582 utils.StopDaemon(constants.KVMD) 583 584 # Raise a custom exception (handled in ganeti-noded) 585 raise errors.QuitGanetiException(True, "Shutdown scheduled")

586

587 588 -def _CheckStorageParams(params, num_params):

589 """Performs sanity checks for storage parameters. 590 591 @type params: list 592 @param params: list of storage parameters 593 @type num_params: int 594 @param num_params: expected number of parameters 595 596 """ 597 if params is None: 598 raise errors.ProgrammerError("No storage parameters for storage" 599 " reporting is provided.") 600 if not isinstance(params, list): 601 raise errors.ProgrammerError("The storage parameters are not of type" 602 " list: '%s'" % params) 603 if not len(params) == num_params: 604 raise errors.ProgrammerError("Did not receive the expected number of" 605 "storage parameters: expected %s," 606 " received '%s'" % (num_params, len(params)))

607

608 609 -def _CheckLvmStorageParams(params):

610 """Performs sanity check for the 'exclusive storage' flag. 611 612 @see: C{_CheckStorageParams} 613 614 """ 615 _CheckStorageParams(params, 1) 616 excl_stor = params[0] 617 if not isinstance(params[0], bool): 618 raise errors.ProgrammerError("Exclusive storage parameter is not" 619 " boolean: '%s'." % excl_stor) 620 return excl_stor

621

622 623 -def _GetLvmVgSpaceInfo(name, params):

624 """Wrapper around C{_GetVgInfo} which checks the storage parameters. 625 626 @type name: string 627 @param name: name of the volume group 628 @type params: list 629 @param params: list of storage parameters, which in this case should be 630 containing only one for exclusive storage 631 632 """ 633 excl_stor = _CheckLvmStorageParams(params) 634 return _GetVgInfo(name, excl_stor)

635

636 637 -def _GetVgInfo( 638 name, excl_stor, info_fn=bdev.LogicalVolume.GetVGInfo):

639 """Retrieves information about a LVM volume group. 640 641 """ 642 # TODO: GetVGInfo supports returning information for multiple VGs at once 643 vginfo = info_fn([name], excl_stor) 644 if vginfo: 645 vg_free = int(round(vginfo[0][0], 0)) 646 vg_size = int(round(vginfo[0][1], 0)) 647 else: 648 vg_free = None 649 vg_size = None 650 651 return { 652 "type": constants.ST_LVM_VG, 653 "name": name, 654 "storage_free": vg_free, 655 "storage_size": vg_size, 656 }

657

658 659 -def _GetLvmPvSpaceInfo(name, params):

660 """Wrapper around C{_GetVgSpindlesInfo} with sanity checks. 661 662 @see: C{_GetLvmVgSpaceInfo} 663 664 """ 665 excl_stor = _CheckLvmStorageParams(params) 666 return _GetVgSpindlesInfo(name, excl_stor)

667

668 669 -def _GetVgSpindlesInfo( 670 name, excl_stor, info_fn=bdev.LogicalVolume.GetVgSpindlesInfo):

671 """Retrieves information about spindles in an LVM volume group. 672 673 @type name: string 674 @param name: VG name 675 @type excl_stor: bool 676 @param excl_stor: exclusive storage 677 @rtype: dict 678 @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name, 679 free spindles, total spindles respectively 680 681 """ 682 if excl_stor: 683 (vg_free, vg_size) = info_fn(name) 684 else: 685 vg_free = 0 686 vg_size = 0 687 return { 688 "type": constants.ST_LVM_PV, 689 "name": name, 690 "storage_free": vg_free, 691 "storage_size": vg_size, 692 }

693

694 695 -def _GetHvInfo(name, hvparams, get_hv_fn=hypervisor.GetHypervisor):

696 """Retrieves node information from a hypervisor. 697 698 The information returned depends on the hypervisor. Common items: 699 700 - vg_size is the size of the configured volume group in MiB 701 - vg_free is the free size of the volume group in MiB 702 - memory_dom0 is the memory allocated for domain0 in MiB 703 - memory_free is the currently available (free) ram in MiB 704 - memory_total is the total number of ram in MiB 705 - hv_version: the hypervisor version, if available 706 707 @type hvparams: dict of string 708 @param hvparams: the hypervisor's hvparams 709 710 """ 711 return get_hv_fn(name).GetNodeInfo(hvparams=hvparams)

712

713 714 -def _GetHvInfoAll(hv_specs, get_hv_fn=hypervisor.GetHypervisor):

715 """Retrieves node information for all hypervisors. 716 717 See C{_GetHvInfo} for information on the output. 718 719 @type hv_specs: list of pairs (string, dict of strings) 720 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 721 722 """ 723 if hv_specs is None: 724 return None 725 726 result = [] 727 for hvname, hvparams in hv_specs: 728 result.append(_GetHvInfo(hvname, hvparams, get_hv_fn)) 729 return result

730

731 732 -def _GetNamedNodeInfo(names, fn):

733 """Calls C{fn} for all names in C{names} and returns a dictionary. 734 735 @rtype: None or dict 736 737 """ 738 if names is None: 739 return None 740 else: 741 return map(fn, names)

742

743 744 -def GetNodeInfo(storage_units, hv_specs):

745 """Gives back a hash with different information about the node. 746 747 @type storage_units: list of tuples (string, string, list) 748 @param storage_units: List of tuples (storage unit, identifier, parameters) to 749 ask for disk space information. In case of lvm-vg, the identifier is 750 the VG name. The parameters can contain additional, storage-type-specific 751 parameters, for example exclusive storage for lvm storage. 752 @type hv_specs: list of pairs (string, dict of strings) 753 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 754 @rtype: tuple; (string, None/dict, None/dict) 755 @return: Tuple containing boot ID, volume group information and hypervisor 756 information 757 758 """ 759 bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") 760 storage_info = _GetNamedNodeInfo( 761 storage_units, 762 (lambda (storage_type, storage_key, storage_params): 763 _ApplyStorageInfoFunction(storage_type, storage_key, storage_params))) 764 hv_info = _GetHvInfoAll(hv_specs) 765 return (bootid, storage_info, hv_info)

766

767 768 -def _GetFileStorageSpaceInfo(path, params):

769 """Wrapper around filestorage.GetSpaceInfo. 770 771 The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo 772 and ignore the *args parameter to not leak it into the filestorage 773 module's code. 774 775 @see: C{filestorage.GetFileStorageSpaceInfo} for description of the 776 parameters. 777 778 """ 779 _CheckStorageParams(params, 0) 780 return filestorage.GetFileStorageSpaceInfo(path)

781 782 783 # FIXME: implement storage reporting for all missing storage types. 784 _STORAGE_TYPE_INFO_FN = { 785 constants.ST_BLOCK: None, 786 constants.ST_DISKLESS: None, 787 constants.ST_EXT: None, 788 constants.ST_FILE: _GetFileStorageSpaceInfo, 789 constants.ST_LVM_PV: _GetLvmPvSpaceInfo, 790 constants.ST_LVM_VG: _GetLvmVgSpaceInfo, 791 constants.ST_SHARED_FILE: None, 792 constants.ST_GLUSTER: None, 793 constants.ST_RADOS: None, 794 }

795 796 797 -def _ApplyStorageInfoFunction(storage_type, storage_key, *args):

798 """Looks up and applies the correct function to calculate free and total 799 storage for the given storage type. 800 801 @type storage_type: string 802 @param storage_type: the storage type for which the storage shall be reported. 803 @type storage_key: string 804 @param storage_key: identifier of a storage unit, e.g. the volume group name 805 of an LVM storage unit 806 @type args: any 807 @param args: various parameters that can be used for storage reporting. These 808 parameters and their semantics vary from storage type to storage type and 809 are just propagated in this function. 810 @return: the results of the application of the storage space function (see 811 _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that 812 storage type 813 @raises NotImplementedError: for storage types who don't support space 814 reporting yet 815 """ 816 fn = _STORAGE_TYPE_INFO_FN[storage_type] 817 if fn is not None: 818 return fn(storage_key, *args) 819 else: 820 raise NotImplementedError

821

822 823 -def _CheckExclusivePvs(pvi_list):

824 """Check that PVs are not shared among LVs 825 826 @type pvi_list: list of L{objects.LvmPvInfo} objects 827 @param pvi_list: information about the PVs 828 829 @rtype: list of tuples (string, list of strings) 830 @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...]) 831 832 """ 833 res = [] 834 for pvi in pvi_list: 835 if len(pvi.lv_list) > 1: 836 res.append((pvi.name, pvi.lv_list)) 837 return res

838

839 840 -def _VerifyHypervisors(what, vm_capable, result, all_hvparams, 841 get_hv_fn=hypervisor.GetHypervisor):

842 """Verifies the hypervisor. Appends the results to the 'results' list. 843 844 @type what: C{dict} 845 @param what: a dictionary of things to check 846 @type vm_capable: boolean 847 @param vm_capable: whether or not this node is vm capable 848 @type result: dict 849 @param result: dictionary of verification results; results of the 850 verifications in this function will be added here 851 @type all_hvparams: dict of dict of string 852 @param all_hvparams: dictionary mapping hypervisor names to hvparams 853 @type get_hv_fn: function 854 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 855 856 """ 857 if not vm_capable: 858 return 859 860 if constants.NV_HYPERVISOR in what: 861 result[constants.NV_HYPERVISOR] = {} 862 for hv_name in what[constants.NV_HYPERVISOR]: 863 hvparams = all_hvparams[hv_name] 864 try: 865 val = get_hv_fn(hv_name).Verify(hvparams=hvparams) 866 except errors.HypervisorError, err: 867 val = "Error while checking hypervisor: %s" % str(err) 868 result[constants.NV_HYPERVISOR][hv_name] = val

869

870 871 -def _VerifyHvparams(what, vm_capable, result, 872 get_hv_fn=hypervisor.GetHypervisor):

873 """Verifies the hvparams. Appends the results to the 'results' list. 874 875 @type what: C{dict} 876 @param what: a dictionary of things to check 877 @type vm_capable: boolean 878 @param vm_capable: whether or not this node is vm capable 879 @type result: dict 880 @param result: dictionary of verification results; results of the 881 verifications in this function will be added here 882 @type get_hv_fn: function 883 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 884 885 """ 886 if not vm_capable: 887 return 888 889 if constants.NV_HVPARAMS in what: 890 result[constants.NV_HVPARAMS] = [] 891 for source, hv_name, hvparms in what[constants.NV_HVPARAMS]: 892 try: 893 logging.info("Validating hv %s, %s", hv_name, hvparms) 894 get_hv_fn(hv_name).ValidateParameters(hvparms) 895 except errors.HypervisorError, err: 896 result[constants.NV_HVPARAMS].append((source, hv_name, str(err)))

897

898 899 -def _VerifyInstanceList(what, vm_capable, result, all_hvparams):

900 """Verifies the instance list. 901 902 @type what: C{dict} 903 @param what: a dictionary of things to check 904 @type vm_capable: boolean 905 @param vm_capable: whether or not this node is vm capable 906 @type result: dict 907 @param result: dictionary of verification results; results of the 908 verifications in this function will be added here 909 @type all_hvparams: dict of dict of string 910 @param all_hvparams: dictionary mapping hypervisor names to hvparams 911 912 """ 913 if constants.NV_INSTANCELIST in what and vm_capable: 914 # GetInstanceList can fail 915 try: 916 val = GetInstanceList(what[constants.NV_INSTANCELIST], 917 all_hvparams=all_hvparams) 918 except RPCFail, err: 919 val = str(err) 920 result[constants.NV_INSTANCELIST] = val

921

922 923 -def _VerifyNodeInfo(what, vm_capable, result, all_hvparams):

924 """Verifies the node info. 925 926 @type what: C{dict} 927 @param what: a dictionary of things to check 928 @type vm_capable: boolean 929 @param vm_capable: whether or not this node is vm capable 930 @type result: dict 931 @param result: dictionary of verification results; results of the 932 verifications in this function will be added here 933 @type all_hvparams: dict of dict of string 934 @param all_hvparams: dictionary mapping hypervisor names to hvparams 935 936 """ 937 if constants.NV_HVINFO in what and vm_capable: 938 hvname = what[constants.NV_HVINFO] 939 hyper = hypervisor.GetHypervisor(hvname) 940 hvparams = all_hvparams[hvname] 941 result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams)

942

943 944 -def _VerifyClientCertificate(cert_file=pathutils.NODED_CLIENT_CERT_FILE):

945 """Verify the existance and validity of the client SSL certificate. 946 947 Also, verify that the client certificate is not self-signed. Self- 948 signed client certificates stem from Ganeti versions 2.12.0 - 2.12.4 949 and should be replaced by client certificates signed by the server 950 certificate. Hence we output a warning when we encounter a self-signed 951 one. 952 953 """ 954 create_cert_cmd = "gnt-cluster renew-crypto --new-node-certificates" 955 if not os.path.exists(cert_file): 956 return (constants.CV_ERROR, 957 "The client certificate does not exist. Run '%s' to create" 958 " client certificates for all nodes." % create_cert_cmd) 959 960 (errcode, msg) = utils.VerifyCertificate(cert_file) 961 if errcode is not None: 962 return (errcode, msg) 963 964 (errcode, msg) = utils.IsCertificateSelfSigned(cert_file) 965 if errcode is not None: 966 return (errcode, msg) 967 968 # if everything is fine, we return the digest to be compared to the config 969 return (None, utils.GetCertificateDigest(cert_filename=cert_file))

970

971 972 -def _VerifySshSetup(node_status_list, my_name, 973 pub_key_file=pathutils.SSH_PUB_KEYS):

974 """Verifies the state of the SSH key files. 975 976 @type node_status_list: list of tuples 977 @param node_status_list: list of nodes of the cluster associated with a 978 couple of flags: (uuid, name, is_master_candidate, 979 is_potential_master_candidate, online) 980 @type my_name: str 981 @param my_name: name of this node 982 @type pub_key_file: str 983 @param pub_key_file: filename of the public key file 984 985 """ 986 if node_status_list is None: 987 return ["No node list to check against the pub_key_file received."] 988 989 my_status_list = [(my_uuid, name, mc, pot_mc, online) for 990 (my_uuid, name, mc, pot_mc, online) 991 in node_status_list if name == my_name] 992 if len(my_status_list) == 0: 993 return ["Cannot find node information for node '%s'." % my_name] 994 (my_uuid, _, _, potential_master_candidate, online) = \ 995 my_status_list[0] 996 997 result = [] 998 999 if not os.path.exists(pub_key_file): 1000 result.append("The public key file '%s' does not exist. Consider running" 1001 " 'gnt-cluster renew-crypto --new-ssh-keys" 1002 " [--no-ssh-key-check]' to fix this." % pub_key_file) 1003 return result 1004 1005 pot_mc_uuids = [uuid for (uuid, _, _, _, _) in node_status_list] 1006 offline_nodes = [uuid for (uuid, _, _, _, online) in node_status_list 1007 if not online] 1008 pub_keys = ssh.QueryPubKeyFile(None) 1009 1010 if potential_master_candidate: 1011 # Check that the set of potential master candidates matches the 1012 # public key file 1013 pub_uuids_set = set(pub_keys.keys()) - set(offline_nodes) 1014 pot_mc_uuids_set = set(pot_mc_uuids) - set(offline_nodes) 1015 missing_uuids = set([]) 1016 if pub_uuids_set != pot_mc_uuids_set: 1017 unknown_uuids = pub_uuids_set - pot_mc_uuids_set 1018 if unknown_uuids: 1019 result.append("The following node UUIDs are listed in the public key" 1020 " file on node '%s', but are not potential master" 1021 " candidates: %s." 1022 % (my_name, ", ".join(list(unknown_uuids)))) 1023 missing_uuids = pot_mc_uuids_set - pub_uuids_set 1024 if missing_uuids: 1025 result.append("The following node UUIDs of potential master candidates" 1026 " are missing in the public key file on node %s: %s." 1027 % (my_name, ", ".join(list(missing_uuids)))) 1028 1029 (_, key_files) = \ 1030 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1031 (_, dsa_pub_key_filename) = key_files[constants.SSHK_DSA] 1032 1033 my_keys = pub_keys[my_uuid] 1034 1035 dsa_pub_key = utils.ReadFile(dsa_pub_key_filename) 1036 if dsa_pub_key.strip() not in my_keys: 1037 result.append("The dsa key of node %s does not match this node's key" 1038 " in the pub key file." % (my_name)) 1039 if len(my_keys) != 1: 1040 result.append("There is more than one key for node %s in the public key" 1041 " file." % my_name) 1042 else: 1043 if len(pub_keys.keys()) > 0: 1044 result.append("The public key file of node '%s' is not empty, although" 1045 " the node is not a potential master candidate." 1046 % my_name) 1047 1048 # Check that all master candidate keys are in the authorized_keys file 1049 (auth_key_file, _) = \ 1050 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1051 for (uuid, name, mc, _, online) in node_status_list: 1052 if not online: 1053 continue 1054 if uuid in missing_uuids: 1055 continue 1056 if mc: 1057 for key in pub_keys[uuid]: 1058 if not ssh.HasAuthorizedKey(auth_key_file, key): 1059 result.append("A SSH key of master candidate '%s' (UUID: '%s') is" 1060 " not in the 'authorized_keys' file of node '%s'." 1061 % (name, uuid, my_name)) 1062 else: 1063 for key in pub_keys[uuid]: 1064 if name != my_name and ssh.HasAuthorizedKey(auth_key_file, key): 1065 result.append("A SSH key of normal node '%s' (UUID: '%s') is in the" 1066 " 'authorized_keys' file of node '%s'." 1067 % (name, uuid, my_name)) 1068 if name == my_name and not ssh.HasAuthorizedKey(auth_key_file, key): 1069 result.append("A SSH key of normal node '%s' (UUID: '%s') is not" 1070 " in the 'authorized_keys' file of itself." 1071 % (my_name, uuid)) 1072 1073 return result

1074

1075 1076 -def _VerifySshClutter(node_status_list, my_name):

1077 """Verifies that the 'authorized_keys' files are not cluttered up. 1078 1079 @type node_status_list: list of tuples 1080 @param node_status_list: list of nodes of the cluster associated with a 1081 couple of flags: (uuid, name, is_master_candidate, 1082 is_potential_master_candidate, online) 1083 @type my_name: str 1084 @param my_name: name of this node 1085 1086 """ 1087 result = [] 1088 (auth_key_file, _) = \ 1089 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1090 node_names = [name for (_, name, _, _) in node_status_list] 1091 multiple_occurrences = ssh.CheckForMultipleKeys(auth_key_file, node_names) 1092 if multiple_occurrences: 1093 msg = "There are hosts which have more than one SSH key stored for the" \ 1094 " same user in the 'authorized_keys' file of node %s. This can be" \ 1095 " due to an unsuccessful operation which cluttered up the" \ 1096 " 'authorized_keys' file. We recommend to clean this up manually. " \ 1097 % my_name 1098 for host, occ in multiple_occurrences.items(): 1099 msg += "Entry for '%s' in lines %s. " % (host, utils.CommaJoin(occ)) 1100 result.append(msg) 1101 1102 return result

1103

1104 1105 -def VerifyNode(what, cluster_name, all_hvparams, node_groups, groups_cfg):

1106 """Verify the status of the local node. 1107 1108 Based on the input L{what} parameter, various checks are done on the 1109 local node. 1110 1111 If the I{filelist} key is present, this list of 1112 files is checksummed and the file/checksum pairs are returned. 1113 1114 If the I{nodelist} key is present, we check that we have 1115 connectivity via ssh with the target nodes (and check the hostname 1116 report). 1117 1118 If the I{node-net-test} key is present, we check that we have 1119 connectivity to the given nodes via both primary IP and, if 1120 applicable, secondary IPs. 1121 1122 @type what: C{dict} 1123 @param what: a dictionary of things to check: 1124 - filelist: list of files for which to compute checksums 1125 - nodelist: list of nodes we should check ssh communication with 1126 - node-net-test: list of nodes we should check node daemon port 1127 connectivity with 1128 - hypervisor: list with hypervisors to run the verify for 1129 @type cluster_name: string 1130 @param cluster_name: the cluster's name 1131 @type all_hvparams: dict of dict of strings 1132 @param all_hvparams: a dictionary mapping hypervisor names to hvparams 1133 @type node_groups: a dict of strings 1134 @param node_groups: node _names_ mapped to their group uuids (it's enough to 1135 have only those nodes that are in `what["nodelist"]`) 1136 @type groups_cfg: a dict of dict of strings 1137 @param groups_cfg: a dictionary mapping group uuids to their configuration 1138 @rtype: dict 1139 @return: a dictionary with the same keys as the input dict, and 1140 values representing the result of the checks 1141 1142 """ 1143 result = {} 1144 my_name = netutils.Hostname.GetSysName() 1145 port = netutils.GetDaemonPort(constants.NODED) 1146 vm_capable = my_name not in what.get(constants.NV_NONVMNODES, []) 1147 1148 _VerifyHypervisors(what, vm_capable, result, all_hvparams) 1149 _VerifyHvparams(what, vm_capable, result) 1150 1151 if constants.NV_FILELIST in what: 1152 fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath, 1153 what[constants.NV_FILELIST])) 1154 result[constants.NV_FILELIST] = \ 1155 dict((vcluster.MakeVirtualPath(key), value) 1156 for (key, value) in fingerprints.items()) 1157 1158 if constants.NV_CLIENT_CERT in what: 1159 result[constants.NV_CLIENT_CERT] = _VerifyClientCertificate() 1160 1161 if constants.NV_SSH_SETUP in what: 1162 result[constants.NV_SSH_SETUP] = \ 1163 _VerifySshSetup(what[constants.NV_SSH_SETUP], my_name) 1164 if constants.NV_SSH_CLUTTER in what: 1165 result[constants.NV_SSH_CLUTTER] = \ 1166 _VerifySshClutter(what[constants.NV_SSH_SETUP], my_name) 1167 1168 if constants.NV_NODELIST in what: 1169 (nodes, bynode, mcs) = what[constants.NV_NODELIST] 1170 1171 # Add nodes from other groups (different for each node) 1172 try: 1173 nodes.extend(bynode[my_name]) 1174 except KeyError: 1175 pass 1176 1177 # Use a random order 1178 random.shuffle(nodes) 1179 1180 # Try to contact all nodes 1181 val = {} 1182 for node in nodes: 1183 params = groups_cfg.get(node_groups.get(node)) 1184 ssh_port = params["ndparams"].get(constants.ND_SSH_PORT) 1185 logging.debug("Ssh port %s (None = default) for node %s", 1186 str(ssh_port), node) 1187 1188 # We only test if master candidates can communicate to other nodes. 1189 # We cannot test if normal nodes cannot communicate with other nodes, 1190 # because the administrator might have installed additional SSH keys, 1191 # over which Ganeti has no power. 1192 if my_name in mcs: 1193 success, message = _GetSshRunner(cluster_name). \ 1194 VerifyNodeHostname(node, ssh_port) 1195 if not success: 1196 val[node] = message 1197 1198 result[constants.NV_NODELIST] = val 1199 1200 if constants.NV_NODENETTEST in what: 1201 result[constants.NV_NODENETTEST] = tmp = {} 1202 my_pip = my_sip = None 1203 for name, pip, sip in what[constants.NV_NODENETTEST]: 1204 if name == my_name: 1205 my_pip = pip 1206 my_sip = sip 1207 break 1208 if not my_pip: 1209 tmp[my_name] = ("Can't find my own primary/secondary IP" 1210 " in the node list") 1211 else: 1212 for name, pip, sip in what[constants.NV_NODENETTEST]: 1213 fail = [] 1214 if not netutils.TcpPing(pip, port, source=my_pip): 1215 fail.append("primary") 1216 if sip != pip: 1217 if not netutils.TcpPing(sip, port, source=my_sip): 1218 fail.append("secondary") 1219 if fail: 1220 tmp[name] = ("failure using the %s interface(s)" % 1221 " and ".join(fail)) 1222 1223 if constants.NV_MASTERIP in what: 1224 # FIXME: add checks on incoming data structures (here and in the 1225 # rest of the function) 1226 master_name, master_ip = what[constants.NV_MASTERIP] 1227 if master_name == my_name: 1228 source = constants.IP4_ADDRESS_LOCALHOST 1229 else: 1230 source = None 1231 result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port, 1232 source=source) 1233 1234 if constants.NV_USERSCRIPTS in what: 1235 result[constants.NV_USERSCRIPTS] = \ 1236 [script for script in what[constants.NV_USERSCRIPTS] 1237 if not utils.IsExecutable(script)] 1238 1239 if constants.NV_OOB_PATHS in what: 1240 result[constants.NV_OOB_PATHS] = tmp = [] 1241 for path in what[constants.NV_OOB_PATHS]: 1242 try: 1243 st = os.stat(path) 1244 except OSError, err: 1245 tmp.append("error stating out of band helper: %s" % err) 1246 else: 1247 if stat.S_ISREG(st.st_mode): 1248 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR: 1249 tmp.append(None) 1250 else: 1251 tmp.append("out of band helper %s is not executable" % path) 1252 else: 1253 tmp.append("out of band helper %s is not a file" % path) 1254 1255 if constants.NV_LVLIST in what and vm_capable: 1256 try: 1257 val = GetVolumeList(utils.ListVolumeGroups().keys()) 1258 except RPCFail, err: 1259 val = str(err) 1260 result[constants.NV_LVLIST] = val 1261 1262 _VerifyInstanceList(what, vm_capable, result, all_hvparams) 1263 1264 if constants.NV_VGLIST in what and vm_capable: 1265 result[constants.NV_VGLIST] = utils.ListVolumeGroups() 1266 1267 if constants.NV_PVLIST in what and vm_capable: 1268 check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what 1269 val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], 1270 filter_allocatable=False, 1271 include_lvs=check_exclusive_pvs) 1272 if check_exclusive_pvs: 1273 result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val) 1274 for pvi in val: 1275 # Avoid sending useless data on the wire 1276 pvi.lv_list = [] 1277 result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val) 1278 1279 if constants.NV_VERSION in what: 1280 result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION, 1281 constants.RELEASE_VERSION) 1282 1283 _VerifyNodeInfo(what, vm_capable, result, all_hvparams) 1284 1285 if constants.NV_DRBDVERSION in what and vm_capable: 1286 try: 1287 drbd_version = DRBD8.GetProcInfo().GetVersionString() 1288 except errors.BlockDeviceError, err: 1289 logging.warning("Can't get DRBD version", exc_info=True) 1290 drbd_version = str(err) 1291 result[constants.NV_DRBDVERSION] = drbd_version 1292 1293 if constants.NV_DRBDLIST in what and vm_capable: 1294 try: 1295 used_minors = drbd.DRBD8.GetUsedDevs() 1296 except errors.BlockDeviceError, err: 1297 logging.warning("Can't get used minors list", exc_info=True) 1298 used_minors = str(err) 1299 result[constants.NV_DRBDLIST] = used_minors 1300 1301 if constants.NV_DRBDHELPER in what and vm_capable: 1302 status = True 1303 try: 1304 payload = drbd.DRBD8.GetUsermodeHelper() 1305 except errors.BlockDeviceError, err: 1306 logging.error("Can't get DRBD usermode helper: %s", str(err)) 1307 status = False 1308 payload = str(err) 1309 result[constants.NV_DRBDHELPER] = (status, payload) 1310 1311 if constants.NV_NODESETUP in what: 1312 result[constants.NV_NODESETUP] = tmpr = [] 1313 if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"): 1314 tmpr.append("The sysfs filesytem doesn't seem to be mounted" 1315 " under /sys, missing required directories /sys/block" 1316 " and /sys/class/net") 1317 if (not os.path.isdir("/proc/sys") or 1318 not os.path.isfile("/proc/sysrq-trigger")): 1319 tmpr.append("The procfs filesystem doesn't seem to be mounted" 1320 " under /proc, missing required directory /proc/sys and" 1321 " the file /proc/sysrq-trigger") 1322 1323 if constants.NV_TIME in what: 1324 result[constants.NV_TIME] = utils.SplitTime(time.time()) 1325 1326 if constants.NV_OSLIST in what and vm_capable: 1327 result[constants.NV_OSLIST] = DiagnoseOS() 1328 1329 if constants.NV_BRIDGES in what and vm_capable: 1330 result[constants.NV_BRIDGES] = [bridge 1331 for bridge in what[constants.NV_BRIDGES] 1332 if not utils.BridgeExists(bridge)] 1333 1334 if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name: 1335 result[constants.NV_ACCEPTED_STORAGE_PATHS] = \ 1336 filestorage.ComputeWrongFileStoragePaths() 1337 1338 if what.get(constants.NV_FILE_STORAGE_PATH): 1339 pathresult = filestorage.CheckFileStoragePath( 1340 what[constants.NV_FILE_STORAGE_PATH]) 1341 if pathresult: 1342 result[constants.NV_FILE_STORAGE_PATH] = pathresult 1343 1344 if what.get(constants.NV_SHARED_FILE_STORAGE_PATH): 1345 pathresult = filestorage.CheckFileStoragePath( 1346 what[constants.NV_SHARED_FILE_STORAGE_PATH]) 1347 if pathresult: 1348 result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult 1349 1350 return result

1351

1352 1353 -def GetCryptoTokens(token_requests):

1354 """Perform actions on the node's cryptographic tokens. 1355 1356 Token types can be 'ssl' or 'ssh'. So far only some actions are implemented 1357 for 'ssl'. Action 'get' returns the digest of the public client ssl 1358 certificate. Action 'create' creates a new client certificate and private key 1359 and also returns the digest of the certificate. The third parameter of a 1360 token request are optional parameters for the actions, so far only the 1361 filename is supported. 1362 1363 @type token_requests: list of tuples of (string, string, dict), where the 1364 first string is in constants.CRYPTO_TYPES, the second in 1365 constants.CRYPTO_ACTIONS. The third parameter is a dictionary of string 1366 to string. 1367 @param token_requests: list of requests of cryptographic tokens and actions 1368 to perform on them. The actions come with a dictionary of options. 1369 @rtype: list of tuples (string, string) 1370 @return: list of tuples of the token type and the public crypto token 1371 1372 """ 1373 tokens = [] 1374 for (token_type, action, _) in token_requests: 1375 if token_type not in constants.CRYPTO_TYPES: 1376 raise errors.ProgrammerError("Token type '%s' not supported." % 1377 token_type) 1378 if action not in constants.CRYPTO_ACTIONS: 1379 raise errors.ProgrammerError("Action '%s' is not supported." % 1380 action) 1381 if token_type == constants.CRYPTO_TYPE_SSL_DIGEST: 1382 tokens.append((token_type, 1383 utils.GetCertificateDigest())) 1384 return tokens

1385

1386 1387 -def EnsureDaemon(daemon_name, run):

1388 """Ensures the given daemon is running or stopped. 1389 1390 @type daemon_name: string 1391 @param daemon_name: name of the daemon (e.g., constants.KVMD) 1392 1393 @type run: bool 1394 @param run: whether to start or stop the daemon 1395 1396 @rtype: bool 1397 @return: 'True' if daemon successfully started/stopped, 1398 'False' otherwise 1399 1400 """ 1401 allowed_daemons = [constants.KVMD] 1402 1403 if daemon_name not in allowed_daemons: 1404 fn = lambda _: False 1405 elif run: 1406 fn = utils.EnsureDaemon 1407 else: 1408 fn = utils.StopDaemon 1409 1410 return fn(daemon_name)

1411

1412 1413 -def _InitSshUpdateData(data, noded_cert_file, ssconf_store):

1414 (_, noded_cert) = \ 1415 utils.ExtractX509Certificate(utils.ReadFile(noded_cert_file)) 1416 data[constants.SSHS_NODE_DAEMON_CERTIFICATE] = noded_cert 1417 1418 cluster_name = ssconf_store.GetClusterName() 1419 data[constants.SSHS_CLUSTER_NAME] = cluster_name

1420

1421 1422 -def AddNodeSshKey(node_uuid, node_name, 1423 potential_master_candidates, 1424 to_authorized_keys=False, 1425 to_public_keys=False, 1426 get_public_keys=False, 1427 pub_key_file=pathutils.SSH_PUB_KEYS, 1428 ssconf_store=None, 1429 noded_cert_file=pathutils.NODED_CERT_FILE, 1430 run_cmd_fn=ssh.RunSshCmdWithStdin):

1431 """Distributes a node's public SSH key across the cluster. 1432 1433 Note that this function should only be executed on the master node, which 1434 then will copy the new node's key to all nodes in the cluster via SSH. 1435 1436 Also note: at least one of the flags C{to_authorized_keys}, 1437 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for 1438 the function to actually perform any actions. 1439 1440 @type node_uuid: str 1441 @param node_uuid: the UUID of the node whose key is added 1442 @type node_name: str 1443 @param node_name: the name of the node whose key is added 1444 @type potential_master_candidates: list of str 1445 @param potential_master_candidates: list of node names of potential master 1446 candidates; this should match the list of uuids in the public key file 1447 @type to_authorized_keys: boolean 1448 @param to_authorized_keys: whether the key should be added to the 1449 C{authorized_keys} file of all nodes 1450 @type to_public_keys: boolean 1451 @param to_public_keys: whether the keys should be added to the public key file 1452 @type get_public_keys: boolean 1453 @param get_public_keys: whether the node should add the clusters' public keys 1454 to its {ganeti_pub_keys} file 1455 1456 """ 1457 # assure that at least one of those flags is true, as the function would 1458 # not do anything otherwise 1459 assert (to_authorized_keys or to_public_keys or get_public_keys) 1460 1461 if not ssconf_store: 1462 ssconf_store = ssconf.SimpleStore() 1463 1464 # Check and fix sanity of key file 1465 keys_by_name = ssh.QueryPubKeyFile([node_name], key_file=pub_key_file) 1466 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file) 1467 1468 if (not keys_by_name or node_name not in keys_by_name) \ 1469 and (not keys_by_uuid or node_uuid not in keys_by_uuid): 1470 raise errors.SshUpdateError( 1471 "No keys found for the new node '%s' (UUID %s) in the list of public" 1472 " SSH keys, neither for the name or the UUID" % (node_name, node_uuid)) 1473 else: 1474 if node_name in keys_by_name: 1475 keys_by_uuid = {} 1476 # Replace the name by UUID in the file as the name should only be used 1477 # temporarily 1478 ssh.ReplaceNameByUuid(node_uuid, node_name, 1479 error_fn=errors.SshUpdateError, 1480 key_file=pub_key_file) 1481 keys_by_uuid[node_uuid] = keys_by_name[node_name] 1482 1483 # Update the master node's key files 1484 if to_authorized_keys: 1485 (auth_key_file, _) = \ 1486 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1487 ssh.AddAuthorizedKeys(auth_key_file, keys_by_uuid[node_uuid]) 1488 1489 base_data = {} 1490 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) 1491 cluster_name = base_data[constants.SSHS_CLUSTER_NAME] 1492 1493 ssh_port_map = ssconf_store.GetSshPortMap() 1494 1495 # Update the target node itself 1496 logging.debug("Updating SSH key files of target node '%s'.", node_name) 1497 if get_public_keys: 1498 node_data = {} 1499 _InitSshUpdateData(node_data, noded_cert_file, ssconf_store) 1500 all_keys = ssh.QueryPubKeyFile(None, key_file=pub_key_file) 1501 node_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1502 (constants.SSHS_OVERRIDE, all_keys) 1503 1504 try: 1505 utils.RetryByNumberOfTimes( 1506 constants.SSHS_MAX_RETRIES, 1507 errors.SshUpdateError, 1508 run_cmd_fn, cluster_name, node_name, pathutils.SSH_UPDATE, 1509 ssh_port_map.get(node_name), node_data, 1510 debug=False, verbose=False, use_cluster_key=False, 1511 ask_key=False, strict_host_check=False) 1512 except errors.SshUpdateError as e: 1513 # Clean up the master's public key file if adding key fails 1514 if to_public_keys: 1515 ssh.RemovePublicKey(node_uuid) 1516 raise e 1517 1518 # Update all nodes except master and the target node 1519 if to_authorized_keys: 1520 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1521 (constants.SSHS_ADD, keys_by_uuid) 1522 1523 pot_mc_data = copy.deepcopy(base_data) 1524 if to_public_keys: 1525 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1526 (constants.SSHS_REPLACE_OR_ADD, keys_by_uuid) 1527 1528 all_nodes = ssconf_store.GetNodeList() 1529 master_node = ssconf_store.GetMasterNode() 1530 online_nodes = ssconf_store.GetOnlineNodeList() 1531 1532 node_errors = [] 1533 for node in all_nodes: 1534 if node == master_node: 1535 logging.debug("Skipping master node '%s'.", master_node) 1536 continue 1537 if node not in online_nodes: 1538 logging.debug("Skipping offline node '%s'.", node) 1539 continue 1540 if node in potential_master_candidates: 1541 logging.debug("Updating SSH key files of node '%s'.", node) 1542 try: 1543 utils.RetryByNumberOfTimes( 1544 constants.SSHS_MAX_RETRIES, 1545 errors.SshUpdateError, 1546 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1547 ssh_port_map.get(node), pot_mc_data, 1548 debug=False, verbose=False, use_cluster_key=False, 1549 ask_key=False, strict_host_check=False) 1550 except errors.SshUpdateError as last_exception: 1551 error_msg = ("When adding the key of node '%s', updating SSH key" 1552 " files of node '%s' failed after %s retries." 1553 " Not trying again. Last error was: %s." % 1554 (node, node_name, constants.SSHS_MAX_RETRIES, 1555 last_exception)) 1556 node_errors.append((node, error_msg)) 1557 # We only log the error and don't throw an exception, because 1558 # one unreachable node shall not abort the entire procedure. 1559 logging.error(error_msg) 1560 1561 else: 1562 if to_authorized_keys: 1563 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE, 1564 ssh_port_map.get(node), base_data, 1565 debug=False, verbose=False, use_cluster_key=False, 1566 ask_key=False, strict_host_check=False) 1567 1568 return node_errors

1569

1570 1571 -def RemoveNodeSshKey(node_uuid, node_name, 1572 master_candidate_uuids, 1573 potential_master_candidates, 1574 master_uuid=None, 1575 keys_to_remove=None, 1576 from_authorized_keys=False, 1577 from_public_keys=False, 1578 clear_authorized_keys=False, 1579 clear_public_keys=False, 1580 pub_key_file=pathutils.SSH_PUB_KEYS, 1581 ssconf_store=None, 1582 noded_cert_file=pathutils.NODED_CERT_FILE, 1583 readd=False, 1584 run_cmd_fn=ssh.RunSshCmdWithStdin):

1585 """Removes the node's SSH keys from the key files and distributes those. 1586 1587 Note that at least one of the flags C{from_authorized_keys}, 1588 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys} 1589 has to be set to C{True} for the function to perform any action at all. 1590 Not doing so will trigger an assertion in the function. 1591 1592 @type node_uuid: str 1593 @param node_uuid: UUID of the node whose key is removed 1594 @type node_name: str 1595 @param node_name: name of the node whose key is remove 1596 @type master_candidate_uuids: list of str 1597 @param master_candidate_uuids: list of UUIDs of the current master candidates 1598 @type potential_master_candidates: list of str 1599 @param potential_master_candidates: list of names of potential master 1600 candidates 1601 @type keys_to_remove: dict of str to list of str 1602 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys 1603 to be removed. This list is supposed to be used only if the keys are not 1604 in the public keys file. This is for example the case when removing a 1605 master node's key. 1606 @type from_authorized_keys: boolean 1607 @param from_authorized_keys: whether or not the key should be removed 1608 from the C{authorized_keys} file 1609 @type from_public_keys: boolean 1610 @param from_public_keys: whether or not the key should be remove from 1611 the C{ganeti_pub_keys} file 1612 @type clear_authorized_keys: boolean 1613 @param clear_authorized_keys: whether or not the C{authorized_keys} file 1614 should be cleared on the node whose keys are removed 1615 @type clear_public_keys: boolean 1616 @param clear_public_keys: whether to clear the node's C{ganeti_pub_key} file 1617 @type readd: boolean 1618 @param readd: whether this is called during a readd operation. 1619 @rtype: list of string 1620 @returns: list of feedback messages 1621 1622 """ 1623 # Non-disruptive error messages, list of (node, msg) pairs 1624 result_msgs = [] 1625 1626 # Make sure at least one of these flags is true. 1627 if not (from_authorized_keys or from_public_keys or clear_authorized_keys 1628 or clear_public_keys): 1629 raise errors.SshUpdateError("No removal from any key file was requested.") 1630 1631 if not ssconf_store: 1632 ssconf_store = ssconf.SimpleStore() 1633 1634 master_node = ssconf_store.GetMasterNode() 1635 ssh_port_map = ssconf_store.GetSshPortMap() 1636 1637 if from_authorized_keys or from_public_keys: 1638 if keys_to_remove: 1639 keys = keys_to_remove 1640 else: 1641 keys = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file) 1642 if (not keys or node_uuid not in keys) and not readd: 1643 raise errors.SshUpdateError("Node '%s' not found in the list of public" 1644 " SSH keys. It seems someone tries to" 1645 " remove a key from outside the cluster!" 1646 % node_uuid) 1647 # During an upgrade all nodes have the master key. In this case we 1648 # should not remove it to avoid accidentally shutting down cluster 1649 # SSH communication 1650 master_keys = None 1651 if master_uuid: 1652 master_keys = ssh.QueryPubKeyFile([master_uuid], key_file=pub_key_file) 1653 for master_key in master_keys: 1654 if master_key in keys[node_uuid]: 1655 keys[node_uuid].remove(master_key) 1656 1657 if node_name == master_node and not keys_to_remove: 1658 raise errors.SshUpdateError("Cannot remove the master node's keys.") 1659 1660 if node_uuid in keys: 1661 base_data = {} 1662 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) 1663 cluster_name = base_data[constants.SSHS_CLUSTER_NAME] 1664 1665 if from_authorized_keys: 1666 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1667 (constants.SSHS_REMOVE, keys) 1668 (auth_key_file, _) = \ 1669 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, 1670 dircheck=False) 1671 ssh.RemoveAuthorizedKeys(auth_key_file, keys[node_uuid]) 1672 1673 pot_mc_data = copy.deepcopy(base_data) 1674 1675 if from_public_keys: 1676 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1677 (constants.SSHS_REMOVE, keys) 1678 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file) 1679 1680 all_nodes = ssconf_store.GetNodeList() 1681 online_nodes = ssconf_store.GetOnlineNodeList() 1682 logging.debug("Removing key of node '%s' from all nodes but itself and" 1683 " master.", node_name) 1684 for node in all_nodes: 1685 if node == master_node: 1686 logging.debug("Skipping master node '%s'.", master_node) 1687 continue 1688 if node not in online_nodes: 1689 logging.debug("Skipping offline node '%s'.", node) 1690 continue 1691 ssh_port = ssh_port_map.get(node) 1692 if not ssh_port: 1693 raise errors.OpExecError("No SSH port information available for" 1694 " node '%s', map: %s." % 1695 (node, ssh_port_map)) 1696 error_msg_final = ("When removing the key of node '%s', updating the" 1697 " SSH key files of node '%s' failed. Last error" 1698 " was: %s.") 1699 if node in potential_master_candidates: 1700 logging.debug("Updating key setup of potential master candidate node" 1701 " %s.", node) 1702 try: 1703 utils.RetryByNumberOfTimes( 1704 constants.SSHS_MAX_RETRIES, 1705 errors.SshUpdateError, 1706 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1707 ssh_port, pot_mc_data, 1708 debug=False, verbose=False, use_cluster_key=False, 1709 ask_key=False, strict_host_check=False) 1710 except errors.SshUpdateError as last_exception: 1711 error_msg = error_msg_final % ( 1712 node_name, node, last_exception) 1713 result_msgs.append((node, error_msg)) 1714 logging.error(error_msg) 1715 1716 else: 1717 if from_authorized_keys: 1718 logging.debug("Updating key setup of normal node %s.", node) 1719 try: 1720 utils.RetryByNumberOfTimes( 1721 constants.SSHS_MAX_RETRIES, 1722 errors.SshUpdateError, 1723 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1724 ssh_port, base_data, 1725 debug=False, verbose=False, use_cluster_key=False, 1726 ask_key=False, strict_host_check=False) 1727 except errors.SshUpdateError as last_exception: 1728 error_msg = error_msg_final % ( 1729 node_name, node, last_exception) 1730 result_msgs.append((node, error_msg)) 1731 logging.error(error_msg) 1732 1733 if clear_authorized_keys or from_public_keys or clear_public_keys: 1734 data = {} 1735 _InitSshUpdateData(data, noded_cert_file, ssconf_store) 1736 cluster_name = data[constants.SSHS_CLUSTER_NAME] 1737 ssh_port = ssh_port_map.get(node_name) 1738 if not ssh_port: 1739 raise errors.OpExecError("No SSH port information available for" 1740 " node '%s', which is leaving the cluster.") 1741 1742 if clear_authorized_keys: 1743 # The 'authorized_keys' file is not solely managed by Ganeti. Therefore, 1744 # we have to specify exactly which keys to clear to leave keys untouched 1745 # that were not added by Ganeti. 1746 other_master_candidate_uuids = [uuid for uuid in master_candidate_uuids 1747 if uuid != node_uuid] 1748 candidate_keys = ssh.QueryPubKeyFile(other_master_candidate_uuids, 1749 key_file=pub_key_file) 1750 data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1751 (constants.SSHS_REMOVE, candidate_keys) 1752 1753 if clear_public_keys: 1754 data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1755 (constants.SSHS_CLEAR, {}) 1756 elif from_public_keys: 1757 # Since clearing the public keys subsumes removing just a single key, 1758 # we only do it of clear_public_keys is 'False'. 1759 1760 if keys[node_uuid]: 1761 data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1762 (constants.SSHS_REMOVE, keys) 1763 1764 # If we have no changes to any keyfile, just return 1765 if not (constants.SSHS_SSH_PUBLIC_KEYS in data or 1766 constants.SSHS_SSH_AUTHORIZED_KEYS in data): 1767 return 1768 1769 logging.debug("Updating SSH key setup of target node '%s'.", node_name) 1770 try: 1771 utils.RetryByNumberOfTimes( 1772 constants.SSHS_MAX_RETRIES, 1773 errors.SshUpdateError, 1774 run_cmd_fn, cluster_name, node_name, pathutils.SSH_UPDATE, 1775 ssh_port, data, 1776 debug=False, verbose=False, use_cluster_key=False, 1777 ask_key=False, strict_host_check=False) 1778 except errors.SshUpdateError as last_exception: 1779 result_msgs.append( 1780 (node_name, 1781 ("Removing SSH keys from node '%s' failed." 1782 " This can happen when the node is already unreachable." 1783 " Error: %s" % (node_name, last_exception)))) 1784 1785 return result_msgs

1786

1787 1788 -def _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, 1789 pub_key_file=pathutils.SSH_PUB_KEYS, 1790 ssconf_store=None, 1791 noded_cert_file=pathutils.NODED_CERT_FILE, 1792 run_cmd_fn=ssh.RunSshCmdWithStdin, 1793 suffix=""):

1794 """Generates the root SSH key pair on the node. 1795 1796 @type node_uuid: str 1797 @param node_uuid: UUID of the node whose key is removed 1798 @type node_name: str 1799 @param node_name: name of the node whose key is remove 1800 @type ssh_port_map: dict of str to int 1801 @param ssh_port_map: mapping of node names to their SSH port 1802 1803 """ 1804 if not ssconf_store: 1805 ssconf_store = ssconf.SimpleStore() 1806 1807 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file) 1808 if not keys_by_uuid or node_uuid not in keys_by_uuid: 1809 raise errors.SshUpdateError("Node %s (UUID: %s) whose key is requested to" 1810 " be regenerated is not registered in the" 1811 " public keys file." % (node_name, node_uuid)) 1812 1813 data = {} 1814 _InitSshUpdateData(data, noded_cert_file, ssconf_store) 1815 cluster_name = data[constants.SSHS_CLUSTER_NAME] 1816 data[constants.SSHS_GENERATE] = {constants.SSHS_SUFFIX: suffix} 1817 1818 run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE, 1819 ssh_port_map.get(node_name), data, 1820 debug=False, verbose=False, use_cluster_key=False, 1821 ask_key=False, strict_host_check=False)

1822

1823 1824 -def _GetMasterNodeUUID(node_uuid_name_map, master_node_name):

1825 master_node_uuids = [node_uuid for (node_uuid, node_name) 1826 in node_uuid_name_map 1827 if node_name == master_node_name] 1828 if len(master_node_uuids) != 1: 1829 raise errors.SshUpdateError("No (unique) master UUID found. Master node" 1830 " name: '%s', Master UUID: '%s'" % 1831 (master_node_name, master_node_uuids)) 1832 return master_node_uuids[0]

1833

1834 1835 -def _GetOldMasterKeys(master_node_uuid, pub_key_file):

1836 old_master_keys_by_uuid = ssh.QueryPubKeyFile([master_node_uuid], 1837 key_file=pub_key_file) 1838 if not old_master_keys_by_uuid: 1839 raise errors.SshUpdateError("No public key of the master node (UUID '%s')" 1840 " found, not generating a new key." 1841 % master_node_uuid) 1842 return old_master_keys_by_uuid

1843

1844 1845 -def _GetNewMasterKey(root_keyfiles, master_node_uuid):

1846 new_master_keys = [] 1847 for (_, (_, public_key_file)) in root_keyfiles.items(): 1848 public_key_dir = os.path.dirname(public_key_file) 1849 public_key_file_tmp_filename = \ 1850 os.path.splitext(os.path.basename(public_key_file))[0] \ 1851 + constants.SSHS_MASTER_SUFFIX + ".pub" 1852 public_key_path_tmp = os.path.join(public_key_dir, 1853 public_key_file_tmp_filename) 1854 if os.path.exists(public_key_path_tmp): 1855 # for some key types, there might not be any keys 1856 key = utils.ReadFile(public_key_path_tmp) 1857 new_master_keys.append(key) 1858 if not new_master_keys: 1859 raise errors.SshUpdateError("Cannot find any type of temporary SSH key.") 1860 return {master_node_uuid: new_master_keys}

1861

1862 1863 -def _ReplaceMasterKeyOnMaster(root_keyfiles):

1864 number_of_moves = 0 1865 for (_, (private_key_file, public_key_file)) in root_keyfiles.items(): 1866 key_dir = os.path.dirname(public_key_file) 1867 private_key_file_tmp = \ 1868 os.path.basename(private_key_file) + constants.SSHS_MASTER_SUFFIX 1869 public_key_file_tmp = private_key_file_tmp + ".pub" 1870 private_key_path_tmp = os.path.join(key_dir, 1871 private_key_file_tmp) 1872 public_key_path_tmp = os.path.join(key_dir, 1873 public_key_file_tmp) 1874 if os.path.exists(public_key_file): 1875 utils.CreateBackup(public_key_file) 1876 utils.RemoveFile(public_key_file) 1877 if os.path.exists(private_key_file): 1878 utils.CreateBackup(private_key_file) 1879 utils.RemoveFile(private_key_file) 1880 if os.path.exists(public_key_path_tmp) and \ 1881 os.path.exists(private_key_path_tmp): 1882 # for some key types, there might not be any keys 1883 shutil.move(public_key_path_tmp, public_key_file) 1884 shutil.move(private_key_path_tmp, private_key_file) 1885 number_of_moves += 1 1886 if not number_of_moves: 1887 raise errors.SshUpdateError("Could not move at least one master SSH key.")

1888

1889 1890 -def RenewSshKeys(node_uuids, node_names, master_candidate_uuids, 1891 potential_master_candidates, 1892 pub_key_file=pathutils.SSH_PUB_KEYS, 1893 ssconf_store=None, 1894 noded_cert_file=pathutils.NODED_CERT_FILE, 1895 run_cmd_fn=ssh.RunSshCmdWithStdin):

1896 """Renews all SSH keys and updates authorized_keys and ganeti_pub_keys. 1897 1898 @type node_uuids: list of str 1899 @param node_uuids: list of node UUIDs whose keys should be renewed 1900 @type node_names: list of str 1901 @param node_names: list of node names whose keys should be removed. This list 1902 should match the C{node_uuids} parameter 1903 @type master_candidate_uuids: list of str 1904 @param master_candidate_uuids: list of UUIDs of master candidates or 1905 master node 1906 @type pub_key_file: str 1907 @param pub_key_file: file path of the the public key file 1908 @type noded_cert_file: str 1909 @param noded_cert_file: path of the noded SSL certificate file 1910 @type run_cmd_fn: function 1911 @param run_cmd_fn: function to run commands on remote nodes via SSH 1912 @raises ProgrammerError: if node_uuids and node_names don't match; 1913 SshUpdateError if a node's key is missing from the public key file, 1914 if a node's new SSH key could not be fetched from it, if there is 1915 none or more than one entry in the public key list for the master 1916 node. 1917 1918 """ 1919 if not ssconf_store: 1920 ssconf_store = ssconf.SimpleStore() 1921 cluster_name = ssconf_store.GetClusterName() 1922 1923 if not len(node_uuids) == len(node_names): 1924 raise errors.ProgrammerError("List of nodes UUIDs and node names" 1925 " does not match in length.") 1926 1927 (_, root_keyfiles) = \ 1928 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1929 (_, dsa_pub_keyfile) = root_keyfiles[constants.SSHK_DSA] 1930 old_master_key = utils.ReadFile(dsa_pub_keyfile) 1931 1932 node_uuid_name_map = zip(node_uuids, node_names) 1933 1934 master_node_name = ssconf_store.GetMasterNode() 1935 master_node_uuid = _GetMasterNodeUUID(node_uuid_name_map, master_node_name) 1936 ssh_port_map = ssconf_store.GetSshPortMap() 1937 # List of all node errors that happened, but which did not abort the 1938 # procedure as a whole. It is important that this is a list to have a 1939 # somewhat chronological history of events. 1940 all_node_errors = [] 1941 1942 # process non-master nodes 1943 for node_uuid, node_name in node_uuid_name_map: 1944 if node_name == master_node_name: 1945 continue 1946 master_candidate = node_uuid in master_candidate_uuids 1947 potential_master_candidate = node_name in potential_master_candidates 1948 1949 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file) 1950 if not keys_by_uuid: 1951 raise errors.SshUpdateError("No public key of node %s (UUID %s) found," 1952 " not generating a new key." 1953 % (node_name, node_uuid)) 1954 1955 if master_candidate: 1956 logging.debug("Fetching old SSH key from node '%s'.", node_name) 1957 old_pub_key = ssh.ReadRemoteSshPubKeys(dsa_pub_keyfile, 1958 node_name, cluster_name, 1959 ssh_port_map[node_name], 1960 False, # ask_key 1961 False) # key_check 1962 if old_pub_key != old_master_key: 1963 # If we are already in a multi-key setup (that is past Ganeti 2.12), 1964 # we can safely remove the old key of the node. Otherwise, we cannot 1965 # remove that node's key, because it is also the master node's key 1966 # and that would terminate all communication from the master to the 1967 # node. 1968 logging.debug("Removing SSH key of node '%s'.", node_name) 1969 node_errors = RemoveNodeSshKey( 1970 node_uuid, node_name, master_candidate_uuids, 1971 potential_master_candidates, 1972 master_uuid=master_node_uuid, from_authorized_keys=master_candidate, 1973 from_public_keys=False, clear_authorized_keys=False, 1974 clear_public_keys=False) 1975 if node_errors: 1976 all_node_errors = all_node_errors + node_errors 1977 else: 1978 logging.debug("Old key of node '%s' is the same as the current master" 1979 " key. Not deleting that key on the node.", node_name) 1980 1981 logging.debug("Generating new SSH key for node '%s'.", node_name) 1982 _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, 1983 pub_key_file=pub_key_file, 1984 ssconf_store=ssconf_store, 1985 noded_cert_file=noded_cert_file, 1986 run_cmd_fn=run_cmd_fn) 1987 1988 try: 1989 logging.debug("Fetching newly created SSH key from node '%s'.", node_name) 1990 pub_key = ssh.ReadRemoteSshPubKeys(dsa_pub_keyfile, 1991 node_name, cluster_name, 1992 ssh_port_map[node_name], 1993 False, # ask_key 1994 False) # key_check 1995 except: 1996 raise errors.SshUpdateError("Could not fetch key of node %s" 1997 " (UUID %s)" % (node_name, node_uuid)) 1998 1999 if potential_master_candidate: 2000 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file) 2001 ssh.AddPublicKey(node_uuid, pub_key, key_file=pub_key_file) 2002 2003 logging.debug("Add ssh key of node '%s'.", node_name) 2004 node_errors = AddNodeSshKey( 2005 node_uuid, node_name, potential_master_candidates, 2006 to_authorized_keys=master_candidate, 2007 to_public_keys=potential_master_candidate, 2008 get_public_keys=True, 2009 pub_key_file=pub_key_file, ssconf_store=ssconf_store, 2010 noded_cert_file=noded_cert_file, 2011 run_cmd_fn=run_cmd_fn) 2012 if node_errors: 2013 all_node_errors = all_node_errors + node_errors 2014 2015 # Renewing the master node's key 2016 2017 # Preserve the old keys for now 2018 old_master_keys_by_uuid = _GetOldMasterKeys(master_node_uuid, pub_key_file) 2019 2020 # Generate a new master key with a suffix, don't touch the old one for now 2021 logging.debug("Generate new ssh key of master.") 2022 _GenerateNodeSshKey(master_node_uuid, master_node_name, ssh_port_map, 2023 pub_key_file=pub_key_file, 2024 ssconf_store=ssconf_store, 2025 noded_cert_file=noded_cert_file, 2026 run_cmd_fn=run_cmd_fn, 2027 suffix=constants.SSHS_MASTER_SUFFIX) 2028 # Read newly created master key 2029 new_master_key_dict = _GetNewMasterKey(root_keyfiles, master_node_uuid) 2030 2031 # Replace master key in the master nodes' public key file 2032 ssh.RemovePublicKey(master_node_uuid, key_file=pub_key_file) 2033 for pub_key in new_master_key_dict[master_node_uuid]: 2034 ssh.AddPublicKey(master_node_uuid, pub_key, key_file=pub_key_file) 2035 2036 # Add new master key to all node's public and authorized keys 2037 logging.debug("Add new master key to all nodes.") 2038 node_errors = AddNodeSshKey( 2039 master_node_uuid, master_node_name, potential_master_candidates, 2040 to_authorized_keys=True, to_public_keys=True, 2041 get_public_keys=False, pub_key_file=pub_key_file, 2042 ssconf_store=ssconf_store, noded_cert_file=noded_cert_file, 2043 run_cmd_fn=run_cmd_fn) 2044 if node_errors: 2045 all_node_errors = all_node_errors + node_errors 2046 2047 # Remove the old key file and rename the new key to the non-temporary filename 2048 _ReplaceMasterKeyOnMaster(root_keyfiles) 2049 2050 # Remove old key from authorized keys 2051 (auth_key_file, _) = \ 2052 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 2053 ssh.RemoveAuthorizedKeys(auth_key_file, 2054 old_master_keys_by_uuid[master_node_uuid]) 2055 2056 # Remove the old key from all node's authorized keys file 2057 logging.debug("Remove the old master key from all nodes.") 2058 node_errors = RemoveNodeSshKey( 2059 master_node_uuid, master_node_name, master_candidate_uuids, 2060 potential_master_candidates, 2061 keys_to_remove=old_master_keys_by_uuid, from_authorized_keys=True, 2062 from_public_keys=False, clear_authorized_keys=False, 2063 clear_public_keys=False) 2064 if node_errors: 2065 all_node_errors = all_node_errors + node_errors 2066 2067 return all_node_errors

2068

2069 2070 -def GetBlockDevSizes(devices):

2071 """Return the size of the given block devices 2072 2073 @type devices: list 2074 @param devices: list of block device nodes to query 2075 @rtype: dict 2076 @return: 2077 dictionary of all block devices under /dev (key). The value is their 2078 size in MiB. 2079 2080 {'/dev/disk/by-uuid/123456-12321231-312312-312': 124} 2081 2082 """ 2083 DEV_PREFIX = "/dev/" 2084 blockdevs = {} 2085 2086 for devpath in devices: 2087 if not utils.IsBelowDir(DEV_PREFIX, devpath): 2088 continue 2089 2090 try: 2091 st = os.stat(devpath) 2092 except EnvironmentError, err: 2093 logging.warning("Error stat()'ing device %s: %s", devpath, str(err)) 2094 continue 2095 2096 if stat.S_ISBLK(st.st_mode): 2097 result = utils.RunCmd(["blockdev", "--getsize64", devpath]) 2098 if result.failed: 2099 # We don't want to fail, just do not list this device as available 2100 logging.warning("Cannot get size for block device %s", devpath) 2101 continue 2102 2103 size = int(result.stdout) / (1024 * 1024) 2104 blockdevs[devpath] = size 2105 return blockdevs

2106

2107 2108 -def GetVolumeList(vg_names):

2109 """Compute list of logical volumes and their size. 2110 2111 @type vg_names: list 2112 @param vg_names: the volume groups whose LVs we should list, or 2113 empty for all volume groups 2114 @rtype: dict 2115 @return: 2116 dictionary of all partions (key) with value being a tuple of 2117 their size (in MiB), inactive and online status:: 2118 2119 {'xenvg/test1': ('20.06', True, True)} 2120 2121 in case of errors, a string is returned with the error 2122 details. 2123 2124 """ 2125 lvs = {} 2126 sep = "|" 2127 if not vg_names: 2128 vg_names = [] 2129 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 2130 "--separator=%s" % sep, 2131 "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names) 2132 if result.failed: 2133 _Fail("Failed to list logical volumes, lvs output: %s", result.output) 2134 2135 for line in result.stdout.splitlines(): 2136 line = line.strip() 2137 match = _LVSLINE_REGEX.match(line) 2138 if not match: 2139 logging.error("Invalid line returned from lvs output: '%s'", line) 2140 continue 2141 vg_name, name, size, attr = match.groups() 2142 inactive = attr[4] == "-" 2143 online = attr[5] == "o" 2144 virtual = attr[0] == "v" 2145 if virtual: 2146 # we don't want to report such volumes as existing, since they 2147 # don't really hold data 2148 continue 2149 lvs[vg_name + "/" + name] = (size, inactive, online) 2150 2151 return lvs

2152

2153 2154 -def ListVolumeGroups():

2155 """List the volume groups and their size. 2156 2157 @rtype: dict 2158 @return: dictionary with keys volume name and values the 2159 size of the volume 2160 2161 """ 2162 return utils.ListVolumeGroups()

2163

2164 2165 -def NodeVolumes():

2166 """List all volumes on this node. 2167 2168 @rtype: list 2169 @return: 2170 A list of dictionaries, each having four keys: 2171 - name: the logical volume name, 2172 - size: the size of the logical volume 2173 - dev: the physical device on which the LV lives 2174 - vg: the volume group to which it belongs 2175 2176 In case of errors, we return an empty list and log the 2177 error. 2178 2179 Note that since a logical volume can live on multiple physical 2180 volumes, the resulting list might include a logical volume 2181 multiple times. 2182 2183 """ 2184 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 2185 "--separator=|", 2186 "--options=lv_name,lv_size,devices,vg_name"]) 2187 if result.failed: 2188 _Fail("Failed to list logical volumes, lvs output: %s", 2189 result.output) 2190 2191 def parse_dev(dev): 2192 return dev.split("(")[0]

2193 2194 def handle_dev(dev): 2195 return [parse_dev(x) for x in dev.split(",")] 2196 2197 def map_line(line): 2198 line = [v.strip() for v in line] 2199 return [{"name": line[0], "size": line[1], 2200 "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])] 2201 2202 all_devs = [] 2203 for line in result.stdout.splitlines(): 2204 if line.count("|") >= 3: 2205 all_devs.extend(map_line(line.split("|"))) 2206 else: 2207 logging.warning("Strange line in the output from lvs: '%s'", line) 2208 return all_devs 2209

2210 2211 -def BridgesExist(bridges_list):

2212 """Check if a list of bridges exist on the current node. 2213 2214 @rtype: boolean 2215 @return: C{True} if all of them exist, C{False} otherwise 2216 2217 """ 2218 missing = [] 2219 for bridge in bridges_list: 2220 if not utils.BridgeExists(bridge): 2221 missing.append(bridge) 2222 2223 if missing: 2224 _Fail("Missing bridges %s", utils.CommaJoin(missing))

2225

2226 2227 -def GetInstanceListForHypervisor(hname, hvparams=None, 2228 get_hv_fn=hypervisor.GetHypervisor):

2229 """Provides a list of instances of the given hypervisor. 2230 2231 @type hname: string 2232 @param hname: name of the hypervisor 2233 @type hvparams: dict of strings 2234 @param hvparams: hypervisor parameters for the given hypervisor 2235 @type get_hv_fn: function 2236 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 2237 name; optional parameter to increase testability 2238 2239 @rtype: list 2240 @return: a list of all running instances on the current node 2241 - instance1.example.com 2242 - instance2.example.com 2243 2244 """ 2245 try: 2246 return get_hv_fn(hname).ListInstances(hvparams=hvparams) 2247 except errors.HypervisorError, err: 2248 _Fail("Error enumerating instances (hypervisor %s): %s", 2249 hname, err, exc=True)

2250

2251 2252 -def GetInstanceList(hypervisor_list, all_hvparams=None, 2253 get_hv_fn=hypervisor.GetHypervisor):

2254 """Provides a list of instances. 2255 2256 @type hypervisor_list: list 2257 @param hypervisor_list: the list of hypervisors to query information 2258 @type all_hvparams: dict of dict of strings 2259 @param all_hvparams: a dictionary mapping hypervisor types to respective 2260 cluster-wide hypervisor parameters 2261 @type get_hv_fn: function 2262 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 2263 name; optional parameter to increase testability 2264 2265 @rtype: list 2266 @return: a list of all running instances on the current node 2267 - instance1.example.com 2268 - instance2.example.com 2269 2270 """ 2271 results = [] 2272 for hname in hypervisor_list: 2273 hvparams = all_hvparams[hname] 2274 results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams, 2275 get_hv_fn=get_hv_fn)) 2276 return results

2277

2278 2279 -def GetInstanceInfo(instance, hname, hvparams=None):

2280 """Gives back the information about an instance as a dictionary. 2281 2282 @type instance: string 2283 @param instance: the instance name 2284 @type hname: string 2285 @param hname: the hypervisor type of the instance 2286 @type hvparams: dict of strings 2287 @param hvparams: the instance's hvparams 2288 2289 @rtype: dict 2290 @return: dictionary with the following keys: 2291 - memory: memory size of instance (int) 2292 - state: state of instance (HvInstanceState) 2293 - time: cpu time of instance (float) 2294 - vcpus: the number of vcpus (int) 2295 2296 """ 2297 output = {} 2298 2299 iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance, 2300 hvparams=hvparams) 2301 if iinfo is not None: 2302 output["memory"] = iinfo[2] 2303 output["vcpus"] = iinfo[3] 2304 output["state"] = iinfo[4] 2305 output["time"] = iinfo[5] 2306 2307 return output

2308

2309 2310 -def GetInstanceMigratable(instance):

2311 """Computes whether an instance can be migrated. 2312 2313 @type instance: L{objects.Instance} 2314 @param instance: object representing the instance to be checked. 2315 2316 @rtype: tuple 2317 @return: tuple of (result, description) where: 2318 - result: whether the instance can be migrated or not 2319 - description: a description of the issue, if relevant 2320 2321 """ 2322 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2323 iname = instance.name 2324 if iname not in hyper.ListInstances(hvparams=instance.hvparams): 2325 _Fail("Instance %s is not running", iname) 2326 2327 for idx in range(len(instance.disks_info)): 2328 link_name = _GetBlockDevSymlinkPath(iname, idx) 2329 if not os.path.islink(link_name): 2330 logging.warning("Instance %s is missing symlink %s for disk %d", 2331 iname, link_name, idx)

2332

2333 2334 -def GetAllInstancesInfo(hypervisor_list, all_hvparams):

2335 """Gather data about all instances. 2336 2337 This is the equivalent of L{GetInstanceInfo}, except that it 2338 computes data for all instances at once, thus being faster if one 2339 needs data about more than one instance. 2340 2341 @type hypervisor_list: list 2342 @param hypervisor_list: list of hypervisors to query for instance data 2343 @type all_hvparams: dict of dict of strings 2344 @param all_hvparams: mapping of hypervisor names to hvparams 2345 2346 @rtype: dict 2347 @return: dictionary of instance: data, with data having the following keys: 2348 - memory: memory size of instance (int) 2349 - state: xen state of instance (string) 2350 - time: cpu time of instance (float) 2351 - vcpus: the number of vcpus 2352 2353 """ 2354 output = {} 2355 for hname in hypervisor_list: 2356 hvparams = all_hvparams[hname] 2357 iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams) 2358 if iinfo: 2359 for name, _, memory, vcpus, state, times in iinfo: 2360 value = { 2361 "memory": memory, 2362 "vcpus": vcpus, 2363 "state": state, 2364 "time": times, 2365 } 2366 if name in output: 2367 # we only check static parameters, like memory and vcpus, 2368 # and not state and time which can change between the 2369 # invocations of the different hypervisors 2370 for key in "memory", "vcpus": 2371 if value[key] != output[name][key]: 2372 _Fail("Instance %s is running twice" 2373 " with different parameters", name) 2374 output[name] = value 2375 2376 return output

2377

2378 2379 -def GetInstanceConsoleInfo(instance_param_dict, 2380 get_hv_fn=hypervisor.GetHypervisor):

2381 """Gather data about the console access of a set of instances of this node. 2382 2383 This function assumes that the caller already knows which instances are on 2384 this node, by calling a function such as L{GetAllInstancesInfo} or 2385 L{GetInstanceList}. 2386 2387 For every instance, a large amount of configuration data needs to be 2388 provided to the hypervisor interface in order to receive the console 2389 information. Whether this could or should be cut down can be discussed. 2390 The information is provided in a dictionary indexed by instance name, 2391 allowing any number of instance queries to be done. 2392 2393 @type instance_param_dict: dict of string to tuple of dictionaries, where the 2394 dictionaries represent: L{objects.Instance}, L{objects.Node}, 2395 L{objects.NodeGroup}, HvParams, BeParams 2396 @param instance_param_dict: mapping of instance name to parameters necessary 2397 for console information retrieval 2398 2399 @rtype: dict 2400 @return: dictionary of instance: data, with data having the following keys: 2401 - instance: instance name 2402 - kind: console kind 2403 - message: used with kind == CONS_MESSAGE, indicates console to be 2404 unavailable, supplies error message 2405 - host: host to connect to 2406 - port: port to use 2407 - user: user for login 2408 - command: the command, broken into parts as an array 2409 - display: unknown, potentially unused? 2410 2411 """ 2412 2413 output = {} 2414 for inst_name in instance_param_dict: 2415 instance = instance_param_dict[inst_name]["instance"] 2416 pnode = instance_param_dict[inst_name]["node"] 2417 group = instance_param_dict[inst_name]["group"] 2418 hvparams = instance_param_dict[inst_name]["hvParams"] 2419 beparams = instance_param_dict[inst_name]["beParams"] 2420 2421 instance = objects.Instance.FromDict(instance) 2422 pnode = objects.Node.FromDict(pnode) 2423 group = objects.NodeGroup.FromDict(group) 2424 2425 h = get_hv_fn(instance.hypervisor) 2426 output[inst_name] = h.GetInstanceConsole(instance, pnode, group, 2427 hvparams, beparams).ToDict() 2428 2429 return output

2430

2431 2432 -def _InstanceLogName(kind, os_name, instance, component):

2433 """Compute the OS log filename for a given instance and operation. 2434 2435 The instance name and os name are passed in as strings since not all 2436 operations have these as part of an instance object. 2437 2438 @type kind: string 2439 @param kind: the operation type (e.g. add, import, etc.) 2440 @type os_name: string 2441 @param os_name: the os name 2442 @type instance: string 2443 @param instance: the name of the instance being imported/added/etc. 2444 @type component: string or None 2445 @param component: the name of the component of the instance being 2446 transferred 2447 2448 """ 2449 # TODO: Use tempfile.mkstemp to create unique filename 2450 if component: 2451 assert "/" not in component 2452 c_msg = "-%s" % component 2453 else: 2454 c_msg = "" 2455 base = ("%s-%s-%s%s-%s.log" % 2456 (kind, os_name, instance, c_msg, utils.TimestampForFilename())) 2457 return utils.PathJoin(pathutils.LOG_OS_DIR, base)

2458

2459 2460 -def InstanceOsAdd(instance, reinstall, debug):

2461 """Add an OS to an instance. 2462 2463 @type instance: L{objects.Instance} 2464 @param instance: Instance whose OS is to be installed 2465 @type reinstall: boolean 2466 @param reinstall: whether this is an instance reinstall 2467 @type debug: integer 2468 @param debug: debug level, passed to the OS scripts 2469 @rtype: None 2470 2471 """ 2472 inst_os = OSFromDisk(instance.os) 2473 2474 create_env = OSEnvironment(instance, inst_os, debug) 2475 if reinstall: 2476 create_env["INSTANCE_REINSTALL"] = "1" 2477 2478 logfile = _InstanceLogName("add", instance.os, instance.name, None) 2479 2480 result = utils.RunCmd([inst_os.create_script], env=create_env, 2481 cwd=inst_os.path, output=logfile, reset_env=True) 2482 if result.failed: 2483 logging.error("os create command '%s' returned error: %s, logfile: %s," 2484 " output: %s", result.cmd, result.fail_reason, logfile, 2485 result.output) 2486 lines = [utils.SafeEncode(val) 2487 for val in utils.TailFile(logfile, lines=20)] 2488 _Fail("OS create script failed (%s), last lines in the" 2489 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)

2490

2491 2492 -def RunRenameInstance(instance, old_name, debug):

2493 """Run the OS rename script for an instance. 2494 2495 @type instance: L{objects.Instance} 2496 @param instance: Instance whose OS is to be installed 2497 @type old_name: string 2498 @param old_name: previous instance name 2499 @type debug: integer 2500 @param debug: debug level, passed to the OS scripts 2501 @rtype: boolean 2502 @return: the success of the operation 2503 2504 """ 2505 inst_os = OSFromDisk(instance.os) 2506 2507 rename_env = OSEnvironment(instance, inst_os, debug) 2508 rename_env["OLD_INSTANCE_NAME"] = old_name 2509 2510 logfile = _InstanceLogName("rename", instance.os, 2511 "%s-%s" % (old_name, instance.name), None) 2512 2513 result = utils.RunCmd([inst_os.rename_script], env=rename_env, 2514 cwd=inst_os.path, output=logfile, reset_env=True) 2515 2516 if result.failed: 2517 logging.error("os create command '%s' returned error: %s output: %s", 2518 result.cmd, result.fail_reason, result.output) 2519 lines = [utils.SafeEncode(val) 2520 for val in utils.TailFile(logfile, lines=20)] 2521 _Fail("OS rename script failed (%s), last lines in the" 2522 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)

2523

2524 2525 -def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None):

2526 """Returns symlink path for block device. 2527 2528 """ 2529 if _dir is None: 2530 _dir = pathutils.DISK_LINKS_DIR 2531 2532 return utils.PathJoin(_dir, 2533 ("%s%s%s" % 2534 (instance_name, constants.DISK_SEPARATOR, idx)))

2535

2536 2537 -def _SymlinkBlockDev(instance_name, device_path, idx):

2538 """Set up symlinks to a instance's block device. 2539 2540 This is an auxiliary function run when an instance is start (on the primary 2541 node) or when an instance is migrated (on the target node). 2542 2543 2544 @param instance_name: the name of the target instance 2545 @param device_path: path of the physical block device, on the node 2546 @param idx: the disk index 2547 @return: absolute path to the disk's symlink 2548 2549 """ 2550 # In case we have only a userspace access URI, device_path is None 2551 if not device_path: 2552 return None 2553 2554 link_name = _GetBlockDevSymlinkPath(instance_name, idx) 2555 try: 2556 os.symlink(device_path, link_name) 2557 except OSError, err: 2558 if err.errno == errno.EEXIST: 2559 if (not os.path.islink(link_name) or 2560 os.readlink(link_name) != device_path): 2561 os.remove(link_name) 2562 os.symlink(device_path, link_name) 2563 else: 2564 raise 2565 2566 return link_name

2567

2568 2569 -def _RemoveBlockDevLinks(instance_name, disks):

2570 """Remove the block device symlinks belonging to the given instance. 2571 2572 """ 2573 for idx, _ in enumerate(disks): 2574 link_name = _GetBlockDevSymlinkPath(instance_name, idx) 2575 if os.path.islink(link_name): 2576 try: 2577 os.remove(link_name) 2578 except OSError: 2579 logging.exception("Can't remove symlink '%s'", link_name)

2580

2581 2582 -def _CalculateDeviceURI(instance, disk, device):

2583 """Get the URI for the device. 2584 2585 @type instance: L{objects.Instance} 2586 @param instance: the instance which disk belongs to 2587 @type disk: L{objects.Disk} 2588 @param disk: the target disk object 2589 @type device: L{bdev.BlockDev} 2590 @param device: the corresponding BlockDevice 2591 @rtype: string 2592 @return: the device uri if any else None 2593 2594 """ 2595 access_mode = disk.params.get(constants.LDP_ACCESS, 2596 constants.DISK_KERNELSPACE) 2597 if access_mode == constants.DISK_USERSPACE: 2598 # This can raise errors.BlockDeviceError 2599 return device.GetUserspaceAccessUri(instance.hypervisor) 2600 else: 2601 return None

2602

2603 2604 -def _GatherAndLinkBlockDevs(instance):

2605 """Set up an instance's block device(s). 2606 2607 This is run on the primary node at instance startup. The block 2608 devices must be already assembled. 2609 2610 @type instance: L{objects.Instance} 2611 @param instance: the instance whose disks we should assemble 2612 @rtype: list 2613 @return: list of (disk_object, link_name, drive_uri) 2614 2615 """ 2616 block_devices = [] 2617 for idx, disk in enumerate(instance.disks_info): 2618 device = _RecursiveFindBD(disk) 2619 if device is None: 2620 raise errors.BlockDeviceError("Block device '%s' is not set up." % 2621 str(disk)) 2622 device.Open() 2623 try: 2624 link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx) 2625 except OSError, e: 2626 raise errors.BlockDeviceError("Cannot create block device symlink: %s" % 2627 e.strerror) 2628 uri = _CalculateDeviceURI(instance, disk, device) 2629 2630 block_devices.append((disk, link_name, uri)) 2631 2632 return block_devices

2633

2634 2635 -def _IsInstanceUserDown(instance_info):

2636 return instance_info and \ 2637 "state" in instance_info and \ 2638 hv_base.HvInstanceState.IsShutdown(instance_info["state"])

2639

2640 2641 -def _GetInstanceInfo(instance):

2642 """Helper function L{GetInstanceInfo}""" 2643 return GetInstanceInfo(instance.name, instance.hypervisor, 2644 hvparams=instance.hvparams)

2645

2646 2647 -def StartInstance(instance, startup_paused, reason, store_reason=True):

2648 """Start an instance. 2649 2650 @type instance: L{objects.Instance} 2651 @param instance: the instance object 2652 @type startup_paused: bool 2653 @param instance: pause instance at startup? 2654 @type reason: list of reasons 2655 @param reason: the reason trail for this startup 2656 @type store_reason: boolean 2657 @param store_reason: whether to store the shutdown reason trail on file 2658 @rtype: None 2659 2660 """ 2661 instance_info = _GetInstanceInfo(instance) 2662 2663 if instance_info and not _IsInstanceUserDown(instance_info): 2664 logging.info("Instance '%s' already running, not starting", instance.name) 2665 return 2666 2667 try: 2668 block_devices = _GatherAndLinkBlockDevs(instance) 2669 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2670 hyper.StartInstance(instance, block_devices, startup_paused) 2671 if store_reason: 2672 _StoreInstReasonTrail(instance.name, reason) 2673 except errors.BlockDeviceError, err: 2674 _Fail("Block device error: %s", err, exc=True) 2675 except errors.HypervisorError, err: 2676 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2677 _Fail("Hypervisor error: %s", err, exc=True)

2678

2679 2680 -def InstanceShutdown(instance, timeout, reason, store_reason=True):

2681 """Shut an instance down. 2682 2683 @note: this functions uses polling with a hardcoded timeout. 2684 2685 @type instance: L{objects.Instance} 2686 @param instance: the instance object 2687 @type timeout: integer 2688 @param timeout: maximum timeout for soft shutdown 2689 @type reason: list of reasons 2690 @param reason: the reason trail for this shutdown 2691 @type store_reason: boolean 2692 @param store_reason: whether to store the shutdown reason trail on file 2693 @rtype: None 2694 2695 """ 2696 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2697 2698 if not _GetInstanceInfo(instance): 2699 logging.info("Instance '%s' not running, doing nothing", instance.name) 2700 return 2701 2702 class _TryShutdown(object): 2703 def __init__(self): 2704 self.tried_once = False

2705 2706 def __call__(self): 2707 if not _GetInstanceInfo(instance): 2708 return 2709 2710 try: 2711 hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout) 2712 if store_reason: 2713 _StoreInstReasonTrail(instance.name, reason) 2714 except errors.HypervisorError, err: 2715 # if the instance is no longer existing, consider this a 2716 # success and go to cleanup 2717 if not _GetInstanceInfo(instance): 2718 return 2719 2720 _Fail("Failed to stop instance '%s': %s", instance.name, err) 2721 2722 self.tried_once = True 2723 2724 raise utils.RetryAgain() 2725 2726 try: 2727 utils.Retry(_TryShutdown(), 5, timeout) 2728 except utils.RetryTimeout: 2729 # the shutdown did not succeed 2730 logging.error("Shutdown of '%s' unsuccessful, forcing", instance.name) 2731 2732 try: 2733 hyper.StopInstance(instance, force=True) 2734 except errors.HypervisorError, err: 2735 # only raise an error if the instance still exists, otherwise 2736 # the error could simply be "instance ... unknown"! 2737 if _GetInstanceInfo(instance): 2738 _Fail("Failed to force stop instance '%s': %s", instance.name, err) 2739 2740 time.sleep(1) 2741 2742 if _GetInstanceInfo(instance): 2743 _Fail("Could not shutdown instance '%s' even by destroy", instance.name) 2744 2745 try: 2746 hyper.CleanupInstance(instance.name) 2747 except errors.HypervisorError, err: 2748 logging.warning("Failed to execute post-shutdown cleanup step: %s", err) 2749 2750 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2751

2752 2753 -def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):

2754 """Reboot an instance. 2755 2756 @type instance: L{objects.Instance} 2757 @param instance: the instance object to reboot 2758 @type reboot_type: str 2759 @param reboot_type: the type of reboot, one the following 2760 constants: 2761 - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the 2762 instance OS, do not recreate the VM 2763 - L{constants.INSTANCE_REBOOT_HARD}: tear down and 2764 restart the VM (at the hypervisor level) 2765 - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is 2766 not accepted here, since that mode is handled differently, in 2767 cmdlib, and translates into full stop and start of the 2768 instance (instead of a call_instance_reboot RPC) 2769 @type shutdown_timeout: integer 2770 @param shutdown_timeout: maximum timeout for soft shutdown 2771 @type reason: list of reasons 2772 @param reason: the reason trail for this reboot 2773 @rtype: None 2774 2775 """ 2776 # TODO: this is inconsistent with 'StartInstance' and 'InstanceShutdown' 2777 # because those functions simply 'return' on error whereas this one 2778 # raises an exception with '_Fail' 2779 if not _GetInstanceInfo(instance): 2780 _Fail("Cannot reboot instance '%s' that is not running", instance.name) 2781 2782 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2783 if reboot_type == constants.INSTANCE_REBOOT_SOFT: 2784 try: 2785 hyper.RebootInstance(instance) 2786 except errors.HypervisorError, err: 2787 _Fail("Failed to soft reboot instance '%s': %s", instance.name, err) 2788 elif reboot_type == constants.INSTANCE_REBOOT_HARD: 2789 try: 2790 InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False) 2791 result = StartInstance(instance, False, reason, store_reason=False) 2792 _StoreInstReasonTrail(instance.name, reason) 2793 return result 2794 except errors.HypervisorError, err: 2795 _Fail("Failed to hard reboot instance '%s': %s", instance.name, err) 2796 else: 2797 _Fail("Invalid reboot_type received: '%s'", reboot_type)

2798

2799 2800 -def InstanceBalloonMemory(instance, memory):

2801 """Resize an instance's memory. 2802 2803 @type instance: L{objects.Instance} 2804 @param instance: the instance object 2805 @type memory: int 2806 @param memory: new memory amount in MB 2807 @rtype: None 2808 2809 """ 2810 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2811 running = hyper.ListInstances(hvparams=instance.hvparams) 2812 if instance.name not in running: 2813 logging.info("Instance %s is not running, cannot balloon", instance.name) 2814 return 2815 try: 2816 hyper.BalloonInstanceMemory(instance, memory) 2817 except errors.HypervisorError, err: 2818 _Fail("Failed to balloon instance memory: %s", err, exc=True)

2819

2820 2821 -def MigrationInfo(instance):

2822 """Gather information about an instance to be migrated. 2823 2824 @type instance: L{objects.Instance} 2825 @param instance: the instance definition 2826 2827 """ 2828 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2829 try: 2830 info = hyper.MigrationInfo(instance) 2831 except errors.HypervisorError, err: 2832 _Fail("Failed to fetch migration information: %s", err, exc=True) 2833 return info

2834

2835 2836 -def AcceptInstance(instance, info, target):

2837 """Prepare the node to accept an instance. 2838 2839 @type instance: L{objects.Instance} 2840 @param instance: the instance definition 2841 @type info: string/data (opaque) 2842 @param info: migration information, from the source node 2843 @type target: string 2844 @param target: target host (usually ip), on this node 2845 2846 """ 2847 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2848 try: 2849 hyper.AcceptInstance(instance, info, target) 2850 except errors.HypervisorError, err: 2851 _Fail("Failed to accept instance: %s", err, exc=True)

2852

2853 2854 -def FinalizeMigrationDst(instance, info, success):

2855 """Finalize any preparation to accept an instance. 2856 2857 @type instance: L{objects.Instance} 2858 @param instance: the instance definition 2859 @type info: string/data (opaque) 2860 @param info: migration information, from the source node 2861 @type success: boolean 2862 @param success: whether the migration was a success or a failure 2863 2864 """ 2865 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2866 try: 2867 hyper.FinalizeMigrationDst(instance, info, success) 2868 except errors.HypervisorError, err: 2869 _Fail("Failed to finalize migration on the target node: %s", err, exc=True)

2870

2871 2872 -def MigrateInstance(cluster_name, instance, target, live):

2873 """Migrates an instance to another node. 2874 2875 @type cluster_name: string 2876 @param cluster_name: name of the cluster 2877 @type instance: L{objects.Instance} 2878 @param instance: the instance definition 2879 @type target: string 2880 @param target: the target node name 2881 @type live: boolean 2882 @param live: whether the migration should be done live or not (the 2883 interpretation of this parameter is left to the hypervisor) 2884 @raise RPCFail: if migration fails for some reason 2885 2886 """ 2887 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2888 2889 try: 2890 hyper.MigrateInstance(cluster_name, instance, target, live) 2891 except errors.HypervisorError, err: 2892 _Fail("Failed to migrate instance: %s", err, exc=True)

2893

2894 2895 -def FinalizeMigrationSource(instance, success, live):

2896 """Finalize the instance migration on the source node. 2897 2898 @type instance: L{objects.Instance} 2899 @param instance: the instance definition of the migrated instance 2900 @type success: bool 2901 @param success: whether the migration succeeded or not 2902 @type live: bool 2903 @param live: whether the user requested a live migration or not 2904 @raise RPCFail: If the execution fails for some reason 2905 2906 """ 2907 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2908 2909 try: 2910 hyper.FinalizeMigrationSource(instance, success, live) 2911 except Exception, err: # pylint: disable=W0703 2912 _Fail("Failed to finalize the migration on the source node: %s", err, 2913 exc=True)

2914

2915 2916 -def GetMigrationStatus(instance):

2917 """Get the migration status 2918 2919 @type instance: L{objects.Instance} 2920 @param instance: the instance that is being migrated 2921 @rtype: L{objects.MigrationStatus} 2922 @return: the status of the current migration (one of 2923 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional 2924 progress info that can be retrieved from the hypervisor 2925 @raise RPCFail: If the migration status cannot be retrieved 2926 2927 """ 2928 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2929 try: 2930 return hyper.GetMigrationStatus(instance) 2931 except Exception, err: # pylint: disable=W0703 2932 _Fail("Failed to get migration status: %s", err, exc=True)

2933

2934 2935 -def HotplugDevice(instance, action, dev_type, device, extra, seq):

2936 """Hotplug a device 2937 2938 Hotplug is currently supported only for KVM Hypervisor. 2939 @type instance: L{objects.Instance} 2940 @param instance: the instance to which we hotplug a device 2941 @type action: string 2942 @param action: the hotplug action to perform 2943 @type dev_type: string 2944 @param dev_type: the device type to hotplug 2945 @type device: either L{objects.NIC} or L{objects.Disk} 2946 @param device: the device object to hotplug 2947 @type extra: tuple 2948 @param extra: extra info used for disk hotplug (disk link, drive uri) 2949 @type seq: int 2950 @param seq: the index of the device from master perspective 2951 @raise RPCFail: in case instance does not have KVM hypervisor 2952 2953 """ 2954 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2955 try: 2956 hyper.VerifyHotplugSupport(instance, action, dev_type) 2957 except errors.HotplugError, err: 2958 _Fail("Hotplug is not supported: %s", err) 2959 2960 if action == constants.HOTPLUG_ACTION_ADD: 2961 fn = hyper.HotAddDevice 2962 elif action == constants.HOTPLUG_ACTION_REMOVE: 2963 fn = hyper.HotDelDevice 2964 elif action == constants.HOTPLUG_ACTION_MODIFY: 2965 fn = hyper.HotModDevice 2966 else: 2967 assert action in constants.HOTPLUG_ALL_ACTIONS 2968 2969 return fn(instance, dev_type, device, extra, seq)

2970

2971 2972 -def HotplugSupported(instance):

2973 """Checks if hotplug is generally supported. 2974 2975 """ 2976 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2977 try: 2978 hyper.HotplugSupported(instance) 2979 except errors.HotplugError, err: 2980 _Fail("Hotplug is not supported: %s", err)

2981

2982 2983 -def ModifyInstanceMetadata(metadata):

2984 """Sends instance data to the metadata daemon. 2985 2986 Uses the Luxi transport layer to communicate with the metadata 2987 daemon configuration server. It starts the metadata daemon if it is 2988 not running. 2989 The daemon must be enabled during at configuration time. 2990 2991 @type metadata: dict 2992 @param metadata: instance metadata obtained by calling 2993 L{objects.Instance.ToDict} on an instance object 2994 2995 """ 2996 if not constants.ENABLE_METAD: 2997 raise errors.ProgrammerError("The metadata deamon is disabled, yet" 2998 " ModifyInstanceMetadata has been called") 2999 3000 if not utils.IsDaemonAlive(constants.METAD): 3001 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.METAD]) 3002 if result.failed: 3003 raise errors.HypervisorError("Failed to start metadata daemon") 3004 3005 def _Connect(): 3006 return transport.Transport(pathutils.SOCKET_DIR + "/ganeti-metad", 3007 allow_non_master=True)

3008 3009 retries = 5 3010 3011 while True: 3012 try: 3013 trans = utils.Retry(_Connect, 1.0, constants.LUXI_DEF_CTMO) 3014 break 3015 except utils.RetryTimeout: 3016 raise TimeoutError("Connection to metadata daemon timed out") 3017 except (socket.error, NoMasterError), err: 3018 if retries == 0: 3019 logging.error("Failed to connect to the metadata daemon", 3020 exc_info=True) 3021 raise TimeoutError("Failed to connect to metadata daemon: %s" % err) 3022 else: 3023 retries -= 1 3024 3025 data = serializer.DumpJson(metadata, 3026 private_encoder=serializer.EncodeWithPrivateFields) 3027 3028 trans.Send(data) 3029 trans.Close() 3030

3031 3032 -def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):

3033 """Creates a block device for an instance. 3034 3035 @type disk: L{objects.Disk} 3036 @param disk: the object describing the disk we should create 3037 @type size: int 3038 @param size: the size of the physical underlying device, in MiB 3039 @type owner: str 3040 @param owner: the name of the instance for which disk is created, 3041 used for device cache data 3042 @type on_primary: boolean 3043 @param on_primary: indicates if it is the primary node or not 3044 @type info: string 3045 @param info: string that will be sent to the physical device 3046 creation, used for example to set (LVM) tags on LVs 3047 @type excl_stor: boolean 3048 @param excl_stor: Whether exclusive_storage is active 3049 3050 @return: the new unique_id of the device (this can sometime be 3051 computed only after creation), or None. On secondary nodes, 3052 it's not required to return anything. 3053 3054 """ 3055 # TODO: remove the obsolete "size" argument 3056 # pylint: disable=W0613 3057 clist = [] 3058 if disk.children: 3059 for child in disk.children: 3060 try: 3061 crdev = _RecursiveAssembleBD(child, owner, on_primary) 3062 except errors.BlockDeviceError, err: 3063 _Fail("Can't assemble device %s: %s", child, err) 3064 if on_primary or disk.AssembleOnSecondary(): 3065 # we need the children open in case the device itself has to 3066 # be assembled 3067 try: 3068 # pylint: disable=E1103 3069 crdev.Open() 3070 except errors.BlockDeviceError, err: 3071 _Fail("Can't make child '%s' read-write: %s", child, err) 3072 clist.append(crdev) 3073 3074 try: 3075 device = bdev.Create(disk, clist, excl_stor) 3076 except errors.BlockDeviceError, err: 3077 _Fail("Can't create block device: %s", err) 3078 3079 if on_primary or disk.AssembleOnSecondary(): 3080 try: 3081 device.Assemble() 3082 except errors.BlockDeviceError, err: 3083 _Fail("Can't assemble device after creation, unusual event: %s", err) 3084 if on_primary or disk.OpenOnSecondary(): 3085 try: 3086 device.Open(force=True) 3087 except errors.BlockDeviceError, err: 3088 _Fail("Can't make device r/w after creation, unusual event: %s", err) 3089 DevCacheManager.UpdateCache(device.dev_path, owner, 3090 on_primary, disk.iv_name) 3091 3092 device.SetInfo(info) 3093 3094 return device.unique_id

3095

3096 3097 -def _DumpDevice(source_path, target_path, offset, size, truncate):

3098 """This function images/wipes the device using a local file. 3099 3100 @type source_path: string 3101 @param source_path: path of the image or data source (e.g., "/dev/zero") 3102 3103 @type target_path: string 3104 @param target_path: path of the device to image/wipe 3105 3106 @type offset: int 3107 @param offset: offset in MiB in the output file 3108 3109 @type size: int 3110 @param size: maximum size in MiB to write (data source might be smaller) 3111 3112 @type truncate: bool 3113 @param truncate: whether the file should be truncated 3114 3115 @return: None 3116 @raise RPCFail: in case of failure 3117 3118 """ 3119 # Internal sizes are always in Mebibytes; if the following "dd" command 3120 # should use a different block size the offset and size given to this 3121 # function must be adjusted accordingly before being passed to "dd". 3122 block_size = constants.DD_BLOCK_SIZE 3123 3124 cmd = [constants.DD_CMD, "if=%s" % source_path, "seek=%d" % offset, 3125 "bs=%s" % block_size, "oflag=direct", "of=%s" % target_path, 3126 "count=%d" % size] 3127 3128 if not truncate: 3129 cmd.append("conv=notrunc") 3130 3131 result = utils.RunCmd(cmd) 3132 3133 if result.failed: 3134 _Fail("Dump command '%s' exited with error: %s; output: %s", result.cmd, 3135 result.fail_reason, result.output)

3136

3137 3138 -def _DownloadAndDumpDevice(source_url, target_path, size):

3139 """This function images a device using a downloaded image file. 3140 3141 @type source_url: string 3142 @param source_url: URL of image to dump to disk 3143 3144 @type target_path: string 3145 @param target_path: path of the device to image 3146 3147 @type size: int 3148 @param size: maximum size in MiB to write (data source might be smaller) 3149 3150 @rtype: NoneType 3151 @return: None 3152 @raise RPCFail: in case of download or write failures 3153 3154 """ 3155 class DDParams(object): 3156 def __init__(self, current_size, total_size): 3157 self.current_size = current_size 3158 self.total_size = total_size 3159 self.image_size_error = False

3160 3161 def dd_write(ddparams, out): 3162 if ddparams.current_size < ddparams.total_size: 3163 ddparams.current_size += len(out) 3164 target_file.write(out) 3165 else: 3166 ddparams.image_size_error = True 3167 return -1 3168 3169 target_file = open(target_path, "r+") 3170 ddparams = DDParams(0, 1024 * 1024 * size) 3171 3172 curl = pycurl.Curl() 3173 curl.setopt(pycurl.VERBOSE, True) 3174 curl.setopt(pycurl.NOSIGNAL, True) 3175 curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION) 3176 curl.setopt(pycurl.URL, source_url) 3177 curl.setopt(pycurl.WRITEFUNCTION, lambda out: dd_write(ddparams, out)) 3178 3179 try: 3180 curl.perform() 3181 except pycurl.error: 3182 if ddparams.image_size_error: 3183 _Fail("Disk image larger than the disk") 3184 else: 3185 raise 3186 3187 target_file.close() 3188

3189 3190 -def BlockdevConvert(src_disk, target_disk):

3191 """Copies data from source block device to target. 3192 3193 This function gets the export and import commands from the source and 3194 target devices respectively, and then concatenates them to a single 3195 command using a pipe ("|"). Finally, executes the unified command that 3196 will transfer the data between the devices during the disk template 3197 conversion operation. 3198 3199 @type src_disk: L{objects.Disk} 3200 @param src_disk: the disk object we want to copy from 3201 @type target_disk: L{objects.Disk} 3202 @param target_disk: the disk object we want to copy to 3203 3204 @rtype: NoneType 3205 @return: None 3206 @raise RPCFail: in case of failure 3207 3208 """ 3209 src_dev = _RecursiveFindBD(src_disk) 3210 if src_dev is None: 3211 _Fail("Cannot copy from device '%s': device not found", src_disk.uuid) 3212 3213 dest_dev = _RecursiveFindBD(target_disk) 3214 if dest_dev is None: 3215 _Fail("Cannot copy to device '%s': device not found", target_disk.uuid) 3216 3217 src_cmd = src_dev.Export() 3218 dest_cmd = dest_dev.Import() 3219 command = "%s | %s" % (utils.ShellQuoteArgs(src_cmd), 3220 utils.ShellQuoteArgs(dest_cmd)) 3221 3222 result = utils.RunCmd(command) 3223 if result.failed: 3224 _Fail("Disk conversion command '%s' exited with error: %s; output: %s", 3225 result.cmd, result.fail_reason, result.output)

3226

3227 3228 -def BlockdevWipe(disk, offset, size):

3229 """Wipes a block device. 3230 3231 @type disk: L{objects.Disk} 3232 @param disk: the disk object we want to wipe 3233 @type offset: int 3234 @param offset: The offset in MiB in the file 3235 @type size: int 3236 @param size: The size in MiB to write 3237 3238 """ 3239 try: 3240 rdev = _RecursiveFindBD(disk) 3241 except errors.BlockDeviceError: 3242 rdev = None 3243 3244 if not rdev: 3245 _Fail("Cannot wipe device %s: device not found", disk.iv_name) 3246 if offset < 0: 3247 _Fail("Negative offset") 3248 if size < 0: 3249 _Fail("Negative size") 3250 if offset > rdev.size: 3251 _Fail("Wipe offset is bigger than device size") 3252 if (offset + size) > rdev.size: 3253 _Fail("Wipe offset and size are bigger than device size") 3254 3255 _DumpDevice("/dev/zero", rdev.dev_path, offset, size, True)

3256

3257 3258 -def BlockdevImage(disk, image, size):

3259 """Images a block device either by dumping a local file or 3260 downloading a URL. 3261 3262 @type disk: L{objects.Disk} 3263 @param disk: the disk object we want to image 3264 3265 @type image: string 3266 @param image: file path to the disk image be dumped 3267 3268 @type size: int 3269 @param size: The size in MiB to write 3270 3271 @rtype: NoneType 3272 @return: None 3273 @raise RPCFail: in case of failure 3274 3275 """ 3276 if not (utils.IsUrl(image) or os.path.exists(image)): 3277 _Fail("Image '%s' not found", image) 3278 3279 try: 3280 rdev = _RecursiveFindBD(disk) 3281 except errors.BlockDeviceError: 3282 rdev = None 3283 3284 if not rdev: 3285 _Fail("Cannot image device %s: device not found", disk.iv_name) 3286 if size < 0: 3287 _Fail("Negative size") 3288 if size > rdev.size: 3289 _Fail("Image size is bigger than device size") 3290 3291 if utils.IsUrl(image): 3292 _DownloadAndDumpDevice(image, rdev.dev_path, size) 3293 else: 3294 _DumpDevice(image, rdev.dev_path, 0, size, False)

3295

3296 3297 -def BlockdevPauseResumeSync(disks, pause):

3298 """Pause or resume the sync of the block device. 3299 3300 @type disks: list of L{objects.Disk} 3301 @param disks: the disks object we want to pause/resume 3302 @type pause: bool 3303 @param pause: Wheater to pause or resume 3304 3305 """ 3306 success = [] 3307 for disk in disks: 3308 try: 3309 rdev = _RecursiveFindBD(disk) 3310 except errors.BlockDeviceError: 3311 rdev = None 3312 3313 if not rdev: 3314 success.append((False, ("Cannot change sync for device %s:" 3315 " device not found" % disk.iv_name))) 3316 continue 3317 3318 result = rdev.PauseResumeSync(pause) 3319 3320 if result: 3321 success.append((result, None)) 3322 else: 3323 if pause: 3324 msg = "Pause" 3325 else: 3326 msg = "Resume" 3327 success.append((result, "%s for device %s failed" % (msg, disk.iv_name))) 3328 3329 return success

3330

3331 3332 -def BlockdevRemove(disk):

3333 """Remove a block device. 3334 3335 @note: This is intended to be called recursively. 3336 3337 @type disk: L{objects.Disk} 3338 @param disk: the disk object we should remove 3339 @rtype: boolean 3340 @return: the success of the operation 3341 3342 """ 3343 msgs = [] 3344 try: 3345 rdev = _RecursiveFindBD(disk) 3346 except errors.BlockDeviceError, err: 3347 # probably can't attach 3348 logging.info("Can't attach to device %s in remove", disk) 3349 rdev = None 3350 if rdev is not None: 3351 r_path = rdev.dev_path 3352 3353 def _TryRemove(): 3354 try: 3355 rdev.Remove() 3356 return [] 3357 except errors.BlockDeviceError, err: 3358 return [str(err)]

3359 3360 msgs.extend(utils.SimpleRetry([], _TryRemove, 3361 constants.DISK_REMOVE_RETRY_INTERVAL, 3362 constants.DISK_REMOVE_RETRY_TIMEOUT)) 3363 3364 if not msgs: 3365 DevCacheManager.RemoveCache(r_path) 3366 3367 if disk.children: 3368 for child in disk.children: 3369 try: 3370 BlockdevRemove(child) 3371 except RPCFail, err: 3372 msgs.append(str(err)) 3373 3374 if msgs: 3375 _Fail("; ".join(msgs)) 3376

3377 3378 -def _RecursiveAssembleBD(disk, owner, as_primary):

3379 """Activate a block device for an instance. 3380 3381 This is run on the primary and secondary nodes for an instance. 3382 3383 @note: this function is called recursively. 3384 3385 @type disk: L{objects.Disk} 3386 @param disk: the disk we try to assemble 3387 @type owner: str 3388 @param owner: the name of the instance which owns the disk 3389 @type as_primary: boolean 3390 @param as_primary: if we should make the block device 3391 read/write 3392 3393 @return: the assembled device or None (in case no device 3394 was assembled) 3395 @raise errors.BlockDeviceError: in case there is an error 3396 during the activation of the children or the device 3397 itself 3398 3399 """ 3400 children = [] 3401 if disk.children: 3402 mcn = disk.ChildrenNeeded() 3403 if mcn == -1: 3404 mcn = 0 # max number of Nones allowed 3405 else: 3406 mcn = len(disk.children) - mcn # max number of Nones 3407 for chld_disk in disk.children: 3408 try: 3409 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary) 3410 except errors.BlockDeviceError, err: 3411 if children.count(None) >= mcn: 3412 raise 3413 cdev = None 3414 logging.error("Error in child activation (but continuing): %s", 3415 str(err)) 3416 children.append(cdev) 3417 3418 if as_primary or disk.AssembleOnSecondary(): 3419 r_dev = bdev.Assemble(disk, children) 3420 result = r_dev 3421 if as_primary or disk.OpenOnSecondary(): 3422 r_dev.Open() 3423 DevCacheManager.UpdateCache(r_dev.dev_path, owner, 3424 as_primary, disk.iv_name) 3425 3426 else: 3427 result = True 3428 return result

3429

3430 3431 -def BlockdevAssemble(disk, instance, as_primary, idx):

3432 """Activate a block device for an instance. 3433 3434 This is a wrapper over _RecursiveAssembleBD. 3435 3436 @rtype: str or boolean 3437 @return: a tuple with the C{/dev/...} path and the created symlink 3438 for primary nodes, and (C{True}, C{True}) for secondary nodes 3439 3440 """ 3441 try: 3442 result = _RecursiveAssembleBD(disk, instance.name, as_primary) 3443 if isinstance(result, BlockDev): 3444 # pylint: disable=E1103 3445 dev_path = result.dev_path 3446 link_name = None 3447 uri = None 3448 if as_primary: 3449 link_name = _SymlinkBlockDev(instance.name, dev_path, idx) 3450 uri = _CalculateDeviceURI(instance, disk, result) 3451 elif result: 3452 return result, result 3453 else: 3454 _Fail("Unexpected result from _RecursiveAssembleBD") 3455 except errors.BlockDeviceError, err: 3456 _Fail("Error while assembling disk: %s", err, exc=True) 3457 except OSError, err: 3458 _Fail("Error while symlinking disk: %s", err, exc=True) 3459 3460 return dev_path, link_name, uri

3461

3462 3463 -def BlockdevShutdown(disk):

3464 """Shut down a block device. 3465 3466 First, if the device is assembled (Attach() is successful), then 3467 the device is shutdown. Then the children of the device are 3468 shutdown. 3469 3470 This function is called recursively. Note that we don't cache the 3471 children or such, as oppossed to assemble, shutdown of different 3472 devices doesn't require that the upper device was active. 3473 3474 @type disk: L{objects.Disk} 3475 @param disk: the description of the disk we should 3476 shutdown 3477 @rtype: None 3478 3479 """ 3480 msgs = [] 3481 r_dev = _RecursiveFindBD(disk) 3482 if r_dev is not None: 3483 r_path = r_dev.dev_path 3484 try: 3485 r_dev.Shutdown() 3486 DevCacheManager.RemoveCache(r_path) 3487 except errors.BlockDeviceError, err: 3488 msgs.append(str(err)) 3489 3490 if disk.children: 3491 for child in disk.children: 3492 try: 3493 BlockdevShutdown(child) 3494 except RPCFail, err: 3495 msgs.append(str(err)) 3496 3497 if msgs: 3498 _Fail("; ".join(msgs))

3499

3500 3501 -def BlockdevAddchildren(parent_cdev, new_cdevs):

3502 """Extend a mirrored block device. 3503 3504 @type parent_cdev: L{objects.Disk} 3505 @param parent_cdev: the disk to which we should add children 3506 @type new_cdevs: list of L{objects.Disk} 3507 @param new_cdevs: the list of children which we should add 3508 @rtype: None 3509 3510 """ 3511 parent_bdev = _RecursiveFindBD(parent_cdev) 3512 if parent_bdev is None: 3513 _Fail("Can't find parent device '%s' in add children", parent_cdev) 3514 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs] 3515 if new_bdevs.count(None) > 0: 3516 _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs) 3517 parent_bdev.AddChildren(new_bdevs)

3518

3519 3520 -def BlockdevRemovechildren(parent_cdev, new_cdevs):

3521 """Shrink a mirrored block device. 3522 3523 @type parent_cdev: L{objects.Disk} 3524 @param parent_cdev: the disk from which we should remove children 3525 @type new_cdevs: list of L{objects.Disk} 3526 @param new_cdevs: the list of children which we should remove 3527 @rtype: None 3528 3529 """ 3530 parent_bdev = _RecursiveFindBD(parent_cdev) 3531 if parent_bdev is None: 3532 _Fail("Can't find parent device '%s' in remove children", parent_cdev) 3533 devs = [] 3534 for disk in new_cdevs: 3535 rpath = disk.StaticDevPath() 3536 if rpath is None: 3537 bd = _RecursiveFindBD(disk) 3538 if bd is None: 3539 _Fail("Can't find device %s while removing children", disk) 3540 else: 3541 devs.append(bd.dev_path) 3542 else: 3543 if not utils.IsNormAbsPath(rpath): 3544 _Fail("Strange path returned from StaticDevPath: '%s'", rpath) 3545 devs.append(rpath) 3546 parent_bdev.RemoveChildren(devs)

3547

3548 3549 -def BlockdevGetmirrorstatus(disks):

3550 """Get the mirroring status of a list of devices. 3551 3552 @type disks: list of L{objects.Disk} 3553 @param disks: the list of disks which we should query 3554 @rtype: disk 3555 @return: List of L{objects.BlockDevStatus}, one for each disk 3556 @raise errors.BlockDeviceError: if any of the disks cannot be 3557 found 3558 3559 """ 3560 stats = [] 3561 for dsk in disks: 3562 rbd = _RecursiveFindBD(dsk) 3563 if rbd is None: 3564 _Fail("Can't find device %s", dsk) 3565 3566 stats.append(rbd.CombinedSyncStatus()) 3567 3568 return stats

3569

3570 3571 -def BlockdevGetmirrorstatusMulti(disks):

3572 """Get the mirroring status of a list of devices. 3573 3574 @type disks: list of L{objects.Disk} 3575 @param disks: the list of disks which we should query 3576 @rtype: disk 3577 @return: List of tuples, (bool, status), one for each disk; bool denotes 3578 success/failure, status is L{objects.BlockDevStatus} on success, string 3579 otherwise 3580 3581 """ 3582 result = [] 3583 for disk in disks: 3584 try: 3585 rbd = _RecursiveFindBD(disk) 3586 if rbd is None: 3587 result.append((False, "Can't find device %s" % disk)) 3588 continue 3589 3590 status = rbd.CombinedSyncStatus() 3591 except errors.BlockDeviceError, err: 3592 logging.exception("Error while getting disk status") 3593 result.append((False, str(err))) 3594 else: 3595 result.append((True, status)) 3596 3597 assert len(disks) == len(result) 3598 3599 return result

3600

3601 3602 -def _RecursiveFindBD(disk):

3603 """Check if a device is activated. 3604 3605 If so, return information about the real device. 3606 3607 @type disk: L{objects.Disk} 3608 @param disk: the disk object we need to find 3609 3610 @return: None if the device can't be found, 3611 otherwise the device instance 3612 3613 """ 3614 children = [] 3615 if disk.children: 3616 for chdisk in disk.children: 3617 children.append(_RecursiveFindBD(chdisk)) 3618 3619 return bdev.FindDevice(disk, children)

3620

3621 3622 -def _OpenRealBD(disk):

3623 """Opens the underlying block device of a disk. 3624 3625 @type disk: L{objects.Disk} 3626 @param disk: the disk object we want to open 3627 3628 """ 3629 real_disk = _RecursiveFindBD(disk) 3630 if real_disk is None: 3631 _Fail("Block device '%s' is not set up", disk) 3632 3633 real_disk.Open() 3634 3635 return real_disk

3636

3637 3638 -def BlockdevFind(disk):

3639 """Check if a device is activated. 3640 3641 If it is, return information about the real device. 3642 3643 @type disk: L{objects.Disk} 3644 @param disk: the disk to find 3645 @rtype: None or objects.BlockDevStatus 3646 @return: None if the disk cannot be found, otherwise a the current 3647 information 3648 3649 """ 3650 try: 3651 rbd = _RecursiveFindBD(disk) 3652 except errors.BlockDeviceError, err: 3653 _Fail("Failed to find device: %s", err, exc=True) 3654 3655 if rbd is None: 3656 return None 3657 3658 return rbd.GetSyncStatus()

3659

3660 3661 -def BlockdevGetdimensions(disks):

3662 """Computes the size of the given disks. 3663 3664 If a disk is not found, returns None instead. 3665 3666 @type disks: list of L{objects.Disk} 3667 @param disks: the list of disk to compute the size for 3668 @rtype: list 3669 @return: list with elements None if the disk cannot be found, 3670 otherwise the pair (size, spindles), where spindles is None if the 3671 device doesn't support that 3672 3673 """ 3674 result = [] 3675 for cf in disks: 3676 try: 3677 rbd = _RecursiveFindBD(cf) 3678 except errors.BlockDeviceError: 3679 result.append(None) 3680 continue 3681 if rbd is None: 3682 result.append(None) 3683 else: 3684 result.append(rbd.GetActualDimensions()) 3685 return result

3686

3687 3688 -def UploadFile(file_name, data, mode, uid, gid, atime, mtime):

3689 """Write a file to the filesystem. 3690 3691 This allows the master to overwrite(!) a file. It will only perform 3692 the operation if the file belongs to a list of configuration files. 3693 3694 @type file_name: str 3695 @param file_name: the target file name 3696 @type data: str 3697 @param data: the new contents of the file 3698 @type mode: int 3699 @param mode: the mode to give the file (can be None) 3700 @type uid: string 3701 @param uid: the owner of the file 3702 @type gid: string 3703 @param gid: the group of the file 3704 @type atime: float 3705 @param atime: the atime to set on the file (can be None) 3706 @type mtime: float 3707 @param mtime: the mtime to set on the file (can be None) 3708 @rtype: None 3709 3710 """ 3711 file_name = vcluster.LocalizeVirtualPath(file_name) 3712 3713 if not os.path.isabs(file_name): 3714 _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name) 3715 3716 if file_name not in _ALLOWED_UPLOAD_FILES: 3717 _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'", 3718 file_name) 3719 3720 raw_data = _Decompress(data) 3721 3722 if not (isinstance(uid, basestring) and isinstance(gid, basestring)): 3723 _Fail("Invalid username/groupname type") 3724 3725 getents = runtime.GetEnts() 3726 uid = getents.LookupUser(uid) 3727 gid = getents.LookupGroup(gid) 3728 3729 utils.SafeWriteFile(file_name, None, 3730 data=raw_data, mode=mode, uid=uid, gid=gid, 3731 atime=atime, mtime=mtime)

3732

3733 3734 -def RunOob(oob_program, command, node, timeout):

3735 """Executes oob_program with given command on given node. 3736 3737 @param oob_program: The path to the executable oob_program 3738 @param command: The command to invoke on oob_program 3739 @param node: The node given as an argument to the program 3740 @param timeout: Timeout after which we kill the oob program 3741 3742 @return: stdout 3743 @raise RPCFail: If execution fails for some reason 3744 3745 """ 3746 result = utils.RunCmd([oob_program, command, node], timeout=timeout) 3747 3748 if result.failed: 3749 _Fail("'%s' failed with reason '%s'; output: %s", result.cmd, 3750 result.fail_reason, result.output) 3751 3752 return result.stdout

3753

3754 3755 -def _OSOndiskAPIVersion(os_dir):

3756 """Compute and return the API version of a given OS. 3757 3758 This function will try to read the API version of the OS residing in 3759 the 'os_dir' directory. 3760 3761 @type os_dir: str 3762 @param os_dir: the directory in which we should look for the OS 3763 @rtype: tuple 3764 @return: tuple (status, data) with status denoting the validity and 3765 data holding either the valid versions or an error message 3766 3767 """ 3768 api_file = utils.PathJoin(os_dir, constants.OS_API_FILE) 3769 3770 try: 3771 st = os.stat(api_file) 3772 except EnvironmentError, err: 3773 return False, ("Required file '%s' not found under path %s: %s" % 3774 (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err))) 3775 3776 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 3777 return False, ("File '%s' in %s is not a regular file" % 3778 (constants.OS_API_FILE, os_dir)) 3779 3780 try: 3781 api_versions = utils.ReadFile(api_file).splitlines() 3782 except EnvironmentError, err: 3783 return False, ("Error while reading the API version file at %s: %s" % 3784 (api_file, utils.ErrnoOrStr(err))) 3785 3786 try: 3787 api_versions = [int(version.strip()) for version in api_versions] 3788 except (TypeError, ValueError), err: 3789 return False, ("API version(s) can't be converted to integer: %s" % 3790 str(err)) 3791 3792 return True, api_versions

3793

3794 3795 -def DiagnoseOS(top_dirs=None):

3796 """Compute the validity for all OSes. 3797 3798 @type top_dirs: list 3799 @param top_dirs: the list of directories in which to 3800 search (if not given defaults to 3801 L{pathutils.OS_SEARCH_PATH}) 3802 @rtype: list of L{objects.OS} 3803 @return: a list of tuples (name, path, status, diagnose, variants, 3804 parameters, api_version) for all (potential) OSes under all 3805 search paths, where: 3806 - name is the (potential) OS name 3807 - path is the full path to the OS 3808 - status True/False is the validity of the OS 3809 - diagnose is the error message for an invalid OS, otherwise empty 3810 - variants is a list of supported OS variants, if any 3811 - parameters is a list of (name, help) parameters, if any 3812 - api_version is a list of support OS API versions 3813 3814 """ 3815 if top_dirs is None: 3816 top_dirs = pathutils.OS_SEARCH_PATH 3817 3818 result = [] 3819 for dir_name in top_dirs: 3820 if os.path.isdir(dir_name): 3821 try: 3822 f_names = utils.ListVisibleFiles(dir_name) 3823 except EnvironmentError, err: 3824 logging.exception("Can't list the OS directory %s: %s", dir_name, err) 3825 break 3826 for name in f_names: 3827 os_path = utils.PathJoin(dir_name, name) 3828 status, os_inst = _TryOSFromDisk(name, base_dir=dir_name) 3829 if status: 3830 diagnose = "" 3831 variants = os_inst.supported_variants 3832 parameters = os_inst.supported_parameters 3833 api_versions = os_inst.api_versions 3834 trusted = False if os_inst.create_script_untrusted else True 3835 else: 3836 diagnose = os_inst 3837 variants = parameters = api_versions = [] 3838 trusted = True 3839 result.append((name, os_path, status, diagnose, variants, 3840 parameters, api_versions, trusted)) 3841 3842 return result

3843

3844 3845 -def _TryOSFromDisk(name, base_dir=None):

3846 """Create an OS instance from disk. 3847 3848 This function will return an OS instance if the given name is a 3849 valid OS name. 3850 3851 @type base_dir: string 3852 @keyword base_dir: Base directory containing OS installations. 3853 Defaults to a search in all the OS_SEARCH_PATH dirs. 3854 @rtype: tuple 3855 @return: success and either the OS instance if we find a valid one, 3856 or error message 3857 3858 """ 3859 if base_dir is None: 3860 os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir) 3861 else: 3862 os_dir = utils.FindFile(name, [base_dir], os.path.isdir) 3863 3864 if os_dir is None: 3865 return False, "Directory for OS %s not found in search path" % name 3866 3867 status, api_versions = _OSOndiskAPIVersion(os_dir) 3868 if not status: 3869 # push the error up 3870 return status, api_versions 3871 3872 if not constants.OS_API_VERSIONS.intersection(api_versions): 3873 return False, ("API version mismatch for path '%s': found %s, want %s." % 3874 (os_dir, api_versions, constants.OS_API_VERSIONS)) 3875 3876 # OS Files dictionary, we will populate it with the absolute path 3877 # names; if the value is True, then it is a required file, otherwise 3878 # an optional one 3879 os_files = dict.fromkeys(constants.OS_SCRIPTS, True) 3880 3881 os_files[constants.OS_SCRIPT_CREATE] = False 3882 os_files[constants.OS_SCRIPT_CREATE_UNTRUSTED] = False 3883 3884 if max(api_versions) >= constants.OS_API_V15: 3885 os_files[constants.OS_VARIANTS_FILE] = False 3886 3887 if max(api_versions) >= constants.OS_API_V20: 3888 os_files[constants.OS_PARAMETERS_FILE] = True 3889 else: 3890 del os_files[constants.OS_SCRIPT_VERIFY] 3891 3892 for (filename, required) in os_files.items(): 3893 os_files[filename] = utils.PathJoin(os_dir, filename) 3894 3895 try: 3896 st = os.stat(os_files[filename]) 3897 except EnvironmentError, err: 3898 if err.errno == errno.ENOENT and not required: 3899 del os_files[filename] 3900 continue 3901 return False, ("File '%s' under path '%s' is missing (%s)" % 3902 (filename, os_dir, utils.ErrnoOrStr(err))) 3903 3904 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 3905 return False, ("File '%s' under path '%s' is not a regular file" % 3906 (filename, os_dir)) 3907 3908 if filename in constants.OS_SCRIPTS: 3909 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR: 3910 return False, ("File '%s' under path '%s' is not executable" % 3911 (filename, os_dir)) 3912 3913 if not constants.OS_SCRIPT_CREATE in os_files and \ 3914 not constants.OS_SCRIPT_CREATE_UNTRUSTED in os_files: 3915 return False, ("A create script (trusted or untrusted) under path '%s'" 3916 " must exist" % os_dir) 3917 3918 create_script = os_files.get(constants.OS_SCRIPT_CREATE, None) 3919 create_script_untrusted = os_files.get(constants.OS_SCRIPT_CREATE_UNTRUSTED, 3920 None) 3921 3922 variants = [] 3923 if constants.OS_VARIANTS_FILE in os_files: 3924 variants_file = os_files[constants.OS_VARIANTS_FILE] 3925 try: 3926 variants = \ 3927 utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file)) 3928 except EnvironmentError, err: 3929 # we accept missing files, but not other errors 3930 if err.errno != errno.ENOENT: 3931 return False, ("Error while reading the OS variants file at %s: %s" % 3932 (variants_file, utils.ErrnoOrStr(err))) 3933 3934 parameters = [] 3935 if constants.OS_PARAMETERS_FILE in os_files: 3936 parameters_file = os_files[constants.OS_PARAMETERS_FILE] 3937 try: 3938 parameters = utils.ReadFile(parameters_file).splitlines() 3939 except EnvironmentError, err: 3940 return False, ("Error while reading the OS parameters file at %s: %s" % 3941 (parameters_file, utils.ErrnoOrStr(err))) 3942 parameters = [v.split(None, 1) for v in parameters] 3943 3944 os_obj = objects.OS(name=name, path=os_dir, 3945 create_script=create_script, 3946 create_script_untrusted=create_script_untrusted, 3947 export_script=os_files[constants.OS_SCRIPT_EXPORT], 3948 import_script=os_files[constants.OS_SCRIPT_IMPORT], 3949 rename_script=os_files[constants.OS_SCRIPT_RENAME], 3950 verify_script=os_files.get(constants.OS_SCRIPT_VERIFY, 3951 None), 3952 supported_variants=variants, 3953 supported_parameters=parameters, 3954 api_versions=api_versions) 3955 return True, os_obj

3956

3957 3958 -def OSFromDisk(name, base_dir=None):

3959 """Create an OS instance from disk. 3960 3961 This function will return an OS instance if the given name is a 3962 valid OS name. Otherwise, it will raise an appropriate 3963 L{RPCFail} exception, detailing why this is not a valid OS. 3964 3965 This is just a wrapper over L{_TryOSFromDisk}, which doesn't raise 3966 an exception but returns true/false status data. 3967 3968 @type base_dir: string 3969 @keyword base_dir: Base directory containing OS installations. 3970 Defaults to a search in all the OS_SEARCH_PATH dirs. 3971 @rtype: L{objects.OS} 3972 @return: the OS instance if we find a valid one 3973 @raise RPCFail: if we don't find a valid OS 3974 3975 """ 3976 name_only = objects.OS.GetName(name) 3977 status, payload = _TryOSFromDisk(name_only, base_dir) 3978 3979 if not status: 3980 _Fail(payload) 3981 3982 return payload

3983

3984 3985 -def OSCoreEnv(os_name, inst_os, os_params, debug=0):

3986 """Calculate the basic environment for an os script. 3987 3988 @type os_name: str 3989 @param os_name: full operating system name (including variant) 3990 @type inst_os: L{objects.OS} 3991 @param inst_os: operating system for which the environment is being built 3992 @type os_params: dict 3993 @param os_params: the OS parameters 3994 @type debug: integer 3995 @param debug: debug level (0 or 1, for OS Api 10) 3996 @rtype: dict 3997 @return: dict of environment variables 3998 @raise errors.BlockDeviceError: if the block device 3999 cannot be found 4000 4001 """ 4002 result = {} 4003 api_version = \ 4004 max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions)) 4005 result["OS_API_VERSION"] = "%d" % api_version 4006 result["OS_NAME"] = inst_os.name 4007 result["DEBUG_LEVEL"] = "%d" % debug 4008 4009 # OS variants 4010 if api_version >= constants.OS_API_V15 and inst_os.supported_variants: 4011 variant = objects.OS.GetVariant(os_name) 4012 if not variant: 4013 variant = inst_os.supported_variants[0] 4014 else: 4015 variant = "" 4016 result["OS_VARIANT"] = variant 4017 4018 # OS params 4019 for pname, pvalue in os_params.items(): 4020 result["OSP_%s" % pname.upper().replace("-", "_")] = pvalue 4021 4022 # Set a default path otherwise programs called by OS scripts (or 4023 # even hooks called from OS scripts) might break, and we don't want 4024 # to have each script require setting a PATH variable 4025 result["PATH"] = constants.HOOKS_PATH 4026 4027 return result

4028

4029 4030 -def OSEnvironment(instance, inst_os, debug=0):

4031 """Calculate the environment for an os script. 4032 4033 @type instance: L{objects.Instance} 4034 @param instance: target instance for the os script run 4035 @type inst_os: L{objects.OS} 4036 @param inst_os: operating system for which the environment is being built 4037 @type debug: integer 4038 @param debug: debug level (0 or 1, for OS Api 10) 4039 @rtype: dict 4040 @return: dict of environment variables 4041 @raise errors.BlockDeviceError: if the block device 4042 cannot be found 4043 4044 """ 4045 result = OSCoreEnv(instance.os, inst_os, objects.FillDict(instance.osparams, 4046 instance.osparams_private.Unprivate()), debug=debug) 4047 4048 for attr in ["name", "os", "uuid", "ctime", "mtime", "primary_node"]: 4049 result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr)) 4050 4051 result["HYPERVISOR"] = instance.hypervisor 4052 result["DISK_COUNT"] = "%d" % len(instance.disks_info) 4053 result["NIC_COUNT"] = "%d" % len(instance.nics) 4054 result["INSTANCE_SECONDARY_NODES"] = \ 4055 ("%s" % " ".join(instance.secondary_nodes)) 4056 4057 # Disks 4058 for idx, disk in enumerate(instance.disks_info): 4059 real_disk = _OpenRealBD(disk) 4060 uri = _CalculateDeviceURI(instance, disk, real_disk) 4061 result["DISK_%d_ACCESS" % idx] = disk.mode 4062 result["DISK_%d_UUID" % idx] = disk.uuid 4063 if real_disk.dev_path: 4064 result["DISK_%d_PATH" % idx] = real_disk.dev_path 4065 if uri: 4066 result["DISK_%d_URI" % idx] = uri 4067 if disk.name: 4068 result["DISK_%d_NAME" % idx] = disk.name 4069 if constants.HV_DISK_TYPE in instance.hvparams: 4070 result["DISK_%d_FRONTEND_TYPE" % idx] = \ 4071 instance.hvparams[constants.HV_DISK_TYPE] 4072 if disk.dev_type in constants.DTS_BLOCK: 4073 result["DISK_%d_BACKEND_TYPE" % idx] = "block" 4074 elif disk.dev_type in constants.DTS_FILEBASED: 4075 result["DISK_%d_BACKEND_TYPE" % idx] = \ 4076 "file:%s" % disk.logical_id[0] 4077 4078 # NICs 4079 for idx, nic in enumerate(instance.nics): 4080 result["NIC_%d_MAC" % idx] = nic.mac 4081 result["NIC_%d_UUID" % idx] = nic.uuid 4082 if nic.name: 4083 result["NIC_%d_NAME" % idx] = nic.name 4084 if nic.ip: 4085 result["NIC_%d_IP" % idx] = nic.ip 4086 result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE] 4087 if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 4088 result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK] 4089 if nic.nicparams[constants.NIC_LINK]: 4090 result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK] 4091 if nic.netinfo: 4092 nobj = objects.Network.FromDict(nic.netinfo) 4093 result.update(nobj.HooksDict("NIC_%d_" % idx)) 4094 if constants.HV_NIC_TYPE in instance.hvparams: 4095 result["NIC_%d_FRONTEND_TYPE" % idx] = \ 4096 instance.hvparams[constants.HV_NIC_TYPE] 4097 4098 # HV/BE params 4099 for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]: 4100 for key, value in source.items(): 4101 result["INSTANCE_%s_%s" % (kind, key)] = str(value) 4102 4103 return result

4104

4105 4106 -def DiagnoseExtStorage(top_dirs=None):

4107 """Compute the validity for all ExtStorage Providers. 4108 4109 @type top_dirs: list 4110 @param top_dirs: the list of directories in which to 4111 search (if not given defaults to 4112 L{pathutils.ES_SEARCH_PATH}) 4113 @rtype: list of L{objects.ExtStorage} 4114 @return: a list of tuples (name, path, status, diagnose, parameters) 4115 for all (potential) ExtStorage Providers under all 4116 search paths, where: 4117 - name is the (potential) ExtStorage Provider 4118 - path is the full path to the ExtStorage Provider 4119 - status True/False is the validity of the ExtStorage Provider 4120 - diagnose is the error message for an invalid ExtStorage Provider, 4121 otherwise empty 4122 - parameters is a list of (name, help) parameters, if any 4123 4124 """ 4125 if top_dirs is None: 4126 top_dirs = pathutils.ES_SEARCH_PATH 4127 4128 result = [] 4129 for dir_name in top_dirs: 4130 if os.path.isdir(dir_name): 4131 try: 4132 f_names = utils.ListVisibleFiles(dir_name) 4133 except EnvironmentError, err: 4134 logging.exception("Can't list the ExtStorage directory %s: %s", 4135 dir_name, err) 4136 break 4137 for name in f_names: 4138 es_path = utils.PathJoin(dir_name, name) 4139 status, es_inst = extstorage.ExtStorageFromDisk(name, base_dir=dir_name) 4140 if status: 4141 diagnose = "" 4142 parameters = es_inst.supported_parameters 4143 else: 4144 diagnose = es_inst 4145 parameters = [] 4146 result.append((name, es_path, status, diagnose, parameters)) 4147 4148 return result

4149

4150 4151 -def BlockdevGrow(disk, amount, dryrun, backingstore, excl_stor):

4152 """Grow a stack of block devices. 4153 4154 This function is called recursively, with the childrens being the 4155 first ones to resize. 4156 4157 @type disk: L{objects.Disk} 4158 @param disk: the disk to be grown 4159 @type amount: integer 4160 @param amount: the amount (in mebibytes) to grow with 4161 @type dryrun: boolean 4162 @param dryrun: whether to execute the operation in simulation mode 4163 only, without actually increasing the size 4164 @param backingstore: whether to execute the operation on backing storage 4165 only, or on "logical" storage only; e.g. DRBD is logical storage, 4166 whereas LVM, file, RBD are backing storage 4167 @rtype: (status, result) 4168 @type excl_stor: boolean 4169 @param excl_stor: Whether exclusive_storage is active 4170 @return: a tuple with the status of the operation (True/False), and 4171 the errors message if status is False 4172 4173 """ 4174 r_dev = _RecursiveFindBD(disk) 4175 if r_dev is None: 4176 _Fail("Cannot find block device %s", disk) 4177 4178 try: 4179 r_dev.Grow(amount, dryrun, backingstore, excl_stor) 4180 except errors.BlockDeviceError, err: 4181 _Fail("Failed to grow block device: %s", err, exc=True)

4182

4183 4184 -def BlockdevSnapshot(disk, snap_name, snap_size):

4185 """Create a snapshot copy of a block device. 4186 4187 This function is called recursively, and the snapshot is actually created 4188 just for the leaf lvm backend device. 4189 4190 @type disk: L{objects.Disk} 4191 @param disk: the disk to be snapshotted 4192 @type snap_name: string 4193 @param snap_name: the name of the snapshot 4194 @type snap_size: int 4195 @param snap_size: the size of the snapshot 4196 @rtype: string 4197 @return: snapshot disk ID as (vg, lv) 4198 4199 """ 4200 def _DiskSnapshot(disk, snap_name=None, snap_size=None): 4201 r_dev = _RecursiveFindBD(disk) 4202 if r_dev is not None: 4203 return r_dev.Snapshot(snap_name=snap_name, snap_size=snap_size) 4204 else: 4205 _Fail("Cannot find block device %s", disk)

4206 4207 if disk.SupportsSnapshots(): 4208 if disk.dev_type == constants.DT_DRBD8: 4209 if not disk.children: 4210 _Fail("DRBD device '%s' without backing storage cannot be snapshotted", 4211 disk.unique_id) 4212 return BlockdevSnapshot(disk.children[0], snap_name, snap_size) 4213 else: 4214 return _DiskSnapshot(disk, snap_name, snap_size) 4215 else: 4216 _Fail("Cannot snapshot block device '%s' of type '%s'", 4217 disk.logical_id, disk.dev_type) 4218

4219 4220 -def BlockdevSetInfo(disk, info):

4221 """Sets 'metadata' information on block devices. 4222 4223 This function sets 'info' metadata on block devices. Initial 4224 information is set at device creation; this function should be used 4225 for example after renames. 4226 4227 @type disk: L{objects.Disk} 4228 @param disk: the disk to be grown 4229 @type info: string 4230 @param info: new 'info' metadata 4231 @rtype: (status, result) 4232 @return: a tuple with the status of the operation (True/False), and 4233 the errors message if status is False 4234 4235 """ 4236 r_dev = _RecursiveFindBD(disk) 4237 if r_dev is None: 4238 _Fail("Cannot find block device %s", disk) 4239 4240 try: 4241 r_dev.SetInfo(info) 4242 except errors.BlockDeviceError, err: 4243 _Fail("Failed to set information on block device: %s", err, exc=True)

4244

4245 4246 -def FinalizeExport(instance, snap_disks):

4247 """Write out the export configuration information. 4248 4249 @type instance: L{objects.Instance} 4250 @param instance: the instance which we export, used for 4251 saving configuration 4252 @type snap_disks: list of L{objects.Disk} 4253 @param snap_disks: list of snapshot block devices, which 4254 will be used to get the actual name of the dump file 4255 4256 @rtype: None 4257 4258 """ 4259 destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new") 4260 finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name) 4261 disk_template = utils.GetDiskTemplate(snap_disks) 4262 4263 config = objects.SerializableConfigParser() 4264 4265 config.add_section(constants.INISECT_EXP) 4266 config.set(constants.INISECT_EXP, "version", str(constants.EXPORT_VERSION)) 4267 config.set(constants.INISECT_EXP, "timestamp", "%d" % int(time.time())) 4268 config.set(constants.INISECT_EXP, "source", instance.primary_node) 4269 config.set(constants.INISECT_EXP, "os", instance.os) 4270 config.set(constants.INISECT_EXP, "compression", "none") 4271 4272 config.add_section(constants.INISECT_INS) 4273 config.set(constants.INISECT_INS, "name", instance.name) 4274 config.set(constants.INISECT_INS, "maxmem", "%d" % 4275 instance.beparams[constants.BE_MAXMEM]) 4276 config.set(constants.INISECT_INS, "minmem", "%d" % 4277 instance.beparams[constants.BE_MINMEM]) 4278 # "memory" is deprecated, but useful for exporting to old ganeti versions 4279 config.set(constants.INISECT_INS, "memory", "%d" % 4280 instance.beparams[constants.BE_MAXMEM]) 4281 config.set(constants.INISECT_INS, "vcpus", "%d" % 4282 instance.beparams[constants.BE_VCPUS]) 4283 config.set(constants.INISECT_INS, "disk_template", disk_template) 4284 config.set(constants.INISECT_INS, "hypervisor", instance.hypervisor) 4285 config.set(constants.INISECT_INS, "tags", " ".join(instance.GetTags())) 4286 4287 nic_total = 0 4288 for nic_count, nic in enumerate(instance.nics): 4289 nic_total += 1 4290 config.set(constants.INISECT_INS, "nic%d_mac" % 4291 nic_count, "%s" % nic.mac) 4292 config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip) 4293 config.set(constants.INISECT_INS, "nic%d_network" % nic_count, 4294 "%s" % nic.network) 4295 config.set(constants.INISECT_INS, "nic%d_name" % nic_count, 4296 "%s" % nic.name) 4297 for param in constants.NICS_PARAMETER_TYPES: 4298 config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param), 4299 "%s" % nic.nicparams.get(param, None)) 4300 # TODO: redundant: on load can read nics until it doesn't exist 4301 config.set(constants.INISECT_INS, "nic_count", "%d" % nic_total) 4302 4303 disk_total = 0 4304 for disk_count, disk in enumerate(snap_disks): 4305 if disk: 4306 disk_total += 1 4307 config.set(constants.INISECT_INS, "disk%d_ivname" % disk_count, 4308 ("%s" % disk.iv_name)) 4309 config.set(constants.INISECT_INS, "disk%d_dump" % disk_count, 4310 ("%s" % disk.uuid)) 4311 config.set(constants.INISECT_INS, "disk%d_size" % disk_count, 4312 ("%d" % disk.size)) 4313 config.set(constants.INISECT_INS, "disk%d_name" % disk_count, 4314 "%s" % disk.name) 4315 4316 config.set(constants.INISECT_INS, "disk_count", "%d" % disk_total) 4317 4318 # New-style hypervisor/backend parameters 4319 4320 config.add_section(constants.INISECT_HYP) 4321 for name, value in instance.hvparams.items(): 4322 if name not in constants.HVC_GLOBALS: 4323 config.set(constants.INISECT_HYP, name, str(value)) 4324 4325 config.add_section(constants.INISECT_BEP) 4326 for name, value in instance.beparams.items(): 4327 config.set(constants.INISECT_BEP, name, str(value)) 4328 4329 config.add_section(constants.INISECT_OSP) 4330 for name, value in instance.osparams.items(): 4331 config.set(constants.INISECT_OSP, name, str(value)) 4332 4333 config.add_section(constants.INISECT_OSP_PRIVATE) 4334 for name, value in instance.osparams_private.items(): 4335 config.set(constants.INISECT_OSP_PRIVATE, name, str(value.Get())) 4336 4337 utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE), 4338 data=config.Dumps()) 4339 shutil.rmtree(finaldestdir, ignore_errors=True) 4340 shutil.move(destdir, finaldestdir)

4341

4342 4343 -def ExportInfo(dest):

4344 """Get export configuration information. 4345 4346 @type dest: str 4347 @param dest: directory containing the export 4348 4349 @rtype: L{objects.SerializableConfigParser} 4350 @return: a serializable config file containing the 4351 export info 4352 4353 """ 4354 cff = utils.PathJoin(dest, constants.EXPORT_CONF_FILE) 4355 4356 config = objects.SerializableConfigParser() 4357 config.read(cff) 4358 4359 if (not config.has_section(constants.INISECT_EXP) or 4360 not config.has_section(constants.INISECT_INS)): 4361 _Fail("Export info file doesn't have the required fields") 4362 4363 return config.Dumps()

4364

4365 4366 -def ListExports():

4367 """Return a list of exports currently available on this machine. 4368 4369 @rtype: list 4370 @return: list of the exports 4371 4372 """ 4373 if os.path.isdir(pathutils.EXPORT_DIR): 4374 return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR)) 4375 else: 4376 _Fail("No exports directory")

4377

4378 4379 -def RemoveExport(export):

4380 """Remove an existing export from the node. 4381 4382 @type export: str 4383 @param export: the name of the export to remove 4384 @rtype: None 4385 4386 """ 4387 target = utils.PathJoin(pathutils.EXPORT_DIR, export) 4388 4389 try: 4390 shutil.rmtree(target) 4391 except EnvironmentError, err: 4392 _Fail("Error while removing the export: %s", err, exc=True)

4393

4394 4395 -def BlockdevRename(devlist):

4396 """Rename a list of block devices. 4397 4398 @type devlist: list of tuples 4399 @param devlist: list of tuples of the form (disk, new_unique_id); disk is 4400 an L{objects.Disk} object describing the current disk, and new 4401 unique_id is the name we rename it to 4402 @rtype: boolean 4403 @return: True if all renames succeeded, False otherwise 4404 4405 """ 4406 msgs = [] 4407 result = True 4408 for disk, unique_id in devlist: 4409 dev = _RecursiveFindBD(disk) 4410 if dev is None: 4411 msgs.append("Can't find device %s in rename" % str(disk)) 4412 result = False 4413 continue 4414 try: 4415 old_rpath = dev.dev_path 4416 dev.Rename(unique_id) 4417 new_rpath = dev.dev_path 4418 if old_rpath != new_rpath: 4419 DevCacheManager.RemoveCache(old_rpath) 4420 # FIXME: we should add the new cache information here, like: 4421 # DevCacheManager.UpdateCache(new_rpath, owner, ...) 4422 # but we don't have the owner here - maybe parse from existing 4423 # cache? for now, we only lose lvm data when we rename, which 4424 # is less critical than DRBD or MD 4425 except errors.BlockDeviceError, err: 4426 msgs.append("Can't rename device '%s' to '%s': %s" % 4427 (dev, unique_id, err)) 4428 logging.exception("Can't rename device '%s' to '%s'", dev, unique_id) 4429 result = False 4430 if not result: 4431 _Fail("; ".join(msgs))

4432

4433 4434 -def _TransformFileStorageDir(fs_dir):

4435 """Checks whether given file_storage_dir is valid. 4436 4437 Checks wheter the given fs_dir is within the cluster-wide default 4438 file_storage_dir or the shared_file_storage_dir, which are stored in 4439 SimpleStore. Only paths under those directories are allowed. 4440 4441 @type fs_dir: str 4442 @param fs_dir: the path to check 4443 4444 @return: the normalized path if valid, None otherwise 4445 4446 """ 4447 filestorage.CheckFileStoragePath(fs_dir) 4448 4449 return os.path.normpath(fs_dir)

4450

4451 4452 -def CreateFileStorageDir(file_storage_dir):

4453 """Create file storage directory. 4454 4455 @type file_storage_dir: str 4456 @param file_storage_dir: directory to create 4457 4458 @rtype: tuple 4459 @return: tuple with first element a boolean indicating wheter dir 4460 creation was successful or not 4461 4462 """ 4463 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 4464 if os.path.exists(file_storage_dir): 4465 if not os.path.isdir(file_storage_dir): 4466 _Fail("Specified storage dir '%s' is not a directory", 4467 file_storage_dir) 4468 else: 4469 try: 4470 os.makedirs(file_storage_dir, 0750) 4471 except OSError, err: 4472 _Fail("Cannot create file storage directory '%s': %s", 4473 file_storage_dir, err, exc=True)

4474

4475 4476 -def RemoveFileStorageDir(file_storage_dir):

4477 """Remove file storage directory. 4478 4479 Remove it only if it's empty. If not log an error and return. 4480 4481 @type file_storage_dir: str 4482 @param file_storage_dir: the directory we should cleanup 4483 @rtype: tuple (success,) 4484 @return: tuple of one element, C{success}, denoting 4485 whether the operation was successful 4486 4487 """ 4488 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 4489 if os.path.exists(file_storage_dir): 4490 if not os.path.isdir(file_storage_dir): 4491 _Fail("Specified Storage directory '%s' is not a directory", 4492 file_storage_dir) 4493 # deletes dir only if empty, otherwise we want to fail the rpc call 4494 try: 4495 os.rmdir(file_storage_dir) 4496 except OSError, err: 4497 _Fail("Cannot remove file storage directory '%s': %s", 4498 file_storage_dir, err)

4499

4500 4501 -def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):

4502 """Rename the file storage directory. 4503 4504 @type old_file_storage_dir: str 4505 @param old_file_storage_dir: the current path 4506 @type new_file_storage_dir: str 4507 @param new_file_storage_dir: the name we should rename to 4508 @rtype: tuple (success,) 4509 @return: tuple of one element, C{success}, denoting 4510 whether the operation was successful 4511 4512 """ 4513 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir) 4514 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir) 4515 if not os.path.exists(new_file_storage_dir): 4516 if os.path.isdir(old_file_storage_dir): 4517 try: 4518 os.rename(old_file_storage_dir, new_file_storage_dir) 4519 except OSError, err: 4520 _Fail("Cannot rename '%s' to '%s': %s", 4521 old_file_storage_dir, new_file_storage_dir, err) 4522 else: 4523 _Fail("Specified storage dir '%s' is not a directory", 4524 old_file_storage_dir) 4525 else: 4526 if os.path.exists(old_file_storage_dir): 4527 _Fail("Cannot rename '%s' to '%s': both locations exist", 4528 old_file_storage_dir, new_file_storage_dir)

4529

4530 4531 -def _EnsureJobQueueFile(file_name):

4532 """Checks whether the given filename is in the queue directory. 4533 4534 @type file_name: str 4535 @param file_name: the file name we should check 4536 @rtype: None 4537 @raises RPCFail: if the file is not valid 4538 4539 """ 4540 if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name): 4541 _Fail("Passed job queue file '%s' does not belong to" 4542 " the queue directory '%s'", file_name, pathutils.QUEUE_DIR)

4543

4544 4545 -def JobQueueUpdate(file_name, content):

4546 """Updates a file in the queue directory. 4547 4548 This is just a wrapper over L{utils.io.WriteFile}, with proper 4549 checking. 4550 4551 @type file_name: str 4552 @param file_name: the job file name 4553 @type content: str 4554 @param content: the new job contents 4555 @rtype: boolean 4556 @return: the success of the operation 4557 4558 """ 4559 file_name = vcluster.LocalizeVirtualPath(file_name) 4560 4561 _EnsureJobQueueFile(file_name) 4562 getents = runtime.GetEnts() 4563 4564 # Write and replace the file atomically 4565 utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid, 4566 gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS)

4567

4568 4569 -def JobQueueRename(old, new):

4570 """Renames a job queue file. 4571 4572 This is just a wrapper over os.rename with proper checking. 4573 4574 @type old: str 4575 @param old: the old (actual) file name 4576 @type new: str 4577 @param new: the desired file name 4578 @rtype: tuple 4579 @return: the success of the operation and payload 4580 4581 """ 4582 old = vcluster.LocalizeVirtualPath(old) 4583 new = vcluster.LocalizeVirtualPath(new) 4584 4585 _EnsureJobQueueFile(old) 4586 _EnsureJobQueueFile(new) 4587 4588 getents = runtime.GetEnts() 4589 4590 utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750, 4591 dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid)

4592

4593 4594 -def BlockdevClose(instance_name, disks):

4595 """Closes the given block devices. 4596 4597 This means they will be switched to secondary mode (in case of 4598 DRBD). 4599 4600 @param instance_name: if the argument is not empty, the symlinks 4601 of this instance will be removed 4602 @type disks: list of L{objects.Disk} 4603 @param disks: the list of disks to be closed 4604 @rtype: tuple (success, message) 4605 @return: a tuple of success and message, where success 4606 indicates the succes of the operation, and message 4607 which will contain the error details in case we 4608 failed 4609 4610 """ 4611 bdevs = [] 4612 for cf in disks: 4613 rd = _RecursiveFindBD(cf) 4614 if rd is None: 4615 _Fail("Can't find device %s", cf) 4616 bdevs.append(rd) 4617 4618 msg = [] 4619 for rd in bdevs: 4620 try: 4621 rd.Close() 4622 except errors.BlockDeviceError, err: 4623 msg.append(str(err)) 4624 if msg: 4625 _Fail("Can't close devices: %s", ",".join(msg)) 4626 else: 4627 if instance_name: 4628 _RemoveBlockDevLinks(instance_name, disks)

4629

4630 4631 -def BlockdevOpen(instance_name, disks, exclusive):

4632 """Opens the given block devices. 4633 4634 """ 4635 bdevs = [] 4636 for cf in disks: 4637 rd = _RecursiveFindBD(cf) 4638 if rd is None: 4639 _Fail("Can't find device %s", cf) 4640 bdevs.append(rd) 4641 4642 msg = [] 4643 for idx, rd in enumerate(bdevs): 4644 try: 4645 rd.Open(exclusive=exclusive) 4646 _SymlinkBlockDev(instance_name, rd.dev_path, idx) 4647 except errors.BlockDeviceError, err: 4648 msg.append(str(err)) 4649 4650 if msg: 4651 _Fail("Can't open devices: %s", ",".join(msg))

4652

4653 4654 -def ValidateHVParams(hvname, hvparams):

4655 """Validates the given hypervisor parameters. 4656 4657 @type hvname: string 4658 @param hvname: the hypervisor name 4659 @type hvparams: dict 4660 @param hvparams: the hypervisor parameters to be validated 4661 @rtype: None 4662 4663 """ 4664 try: 4665 hv_type = hypervisor.GetHypervisor(hvname) 4666 hv_type.ValidateParameters(hvparams) 4667 except errors.HypervisorError, err: 4668 _Fail(str(err), log=False)

4669

4670 4671 -def _CheckOSPList(os_obj, parameters):

4672 """Check whether a list of parameters is supported by the OS. 4673 4674 @type os_obj: L{objects.OS} 4675 @param os_obj: OS object to check 4676 @type parameters: list 4677 @param parameters: the list of parameters to check 4678 4679 """ 4680 supported = [v[0] for v in os_obj.supported_parameters] 4681 delta = frozenset(parameters).difference(supported) 4682 if delta: 4683 _Fail("The following parameters are not supported" 4684 " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta)))

4685

4686 4687 -def _CheckOSVariant(os_obj, name):

4688 """Check whether an OS name conforms to the os variants specification. 4689 4690 @type os_obj: L{objects.OS} 4691 @param os_obj: OS object to check 4692 4693 @type name: string 4694 @param name: OS name passed by the user, to check for validity 4695 4696 @rtype: NoneType 4697 @return: None 4698 @raise RPCFail: if OS variant is not valid 4699 4700 """ 4701 variant = objects.OS.GetVariant(name) 4702 4703 if not os_obj.supported_variants: 4704 if variant: 4705 _Fail("OS '%s' does not support variants ('%s' passed)" % 4706 (os_obj.name, variant)) 4707 else: 4708 return 4709 4710 if not variant: 4711 _Fail("OS name '%s' must include a variant" % name) 4712 4713 if variant not in os_obj.supported_variants: 4714 _Fail("OS '%s' does not support variant '%s'" % (os_obj.name, variant))

4715

4716 4717 -def ValidateOS(required, osname, checks, osparams, force_variant):

4718 """Validate the given OS parameters. 4719 4720 @type required: boolean 4721 @param required: whether absence of the OS should translate into 4722 failure or not 4723 @type osname: string 4724 @param osname: the OS to be validated 4725 @type checks: list 4726 @param checks: list of the checks to run (currently only 'parameters') 4727 @type osparams: dict 4728 @param osparams: dictionary with OS parameters, some of which may be 4729 private. 4730 @rtype: boolean 4731 @return: True if the validation passed, or False if the OS was not 4732 found and L{required} was false 4733 4734 """ 4735 if not constants.OS_VALIDATE_CALLS.issuperset(checks): 4736 _Fail("Unknown checks required for OS %s: %s", osname, 4737 set(checks).difference(constants.OS_VALIDATE_CALLS)) 4738 4739 name_only = objects.OS.GetName(osname) 4740 status, tbv = _TryOSFromDisk(name_only, None) 4741 4742 if not status: 4743 if required: 4744 _Fail(tbv) 4745 else: 4746 return False 4747 4748 if not force_variant: 4749 _CheckOSVariant(tbv, osname) 4750 4751 if max(tbv.api_versions) < constants.OS_API_V20: 4752 return True 4753 4754 if constants.OS_VALIDATE_PARAMETERS in checks: 4755 _CheckOSPList(tbv, osparams.keys()) 4756 4757 validate_env = OSCoreEnv(osname, tbv, osparams) 4758 result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env, 4759 cwd=tbv.path, reset_env=True) 4760 if result.failed: 4761 logging.error("os validate command '%s' returned error: %s output: %s", 4762 result.cmd, result.fail_reason, result.output) 4763 _Fail("OS validation script failed (%s), output: %s", 4764 result.fail_reason, result.output, log=False) 4765 4766 return True

4767

4768 4769 -def ExportOS(instance, override_env):

4770 """Creates a GZIPed tarball with an OS definition and environment. 4771 4772 The archive contains a file with the environment variables needed by 4773 the OS scripts. 4774 4775 @type instance: L{objects.Instance} 4776 @param instance: instance for which the OS definition is exported 4777 4778 @type override_env: dict of string to string 4779 @param override_env: if supplied, it overrides the environment on a 4780 key-by-key basis that is part of the archive 4781 4782 @rtype: string 4783 @return: filepath of the archive 4784 4785 """ 4786 assert instance 4787 assert instance.os 4788 4789 temp_dir = tempfile.mkdtemp() 4790 inst_os = OSFromDisk(instance.os) 4791 4792 result = utils.RunCmd(["ln", "-s", inst_os.path, 4793 utils.PathJoin(temp_dir, "os")]) 4794 if result.failed: 4795 _Fail("Failed to copy OS package '%s' to '%s': %s, output '%s'", 4796 inst_os, temp_dir, result.fail_reason, result.output) 4797 4798 env = OSEnvironment(instance, inst_os) 4799 env.update(override_env) 4800 4801 with open(utils.PathJoin(temp_dir, "environment"), "w") as f: 4802 for var in env: 4803 f.write(var + "=" + env[var] + "\n") 4804 4805 (fd, os_package) = tempfile.mkstemp(suffix=".tgz") 4806 os.close(fd) 4807 4808 result = utils.RunCmd(["tar", "--dereference", "-czv", 4809 "-f", os_package, 4810 "-C", temp_dir, 4811 "."]) 4812 if result.failed: 4813 _Fail("Failed to create OS archive '%s': %s, output '%s'", 4814 os_package, result.fail_reason, result.output) 4815 4816 result = utils.RunCmd(["rm", "-rf", temp_dir]) 4817 if result.failed: 4818 _Fail("Failed to remove copy of OS package '%s' in '%s': %s, output '%s'", 4819 inst_os, temp_dir, result.fail_reason, result.output) 4820 4821 return os_package

4822

4823 4824 -def DemoteFromMC():

4825 """Demotes the current node from master candidate role. 4826 4827 """ 4828 # try to ensure we're not the master by mistake 4829 master, myself = ssconf.GetMasterAndMyself() 4830 if master == myself: 4831 _Fail("ssconf status shows I'm the master node, will not demote") 4832 4833 result = utils.RunCmd([pathutils.DAEMON_UTIL, "check", constants.MASTERD]) 4834 if not result.failed: 4835 _Fail("The master daemon is running, will not demote") 4836 4837 try: 4838 if os.path.isfile(pathutils.CLUSTER_CONF_FILE): 4839 utils.CreateBackup(pathutils.CLUSTER_CONF_FILE) 4840 except EnvironmentError, err: 4841 if err.errno != errno.ENOENT: 4842 _Fail("Error while backing up cluster file: %s", err, exc=True) 4843 4844 utils.RemoveFile(pathutils.CLUSTER_CONF_FILE)

4845

4846 4847 -def _GetX509Filenames(cryptodir, name):

4848 """Returns the full paths for the private key and certificate. 4849 4850 """ 4851 return (utils.PathJoin(cryptodir, name), 4852 utils.PathJoin(cryptodir, name, _X509_KEY_FILE), 4853 utils.PathJoin(cryptodir, name, _X509_CERT_FILE))

4854

4855 4856 -def CreateX509Certificate(validity, cryptodir=pathutils.CRYPTO_KEYS_DIR):

4857 """Creates a new X509 certificate for SSL/TLS. 4858 4859 @type validity: int 4860 @param validity: Validity in seconds 4861 @rtype: tuple; (string, string) 4862 @return: Certificate name and public part 4863 4864 """ 4865 serial_no = int(time.time()) 4866 (key_pem, cert_pem) = \ 4867 utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(), 4868 min(validity, _MAX_SSL_CERT_VALIDITY), 4869 serial_no) 4870 4871 cert_dir = tempfile.mkdtemp(dir=cryptodir, 4872 prefix="x509-%s-" % utils.TimestampForFilename()) 4873 try: 4874 name = os.path.basename(cert_dir) 4875 assert len(name) > 5 4876 4877 (_, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 4878 4879 utils.WriteFile(key_file, mode=0400, data=key_pem) 4880 utils.WriteFile(cert_file, mode=0400, data=cert_pem) 4881 4882 # Never return private key as it shouldn't leave the node 4883 return (name, cert_pem) 4884 except Exception: 4885 shutil.rmtree(cert_dir, ignore_errors=True) 4886 raise

4887

4888 4889 -def RemoveX509Certificate(name, cryptodir=pathutils.CRYPTO_KEYS_DIR):

4890 """Removes a X509 certificate. 4891 4892 @type name: string 4893 @param name: Certificate name 4894 4895 """ 4896 (cert_dir, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 4897 4898 utils.RemoveFile(key_file) 4899 utils.RemoveFile(cert_file) 4900 4901 try: 4902 os.rmdir(cert_dir) 4903 except EnvironmentError, err: 4904 _Fail("Cannot remove certificate directory '%s': %s", 4905 cert_dir, err)

4906

4907 4908 -def _GetImportExportIoCommand(instance, mode, ieio, ieargs):

4909 """Returns the command for the requested input/output. 4910 4911 @type instance: L{objects.Instance} 4912 @param instance: The instance object 4913 @param mode: Import/export mode 4914 @param ieio: Input/output type 4915 @param ieargs: Input/output arguments 4916 4917 """ 4918 assert mode in (constants.IEM_IMPORT, constants.IEM_EXPORT) 4919 4920 env = None 4921 prefix = None 4922 suffix = None 4923 exp_size = None 4924 4925 if ieio == constants.IEIO_FILE: 4926 (filename, ) = ieargs 4927 4928 if not utils.IsNormAbsPath(filename): 4929 _Fail("Path '%s' is not normalized or absolute", filename) 4930 4931 real_filename = os.path.realpath(filename) 4932 directory = os.path.dirname(real_filename) 4933 4934 if not utils.IsBelowDir(pathutils.EXPORT_DIR, real_filename): 4935 _Fail("File '%s' is not under exports directory '%s': %s", 4936 filename, pathutils.EXPORT_DIR, real_filename) 4937 4938 # Create directory 4939 utils.Makedirs(directory, mode=0750) 4940 4941 quoted_filename = utils.ShellQuote(filename) 4942 4943 if mode == constants.IEM_IMPORT: 4944 suffix = "> %s" % quoted_filename 4945 elif mode == constants.IEM_EXPORT: 4946 suffix = "< %s" % quoted_filename 4947 4948 # Retrieve file size 4949 try: 4950 st = os.stat(filename) 4951 except EnvironmentError, err: 4952 logging.error("Can't stat(2) %s: %s", filename, err) 4953 else: 4954 exp_size = utils.BytesToMebibyte(st.st_size) 4955 4956 elif ieio == constants.IEIO_RAW_DISK: 4957 (disk, ) = ieargs 4958 real_disk = _OpenRealBD(disk) 4959 4960 if mode == constants.IEM_IMPORT: 4961 suffix = "| %s" % utils.ShellQuoteArgs(real_disk.Import()) 4962 4963 elif mode == constants.IEM_EXPORT: 4964 prefix = "%s |" % utils.ShellQuoteArgs(real_disk.Export()) 4965 exp_size = disk.size 4966 4967 elif ieio == constants.IEIO_SCRIPT: 4968 (disk, disk_index, ) = ieargs 4969 4970 assert isinstance(disk_index, (int, long)) 4971 4972 inst_os = OSFromDisk(instance.os) 4973 env = OSEnvironment(instance, inst_os) 4974 4975 if mode == constants.IEM_IMPORT: 4976 disk_path_var = "DISK_%d_PATH" % disk_index 4977 if disk_path_var in env: 4978 env["IMPORT_DEVICE"] = env[disk_path_var] 4979 env["IMPORT_DISK_PATH"] = env[disk_path_var] 4980 4981 disk_uri_var = "DISK_%d_URI" % disk_index 4982 if disk_uri_var in env: 4983 env["IMPORT_DISK_URI"] = env[disk_uri_var] 4984 4985 env["IMPORT_INDEX"] = str(disk_index) 4986 script = inst_os.import_script 4987 4988 elif mode == constants.IEM_EXPORT: 4989 disk_path_var = "DISK_%d_PATH" % disk_index 4990 if disk_path_var in env: 4991 env["EXPORT_DEVICE"] = env[disk_path_var] 4992 env["EXPORT_DISK_PATH"] = env[disk_path_var] 4993 4994 disk_uri_var = "DISK_%d_URI" % disk_index 4995 if disk_uri_var in env: 4996 env["EXPORT_DISK_URI"] = env[disk_uri_var] 4997 4998 env["EXPORT_INDEX"] = str(disk_index) 4999 script = inst_os.export_script 5000 5001 # TODO: Pass special environment only to script 5002 script_cmd = utils.BuildShellCmd("( cd %s && %s; )", inst_os.path, script) 5003 5004 if mode == constants.IEM_IMPORT: 5005 suffix = "| %s" % script_cmd 5006 5007 elif mode == constants.IEM_EXPORT: 5008 prefix = "%s |" % script_cmd 5009 5010 # Let script predict size 5011 exp_size = constants.IE_CUSTOM_SIZE 5012 5013 else: 5014 _Fail("Invalid %s I/O mode %r", mode, ieio) 5015 5016 return (env, prefix, suffix, exp_size)

5017

5018 5019 -def _CreateImportExportStatusDir(prefix):

5020 """Creates status directory for import/export. 5021 5022 """ 5023 return tempfile.mkdtemp(dir=pathutils.IMPORT_EXPORT_DIR, 5024 prefix=("%s-%s-" % 5025 (prefix, utils.TimestampForFilename())))

5026

5027 5028 -def StartImportExportDaemon(mode, opts, host, port, instance, component, 5029 ieio, ieioargs):

5030 """Starts an import or export daemon. 5031 5032 @param mode: Import/output mode 5033 @type opts: L{objects.ImportExportOptions} 5034 @param opts: Daemon options 5035 @type host: string 5036 @param host: Remote host for export (None for import) 5037 @type port: int 5038 @param port: Remote port for export (None for import) 5039 @type instance: L{objects.Instance} 5040 @param instance: Instance object 5041 @type component: string 5042 @param component: which part of the instance is transferred now, 5043 e.g. 'disk/0' 5044 @param ieio: Input/output type 5045 @param ieioargs: Input/output arguments 5046 5047 """ 5048 5049 # Use Import/Export over socat. 5050 # 5051 # Export() gives a command that produces a flat stream. 5052 # Import() gives a command that reads a flat stream to a disk template. 5053 if mode == constants.IEM_IMPORT: 5054 prefix = "import" 5055 5056 if not (host is None and port is None): 5057 _Fail("Can not specify host or port on import") 5058 5059 elif mode == constants.IEM_EXPORT: 5060 prefix = "export" 5061 5062 if host is None or port is None: 5063 _Fail("Host and port must be specified for an export") 5064 5065 else: 5066 _Fail("Invalid mode %r", mode) 5067 5068 if (opts.key_name is None) ^ (opts.ca_pem is None): 5069 _Fail("Cluster certificate can only be used for both key and CA") 5070 5071 (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \ 5072 _GetImportExportIoCommand(instance, mode, ieio, ieioargs) 5073 5074 if opts.key_name is None: 5075 # Use server.pem 5076 key_path = pathutils.NODED_CERT_FILE 5077 cert_path = pathutils.NODED_CERT_FILE 5078 assert opts.ca_pem is None 5079 else: 5080 (_, key_path, cert_path) = _GetX509Filenames(pathutils.CRYPTO_KEYS_DIR, 5081 opts.key_name) 5082 assert opts.ca_pem is not None 5083 5084 for i in [key_path, cert_path]: 5085 if not os.path.exists(i): 5086 _Fail("File '%s' does not exist" % i) 5087 5088 status_dir = _CreateImportExportStatusDir("%s-%s" % (prefix, component)) 5089 try: 5090 status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE) 5091 pid_file = utils.PathJoin(status_dir, _IES_PID_FILE) 5092 ca_file = utils.PathJoin(status_dir, _IES_CA_FILE) 5093 5094 if opts.ca_pem is None: 5095 # Use server.pem 5096 ca = utils.ReadFile(pathutils.NODED_CERT_FILE) 5097 else: 5098 ca = opts.ca_pem 5099 5100 # Write CA file 5101 utils.WriteFile(ca_file, data=ca, mode=0400) 5102 5103 cmd = [ 5104 pathutils.IMPORT_EXPORT_DAEMON, 5105 status_file, mode, 5106 "--key=%s" % key_path, 5107 "--cert=%s" % cert_path, 5108 "--ca=%s" % ca_file, 5109 ] 5110 5111 if host: 5112 cmd.append("--host=%s" % host) 5113 5114 if port: 5115 cmd.append("--port=%s" % port) 5116 5117 if opts.ipv6: 5118 cmd.append("--ipv6") 5119 else: 5120 cmd.append("--ipv4") 5121 5122 if opts.compress: 5123 cmd.append("--compress=%s" % opts.compress) 5124 5125 if opts.magic: 5126 cmd.append("--magic=%s" % opts.magic) 5127 5128 if exp_size is not None: 5129 cmd.append("--expected-size=%s" % exp_size) 5130 5131 if cmd_prefix: 5132 cmd.append("--cmd-prefix=%s" % cmd_prefix) 5133 5134 if cmd_suffix: 5135 cmd.append("--cmd-suffix=%s" % cmd_suffix) 5136 5137 if mode == constants.IEM_EXPORT: 5138 # Retry connection a few times when connecting to remote peer 5139 cmd.append("--connect-retries=%s" % constants.RIE_CONNECT_RETRIES) 5140 cmd.append("--connect-timeout=%s" % constants.RIE_CONNECT_ATTEMPT_TIMEOUT) 5141 elif opts.connect_timeout is not None: 5142 assert mode == constants.IEM_IMPORT 5143 # Overall timeout for establishing connection while listening 5144 cmd.append("--connect-timeout=%s" % opts.connect_timeout) 5145 5146 logfile = _InstanceLogName(prefix, instance.os, instance.name, component) 5147 5148 # TODO: Once _InstanceLogName uses tempfile.mkstemp, StartDaemon has 5149 # support for receiving a file descriptor for output 5150 utils.StartDaemon(cmd, env=cmd_env, pidfile=pid_file, 5151 output=logfile) 5152 5153 # The import/export name is simply the status directory name 5154 return os.path.basename(status_dir) 5155 5156 except Exception: 5157 shutil.rmtree(status_dir, ignore_errors=True) 5158 raise

5159

5160 5161 -def GetImportExportStatus(names):

5162 """Returns import/export daemon status. 5163 5164 @type names: sequence 5165 @param names: List of names 5166 @rtype: List of dicts 5167 @return: Returns a list of the state of each named import/export or None if a 5168 status couldn't be read 5169 5170 """ 5171 result = [] 5172 5173 for name in names: 5174 status_file = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name, 5175 _IES_STATUS_FILE) 5176 5177 try: 5178 data = utils.ReadFile(status_file) 5179 except EnvironmentError, err: 5180 if err.errno != errno.ENOENT: 5181 raise 5182 data = None 5183 5184 if not data: 5185 result.append(None) 5186 continue 5187 5188 result.append(serializer.LoadJson(data)) 5189 5190 return result

5191

5192 5193 -def AbortImportExport(name):

5194 """Sends SIGTERM to a running import/export daemon. 5195 5196 """ 5197 logging.info("Abort import/export %s", name) 5198 5199 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 5200 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 5201 5202 if pid: 5203 logging.info("Import/export %s is running with PID %s, sending SIGTERM", 5204 name, pid) 5205 utils.IgnoreProcessNotFound(os.kill, pid, signal.SIGTERM)

5206

5207 5208 -def CleanupImportExport(name):

5209 """Cleanup after an import or export. 5210 5211 If the import/export daemon is still running it's killed. Afterwards the 5212 whole status directory is removed. 5213 5214 """ 5215 logging.info("Finalizing import/export %s", name) 5216 5217 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 5218 5219 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 5220 5221 if pid: 5222 logging.info("Import/export %s is still running with PID %s", 5223 name, pid) 5224 utils.KillProcess(pid, waitpid=False) 5225 5226 shutil.rmtree(status_dir, ignore_errors=True)

5227

5228 5229 -def _FindDisks(disks):

5230 """Finds attached L{BlockDev}s for the given disks. 5231 5232 @type disks: list of L{objects.Disk} 5233 @param disks: the disk objects we need to find 5234 5235 @return: list of L{BlockDev} objects or C{None} if a given disk 5236 was not found or was no attached. 5237 5238 """ 5239 bdevs = [] 5240 5241 for disk in disks: 5242 rd = _RecursiveFindBD(disk) 5243 if rd is None: 5244 _Fail("Can't find device %s", disk) 5245 bdevs.append(rd) 5246 return bdevs

5247

5248 5249 -def DrbdDisconnectNet(disks):

5250 """Disconnects the network on a list of drbd devices. 5251 5252 """ 5253 bdevs = _FindDisks(disks) 5254 5255 # disconnect disks 5256 for rd in bdevs: 5257 try: 5258 rd.DisconnectNet() 5259 except errors.BlockDeviceError, err: 5260 _Fail("Can't change network configuration to standalone mode: %s", 5261 err, exc=True)

5262

5263 5264 -def DrbdAttachNet(disks, multimaster):

5265 """Attaches the network on a list of drbd devices. 5266 5267 """ 5268 bdevs = _FindDisks(disks) 5269 5270 # reconnect disks, switch to new master configuration and if 5271 # needed primary mode 5272 for rd in bdevs: 5273 try: 5274 rd.AttachNet(multimaster) 5275 except errors.BlockDeviceError, err: 5276 _Fail("Can't change network configuration: %s", err) 5277 5278 # wait until the disks are connected; we need to retry the re-attach 5279 # if the device becomes standalone, as this might happen if the one 5280 # node disconnects and reconnects in a different mode before the 5281 # other node reconnects; in this case, one or both of the nodes will 5282 # decide it has wrong configuration and switch to standalone 5283 5284 def _Attach(): 5285 all_connected = True 5286 5287 for rd in bdevs: 5288 stats = rd.GetProcStatus() 5289 5290 if multimaster: 5291 # In the multimaster case we have to wait explicitly until 5292 # the resource is Connected and UpToDate/UpToDate, because 5293 # we promote *both nodes* to primary directly afterwards. 5294 # Being in resync is not enough, since there is a race during which we 5295 # may promote a node with an Outdated disk to primary, effectively 5296 # tearing down the connection. 5297 all_connected = (all_connected and 5298 stats.is_connected and 5299 stats.is_disk_uptodate and 5300 stats.peer_disk_uptodate) 5301 else: 5302 all_connected = (all_connected and 5303 (stats.is_connected or stats.is_in_resync)) 5304 5305 if stats.is_standalone: 5306 # peer had different config info and this node became 5307 # standalone, even though this should not happen with the 5308 # new staged way of changing disk configs 5309 try: 5310 rd.AttachNet(multimaster) 5311 except errors.BlockDeviceError, err: 5312 _Fail("Can't change network configuration: %s", err) 5313 5314 if not all_connected: 5315 raise utils.RetryAgain()

5316 5317 try: 5318 # Start with a delay of 100 miliseconds and go up to 5 seconds 5319 utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60) 5320 except utils.RetryTimeout: 5321 _Fail("Timeout in disk reconnecting") 5322

5323 5324 -def DrbdWaitSync(disks):

5325 """Wait until DRBDs have synchronized. 5326 5327 """ 5328 def _helper(rd): 5329 stats = rd.GetProcStatus() 5330 if not (stats.is_connected or stats.is_in_resync): 5331 raise utils.RetryAgain() 5332 return stats

5333 5334 bdevs = _FindDisks(disks) 5335 5336 min_resync = 100 5337 alldone = True 5338 for rd in bdevs: 5339 try: 5340 # poll each second for 15 seconds 5341 stats = utils.Retry(_helper, 1, 15, args=[rd]) 5342 except utils.RetryTimeout: 5343 stats = rd.GetProcStatus() 5344 # last check 5345 if not (stats.is_connected or stats.is_in_resync): 5346 _Fail("DRBD device %s is not in sync: stats=%s", rd, stats) 5347 alldone = alldone and (not stats.is_in_resync) 5348 if stats.sync_percent is not None: 5349 min_resync = min(min_resync, stats.sync_percent) 5350 5351 return (alldone, min_resync) 5352

5353 5354 -def DrbdNeedsActivation(disks):

5355 """Checks which of the passed disks needs activation and returns their UUIDs. 5356 5357 """ 5358 faulty_disks = [] 5359 5360 for disk in disks: 5361 rd = _RecursiveFindBD(disk) 5362 if rd is None: 5363 faulty_disks.append(disk) 5364 continue 5365 5366 stats = rd.GetProcStatus() 5367 if stats.is_standalone or stats.is_diskless: 5368 faulty_disks.append(disk) 5369 5370 return [disk.uuid for disk in faulty_disks]

5371

5372 5373 -def GetDrbdUsermodeHelper():

5374 """Returns DRBD usermode helper currently configured. 5375 5376 """ 5377 try: 5378 return drbd.DRBD8.GetUsermodeHelper() 5379 except errors.BlockDeviceError, err: 5380 _Fail(str(err))

5381

5382 5383 -def PowercycleNode(hypervisor_type, hvparams=None):

5384 """Hard-powercycle the node. 5385 5386 Because we need to return first, and schedule the powercycle in the 5387 background, we won't be able to report failures nicely. 5388 5389 """ 5390 hyper = hypervisor.GetHypervisor(hypervisor_type) 5391 try: 5392 pid = os.fork() 5393 except OSError: 5394 # if we can't fork, we'll pretend that we're in the child process 5395 pid = 0 5396 if pid > 0: 5397 return "Reboot scheduled in 5 seconds" 5398 # ensure the child is running on ram 5399 try: 5400 utils.Mlockall() 5401 except Exception: # pylint: disable=W0703 5402 pass 5403 time.sleep(5) 5404 hyper.PowercycleNode(hvparams=hvparams)

5405

5406 5407 -def _VerifyRestrictedCmdName(cmd):

5408 """Verifies a restricted command name. 5409 5410 @type cmd: string 5411 @param cmd: Command name 5412 @rtype: tuple; (boolean, string or None) 5413 @return: The tuple's first element is the status; if C{False}, the second 5414 element is an error message string, otherwise it's C{None} 5415 5416 """ 5417 if not cmd.strip(): 5418 return (False, "Missing command name") 5419 5420 if os.path.basename(cmd) != cmd: 5421 return (False, "Invalid command name") 5422 5423 if not constants.EXT_PLUGIN_MASK.match(cmd): 5424 return (False, "Command name contains forbidden characters") 5425 5426 return (True, None)

5427

5428 5429 -def _CommonRestrictedCmdCheck(path, owner):

5430 """Common checks for restricted command file system directories and files. 5431 5432 @type path: string 5433 @param path: Path to check 5434 @param owner: C{None} or tuple containing UID and GID 5435 @rtype: tuple; (boolean, string or C{os.stat} result) 5436 @return: The tuple's first element is the status; if C{False}, the second 5437 element is an error message string, otherwise it's the result of C{os.stat} 5438 5439 """ 5440 if owner is None: 5441 # Default to root as owner 5442 owner = (0, 0) 5443 5444 try: 5445 st = os.stat(path) 5446 except EnvironmentError, err: 5447 return (False, "Can't stat(2) '%s': %s" % (path, err)) 5448 5449 if stat.S_IMODE(st.st_mode) & (~_RCMD_MAX_MODE): 5450 return (False, "Permissions on '%s' are too permissive" % path) 5451 5452 if (st.st_uid, st.st_gid) != owner: 5453 (owner_uid, owner_gid) = owner 5454 return (False, "'%s' is not owned by %s:%s" % (path, owner_uid, owner_gid)) 5455 5456 return (True, st)

5457

5458 5459 -def _VerifyRestrictedCmdDirectory(path, _owner=None):

5460 """Verifies restricted command directory. 5461 5462 @type path: string 5463 @param path: Path to check 5464 @rtype: tuple; (boolean, string or None) 5465 @return: The tuple's first element is the status; if C{False}, the second 5466 element is an error message string, otherwise it's C{None} 5467 5468 """ 5469 (status, value) = _CommonRestrictedCmdCheck(path, _owner) 5470 5471 if not status: 5472 return (False, value) 5473 5474 if not stat.S_ISDIR(value.st_mode): 5475 return (False, "Path '%s' is not a directory" % path) 5476 5477 return (True, None)

5478

5479 5480 -def _VerifyRestrictedCmd(path, cmd, _owner=None):

5481 """Verifies a whole restricted command and returns its executable filename. 5482 5483 @type path: string 5484 @param path: Directory containing restricted commands 5485 @type cmd: string 5486 @param cmd: Command name 5487 @rtype: tuple; (boolean, string) 5488 @return: The tuple's first element is the status; if C{False}, the second 5489 element is an error message string, otherwise the second element is the 5490 absolute path to the executable 5491 5492 """ 5493 executable = utils.PathJoin(path, cmd) 5494 5495 (status, msg) = _CommonRestrictedCmdCheck(executable, _owner) 5496 5497 if not status: 5498 return (False, msg) 5499 5500 if not utils.IsExecutable(executable): 5501 return (False, "access(2) thinks '%s' can't be executed" % executable) 5502 5503 return (True, executable)

5504

5505 5506 -def _PrepareRestrictedCmd(path, cmd, 5507 _verify_dir=_VerifyRestrictedCmdDirectory, 5508 _verify_name=_VerifyRestrictedCmdName, 5509 _verify_cmd=_VerifyRestrictedCmd):

5510 """Performs a number of tests on a restricted command. 5511 5512 @type path: string 5513 @param path: Directory containing restricted commands 5514 @type cmd: string 5515 @param cmd: Command name 5516 @return: Same as L{_VerifyRestrictedCmd} 5517 5518 """ 5519 # Verify the directory first 5520 (status, msg) = _verify_dir(path) 5521 if status: 5522 # Check command if everything was alright 5523 (status, msg) = _verify_name(cmd) 5524 5525 if not status: 5526 return (False, msg) 5527 5528 # Check actual executable 5529 return _verify_cmd(path, cmd)

5530

5531 5532 -def RunRestrictedCmd(cmd, 5533 _lock_timeout=_RCMD_LOCK_TIMEOUT, 5534 _lock_file=pathutils.RESTRICTED_COMMANDS_LOCK_FILE, 5535 _path=pathutils.RESTRICTED_COMMANDS_DIR, 5536 _sleep_fn=time.sleep, 5537 _prepare_fn=_PrepareRestrictedCmd, 5538 _runcmd_fn=utils.RunCmd, 5539 _enabled=constants.ENABLE_RESTRICTED_COMMANDS):

5540 """Executes a restricted command after performing strict tests. 5541 5542 @type cmd: string 5543 @param cmd: Command name 5544 @rtype: string 5545 @return: Command output 5546 @raise RPCFail: In case of an error 5547 5548 """ 5549 logging.info("Preparing to run restricted command '%s'", cmd) 5550 5551 if not _enabled: 5552 _Fail("Restricted commands disabled at configure time") 5553 5554 lock = None 5555 try: 5556 cmdresult = None 5557 try: 5558 lock = utils.FileLock.Open(_lock_file) 5559 lock.Exclusive(blocking=True, timeout=_lock_timeout) 5560 5561 (status, value) = _prepare_fn(_path, cmd) 5562 5563 if status: 5564 cmdresult = _runcmd_fn([value], env={}, reset_env=True, 5565 postfork_fn=lambda _: lock.Unlock()) 5566 else: 5567 logging.error(value) 5568 except Exception: # pylint: disable=W0703 5569 # Keep original error in log 5570 logging.exception("Caught exception") 5571 5572 if cmdresult is None: 5573 logging.info("Sleeping for %0.1f seconds before returning", 5574 _RCMD_INVALID_DELAY) 5575 _sleep_fn(_RCMD_INVALID_DELAY) 5576 5577 # Do not include original error message in returned error 5578 _Fail("Executing command '%s' failed" % cmd) 5579 elif cmdresult.failed or cmdresult.fail_reason: 5580 _Fail("Restricted command '%s' failed: %s; output: %s", 5581 cmd, cmdresult.fail_reason, cmdresult.output) 5582 else: 5583 return cmdresult.output 5584 finally: 5585 if lock is not None: 5586 # Release lock at last 5587 lock.Close() 5588 lock = None

5589

5590 5591 -def SetWatcherPause(until, _filename=pathutils.WATCHER_PAUSEFILE):

5592 """Creates or removes the watcher pause file. 5593 5594 @type until: None or number 5595 @param until: Unix timestamp saying until when the watcher shouldn't run 5596 5597 """ 5598 if until is None: 5599 logging.info("Received request to no longer pause watcher") 5600 utils.RemoveFile(_filename) 5601 else: 5602 logging.info("Received request to pause watcher until %s", until) 5603 5604 if not ht.TNumber(until): 5605 _Fail("Duration must be numeric") 5606 5607 utils.WriteFile(_filename, data="%d\n" % (until, ), mode=0644)

5608

5609 5610 -def ConfigureOVS(ovs_name, ovs_link):

5611 """Creates a OpenvSwitch on the node. 5612 5613 This function sets up a OpenvSwitch on the node with given name nad 5614 connects it via a given eth device. 5615 5616 @type ovs_name: string 5617 @param ovs_name: Name of the OpenvSwitch to create. 5618 @type ovs_link: None or string 5619 @param ovs_link: Ethernet device for outside connection (can be missing) 5620 5621 """ 5622 # Initialize the OpenvSwitch 5623 result = utils.RunCmd(["ovs-vsctl", "add-br", ovs_name]) 5624 if result.failed: 5625 _Fail("Failed to create openvswitch. Script return value: %s, output: '%s'" 5626 % (result.exit_code, result.output), log=True) 5627 5628 # And connect it to a physical interface, if given 5629 if ovs_link: 5630 result = utils.RunCmd(["ovs-vsctl", "add-port", ovs_name, ovs_link]) 5631 if result.failed: 5632 _Fail("Failed to connect openvswitch to interface %s. Script return" 5633 " value: %s, output: '%s'" % (ovs_link, result.exit_code, 5634 result.output), log=True)

5635

5636 5637 -def GetFileInfo(file_path):

5638 """ Checks if a file exists and returns information related to it. 5639 5640 Currently returned information: 5641 - file size: int, size in bytes 5642 5643 @type file_path: string 5644 @param file_path: Name of file to examine. 5645 5646 @rtype: tuple of bool, dict 5647 @return: Whether the file exists, and a dictionary of information about the 5648 file gathered by os.stat. 5649 5650 """ 5651 try: 5652 stat_info = os.stat(file_path) 5653 values_dict = { 5654 constants.STAT_SIZE: stat_info.st_size, 5655 } 5656 return True, values_dict 5657 except IOError: 5658 return False, {}

5659

5660 5661 -class HooksRunner(object):

5662 """Hook runner. 5663 5664 This class is instantiated on the node side (ganeti-noded) and not 5665 on the master side. 5666 5667 """

5668 - def __init__(self, hooks_base_dir=None):

5669 """Constructor for hooks runner. 5670 5671 @type hooks_base_dir: str or None 5672 @param hooks_base_dir: if not None, this overrides the 5673 L{pathutils.HOOKS_BASE_DIR} (useful for unittests) 5674 5675 """ 5676 if hooks_base_dir is None: 5677 hooks_base_dir = pathutils.HOOKS_BASE_DIR 5678 # yeah, _BASE_DIR is not valid for attributes, we use it like a 5679 # constant 5680 self._BASE_DIR = hooks_base_dir # pylint: disable=C0103

5681

5682 - def RunLocalHooks(self, node_list, hpath, phase, env):

5683 """Check that the hooks will be run only locally and then run them. 5684 5685 """ 5686 assert len(node_list) == 1 5687 node = node_list[0] 5688 _, myself = ssconf.GetMasterAndMyself() 5689 assert node == myself 5690 5691 results = self.RunHooks(hpath, phase, env) 5692 5693 # Return values in the form expected by HooksMaster 5694 return {node: (None, False, results)}

5695

5696 - def RunHooks(self, hpath, phase, env):

5697 """Run the scripts in the hooks directory. 5698 5699 @type hpath: str 5700 @param hpath: the path to the hooks directory which 5701 holds the scripts 5702 @type phase: str 5703 @param phase: either L{constants.HOOKS_PHASE_PRE} or 5704 L{constants.HOOKS_PHASE_POST} 5705 @type env: dict 5706 @param env: dictionary with the environment for the hook 5707 @rtype: list 5708 @return: list of 3-element tuples: 5709 - script path 5710 - script result, either L{constants.HKR_SUCCESS} or 5711 L{constants.HKR_FAIL} 5712 - output of the script 5713 5714 @raise errors.ProgrammerError: for invalid input 5715 parameters 5716 5717 """ 5718 if phase == constants.HOOKS_PHASE_PRE: 5719 suffix = "pre" 5720 elif phase == constants.HOOKS_PHASE_POST: 5721 suffix = "post" 5722 else: 5723 _Fail("Unknown hooks phase '%s'", phase) 5724 5725 subdir = "%s-%s.d" % (hpath, suffix) 5726 dir_name = utils.PathJoin(self._BASE_DIR, subdir) 5727 5728 results = [] 5729 5730 if not os.path.isdir(dir_name): 5731 # for non-existing/non-dirs, we simply exit instead of logging a 5732 # warning at every operation 5733 return results 5734 5735 runparts_results = utils.RunParts(dir_name, env=env, reset_env=True) 5736 5737 for (relname, relstatus, runresult) in runparts_results: 5738 if relstatus == constants.RUNPARTS_SKIP: 5739 rrval = constants.HKR_SKIP 5740 output = "" 5741 elif relstatus == constants.RUNPARTS_ERR: 5742 rrval = constants.HKR_FAIL 5743 output = "Hook script execution error: %s" % runresult 5744 elif relstatus == constants.RUNPARTS_RUN: 5745 if runresult.failed: 5746 rrval = constants.HKR_FAIL 5747 else: 5748 rrval = constants.HKR_SUCCESS 5749 output = utils.SafeEncode(runresult.output.strip()) 5750 results.append(("%s/%s" % (subdir, relname), rrval, output)) 5751 5752 return results

5753

5754 5755 -class IAllocatorRunner(object):

5756 """IAllocator runner. 5757 5758 This class is instantiated on the node side (ganeti-noded) and not on 5759 the master side. 5760 5761 """ 5762 @staticmethod

5763 - def Run(name, idata, ial_params):

5764 """Run an iallocator script. 5765 5766 @type name: str 5767 @param name: the iallocator script name 5768 @type idata: str 5769 @param idata: the allocator input data 5770 @type ial_params: list 5771 @param ial_params: the iallocator parameters 5772 5773 @rtype: tuple 5774 @return: two element tuple of: 5775 - status 5776 - either error message or stdout of allocator (for success) 5777 5778 """ 5779 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH, 5780 os.path.isfile) 5781 if alloc_script is None: 5782 _Fail("iallocator module '%s' not found in the search path", name) 5783 5784 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.") 5785 try: 5786 os.write(fd, idata) 5787 os.close(fd) 5788 result = utils.RunCmd([alloc_script, fin_name] + ial_params) 5789 if result.failed: 5790 _Fail("iallocator module '%s' failed: %s, output '%s'", 5791 name, result.fail_reason, result.output) 5792 finally: 5793 os.unlink(fin_name) 5794 5795 return result.stdout

5796

5797 5798 -class DevCacheManager(object):

5799 """Simple class for managing a cache of block device information. 5800 5801 """ 5802 _DEV_PREFIX = "/dev/" 5803 _ROOT_DIR = pathutils.BDEV_CACHE_DIR 5804 5805 @classmethod

5806 - def _ConvertPath(cls, dev_path):

5807 """Converts a /dev/name path to the cache file name. 5808 5809 This replaces slashes with underscores and strips the /dev 5810 prefix. It then returns the full path to the cache file. 5811 5812 @type dev_path: str 5813 @param dev_path: the C{/dev/} path name 5814 @rtype: str 5815 @return: the converted path name 5816 5817 """ 5818 if dev_path.startswith(cls._DEV_PREFIX): 5819 dev_path = dev_path[len(cls._DEV_PREFIX):] 5820 dev_path = dev_path.replace("/", "_") 5821 fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path) 5822 return fpath

5823 5824 @classmethod

5825 - def UpdateCache(cls, dev_path, owner, on_primary, iv_name):

5826 """Updates the cache information for a given device. 5827 5828 @type dev_path: str 5829 @param dev_path: the pathname of the device 5830 @type owner: str 5831 @param owner: the owner (instance name) of the device 5832 @type on_primary: bool 5833 @param on_primary: whether this is the primary 5834 node nor not 5835 @type iv_name: str 5836 @param iv_name: the instance-visible name of the 5837 device, as in objects.Disk.iv_name 5838 5839 @rtype: None 5840 5841 """ 5842 if dev_path is None: 5843 logging.error("DevCacheManager.UpdateCache got a None dev_path") 5844 return 5845 fpath = cls._ConvertPath(dev_path) 5846 if on_primary: 5847 state = "primary" 5848 else: 5849 state = "secondary" 5850 if iv_name is None: 5851 iv_name = "not_visible" 5852 fdata = "%s %s %s\n" % (str(owner), state, iv_name) 5853 try: 5854 utils.WriteFile(fpath, data=fdata) 5855 except EnvironmentError, err: 5856 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)

5857 5858 @classmethod

5859 - def RemoveCache(cls, dev_path):

5860 """Remove data for a dev_path. 5861 5862 This is just a wrapper over L{utils.io.RemoveFile} with a converted 5863 path name and logging. 5864 5865 @type dev_path: str 5866 @param dev_path: the pathname of the device 5867 5868 @rtype: None 5869 5870 """ 5871 if dev_path is None: 5872 logging.error("DevCacheManager.RemoveCache got a None dev_path") 5873 return 5874 fpath = cls._ConvertPath(dev_path) 5875 try: 5876 utils.RemoveFile(fpath) 5877 except EnvironmentError, err: 5878 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)

5879