ganeti.backend

Source Code for Module ganeti.backend

1 # 2 # 3 4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc. 5 # All rights reserved. 6 # 7 # Redistribution and use in source and binary forms, with or without 8 # modification, are permitted provided that the following conditions are 9 # met: 10 # 11 # 1. Redistributions of source code must retain the above copyright notice, 12 # this list of conditions and the following disclaimer. 13 # 14 # 2. Redistributions in binary form must reproduce the above copyright 15 # notice, this list of conditions and the following disclaimer in the 16 # documentation and/or other materials provided with the distribution. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 31 """Functions used by the node daemon 32 33 @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in 34 the L{UploadFile} function 35 @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted 36 in the L{_CleanDirectory} function 37 38 """ 39 40 # pylint: disable=E1103,C0302 41 42 # E1103: %s %r has no %r member (but some types could not be 43 # inferred), because the _TryOSFromDisk returns either (True, os_obj) 44 # or (False, "string") which confuses pylint 45 46 # C0302: This module has become too big and should be split up 47 48 49 import base64 50 import errno 51 import logging 52 import os 53 import os.path 54 import pycurl 55 import random 56 import re 57 import shutil 58 import signal 59 import socket 60 import stat 61 import tempfile 62 import time 63 import zlib 64 import copy 65 66 from ganeti import errors 67 from ganeti import http 68 from ganeti import utils 69 from ganeti import ssh 70 from ganeti import hypervisor 71 from ganeti.hypervisor import hv_base 72 from ganeti import constants 73 from ganeti.storage import bdev 74 from ganeti.storage import drbd 75 from ganeti.storage import extstorage 76 from ganeti.storage import filestorage 77 from ganeti import objects 78 from ganeti import ssconf 79 from ganeti import serializer 80 from ganeti import netutils 81 from ganeti import runtime 82 from ganeti import compat 83 from ganeti import pathutils 84 from ganeti import vcluster 85 from ganeti import ht 86 from ganeti.storage.base import BlockDev 87 from ganeti.storage.drbd import DRBD8 88 from ganeti import hooksmaster 89 from ganeti.rpc import transport 90 from ganeti.rpc.errors import NoMasterError, TimeoutError 91 92 93 _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id" 94 _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([ 95 pathutils.DATA_DIR, 96 pathutils.JOB_QUEUE_ARCHIVE_DIR, 97 pathutils.QUEUE_DIR, 98 pathutils.CRYPTO_KEYS_DIR, 99 ]) 100 _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60 101 _X509_KEY_FILE = "key" 102 _X509_CERT_FILE = "cert" 103 _IES_STATUS_FILE = "status" 104 _IES_PID_FILE = "pid" 105 _IES_CA_FILE = "ca" 106 107 #: Valid LVS output line regex 108 _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$") 109 110 # Actions for the master setup script 111 _MASTER_START = "start" 112 _MASTER_STOP = "stop" 113 114 #: Maximum file permissions for restricted command directory and executables 115 _RCMD_MAX_MODE = (stat.S_IRWXU | 116 stat.S_IRGRP | stat.S_IXGRP | 117 stat.S_IROTH | stat.S_IXOTH) 118 119 #: Delay before returning an error for restricted commands 120 _RCMD_INVALID_DELAY = 10 121 122 #: How long to wait to acquire lock for restricted commands (shorter than 123 #: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many 124 #: command requests arrive 125 _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8

126 127 128 -class RPCFail(Exception):

129 """Class denoting RPC failure. 130 131 Its argument is the error message. 132 133 """

134

135 136 -def _GetInstReasonFilename(instance_name):

137 """Path of the file containing the reason of the instance status change. 138 139 @type instance_name: string 140 @param instance_name: The name of the instance 141 @rtype: string 142 @return: The path of the file 143 144 """ 145 return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)

146

147 148 -def _StoreInstReasonTrail(instance_name, trail):

149 """Serialize a reason trail related to an instance change of state to file. 150 151 The exact location of the file depends on the name of the instance and on 152 the configuration of the Ganeti cluster defined at deploy time. 153 154 @type instance_name: string 155 @param instance_name: The name of the instance 156 157 @type trail: list of reasons 158 @param trail: reason trail 159 160 @rtype: None 161 162 """ 163 json = serializer.DumpJson(trail) 164 filename = _GetInstReasonFilename(instance_name) 165 utils.WriteFile(filename, data=json)

166

167 168 -def _Fail(msg, *args, **kwargs):

169 """Log an error and the raise an RPCFail exception. 170 171 This exception is then handled specially in the ganeti daemon and 172 turned into a 'failed' return type. As such, this function is a 173 useful shortcut for logging the error and returning it to the master 174 daemon. 175 176 @type msg: string 177 @param msg: the text of the exception 178 @raise RPCFail 179 180 """ 181 if args: 182 msg = msg % args 183 if "log" not in kwargs or kwargs["log"]: # if we should log this error 184 if "exc" in kwargs and kwargs["exc"]: 185 logging.exception(msg) 186 else: 187 logging.error(msg) 188 raise RPCFail(msg)

189

190 191 -def _GetConfig():

192 """Simple wrapper to return a SimpleStore. 193 194 @rtype: L{ssconf.SimpleStore} 195 @return: a SimpleStore instance 196 197 """ 198 return ssconf.SimpleStore()

199

200 201 -def _GetSshRunner(cluster_name):

202 """Simple wrapper to return an SshRunner. 203 204 @type cluster_name: str 205 @param cluster_name: the cluster name, which is needed 206 by the SshRunner constructor 207 @rtype: L{ssh.SshRunner} 208 @return: an SshRunner instance 209 210 """ 211 return ssh.SshRunner(cluster_name)

212

213 214 -def _Decompress(data):

215 """Unpacks data compressed by the RPC client. 216 217 @type data: list or tuple 218 @param data: Data sent by RPC client 219 @rtype: str 220 @return: Decompressed data 221 222 """ 223 assert isinstance(data, (list, tuple)) 224 assert len(data) == 2 225 (encoding, content) = data 226 if encoding == constants.RPC_ENCODING_NONE: 227 return content 228 elif encoding == constants.RPC_ENCODING_ZLIB_BASE64: 229 return zlib.decompress(base64.b64decode(content)) 230 else: 231 raise AssertionError("Unknown data encoding")

232

233 234 -def _CleanDirectory(path, exclude=None):

235 """Removes all regular files in a directory. 236 237 @type path: str 238 @param path: the directory to clean 239 @type exclude: list 240 @param exclude: list of files to be excluded, defaults 241 to the empty list 242 243 """ 244 if path not in _ALLOWED_CLEAN_DIRS: 245 _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'", 246 path) 247 248 if not os.path.isdir(path): 249 return 250 if exclude is None: 251 exclude = [] 252 else: 253 # Normalize excluded paths 254 exclude = [os.path.normpath(i) for i in exclude] 255 256 for rel_name in utils.ListVisibleFiles(path): 257 full_name = utils.PathJoin(path, rel_name) 258 if full_name in exclude: 259 continue 260 if os.path.isfile(full_name) and not os.path.islink(full_name): 261 utils.RemoveFile(full_name)

262

263 264 -def _BuildUploadFileList():

265 """Build the list of allowed upload files. 266 267 This is abstracted so that it's built only once at module import time. 268 269 """ 270 allowed_files = set([ 271 pathutils.CLUSTER_CONF_FILE, 272 pathutils.ETC_HOSTS, 273 pathutils.SSH_KNOWN_HOSTS_FILE, 274 pathutils.VNC_PASSWORD_FILE, 275 pathutils.RAPI_CERT_FILE, 276 pathutils.SPICE_CERT_FILE, 277 pathutils.SPICE_CACERT_FILE, 278 pathutils.RAPI_USERS_FILE, 279 pathutils.CONFD_HMAC_KEY, 280 pathutils.CLUSTER_DOMAIN_SECRET_FILE, 281 ]) 282 283 for hv_name in constants.HYPER_TYPES: 284 hv_class = hypervisor.GetHypervisorClass(hv_name) 285 allowed_files.update(hv_class.GetAncillaryFiles()[0]) 286 287 assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \ 288 "Allowed file storage paths should never be uploaded via RPC" 289 290 return frozenset(allowed_files)

291 292 293 _ALLOWED_UPLOAD_FILES = _BuildUploadFileList()

294 295 296 -def JobQueuePurge():

297 """Removes job queue files and archived jobs. 298 299 @rtype: tuple 300 @return: True, None 301 302 """ 303 _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE]) 304 _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR)

305

306 307 -def GetMasterNodeName():

308 """Returns the master node name. 309 310 @rtype: string 311 @return: name of the master node 312 @raise RPCFail: in case of errors 313 314 """ 315 try: 316 return _GetConfig().GetMasterNode() 317 except errors.ConfigurationError, err: 318 _Fail("Cluster configuration incomplete: %s", err, exc=True)

319

320 321 -def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn):

322 """Decorator that runs hooks before and after the decorated function. 323 324 @type hook_opcode: string 325 @param hook_opcode: opcode of the hook 326 @type hooks_path: string 327 @param hooks_path: path of the hooks 328 @type env_builder_fn: function 329 @param env_builder_fn: function that returns a dictionary containing the 330 environment variables for the hooks. Will get all the parameters of the 331 decorated function. 332 @raise RPCFail: in case of pre-hook failure 333 334 """ 335 def decorator(fn): 336 def wrapper(*args, **kwargs): 337 _, myself = ssconf.GetMasterAndMyself() 338 nodes = ([myself], [myself]) # these hooks run locally 339 340 env_fn = compat.partial(env_builder_fn, *args, **kwargs) 341 342 cfg = _GetConfig() 343 hr = HooksRunner() 344 hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes, 345 hr.RunLocalHooks, None, env_fn, None, 346 logging.warning, cfg.GetClusterName(), 347 cfg.GetMasterNode()) 348 hm.RunPhase(constants.HOOKS_PHASE_PRE) 349 result = fn(*args, **kwargs) 350 hm.RunPhase(constants.HOOKS_PHASE_POST) 351 352 return result

353 return wrapper 354 return decorator 355

356 357 -def _BuildMasterIpEnv(master_params, use_external_mip_script=None):

358 """Builds environment variables for master IP hooks. 359 360 @type master_params: L{objects.MasterNetworkParameters} 361 @param master_params: network parameters of the master 362 @type use_external_mip_script: boolean 363 @param use_external_mip_script: whether to use an external master IP 364 address setup script (unused, but necessary per the implementation of the 365 _RunLocalHooks decorator) 366 367 """ 368 # pylint: disable=W0613 369 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family) 370 env = { 371 "MASTER_NETDEV": master_params.netdev, 372 "MASTER_IP": master_params.ip, 373 "MASTER_NETMASK": str(master_params.netmask), 374 "CLUSTER_IP_VERSION": str(ver), 375 } 376 377 return env

378

379 380 -def _RunMasterSetupScript(master_params, action, use_external_mip_script):

381 """Execute the master IP address setup script. 382 383 @type master_params: L{objects.MasterNetworkParameters} 384 @param master_params: network parameters of the master 385 @type action: string 386 @param action: action to pass to the script. Must be one of 387 L{backend._MASTER_START} or L{backend._MASTER_STOP} 388 @type use_external_mip_script: boolean 389 @param use_external_mip_script: whether to use an external master IP 390 address setup script 391 @raise backend.RPCFail: if there are errors during the execution of the 392 script 393 394 """ 395 env = _BuildMasterIpEnv(master_params) 396 397 if use_external_mip_script: 398 setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT 399 else: 400 setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT 401 402 result = utils.RunCmd([setup_script, action], env=env, reset_env=True) 403 404 if result.failed: 405 _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" % 406 (action, result.exit_code, result.output), log=True)

407 408 409 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup", 410 _BuildMasterIpEnv)

411 -def ActivateMasterIp(master_params, use_external_mip_script):

412 """Activate the IP address of the master daemon. 413 414 @type master_params: L{objects.MasterNetworkParameters} 415 @param master_params: network parameters of the master 416 @type use_external_mip_script: boolean 417 @param use_external_mip_script: whether to use an external master IP 418 address setup script 419 @raise RPCFail: in case of errors during the IP startup 420 421 """ 422 _RunMasterSetupScript(master_params, _MASTER_START, 423 use_external_mip_script)

424

425 426 -def StartMasterDaemons(no_voting):

427 """Activate local node as master node. 428 429 The function will start the master daemons (ganeti-masterd and ganeti-rapi). 430 431 @type no_voting: boolean 432 @param no_voting: whether to start ganeti-masterd without a node vote 433 but still non-interactively 434 @rtype: None 435 436 """ 437 438 if no_voting: 439 daemon_args = "--no-voting --yes-do-it" 440 else: 441 daemon_args = "" 442 443 env = { 444 "EXTRA_LUXID_ARGS": daemon_args, 445 "EXTRA_WCONFD_ARGS": daemon_args, 446 } 447 448 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env) 449 if result.failed: 450 msg = "Can't start Ganeti master: %s" % result.output 451 logging.error(msg) 452 _Fail(msg)

453 454 455 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown", 456 _BuildMasterIpEnv)

457 -def DeactivateMasterIp(master_params, use_external_mip_script):

458 """Deactivate the master IP on this node. 459 460 @type master_params: L{objects.MasterNetworkParameters} 461 @param master_params: network parameters of the master 462 @type use_external_mip_script: boolean 463 @param use_external_mip_script: whether to use an external master IP 464 address setup script 465 @raise RPCFail: in case of errors during the IP turndown 466 467 """ 468 _RunMasterSetupScript(master_params, _MASTER_STOP, 469 use_external_mip_script)

470

471 472 -def StopMasterDaemons():

473 """Stop the master daemons on this node. 474 475 Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node. 476 477 @rtype: None 478 479 """ 480 # TODO: log and report back to the caller the error failures; we 481 # need to decide in which case we fail the RPC for this 482 483 result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"]) 484 if result.failed: 485 logging.error("Could not stop Ganeti master, command %s had exitcode %s" 486 " and error %s", 487 result.cmd, result.exit_code, result.output)

488

489 490 -def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev):

491 """Change the netmask of the master IP. 492 493 @param old_netmask: the old value of the netmask 494 @param netmask: the new value of the netmask 495 @param master_ip: the master IP 496 @param master_netdev: the master network device 497 498 """ 499 if old_netmask == netmask: 500 return 501 502 if not netutils.IPAddress.Own(master_ip): 503 _Fail("The master IP address is not up, not attempting to change its" 504 " netmask") 505 506 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add", 507 "%s/%s" % (master_ip, netmask), 508 "dev", master_netdev, "label", 509 "%s:0" % master_netdev]) 510 if result.failed: 511 _Fail("Could not set the new netmask on the master IP address") 512 513 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del", 514 "%s/%s" % (master_ip, old_netmask), 515 "dev", master_netdev, "label", 516 "%s:0" % master_netdev]) 517 if result.failed: 518 _Fail("Could not bring down the master IP address with the old netmask")

519

520 521 -def EtcHostsModify(mode, host, ip):

522 """Modify a host entry in /etc/hosts. 523 524 @param mode: The mode to operate. Either add or remove entry 525 @param host: The host to operate on 526 @param ip: The ip associated with the entry 527 528 """ 529 if mode == constants.ETC_HOSTS_ADD: 530 if not ip: 531 RPCFail("Mode 'add' needs 'ip' parameter, but parameter not" 532 " present") 533 utils.AddHostToEtcHosts(host, ip) 534 elif mode == constants.ETC_HOSTS_REMOVE: 535 if ip: 536 RPCFail("Mode 'remove' does not allow 'ip' parameter, but" 537 " parameter is present") 538 utils.RemoveHostFromEtcHosts(host) 539 else: 540 RPCFail("Mode not supported")

541

542 543 -def LeaveCluster(modify_ssh_setup):

544 """Cleans up and remove the current node. 545 546 This function cleans up and prepares the current node to be removed 547 from the cluster. 548 549 If processing is successful, then it raises an 550 L{errors.QuitGanetiException} which is used as a special case to 551 shutdown the node daemon. 552 553 @param modify_ssh_setup: boolean 554 555 """ 556 _CleanDirectory(pathutils.DATA_DIR) 557 _CleanDirectory(pathutils.CRYPTO_KEYS_DIR) 558 JobQueuePurge() 559 560 if modify_ssh_setup: 561 try: 562 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER) 563 564 ssh.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key)) 565 566 utils.RemoveFile(priv_key) 567 utils.RemoveFile(pub_key) 568 except errors.OpExecError: 569 logging.exception("Error while processing ssh files") 570 571 try: 572 utils.RemoveFile(pathutils.CONFD_HMAC_KEY) 573 utils.RemoveFile(pathutils.RAPI_CERT_FILE) 574 utils.RemoveFile(pathutils.SPICE_CERT_FILE) 575 utils.RemoveFile(pathutils.SPICE_CACERT_FILE) 576 utils.RemoveFile(pathutils.NODED_CERT_FILE) 577 except: # pylint: disable=W0702 578 logging.exception("Error while removing cluster secrets") 579 580 utils.StopDaemon(constants.CONFD) 581 utils.StopDaemon(constants.MOND) 582 utils.StopDaemon(constants.KVMD) 583 584 # Raise a custom exception (handled in ganeti-noded) 585 raise errors.QuitGanetiException(True, "Shutdown scheduled")

586

587 588 -def _CheckStorageParams(params, num_params):

589 """Performs sanity checks for storage parameters. 590 591 @type params: list 592 @param params: list of storage parameters 593 @type num_params: int 594 @param num_params: expected number of parameters 595 596 """ 597 if params is None: 598 raise errors.ProgrammerError("No storage parameters for storage" 599 " reporting is provided.") 600 if not isinstance(params, list): 601 raise errors.ProgrammerError("The storage parameters are not of type" 602 " list: '%s'" % params) 603 if not len(params) == num_params: 604 raise errors.ProgrammerError("Did not receive the expected number of" 605 "storage parameters: expected %s," 606 " received '%s'" % (num_params, len(params)))

607

608 609 -def _CheckLvmStorageParams(params):

610 """Performs sanity check for the 'exclusive storage' flag. 611 612 @see: C{_CheckStorageParams} 613 614 """ 615 _CheckStorageParams(params, 1) 616 excl_stor = params[0] 617 if not isinstance(params[0], bool): 618 raise errors.ProgrammerError("Exclusive storage parameter is not" 619 " boolean: '%s'." % excl_stor) 620 return excl_stor

621

622 623 -def _GetLvmVgSpaceInfo(name, params):

624 """Wrapper around C{_GetVgInfo} which checks the storage parameters. 625 626 @type name: string 627 @param name: name of the volume group 628 @type params: list 629 @param params: list of storage parameters, which in this case should be 630 containing only one for exclusive storage 631 632 """ 633 excl_stor = _CheckLvmStorageParams(params) 634 return _GetVgInfo(name, excl_stor)

635

636 637 -def _GetVgInfo( 638 name, excl_stor, info_fn=bdev.LogicalVolume.GetVGInfo):

639 """Retrieves information about a LVM volume group. 640 641 """ 642 # TODO: GetVGInfo supports returning information for multiple VGs at once 643 vginfo = info_fn([name], excl_stor) 644 if vginfo: 645 vg_free = int(round(vginfo[0][0], 0)) 646 vg_size = int(round(vginfo[0][1], 0)) 647 else: 648 vg_free = None 649 vg_size = None 650 651 return { 652 "type": constants.ST_LVM_VG, 653 "name": name, 654 "storage_free": vg_free, 655 "storage_size": vg_size, 656 }

657

658 659 -def _GetLvmPvSpaceInfo(name, params):

660 """Wrapper around C{_GetVgSpindlesInfo} with sanity checks. 661 662 @see: C{_GetLvmVgSpaceInfo} 663 664 """ 665 excl_stor = _CheckLvmStorageParams(params) 666 return _GetVgSpindlesInfo(name, excl_stor)

667

668 669 -def _GetVgSpindlesInfo( 670 name, excl_stor, info_fn=bdev.LogicalVolume.GetVgSpindlesInfo):

671 """Retrieves information about spindles in an LVM volume group. 672 673 @type name: string 674 @param name: VG name 675 @type excl_stor: bool 676 @param excl_stor: exclusive storage 677 @rtype: dict 678 @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name, 679 free spindles, total spindles respectively 680 681 """ 682 if excl_stor: 683 (vg_free, vg_size) = info_fn(name) 684 else: 685 vg_free = 0 686 vg_size = 0 687 return { 688 "type": constants.ST_LVM_PV, 689 "name": name, 690 "storage_free": vg_free, 691 "storage_size": vg_size, 692 }

693

694 695 -def _GetHvInfo(name, hvparams, get_hv_fn=hypervisor.GetHypervisor):

696 """Retrieves node information from a hypervisor. 697 698 The information returned depends on the hypervisor. Common items: 699 700 - vg_size is the size of the configured volume group in MiB 701 - vg_free is the free size of the volume group in MiB 702 - memory_dom0 is the memory allocated for domain0 in MiB 703 - memory_free is the currently available (free) ram in MiB 704 - memory_total is the total number of ram in MiB 705 - hv_version: the hypervisor version, if available 706 707 @type hvparams: dict of string 708 @param hvparams: the hypervisor's hvparams 709 710 """ 711 return get_hv_fn(name).GetNodeInfo(hvparams=hvparams)

712

713 714 -def _GetHvInfoAll(hv_specs, get_hv_fn=hypervisor.GetHypervisor):

715 """Retrieves node information for all hypervisors. 716 717 See C{_GetHvInfo} for information on the output. 718 719 @type hv_specs: list of pairs (string, dict of strings) 720 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 721 722 """ 723 if hv_specs is None: 724 return None 725 726 result = [] 727 for hvname, hvparams in hv_specs: 728 result.append(_GetHvInfo(hvname, hvparams, get_hv_fn)) 729 return result

730

731 732 -def _GetNamedNodeInfo(names, fn):

733 """Calls C{fn} for all names in C{names} and returns a dictionary. 734 735 @rtype: None or dict 736 737 """ 738 if names is None: 739 return None 740 else: 741 return map(fn, names)

742

743 744 -def GetNodeInfo(storage_units, hv_specs):

745 """Gives back a hash with different information about the node. 746 747 @type storage_units: list of tuples (string, string, list) 748 @param storage_units: List of tuples (storage unit, identifier, parameters) to 749 ask for disk space information. In case of lvm-vg, the identifier is 750 the VG name. The parameters can contain additional, storage-type-specific 751 parameters, for example exclusive storage for lvm storage. 752 @type hv_specs: list of pairs (string, dict of strings) 753 @param hv_specs: list of pairs of a hypervisor's name and its hvparams 754 @rtype: tuple; (string, None/dict, None/dict) 755 @return: Tuple containing boot ID, volume group information and hypervisor 756 information 757 758 """ 759 bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n") 760 storage_info = _GetNamedNodeInfo( 761 storage_units, 762 (lambda (storage_type, storage_key, storage_params): 763 _ApplyStorageInfoFunction(storage_type, storage_key, storage_params))) 764 hv_info = _GetHvInfoAll(hv_specs) 765 return (bootid, storage_info, hv_info)

766

767 768 -def _GetFileStorageSpaceInfo(path, params):

769 """Wrapper around filestorage.GetSpaceInfo. 770 771 The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo 772 and ignore the *args parameter to not leak it into the filestorage 773 module's code. 774 775 @see: C{filestorage.GetFileStorageSpaceInfo} for description of the 776 parameters. 777 778 """ 779 _CheckStorageParams(params, 0) 780 return filestorage.GetFileStorageSpaceInfo(path)

781 782 783 # FIXME: implement storage reporting for all missing storage types. 784 _STORAGE_TYPE_INFO_FN = { 785 constants.ST_BLOCK: None, 786 constants.ST_DISKLESS: None, 787 constants.ST_EXT: None, 788 constants.ST_FILE: _GetFileStorageSpaceInfo, 789 constants.ST_LVM_PV: _GetLvmPvSpaceInfo, 790 constants.ST_LVM_VG: _GetLvmVgSpaceInfo, 791 constants.ST_SHARED_FILE: None, 792 constants.ST_GLUSTER: None, 793 constants.ST_RADOS: None, 794 }

795 796 797 -def _ApplyStorageInfoFunction(storage_type, storage_key, *args):

798 """Looks up and applies the correct function to calculate free and total 799 storage for the given storage type. 800 801 @type storage_type: string 802 @param storage_type: the storage type for which the storage shall be reported. 803 @type storage_key: string 804 @param storage_key: identifier of a storage unit, e.g. the volume group name 805 of an LVM storage unit 806 @type args: any 807 @param args: various parameters that can be used for storage reporting. These 808 parameters and their semantics vary from storage type to storage type and 809 are just propagated in this function. 810 @return: the results of the application of the storage space function (see 811 _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that 812 storage type 813 @raises NotImplementedError: for storage types who don't support space 814 reporting yet 815 """ 816 fn = _STORAGE_TYPE_INFO_FN[storage_type] 817 if fn is not None: 818 return fn(storage_key, *args) 819 else: 820 raise NotImplementedError

821

822 823 -def _CheckExclusivePvs(pvi_list):

824 """Check that PVs are not shared among LVs 825 826 @type pvi_list: list of L{objects.LvmPvInfo} objects 827 @param pvi_list: information about the PVs 828 829 @rtype: list of tuples (string, list of strings) 830 @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...]) 831 832 """ 833 res = [] 834 for pvi in pvi_list: 835 if len(pvi.lv_list) > 1: 836 res.append((pvi.name, pvi.lv_list)) 837 return res

838

839 840 -def _VerifyHypervisors(what, vm_capable, result, all_hvparams, 841 get_hv_fn=hypervisor.GetHypervisor):

842 """Verifies the hypervisor. Appends the results to the 'results' list. 843 844 @type what: C{dict} 845 @param what: a dictionary of things to check 846 @type vm_capable: boolean 847 @param vm_capable: whether or not this node is vm capable 848 @type result: dict 849 @param result: dictionary of verification results; results of the 850 verifications in this function will be added here 851 @type all_hvparams: dict of dict of string 852 @param all_hvparams: dictionary mapping hypervisor names to hvparams 853 @type get_hv_fn: function 854 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 855 856 """ 857 if not vm_capable: 858 return 859 860 if constants.NV_HYPERVISOR in what: 861 result[constants.NV_HYPERVISOR] = {} 862 for hv_name in what[constants.NV_HYPERVISOR]: 863 hvparams = all_hvparams[hv_name] 864 try: 865 val = get_hv_fn(hv_name).Verify(hvparams=hvparams) 866 except errors.HypervisorError, err: 867 val = "Error while checking hypervisor: %s" % str(err) 868 result[constants.NV_HYPERVISOR][hv_name] = val

869

870 871 -def _VerifyHvparams(what, vm_capable, result, 872 get_hv_fn=hypervisor.GetHypervisor):

873 """Verifies the hvparams. Appends the results to the 'results' list. 874 875 @type what: C{dict} 876 @param what: a dictionary of things to check 877 @type vm_capable: boolean 878 @param vm_capable: whether or not this node is vm capable 879 @type result: dict 880 @param result: dictionary of verification results; results of the 881 verifications in this function will be added here 882 @type get_hv_fn: function 883 @param get_hv_fn: function to retrieve the hypervisor, to improve testability 884 885 """ 886 if not vm_capable: 887 return 888 889 if constants.NV_HVPARAMS in what: 890 result[constants.NV_HVPARAMS] = [] 891 for source, hv_name, hvparms in what[constants.NV_HVPARAMS]: 892 try: 893 logging.info("Validating hv %s, %s", hv_name, hvparms) 894 get_hv_fn(hv_name).ValidateParameters(hvparms) 895 except errors.HypervisorError, err: 896 result[constants.NV_HVPARAMS].append((source, hv_name, str(err)))

897

898 899 -def _VerifyInstanceList(what, vm_capable, result, all_hvparams):

900 """Verifies the instance list. 901 902 @type what: C{dict} 903 @param what: a dictionary of things to check 904 @type vm_capable: boolean 905 @param vm_capable: whether or not this node is vm capable 906 @type result: dict 907 @param result: dictionary of verification results; results of the 908 verifications in this function will be added here 909 @type all_hvparams: dict of dict of string 910 @param all_hvparams: dictionary mapping hypervisor names to hvparams 911 912 """ 913 if constants.NV_INSTANCELIST in what and vm_capable: 914 # GetInstanceList can fail 915 try: 916 val = GetInstanceList(what[constants.NV_INSTANCELIST], 917 all_hvparams=all_hvparams) 918 except RPCFail, err: 919 val = str(err) 920 result[constants.NV_INSTANCELIST] = val

921

922 923 -def _VerifyNodeInfo(what, vm_capable, result, all_hvparams):

924 """Verifies the node info. 925 926 @type what: C{dict} 927 @param what: a dictionary of things to check 928 @type vm_capable: boolean 929 @param vm_capable: whether or not this node is vm capable 930 @type result: dict 931 @param result: dictionary of verification results; results of the 932 verifications in this function will be added here 933 @type all_hvparams: dict of dict of string 934 @param all_hvparams: dictionary mapping hypervisor names to hvparams 935 936 """ 937 if constants.NV_HVINFO in what and vm_capable: 938 hvname = what[constants.NV_HVINFO] 939 hyper = hypervisor.GetHypervisor(hvname) 940 hvparams = all_hvparams[hvname] 941 result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams)

942

943 944 -def _VerifyClientCertificate(cert_file=pathutils.NODED_CLIENT_CERT_FILE):

945 """Verify the existance and validity of the client SSL certificate. 946 947 Also, verify that the client certificate is not self-signed. Self- 948 signed client certificates stem from Ganeti versions 2.12.0 - 2.12.4 949 and should be replaced by client certificates signed by the server 950 certificate. Hence we output a warning when we encounter a self-signed 951 one. 952 953 """ 954 create_cert_cmd = "gnt-cluster renew-crypto --new-node-certificates" 955 if not os.path.exists(cert_file): 956 return (constants.CV_ERROR, 957 "The client certificate does not exist. Run '%s' to create" 958 " client certificates for all nodes." % create_cert_cmd) 959 960 (errcode, msg) = utils.VerifyCertificate(cert_file) 961 if errcode is not None: 962 return (errcode, msg) 963 964 (errcode, msg) = utils.IsCertificateSelfSigned(cert_file) 965 if errcode is not None: 966 return (errcode, msg) 967 968 # if everything is fine, we return the digest to be compared to the config 969 return (None, utils.GetCertificateDigest(cert_filename=cert_file))

970

971 972 -def _VerifySshSetup(node_status_list, my_name, 973 pub_key_file=pathutils.SSH_PUB_KEYS):

974 """Verifies the state of the SSH key files. 975 976 @type node_status_list: list of tuples 977 @param node_status_list: list of nodes of the cluster associated with a 978 couple of flags: (uuid, name, is_master_candidate, 979 is_potential_master_candidate, online) 980 @type my_name: str 981 @param my_name: name of this node 982 @type pub_key_file: str 983 @param pub_key_file: filename of the public key file 984 985 """ 986 if node_status_list is None: 987 return ["No node list to check against the pub_key_file received."] 988 989 my_status_list = [(my_uuid, name, mc, pot_mc, online) for 990 (my_uuid, name, mc, pot_mc, online) 991 in node_status_list if name == my_name] 992 if len(my_status_list) == 0: 993 return ["Cannot find node information for node '%s'." % my_name] 994 (my_uuid, _, _, potential_master_candidate, online) = \ 995 my_status_list[0] 996 997 result = [] 998 999 if not os.path.exists(pub_key_file): 1000 result.append("The public key file '%s' does not exist. Consider running" 1001 " 'gnt-cluster renew-crypto --new-ssh-keys" 1002 " [--no-ssh-key-check]' to fix this." % pub_key_file) 1003 return result 1004 1005 pot_mc_uuids = [uuid for (uuid, _, _, _, _) in node_status_list] 1006 offline_nodes = [uuid for (uuid, _, _, _, online) in node_status_list 1007 if not online] 1008 pub_keys = ssh.QueryPubKeyFile(None) 1009 1010 if potential_master_candidate: 1011 # Check that the set of potential master candidates matches the 1012 # public key file 1013 pub_uuids_set = set(pub_keys.keys()) - set(offline_nodes) 1014 pot_mc_uuids_set = set(pot_mc_uuids) - set(offline_nodes) 1015 missing_uuids = set([]) 1016 if pub_uuids_set != pot_mc_uuids_set: 1017 unknown_uuids = pub_uuids_set - pot_mc_uuids_set 1018 if unknown_uuids: 1019 result.append("The following node UUIDs are listed in the public key" 1020 " file on node '%s', but are not potential master" 1021 " candidates: %s." 1022 % (my_name, ", ".join(list(unknown_uuids)))) 1023 missing_uuids = pot_mc_uuids_set - pub_uuids_set 1024 if missing_uuids: 1025 result.append("The following node UUIDs of potential master candidates" 1026 " are missing in the public key file on node %s: %s." 1027 % (my_name, ", ".join(list(missing_uuids)))) 1028 1029 (_, key_files) = \ 1030 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1031 (_, dsa_pub_key_filename) = key_files[constants.SSHK_DSA] 1032 1033 my_keys = pub_keys[my_uuid] 1034 1035 dsa_pub_key = utils.ReadFile(dsa_pub_key_filename) 1036 if dsa_pub_key.strip() not in my_keys: 1037 result.append("The dsa key of node %s does not match this node's key" 1038 " in the pub key file." % (my_name)) 1039 if len(my_keys) != 1: 1040 result.append("There is more than one key for node %s in the public key" 1041 " file." % my_name) 1042 else: 1043 if len(pub_keys.keys()) > 0: 1044 result.append("The public key file of node '%s' is not empty, although" 1045 " the node is not a potential master candidate." 1046 % my_name) 1047 1048 # Check that all master candidate keys are in the authorized_keys file 1049 (auth_key_file, _) = \ 1050 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1051 for (uuid, name, mc, _, online) in node_status_list: 1052 if not online: 1053 continue 1054 if uuid in missing_uuids: 1055 continue 1056 if mc: 1057 for key in pub_keys[uuid]: 1058 if not ssh.HasAuthorizedKey(auth_key_file, key): 1059 result.append("A SSH key of master candidate '%s' (UUID: '%s') is" 1060 " not in the 'authorized_keys' file of node '%s'." 1061 % (name, uuid, my_name)) 1062 else: 1063 for key in pub_keys[uuid]: 1064 if name != my_name and ssh.HasAuthorizedKey(auth_key_file, key): 1065 result.append("A SSH key of normal node '%s' (UUID: '%s') is in the" 1066 " 'authorized_keys' file of node '%s'." 1067 % (name, uuid, my_name)) 1068 if name == my_name and not ssh.HasAuthorizedKey(auth_key_file, key): 1069 result.append("A SSH key of normal node '%s' (UUID: '%s') is not" 1070 " in the 'authorized_keys' file of itself." 1071 % (my_name, uuid)) 1072 1073 return result

1074

1075 1076 -def _VerifySshClutter(node_status_list, my_name):

1077 """Verifies that the 'authorized_keys' files are not cluttered up. 1078 1079 @type node_status_list: list of tuples 1080 @param node_status_list: list of nodes of the cluster associated with a 1081 couple of flags: (uuid, name, is_master_candidate, 1082 is_potential_master_candidate, online) 1083 @type my_name: str 1084 @param my_name: name of this node 1085 1086 """ 1087 result = [] 1088 (auth_key_file, _) = \ 1089 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1090 node_names = [name for (_, name, _, _) in node_status_list] 1091 multiple_occurrences = ssh.CheckForMultipleKeys(auth_key_file, node_names) 1092 if multiple_occurrences: 1093 msg = "There are hosts which have more than one SSH key stored for the" \ 1094 " same user in the 'authorized_keys' file of node %s. This can be" \ 1095 " due to an unsuccessful operation which cluttered up the" \ 1096 " 'authorized_keys' file. We recommend to clean this up manually. " \ 1097 % my_name 1098 for host, occ in multiple_occurrences.items(): 1099 msg += "Entry for '%s' in lines %s. " % (host, utils.CommaJoin(occ)) 1100 result.append(msg) 1101 1102 return result

1103

1104 1105 -def VerifyNode(what, cluster_name, all_hvparams, node_groups, groups_cfg):

1106 """Verify the status of the local node. 1107 1108 Based on the input L{what} parameter, various checks are done on the 1109 local node. 1110 1111 If the I{filelist} key is present, this list of 1112 files is checksummed and the file/checksum pairs are returned. 1113 1114 If the I{nodelist} key is present, we check that we have 1115 connectivity via ssh with the target nodes (and check the hostname 1116 report). 1117 1118 If the I{node-net-test} key is present, we check that we have 1119 connectivity to the given nodes via both primary IP and, if 1120 applicable, secondary IPs. 1121 1122 @type what: C{dict} 1123 @param what: a dictionary of things to check: 1124 - filelist: list of files for which to compute checksums 1125 - nodelist: list of nodes we should check ssh communication with 1126 - node-net-test: list of nodes we should check node daemon port 1127 connectivity with 1128 - hypervisor: list with hypervisors to run the verify for 1129 @type cluster_name: string 1130 @param cluster_name: the cluster's name 1131 @type all_hvparams: dict of dict of strings 1132 @param all_hvparams: a dictionary mapping hypervisor names to hvparams 1133 @type node_groups: a dict of strings 1134 @param node_groups: node _names_ mapped to their group uuids (it's enough to 1135 have only those nodes that are in `what["nodelist"]`) 1136 @type groups_cfg: a dict of dict of strings 1137 @param groups_cfg: a dictionary mapping group uuids to their configuration 1138 @rtype: dict 1139 @return: a dictionary with the same keys as the input dict, and 1140 values representing the result of the checks 1141 1142 """ 1143 result = {} 1144 my_name = netutils.Hostname.GetSysName() 1145 port = netutils.GetDaemonPort(constants.NODED) 1146 vm_capable = my_name not in what.get(constants.NV_NONVMNODES, []) 1147 1148 _VerifyHypervisors(what, vm_capable, result, all_hvparams) 1149 _VerifyHvparams(what, vm_capable, result) 1150 1151 if constants.NV_FILELIST in what: 1152 fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath, 1153 what[constants.NV_FILELIST])) 1154 result[constants.NV_FILELIST] = \ 1155 dict((vcluster.MakeVirtualPath(key), value) 1156 for (key, value) in fingerprints.items()) 1157 1158 if constants.NV_CLIENT_CERT in what: 1159 result[constants.NV_CLIENT_CERT] = _VerifyClientCertificate() 1160 1161 if constants.NV_SSH_SETUP in what: 1162 result[constants.NV_SSH_SETUP] = \ 1163 _VerifySshSetup(what[constants.NV_SSH_SETUP], my_name) 1164 if constants.NV_SSH_CLUTTER in what: 1165 result[constants.NV_SSH_CLUTTER] = \ 1166 _VerifySshClutter(what[constants.NV_SSH_SETUP], my_name) 1167 1168 if constants.NV_NODELIST in what: 1169 (nodes, bynode, mcs) = what[constants.NV_NODELIST] 1170 1171 # Add nodes from other groups (different for each node) 1172 try: 1173 nodes.extend(bynode[my_name]) 1174 except KeyError: 1175 pass 1176 1177 # Use a random order 1178 random.shuffle(nodes) 1179 1180 # Try to contact all nodes 1181 val = {} 1182 for node in nodes: 1183 params = groups_cfg.get(node_groups.get(node)) 1184 ssh_port = params["ndparams"].get(constants.ND_SSH_PORT) 1185 logging.debug("Ssh port %s (None = default) for node %s", 1186 str(ssh_port), node) 1187 1188 # We only test if master candidates can communicate to other nodes. 1189 # We cannot test if normal nodes cannot communicate with other nodes, 1190 # because the administrator might have installed additional SSH keys, 1191 # over which Ganeti has no power. 1192 if my_name in mcs: 1193 success, message = _GetSshRunner(cluster_name). \ 1194 VerifyNodeHostname(node, ssh_port) 1195 if not success: 1196 val[node] = message 1197 1198 result[constants.NV_NODELIST] = val 1199 1200 if constants.NV_NODENETTEST in what: 1201 result[constants.NV_NODENETTEST] = tmp = {} 1202 my_pip = my_sip = None 1203 for name, pip, sip in what[constants.NV_NODENETTEST]: 1204 if name == my_name: 1205 my_pip = pip 1206 my_sip = sip 1207 break 1208 if not my_pip: 1209 tmp[my_name] = ("Can't find my own primary/secondary IP" 1210 " in the node list") 1211 else: 1212 for name, pip, sip in what[constants.NV_NODENETTEST]: 1213 fail = [] 1214 if not netutils.TcpPing(pip, port, source=my_pip): 1215 fail.append("primary") 1216 if sip != pip: 1217 if not netutils.TcpPing(sip, port, source=my_sip): 1218 fail.append("secondary") 1219 if fail: 1220 tmp[name] = ("failure using the %s interface(s)" % 1221 " and ".join(fail)) 1222 1223 if constants.NV_MASTERIP in what: 1224 # FIXME: add checks on incoming data structures (here and in the 1225 # rest of the function) 1226 master_name, master_ip = what[constants.NV_MASTERIP] 1227 if master_name == my_name: 1228 source = constants.IP4_ADDRESS_LOCALHOST 1229 else: 1230 source = None 1231 result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port, 1232 source=source) 1233 1234 if constants.NV_USERSCRIPTS in what: 1235 result[constants.NV_USERSCRIPTS] = \ 1236 [script for script in what[constants.NV_USERSCRIPTS] 1237 if not utils.IsExecutable(script)] 1238 1239 if constants.NV_OOB_PATHS in what: 1240 result[constants.NV_OOB_PATHS] = tmp = [] 1241 for path in what[constants.NV_OOB_PATHS]: 1242 try: 1243 st = os.stat(path) 1244 except OSError, err: 1245 tmp.append("error stating out of band helper: %s" % err) 1246 else: 1247 if stat.S_ISREG(st.st_mode): 1248 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR: 1249 tmp.append(None) 1250 else: 1251 tmp.append("out of band helper %s is not executable" % path) 1252 else: 1253 tmp.append("out of band helper %s is not a file" % path) 1254 1255 if constants.NV_LVLIST in what and vm_capable: 1256 try: 1257 val = GetVolumeList(utils.ListVolumeGroups().keys()) 1258 except RPCFail, err: 1259 val = str(err) 1260 result[constants.NV_LVLIST] = val 1261 1262 _VerifyInstanceList(what, vm_capable, result, all_hvparams) 1263 1264 if constants.NV_VGLIST in what and vm_capable: 1265 result[constants.NV_VGLIST] = utils.ListVolumeGroups() 1266 1267 if constants.NV_PVLIST in what and vm_capable: 1268 check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what 1269 val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST], 1270 filter_allocatable=False, 1271 include_lvs=check_exclusive_pvs) 1272 if check_exclusive_pvs: 1273 result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val) 1274 for pvi in val: 1275 # Avoid sending useless data on the wire 1276 pvi.lv_list = [] 1277 result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val) 1278 1279 if constants.NV_VERSION in what: 1280 result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION, 1281 constants.RELEASE_VERSION) 1282 1283 _VerifyNodeInfo(what, vm_capable, result, all_hvparams) 1284 1285 if constants.NV_DRBDVERSION in what and vm_capable: 1286 try: 1287 drbd_version = DRBD8.GetProcInfo().GetVersionString() 1288 except errors.BlockDeviceError, err: 1289 logging.warning("Can't get DRBD version", exc_info=True) 1290 drbd_version = str(err) 1291 result[constants.NV_DRBDVERSION] = drbd_version 1292 1293 if constants.NV_DRBDLIST in what and vm_capable: 1294 try: 1295 used_minors = drbd.DRBD8.GetUsedDevs() 1296 except errors.BlockDeviceError, err: 1297 logging.warning("Can't get used minors list", exc_info=True) 1298 used_minors = str(err) 1299 result[constants.NV_DRBDLIST] = used_minors 1300 1301 if constants.NV_DRBDHELPER in what and vm_capable: 1302 status = True 1303 try: 1304 payload = drbd.DRBD8.GetUsermodeHelper() 1305 except errors.BlockDeviceError, err: 1306 logging.error("Can't get DRBD usermode helper: %s", str(err)) 1307 status = False 1308 payload = str(err) 1309 result[constants.NV_DRBDHELPER] = (status, payload) 1310 1311 if constants.NV_NODESETUP in what: 1312 result[constants.NV_NODESETUP] = tmpr = [] 1313 if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"): 1314 tmpr.append("The sysfs filesytem doesn't seem to be mounted" 1315 " under /sys, missing required directories /sys/block" 1316 " and /sys/class/net") 1317 if (not os.path.isdir("/proc/sys") or 1318 not os.path.isfile("/proc/sysrq-trigger")): 1319 tmpr.append("The procfs filesystem doesn't seem to be mounted" 1320 " under /proc, missing required directory /proc/sys and" 1321 " the file /proc/sysrq-trigger") 1322 1323 if constants.NV_TIME in what: 1324 result[constants.NV_TIME] = utils.SplitTime(time.time()) 1325 1326 if constants.NV_OSLIST in what and vm_capable: 1327 result[constants.NV_OSLIST] = DiagnoseOS() 1328 1329 if constants.NV_BRIDGES in what and vm_capable: 1330 result[constants.NV_BRIDGES] = [bridge 1331 for bridge in what[constants.NV_BRIDGES] 1332 if not utils.BridgeExists(bridge)] 1333 1334 if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name: 1335 result[constants.NV_ACCEPTED_STORAGE_PATHS] = \ 1336 filestorage.ComputeWrongFileStoragePaths() 1337 1338 if what.get(constants.NV_FILE_STORAGE_PATH): 1339 pathresult = filestorage.CheckFileStoragePath( 1340 what[constants.NV_FILE_STORAGE_PATH]) 1341 if pathresult: 1342 result[constants.NV_FILE_STORAGE_PATH] = pathresult 1343 1344 if what.get(constants.NV_SHARED_FILE_STORAGE_PATH): 1345 pathresult = filestorage.CheckFileStoragePath( 1346 what[constants.NV_SHARED_FILE_STORAGE_PATH]) 1347 if pathresult: 1348 result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult 1349 1350 return result

1351

1352 1353 -def GetCryptoTokens(token_requests):

1354 """Perform actions on the node's cryptographic tokens. 1355 1356 Token types can be 'ssl' or 'ssh'. So far only some actions are implemented 1357 for 'ssl'. Action 'get' returns the digest of the public client ssl 1358 certificate. Action 'create' creates a new client certificate and private key 1359 and also returns the digest of the certificate. The third parameter of a 1360 token request are optional parameters for the actions, so far only the 1361 filename is supported. 1362 1363 @type token_requests: list of tuples of (string, string, dict), where the 1364 first string is in constants.CRYPTO_TYPES, the second in 1365 constants.CRYPTO_ACTIONS. The third parameter is a dictionary of string 1366 to string. 1367 @param token_requests: list of requests of cryptographic tokens and actions 1368 to perform on them. The actions come with a dictionary of options. 1369 @rtype: list of tuples (string, string) 1370 @return: list of tuples of the token type and the public crypto token 1371 1372 """ 1373 tokens = [] 1374 for (token_type, action, _) in token_requests: 1375 if token_type not in constants.CRYPTO_TYPES: 1376 raise errors.ProgrammerError("Token type '%s' not supported." % 1377 token_type) 1378 if action not in constants.CRYPTO_ACTIONS: 1379 raise errors.ProgrammerError("Action '%s' is not supported." % 1380 action) 1381 if token_type == constants.CRYPTO_TYPE_SSL_DIGEST: 1382 tokens.append((token_type, 1383 utils.GetCertificateDigest())) 1384 return tokens

1385

1386 1387 -def EnsureDaemon(daemon_name, run):

1388 """Ensures the given daemon is running or stopped. 1389 1390 @type daemon_name: string 1391 @param daemon_name: name of the daemon (e.g., constants.KVMD) 1392 1393 @type run: bool 1394 @param run: whether to start or stop the daemon 1395 1396 @rtype: bool 1397 @return: 'True' if daemon successfully started/stopped, 1398 'False' otherwise 1399 1400 """ 1401 allowed_daemons = [constants.KVMD] 1402 1403 if daemon_name not in allowed_daemons: 1404 fn = lambda _: False 1405 elif run: 1406 fn = utils.EnsureDaemon 1407 else: 1408 fn = utils.StopDaemon 1409 1410 return fn(daemon_name)

1411

1412 1413 -def _InitSshUpdateData(data, noded_cert_file, ssconf_store):

1414 (_, noded_cert) = \ 1415 utils.ExtractX509Certificate(utils.ReadFile(noded_cert_file)) 1416 data[constants.SSHS_NODE_DAEMON_CERTIFICATE] = noded_cert 1417 1418 cluster_name = ssconf_store.GetClusterName() 1419 data[constants.SSHS_CLUSTER_NAME] = cluster_name

1420

1421 1422 -def AddNodeSshKey(node_uuid, node_name, 1423 potential_master_candidates, 1424 to_authorized_keys=False, 1425 to_public_keys=False, 1426 get_public_keys=False, 1427 pub_key_file=pathutils.SSH_PUB_KEYS, 1428 ssconf_store=None, 1429 noded_cert_file=pathutils.NODED_CERT_FILE, 1430 run_cmd_fn=ssh.RunSshCmdWithStdin):

1431 """Distributes a node's public SSH key across the cluster. 1432 1433 Note that this function should only be executed on the master node, which 1434 then will copy the new node's key to all nodes in the cluster via SSH. 1435 1436 Also note: at least one of the flags C{to_authorized_keys}, 1437 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for 1438 the function to actually perform any actions. 1439 1440 @type node_uuid: str 1441 @param node_uuid: the UUID of the node whose key is added 1442 @type node_name: str 1443 @param node_name: the name of the node whose key is added 1444 @type potential_master_candidates: list of str 1445 @param potential_master_candidates: list of node names of potential master 1446 candidates; this should match the list of uuids in the public key file 1447 @type to_authorized_keys: boolean 1448 @param to_authorized_keys: whether the key should be added to the 1449 C{authorized_keys} file of all nodes 1450 @type to_public_keys: boolean 1451 @param to_public_keys: whether the keys should be added to the public key file 1452 @type get_public_keys: boolean 1453 @param get_public_keys: whether the node should add the clusters' public keys 1454 to its {ganeti_pub_keys} file 1455 1456 """ 1457 # assure that at least one of those flags is true, as the function would 1458 # not do anything otherwise 1459 assert (to_authorized_keys or to_public_keys or get_public_keys) 1460 1461 if not ssconf_store: 1462 ssconf_store = ssconf.SimpleStore() 1463 1464 # Check and fix sanity of key file 1465 keys_by_name = ssh.QueryPubKeyFile([node_name], key_file=pub_key_file) 1466 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file) 1467 1468 if (not keys_by_name or node_name not in keys_by_name) \ 1469 and (not keys_by_uuid or node_uuid not in keys_by_uuid): 1470 raise errors.SshUpdateError( 1471 "No keys found for the new node '%s' (UUID %s) in the list of public" 1472 " SSH keys, neither for the name or the UUID" % (node_name, node_uuid)) 1473 else: 1474 if node_name in keys_by_name: 1475 keys_by_uuid = {} 1476 # Replace the name by UUID in the file as the name should only be used 1477 # temporarily 1478 ssh.ReplaceNameByUuid(node_uuid, node_name, 1479 error_fn=errors.SshUpdateError, 1480 key_file=pub_key_file) 1481 keys_by_uuid[node_uuid] = keys_by_name[node_name] 1482 1483 # Update the master node's key files 1484 if to_authorized_keys: 1485 (auth_key_file, _) = \ 1486 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1487 ssh.AddAuthorizedKeys(auth_key_file, keys_by_uuid[node_uuid]) 1488 1489 base_data = {} 1490 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) 1491 cluster_name = base_data[constants.SSHS_CLUSTER_NAME] 1492 1493 ssh_port_map = ssconf_store.GetSshPortMap() 1494 1495 # Update the target node itself 1496 logging.debug("Updating SSH key files of target node '%s'.", node_name) 1497 if get_public_keys: 1498 node_data = {} 1499 _InitSshUpdateData(node_data, noded_cert_file, ssconf_store) 1500 all_keys = ssh.QueryPubKeyFile(None, key_file=pub_key_file) 1501 node_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1502 (constants.SSHS_OVERRIDE, all_keys) 1503 1504 try: 1505 utils.RetryByNumberOfTimes( 1506 constants.SSHS_MAX_RETRIES, 1507 errors.SshUpdateError, 1508 run_cmd_fn, cluster_name, node_name, pathutils.SSH_UPDATE, 1509 ssh_port_map.get(node_name), node_data, 1510 debug=False, verbose=False, use_cluster_key=False, 1511 ask_key=False, strict_host_check=False) 1512 except errors.SshUpdateError as e: 1513 # Clean up the master's public key file if adding key fails 1514 if to_public_keys: 1515 ssh.RemovePublicKey(node_uuid) 1516 raise e 1517 1518 # Update all nodes except master and the target node 1519 if to_authorized_keys: 1520 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1521 (constants.SSHS_ADD, keys_by_uuid) 1522 1523 pot_mc_data = copy.deepcopy(base_data) 1524 if to_public_keys: 1525 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1526 (constants.SSHS_REPLACE_OR_ADD, keys_by_uuid) 1527 1528 all_nodes = ssconf_store.GetNodeList() 1529 master_node = ssconf_store.GetMasterNode() 1530 online_nodes = ssconf_store.GetOnlineNodeList() 1531 1532 node_errors = [] 1533 for node in all_nodes: 1534 if node == master_node: 1535 logging.debug("Skipping master node '%s'.", master_node) 1536 continue 1537 if node not in online_nodes: 1538 logging.debug("Skipping offline node '%s'.", node) 1539 continue 1540 if node in potential_master_candidates: 1541 logging.debug("Updating SSH key files of node '%s'.", node) 1542 try: 1543 utils.RetryByNumberOfTimes( 1544 constants.SSHS_MAX_RETRIES, 1545 errors.SshUpdateError, 1546 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1547 ssh_port_map.get(node), pot_mc_data, 1548 debug=False, verbose=False, use_cluster_key=False, 1549 ask_key=False, strict_host_check=False) 1550 except errors.SshUpdateError as last_exception: 1551 error_msg = ("When adding the key of node '%s', updating SSH key" 1552 " files of node '%s' failed after %s retries." 1553 " Not trying again. Last error was: %s." % 1554 (node, node_name, constants.SSHS_MAX_RETRIES, 1555 last_exception)) 1556 node_errors.append((node, error_msg)) 1557 # We only log the error and don't throw an exception, because 1558 # one unreachable node shall not abort the entire procedure. 1559 logging.error(error_msg) 1560 1561 else: 1562 if to_authorized_keys: 1563 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE, 1564 ssh_port_map.get(node), base_data, 1565 debug=False, verbose=False, use_cluster_key=False, 1566 ask_key=False, strict_host_check=False) 1567 1568 return node_errors

1569

1570 1571 -def RemoveNodeSshKey(node_uuid, node_name, 1572 master_candidate_uuids, 1573 potential_master_candidates, 1574 master_uuid=None, 1575 keys_to_remove=None, 1576 from_authorized_keys=False, 1577 from_public_keys=False, 1578 clear_authorized_keys=False, 1579 clear_public_keys=False, 1580 pub_key_file=pathutils.SSH_PUB_KEYS, 1581 ssconf_store=None, 1582 noded_cert_file=pathutils.NODED_CERT_FILE, 1583 readd=False, 1584 run_cmd_fn=ssh.RunSshCmdWithStdin):

1585 """Removes the node's SSH keys from the key files and distributes those. 1586 1587 Note that at least one of the flags C{from_authorized_keys}, 1588 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys} 1589 has to be set to C{True} for the function to perform any action at all. 1590 Not doing so will trigger an assertion in the function. 1591 1592 @type node_uuid: str 1593 @param node_uuid: UUID of the node whose key is removed 1594 @type node_name: str 1595 @param node_name: name of the node whose key is remove 1596 @type master_candidate_uuids: list of str 1597 @param master_candidate_uuids: list of UUIDs of the current master candidates 1598 @type potential_master_candidates: list of str 1599 @param potential_master_candidates: list of names of potential master 1600 candidates 1601 @type keys_to_remove: dict of str to list of str 1602 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys 1603 to be removed. This list is supposed to be used only if the keys are not 1604 in the public keys file. This is for example the case when removing a 1605 master node's key. 1606 @type from_authorized_keys: boolean 1607 @param from_authorized_keys: whether or not the key should be removed 1608 from the C{authorized_keys} file 1609 @type from_public_keys: boolean 1610 @param from_public_keys: whether or not the key should be remove from 1611 the C{ganeti_pub_keys} file 1612 @type clear_authorized_keys: boolean 1613 @param clear_authorized_keys: whether or not the C{authorized_keys} file 1614 should be cleared on the node whose keys are removed 1615 @type clear_public_keys: boolean 1616 @param clear_public_keys: whether to clear the node's C{ganeti_pub_key} file 1617 @type readd: boolean 1618 @param readd: whether this is called during a readd operation. 1619 @rtype: list of string 1620 @returns: list of feedback messages 1621 1622 """ 1623 # Non-disruptive error messages, list of (node, msg) pairs 1624 result_msgs = [] 1625 1626 # Make sure at least one of these flags is true. 1627 if not (from_authorized_keys or from_public_keys or clear_authorized_keys 1628 or clear_public_keys): 1629 raise errors.SshUpdateError("No removal from any key file was requested.") 1630 1631 if not ssconf_store: 1632 ssconf_store = ssconf.SimpleStore() 1633 1634 master_node = ssconf_store.GetMasterNode() 1635 ssh_port_map = ssconf_store.GetSshPortMap() 1636 1637 if from_authorized_keys or from_public_keys: 1638 if keys_to_remove: 1639 keys = keys_to_remove 1640 else: 1641 keys = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file) 1642 if (not keys or node_uuid not in keys) and not readd: 1643 raise errors.SshUpdateError("Node '%s' not found in the list of public" 1644 " SSH keys. It seems someone tries to" 1645 " remove a key from outside the cluster!" 1646 % node_uuid) 1647 # During an upgrade all nodes have the master key. In this case we 1648 # should not remove it to avoid accidentally shutting down cluster 1649 # SSH communication 1650 master_keys = None 1651 if master_uuid: 1652 master_keys = ssh.QueryPubKeyFile([master_uuid], key_file=pub_key_file) 1653 for master_key in master_keys: 1654 if master_key in keys[node_uuid]: 1655 keys[node_uuid].remove(master_key) 1656 1657 if node_name == master_node and not keys_to_remove: 1658 raise errors.SshUpdateError("Cannot remove the master node's keys.") 1659 1660 if node_uuid in keys: 1661 base_data = {} 1662 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store) 1663 cluster_name = base_data[constants.SSHS_CLUSTER_NAME] 1664 1665 if from_authorized_keys: 1666 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1667 (constants.SSHS_REMOVE, keys) 1668 (auth_key_file, _) = \ 1669 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, 1670 dircheck=False) 1671 ssh.RemoveAuthorizedKeys(auth_key_file, keys[node_uuid]) 1672 1673 pot_mc_data = copy.deepcopy(base_data) 1674 1675 if from_public_keys: 1676 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1677 (constants.SSHS_REMOVE, keys) 1678 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file) 1679 1680 all_nodes = ssconf_store.GetNodeList() 1681 online_nodes = ssconf_store.GetOnlineNodeList() 1682 logging.debug("Removing key of node '%s' from all nodes but itself and" 1683 " master.", node_name) 1684 for node in all_nodes: 1685 if node == master_node: 1686 logging.debug("Skipping master node '%s'.", master_node) 1687 continue 1688 if node not in online_nodes: 1689 logging.debug("Skipping offline node '%s'.", node) 1690 continue 1691 ssh_port = ssh_port_map.get(node) 1692 if not ssh_port: 1693 raise errors.OpExecError("No SSH port information available for" 1694 " node '%s', map: %s." % 1695 (node, ssh_port_map)) 1696 error_msg_final = ("When removing the key of node '%s', updating the" 1697 " SSH key files of node '%s' failed. Last error" 1698 " was: %s.") 1699 if node in potential_master_candidates: 1700 logging.debug("Updating key setup of potential master candidate node" 1701 " %s.", node) 1702 try: 1703 utils.RetryByNumberOfTimes( 1704 constants.SSHS_MAX_RETRIES, 1705 errors.SshUpdateError, 1706 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1707 ssh_port, pot_mc_data, 1708 debug=False, verbose=False, use_cluster_key=False, 1709 ask_key=False, strict_host_check=False) 1710 except errors.SshUpdateError as last_exception: 1711 error_msg = error_msg_final % ( 1712 node_name, node, last_exception) 1713 result_msgs.append((node, error_msg)) 1714 logging.error(error_msg) 1715 1716 else: 1717 if from_authorized_keys: 1718 logging.debug("Updating key setup of normal node %s.", node) 1719 try: 1720 utils.RetryByNumberOfTimes( 1721 constants.SSHS_MAX_RETRIES, 1722 errors.SshUpdateError, 1723 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE, 1724 ssh_port, base_data, 1725 debug=False, verbose=False, use_cluster_key=False, 1726 ask_key=False, strict_host_check=False) 1727 except errors.SshUpdateError as last_exception: 1728 error_msg = error_msg_final % ( 1729 node_name, node, last_exception) 1730 result_msgs.append((node, error_msg)) 1731 logging.error(error_msg) 1732 1733 if clear_authorized_keys or from_public_keys or clear_public_keys: 1734 data = {} 1735 _InitSshUpdateData(data, noded_cert_file, ssconf_store) 1736 cluster_name = data[constants.SSHS_CLUSTER_NAME] 1737 ssh_port = ssh_port_map.get(node_name) 1738 if not ssh_port: 1739 raise errors.OpExecError("No SSH port information available for" 1740 " node '%s', which is leaving the cluster.") 1741 1742 if clear_authorized_keys: 1743 # The 'authorized_keys' file is not solely managed by Ganeti. Therefore, 1744 # we have to specify exactly which keys to clear to leave keys untouched 1745 # that were not added by Ganeti. 1746 other_master_candidate_uuids = [uuid for uuid in master_candidate_uuids 1747 if uuid != node_uuid] 1748 candidate_keys = ssh.QueryPubKeyFile(other_master_candidate_uuids, 1749 key_file=pub_key_file) 1750 data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \ 1751 (constants.SSHS_REMOVE, candidate_keys) 1752 1753 if clear_public_keys: 1754 data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1755 (constants.SSHS_CLEAR, {}) 1756 elif from_public_keys: 1757 # Since clearing the public keys subsumes removing just a single key, 1758 # we only do it of clear_public_keys is 'False'. 1759 1760 if keys[node_uuid]: 1761 data[constants.SSHS_SSH_PUBLIC_KEYS] = \ 1762 (constants.SSHS_REMOVE, keys) 1763 1764 # If we have no changes to any keyfile, just return 1765 if not (constants.SSHS_SSH_PUBLIC_KEYS in data or 1766 constants.SSHS_SSH_AUTHORIZED_KEYS in data): 1767 return 1768 1769 logging.debug("Updating SSH key setup of target node '%s'.", node_name) 1770 try: 1771 utils.RetryByNumberOfTimes( 1772 constants.SSHS_MAX_RETRIES, 1773 errors.SshUpdateError, 1774 run_cmd_fn, cluster_name, node_name, pathutils.SSH_UPDATE, 1775 ssh_port, data, 1776 debug=False, verbose=False, use_cluster_key=False, 1777 ask_key=False, strict_host_check=False) 1778 except errors.SshUpdateError as last_exception: 1779 result_msgs.append( 1780 (node_name, 1781 ("Removing SSH keys from node '%s' failed." 1782 " This can happen when the node is already unreachable." 1783 " Error: %s" % (node_name, last_exception)))) 1784 1785 return result_msgs

1786

1787 1788 -def _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, 1789 pub_key_file=pathutils.SSH_PUB_KEYS, 1790 ssconf_store=None, 1791 noded_cert_file=pathutils.NODED_CERT_FILE, 1792 run_cmd_fn=ssh.RunSshCmdWithStdin, 1793 suffix=""):

1794 """Generates the root SSH key pair on the node. 1795 1796 @type node_uuid: str 1797 @param node_uuid: UUID of the node whose key is removed 1798 @type node_name: str 1799 @param node_name: name of the node whose key is remove 1800 @type ssh_port_map: dict of str to int 1801 @param ssh_port_map: mapping of node names to their SSH port 1802 1803 """ 1804 if not ssconf_store: 1805 ssconf_store = ssconf.SimpleStore() 1806 1807 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file) 1808 if not keys_by_uuid or node_uuid not in keys_by_uuid: 1809 raise errors.SshUpdateError("Node %s (UUID: %s) whose key is requested to" 1810 " be regenerated is not registered in the" 1811 " public keys file." % (node_name, node_uuid)) 1812 1813 data = {} 1814 _InitSshUpdateData(data, noded_cert_file, ssconf_store) 1815 cluster_name = data[constants.SSHS_CLUSTER_NAME] 1816 data[constants.SSHS_GENERATE] = {constants.SSHS_SUFFIX: suffix} 1817 1818 run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE, 1819 ssh_port_map.get(node_name), data, 1820 debug=False, verbose=False, use_cluster_key=False, 1821 ask_key=False, strict_host_check=False)

1822

1823 1824 -def _GetMasterNodeUUID(node_uuid_name_map, master_node_name):

1825 master_node_uuids = [node_uuid for (node_uuid, node_name) 1826 in node_uuid_name_map 1827 if node_name == master_node_name] 1828 if len(master_node_uuids) != 1: 1829 raise errors.SshUpdateError("No (unique) master UUID found. Master node" 1830 " name: '%s', Master UUID: '%s'" % 1831 (master_node_name, master_node_uuids)) 1832 return master_node_uuids[0]

1833

1834 1835 -def _GetOldMasterKeys(master_node_uuid, pub_key_file):

1836 old_master_keys_by_uuid = ssh.QueryPubKeyFile([master_node_uuid], 1837 key_file=pub_key_file) 1838 if not old_master_keys_by_uuid: 1839 raise errors.SshUpdateError("No public key of the master node (UUID '%s')" 1840 " found, not generating a new key." 1841 % master_node_uuid) 1842 return old_master_keys_by_uuid

1843

1844 1845 -def _GetNewMasterKey(root_keyfiles, master_node_uuid):

1846 new_master_keys = [] 1847 for (_, (_, public_key_file)) in root_keyfiles.items(): 1848 public_key_dir = os.path.dirname(public_key_file) 1849 public_key_file_tmp_filename = \ 1850 os.path.splitext(os.path.basename(public_key_file))[0] \ 1851 + constants.SSHS_MASTER_SUFFIX + ".pub" 1852 public_key_path_tmp = os.path.join(public_key_dir, 1853 public_key_file_tmp_filename) 1854 if os.path.exists(public_key_path_tmp): 1855 # for some key types, there might not be any keys 1856 key = utils.ReadFile(public_key_path_tmp) 1857 new_master_keys.append(key) 1858 if not new_master_keys: 1859 raise errors.SshUpdateError("Cannot find any type of temporary SSH key.") 1860 return {master_node_uuid: new_master_keys}

1861

1862 1863 -def _ReplaceMasterKeyOnMaster(root_keyfiles):

1864 number_of_moves = 0 1865 for (_, (private_key_file, public_key_file)) in root_keyfiles.items(): 1866 key_dir = os.path.dirname(public_key_file) 1867 private_key_file_tmp = \ 1868 os.path.basename(private_key_file) + constants.SSHS_MASTER_SUFFIX 1869 public_key_file_tmp = private_key_file_tmp + ".pub" 1870 private_key_path_tmp = os.path.join(key_dir, 1871 private_key_file_tmp) 1872 public_key_path_tmp = os.path.join(key_dir, 1873 public_key_file_tmp) 1874 if os.path.exists(public_key_file): 1875 utils.CreateBackup(public_key_file) 1876 utils.RemoveFile(public_key_file) 1877 if os.path.exists(private_key_file): 1878 utils.CreateBackup(private_key_file) 1879 utils.RemoveFile(private_key_file) 1880 if os.path.exists(public_key_path_tmp) and \ 1881 os.path.exists(private_key_path_tmp): 1882 # for some key types, there might not be any keys 1883 shutil.move(public_key_path_tmp, public_key_file) 1884 shutil.move(private_key_path_tmp, private_key_file) 1885 number_of_moves += 1 1886 if not number_of_moves: 1887 raise errors.SshUpdateError("Could not move at least one master SSH key.")

1888

1889 1890 -def RenewSshKeys(node_uuids, node_names, master_candidate_uuids, 1891 potential_master_candidates, 1892 pub_key_file=pathutils.SSH_PUB_KEYS, 1893 ssconf_store=None, 1894 noded_cert_file=pathutils.NODED_CERT_FILE, 1895 run_cmd_fn=ssh.RunSshCmdWithStdin):

1896 """Renews all SSH keys and updates authorized_keys and ganeti_pub_keys. 1897 1898 @type node_uuids: list of str 1899 @param node_uuids: list of node UUIDs whose keys should be renewed 1900 @type node_names: list of str 1901 @param node_names: list of node names whose keys should be removed. This list 1902 should match the C{node_uuids} parameter 1903 @type master_candidate_uuids: list of str 1904 @param master_candidate_uuids: list of UUIDs of master candidates or 1905 master node 1906 @type pub_key_file: str 1907 @param pub_key_file: file path of the the public key file 1908 @type noded_cert_file: str 1909 @param noded_cert_file: path of the noded SSL certificate file 1910 @type run_cmd_fn: function 1911 @param run_cmd_fn: function to run commands on remote nodes via SSH 1912 @raises ProgrammerError: if node_uuids and node_names don't match; 1913 SshUpdateError if a node's key is missing from the public key file, 1914 if a node's new SSH key could not be fetched from it, if there is 1915 none or more than one entry in the public key list for the master 1916 node. 1917 1918 """ 1919 if not ssconf_store: 1920 ssconf_store = ssconf.SimpleStore() 1921 cluster_name = ssconf_store.GetClusterName() 1922 1923 if not len(node_uuids) == len(node_names): 1924 raise errors.ProgrammerError("List of nodes UUIDs and node names" 1925 " does not match in length.") 1926 1927 (_, root_keyfiles) = \ 1928 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 1929 (_, dsa_pub_keyfile) = root_keyfiles[constants.SSHK_DSA] 1930 old_master_key = utils.ReadFile(dsa_pub_keyfile) 1931 1932 node_uuid_name_map = zip(node_uuids, node_names) 1933 1934 master_node_name = ssconf_store.GetMasterNode() 1935 master_node_uuid = _GetMasterNodeUUID(node_uuid_name_map, master_node_name) 1936 ssh_port_map = ssconf_store.GetSshPortMap() 1937 # List of all node errors that happened, but which did not abort the 1938 # procedure as a whole. It is important that this is a list to have a 1939 # somewhat chronological history of events. 1940 all_node_errors = [] 1941 1942 # process non-master nodes 1943 for node_uuid, node_name in node_uuid_name_map: 1944 if node_name == master_node_name: 1945 continue 1946 master_candidate = node_uuid in master_candidate_uuids 1947 potential_master_candidate = node_name in potential_master_candidates 1948 1949 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file) 1950 if not keys_by_uuid: 1951 raise errors.SshUpdateError("No public key of node %s (UUID %s) found," 1952 " not generating a new key." 1953 % (node_name, node_uuid)) 1954 1955 if master_candidate: 1956 logging.debug("Fetching old SSH key from node '%s'.", node_name) 1957 old_pub_key = ssh.ReadRemoteSshPubKeys(dsa_pub_keyfile, 1958 node_name, cluster_name, 1959 ssh_port_map[node_name], 1960 False, # ask_key 1961 False) # key_check 1962 if old_pub_key != old_master_key: 1963 # If we are already in a multi-key setup (that is past Ganeti 2.12), 1964 # we can safely remove the old key of the node. Otherwise, we cannot 1965 # remove that node's key, because it is also the master node's key 1966 # and that would terminate all communication from the master to the 1967 # node. 1968 logging.debug("Removing SSH key of node '%s'.", node_name) 1969 node_errors = RemoveNodeSshKey( 1970 node_uuid, node_name, master_candidate_uuids, 1971 potential_master_candidates, 1972 master_uuid=master_node_uuid, from_authorized_keys=master_candidate, 1973 from_public_keys=False, clear_authorized_keys=False, 1974 clear_public_keys=False) 1975 if node_errors: 1976 all_node_errors = all_node_errors + node_errors 1977 else: 1978 logging.debug("Old key of node '%s' is the same as the current master" 1979 " key. Not deleting that key on the node.", node_name) 1980 1981 logging.debug("Generating new SSH key for node '%s'.", node_name) 1982 _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, 1983 pub_key_file=pub_key_file, 1984 ssconf_store=ssconf_store, 1985 noded_cert_file=noded_cert_file, 1986 run_cmd_fn=run_cmd_fn) 1987 1988 try: 1989 logging.debug("Fetching newly created SSH key from node '%s'.", node_name) 1990 pub_key = ssh.ReadRemoteSshPubKeys(dsa_pub_keyfile, 1991 node_name, cluster_name, 1992 ssh_port_map[node_name], 1993 False, # ask_key 1994 False) # key_check 1995 except: 1996 raise errors.SshUpdateError("Could not fetch key of node %s" 1997 " (UUID %s)" % (node_name, node_uuid)) 1998 1999 if potential_master_candidate: 2000 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file) 2001 ssh.AddPublicKey(node_uuid, pub_key, key_file=pub_key_file) 2002 2003 logging.debug("Add ssh key of node '%s'.", node_name) 2004 node_errors = AddNodeSshKey( 2005 node_uuid, node_name, potential_master_candidates, 2006 to_authorized_keys=master_candidate, 2007 to_public_keys=potential_master_candidate, 2008 get_public_keys=True, 2009 pub_key_file=pub_key_file, ssconf_store=ssconf_store, 2010 noded_cert_file=noded_cert_file, 2011 run_cmd_fn=run_cmd_fn) 2012 if node_errors: 2013 all_node_errors = all_node_errors + node_errors 2014 2015 # Renewing the master node's key 2016 2017 # Preserve the old keys for now 2018 old_master_keys_by_uuid = _GetOldMasterKeys(master_node_uuid, pub_key_file) 2019 2020 # Generate a new master key with a suffix, don't touch the old one for now 2021 logging.debug("Generate new ssh key of master.") 2022 _GenerateNodeSshKey(master_node_uuid, master_node_name, ssh_port_map, 2023 pub_key_file=pub_key_file, 2024 ssconf_store=ssconf_store, 2025 noded_cert_file=noded_cert_file, 2026 run_cmd_fn=run_cmd_fn, 2027 suffix=constants.SSHS_MASTER_SUFFIX) 2028 # Read newly created master key 2029 new_master_key_dict = _GetNewMasterKey(root_keyfiles, master_node_uuid) 2030 2031 # Replace master key in the master nodes' public key file 2032 ssh.RemovePublicKey(master_node_uuid, key_file=pub_key_file) 2033 for pub_key in new_master_key_dict[master_node_uuid]: 2034 ssh.AddPublicKey(master_node_uuid, pub_key, key_file=pub_key_file) 2035 2036 # Add new master key to all node's public and authorized keys 2037 logging.debug("Add new master key to all nodes.") 2038 node_errors = AddNodeSshKey( 2039 master_node_uuid, master_node_name, potential_master_candidates, 2040 to_authorized_keys=True, to_public_keys=True, 2041 get_public_keys=False, pub_key_file=pub_key_file, 2042 ssconf_store=ssconf_store, noded_cert_file=noded_cert_file, 2043 run_cmd_fn=run_cmd_fn) 2044 if node_errors: 2045 all_node_errors = all_node_errors + node_errors 2046 2047 # Remove the old key file and rename the new key to the non-temporary filename 2048 _ReplaceMasterKeyOnMaster(root_keyfiles) 2049 2050 # Remove old key from authorized keys 2051 (auth_key_file, _) = \ 2052 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) 2053 ssh.RemoveAuthorizedKeys(auth_key_file, 2054 old_master_keys_by_uuid[master_node_uuid]) 2055 2056 # Remove the old key from all node's authorized keys file 2057 logging.debug("Remove the old master key from all nodes.") 2058 node_errors = RemoveNodeSshKey( 2059 master_node_uuid, master_node_name, master_candidate_uuids, 2060 potential_master_candidates, 2061 keys_to_remove=old_master_keys_by_uuid, from_authorized_keys=True, 2062 from_public_keys=False, clear_authorized_keys=False, 2063 clear_public_keys=False) 2064 if node_errors: 2065 all_node_errors = all_node_errors + node_errors 2066 2067 return all_node_errors

2068

2069 2070 -def GetBlockDevSizes(devices):

2071 """Return the size of the given block devices 2072 2073 @type devices: list 2074 @param devices: list of block device nodes to query 2075 @rtype: dict 2076 @return: 2077 dictionary of all block devices under /dev (key). The value is their 2078 size in MiB. 2079 2080 {'/dev/disk/by-uuid/123456-12321231-312312-312': 124} 2081 2082 """ 2083 DEV_PREFIX = "/dev/" 2084 blockdevs = {} 2085 2086 for devpath in devices: 2087 if not utils.IsBelowDir(DEV_PREFIX, devpath): 2088 continue 2089 2090 try: 2091 st = os.stat(devpath) 2092 except EnvironmentError, err: 2093 logging.warning("Error stat()'ing device %s: %s", devpath, str(err)) 2094 continue 2095 2096 if stat.S_ISBLK(st.st_mode): 2097 result = utils.RunCmd(["blockdev", "--getsize64", devpath]) 2098 if result.failed: 2099 # We don't want to fail, just do not list this device as available 2100 logging.warning("Cannot get size for block device %s", devpath) 2101 continue 2102 2103 size = int(result.stdout) / (1024 * 1024) 2104 blockdevs[devpath] = size 2105 return blockdevs

2106

2107 2108 -def GetVolumeList(vg_names):

2109 """Compute list of logical volumes and their size. 2110 2111 @type vg_names: list 2112 @param vg_names: the volume groups whose LVs we should list, or 2113 empty for all volume groups 2114 @rtype: dict 2115 @return: 2116 dictionary of all partions (key) with value being a tuple of 2117 their size (in MiB), inactive and online status:: 2118 2119 {'xenvg/test1': ('20.06', True, True)} 2120 2121 in case of errors, a string is returned with the error 2122 details. 2123 2124 """ 2125 lvs = {} 2126 sep = "|" 2127 if not vg_names: 2128 vg_names = [] 2129 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 2130 "--separator=%s" % sep, 2131 "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names) 2132 if result.failed: 2133 _Fail("Failed to list logical volumes, lvs output: %s", result.output) 2134 2135 for line in result.stdout.splitlines(): 2136 line = line.strip() 2137 match = _LVSLINE_REGEX.match(line) 2138 if not match: 2139 logging.error("Invalid line returned from lvs output: '%s'", line) 2140 continue 2141 vg_name, name, size, attr = match.groups() 2142 inactive = attr[4] == "-" 2143 online = attr[5] == "o" 2144 virtual = attr[0] == "v" 2145 if virtual: 2146 # we don't want to report such volumes as existing, since they 2147 # don't really hold data 2148 continue 2149 lvs[vg_name + "/" + name] = (size, inactive, online) 2150 2151 return lvs

2152

2153 2154 -def ListVolumeGroups():

2155 """List the volume groups and their size. 2156 2157 @rtype: dict 2158 @return: dictionary with keys volume name and values the 2159 size of the volume 2160 2161 """ 2162 return utils.ListVolumeGroups()

2163

2164 2165 -def NodeVolumes():

2166 """List all volumes on this node. 2167 2168 @rtype: list 2169 @return: 2170 A list of dictionaries, each having four keys: 2171 - name: the logical volume name, 2172 - size: the size of the logical volume 2173 - dev: the physical device on which the LV lives 2174 - vg: the volume group to which it belongs 2175 2176 In case of errors, we return an empty list and log the 2177 error. 2178 2179 Note that since a logical volume can live on multiple physical 2180 volumes, the resulting list might include a logical volume 2181 multiple times. 2182 2183 """ 2184 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix", 2185 "--separator=|", 2186 "--options=lv_name,lv_size,devices,vg_name"]) 2187 if result.failed: 2188 _Fail("Failed to list logical volumes, lvs output: %s", 2189 result.output) 2190 2191 def parse_dev(dev): 2192 return dev.split("(")[0]

2193 2194 def handle_dev(dev): 2195 return [parse_dev(x) for x in dev.split(",")] 2196 2197 def map_line(line): 2198 line = [v.strip() for v in line] 2199 return [{"name": line[0], "size": line[1], 2200 "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])] 2201 2202 all_devs = [] 2203 for line in result.stdout.splitlines(): 2204 if line.count("|") >= 3: 2205 all_devs.extend(map_line(line.split("|"))) 2206 else: 2207 logging.warning("Strange line in the output from lvs: '%s'", line) 2208 return all_devs 2209

2210 2211 -def BridgesExist(bridges_list):

2212 """Check if a list of bridges exist on the current node. 2213 2214 @rtype: boolean 2215 @return: C{True} if all of them exist, C{False} otherwise 2216 2217 """ 2218 missing = [] 2219 for bridge in bridges_list: 2220 if not utils.BridgeExists(bridge): 2221 missing.append(bridge) 2222 2223 if missing: 2224 _Fail("Missing bridges %s", utils.CommaJoin(missing))

2225

2226 2227 -def GetInstanceListForHypervisor(hname, hvparams=None, 2228 get_hv_fn=hypervisor.GetHypervisor):

2229 """Provides a list of instances of the given hypervisor. 2230 2231 @type hname: string 2232 @param hname: name of the hypervisor 2233 @type hvparams: dict of strings 2234 @param hvparams: hypervisor parameters for the given hypervisor 2235 @type get_hv_fn: function 2236 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 2237 name; optional parameter to increase testability 2238 2239 @rtype: list 2240 @return: a list of all running instances on the current node 2241 - instance1.example.com 2242 - instance2.example.com 2243 2244 """ 2245 try: 2246 return get_hv_fn(hname).ListInstances(hvparams=hvparams) 2247 except errors.HypervisorError, err: 2248 _Fail("Error enumerating instances (hypervisor %s): %s", 2249 hname, err, exc=True)

2250

2251 2252 -def GetInstanceList(hypervisor_list, all_hvparams=None, 2253 get_hv_fn=hypervisor.GetHypervisor):

2254 """Provides a list of instances. 2255 2256 @type hypervisor_list: list 2257 @param hypervisor_list: the list of hypervisors to query information 2258 @type all_hvparams: dict of dict of strings 2259 @param all_hvparams: a dictionary mapping hypervisor types to respective 2260 cluster-wide hypervisor parameters 2261 @type get_hv_fn: function 2262 @param get_hv_fn: function that returns a hypervisor for the given hypervisor 2263 name; optional parameter to increase testability 2264 2265 @rtype: list 2266 @return: a list of all running instances on the current node 2267 - instance1.example.com 2268 - instance2.example.com 2269 2270 """ 2271 results = [] 2272 for hname in hypervisor_list: 2273 hvparams = all_hvparams[hname] 2274 results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams, 2275 get_hv_fn=get_hv_fn)) 2276 return results

2277

2278 2279 -def GetInstanceInfo(instance, hname, hvparams=None):

2280 """Gives back the information about an instance as a dictionary. 2281 2282 @type instance: string 2283 @param instance: the instance name 2284 @type hname: string 2285 @param hname: the hypervisor type of the instance 2286 @type hvparams: dict of strings 2287 @param hvparams: the instance's hvparams 2288 2289 @rtype: dict 2290 @return: dictionary with the following keys: 2291 - memory: memory size of instance (int) 2292 - state: state of instance (HvInstanceState) 2293 - time: cpu time of instance (float) 2294 - vcpus: the number of vcpus (int) 2295 2296 """ 2297 output = {} 2298 2299 iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance, 2300 hvparams=hvparams) 2301 if iinfo is not None: 2302 output["memory"] = iinfo[2] 2303 output["vcpus"] = iinfo[3] 2304 output["state"] = iinfo[4] 2305 output["time"] = iinfo[5] 2306 2307 return output

2308

2309 2310 -def GetInstanceMigratable(instance):

2311 """Computes whether an instance can be migrated. 2312 2313 @type instance: L{objects.Instance} 2314 @param instance: object representing the instance to be checked. 2315 2316 @rtype: tuple 2317 @return: tuple of (result, description) where: 2318 - result: whether the instance can be migrated or not 2319 - description: a description of the issue, if relevant 2320 2321 """ 2322 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2323 iname = instance.name 2324 if iname not in hyper.ListInstances(hvparams=instance.hvparams): 2325 _Fail("Instance %s is not running", iname) 2326 2327 for idx in range(len(instance.disks_info)): 2328 link_name = _GetBlockDevSymlinkPath(iname, idx) 2329 if not os.path.islink(link_name): 2330 logging.warning("Instance %s is missing symlink %s for disk %d", 2331 iname, link_name, idx)

2332

2333 2334 -def GetAllInstancesInfo(hypervisor_list, all_hvparams):

2335 """Gather data about all instances. 2336 2337 This is the equivalent of L{GetInstanceInfo}, except that it 2338 computes data for all instances at once, thus being faster if one 2339 needs data about more than one instance. 2340 2341 @type hypervisor_list: list 2342 @param hypervisor_list: list of hypervisors to query for instance data 2343 @type all_hvparams: dict of dict of strings 2344 @param all_hvparams: mapping of hypervisor names to hvparams 2345 2346 @rtype: dict 2347 @return: dictionary of instance: data, with data having the following keys: 2348 - memory: memory size of instance (int) 2349 - state: xen state of instance (string) 2350 - time: cpu time of instance (float) 2351 - vcpus: the number of vcpus 2352 2353 """ 2354 output = {} 2355 for hname in hypervisor_list: 2356 hvparams = all_hvparams[hname] 2357 iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams) 2358 if iinfo: 2359 for name, _, memory, vcpus, state, times in iinfo: 2360 value = { 2361 "memory": memory, 2362 "vcpus": vcpus, 2363 "state": state, 2364 "time": times, 2365 } 2366 if name in output: 2367 # we only check static parameters, like memory and vcpus, 2368 # and not state and time which can change between the 2369 # invocations of the different hypervisors 2370 for key in "memory", "vcpus": 2371 if value[key] != output[name][key]: 2372 _Fail("Instance %s is running twice" 2373 " with different parameters", name) 2374 output[name] = value 2375 2376 return output

2377

2378 2379 -def GetInstanceConsoleInfo(instance_param_dict, 2380 get_hv_fn=hypervisor.GetHypervisor):

2381 """Gather data about the console access of a set of instances of this node. 2382 2383 This function assumes that the caller already knows which instances are on 2384 this node, by calling a function such as L{GetAllInstancesInfo} or 2385 L{GetInstanceList}. 2386 2387 For every instance, a large amount of configuration data needs to be 2388 provided to the hypervisor interface in order to receive the console 2389 information. Whether this could or should be cut down can be discussed. 2390 The information is provided in a dictionary indexed by instance name, 2391 allowing any number of instance queries to be done. 2392 2393 @type instance_param_dict: dict of string to tuple of dictionaries, where the 2394 dictionaries represent: L{objects.Instance}, L{objects.Node}, 2395 L{objects.NodeGroup}, HvParams, BeParams 2396 @param instance_param_dict: mapping of instance name to parameters necessary 2397 for console information retrieval 2398 2399 @rtype: dict 2400 @return: dictionary of instance: data, with data having the following keys: 2401 - instance: instance name 2402 - kind: console kind 2403 - message: used with kind == CONS_MESSAGE, indicates console to be 2404 unavailable, supplies error message 2405 - host: host to connect to 2406 - port: port to use 2407 - user: user for login 2408 - command: the command, broken into parts as an array 2409 - display: unknown, potentially unused? 2410 2411 """ 2412 2413 output = {} 2414 for inst_name in instance_param_dict: 2415 instance = instance_param_dict[inst_name]["instance"] 2416 pnode = instance_param_dict[inst_name]["node"] 2417 group = instance_param_dict[inst_name]["group"] 2418 hvparams = instance_param_dict[inst_name]["hvParams"] 2419 beparams = instance_param_dict[inst_name]["beParams"] 2420 2421 instance = objects.Instance.FromDict(instance) 2422 pnode = objects.Node.FromDict(pnode) 2423 group = objects.NodeGroup.FromDict(group) 2424 2425 h = get_hv_fn(instance.hypervisor) 2426 output[inst_name] = h.GetInstanceConsole(instance, pnode, group, 2427 hvparams, beparams).ToDict() 2428 2429 return output

2430

2431 2432 -def _InstanceLogName(kind, os_name, instance, component):

2433 """Compute the OS log filename for a given instance and operation. 2434 2435 The instance name and os name are passed in as strings since not all 2436 operations have these as part of an instance object. 2437 2438 @type kind: string 2439 @param kind: the operation type (e.g. add, import, etc.) 2440 @type os_name: string 2441 @param os_name: the os name 2442 @type instance: string 2443 @param instance: the name of the instance being imported/added/etc. 2444 @type component: string or None 2445 @param component: the name of the component of the instance being 2446 transferred 2447 2448 """ 2449 # TODO: Use tempfile.mkstemp to create unique filename 2450 if component: 2451 assert "/" not in component 2452 c_msg = "-%s" % component 2453 else: 2454 c_msg = "" 2455 base = ("%s-%s-%s%s-%s.log" % 2456 (kind, os_name, instance, c_msg, utils.TimestampForFilename())) 2457 return utils.PathJoin(pathutils.LOG_OS_DIR, base)

2458

2459 2460 -def InstanceOsAdd(instance, reinstall, debug):

2461 """Add an OS to an instance. 2462 2463 @type instance: L{objects.Instance} 2464 @param instance: Instance whose OS is to be installed 2465 @type reinstall: boolean 2466 @param reinstall: whether this is an instance reinstall 2467 @type debug: integer 2468 @param debug: debug level, passed to the OS scripts 2469 @rtype: None 2470 2471 """ 2472 inst_os = OSFromDisk(instance.os) 2473 2474 create_env = OSEnvironment(instance, inst_os, debug) 2475 if reinstall: 2476 create_env["INSTANCE_REINSTALL"] = "1" 2477 2478 logfile = _InstanceLogName("add", instance.os, instance.name, None) 2479 2480 result = utils.RunCmd([inst_os.create_script], env=create_env, 2481 cwd=inst_os.path, output=logfile, reset_env=True) 2482 if result.failed: 2483 logging.error("os create command '%s' returned error: %s, logfile: %s," 2484 " output: %s", result.cmd, result.fail_reason, logfile, 2485 result.output) 2486 lines = [utils.SafeEncode(val) 2487 for val in utils.TailFile(logfile, lines=20)] 2488 _Fail("OS create script failed (%s), last lines in the" 2489 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)

2490

2491 2492 -def RunRenameInstance(instance, old_name, debug):

2493 """Run the OS rename script for an instance. 2494 2495 @type instance: L{objects.Instance} 2496 @param instance: Instance whose OS is to be installed 2497 @type old_name: string 2498 @param old_name: previous instance name 2499 @type debug: integer 2500 @param debug: debug level, passed to the OS scripts 2501 @rtype: boolean 2502 @return: the success of the operation 2503 2504 """ 2505 inst_os = OSFromDisk(instance.os) 2506 2507 rename_env = OSEnvironment(instance, inst_os, debug) 2508 rename_env["OLD_INSTANCE_NAME"] = old_name 2509 2510 logfile = _InstanceLogName("rename", instance.os, 2511 "%s-%s" % (old_name, instance.name), None) 2512 2513 result = utils.RunCmd([inst_os.rename_script], env=rename_env, 2514 cwd=inst_os.path, output=logfile, reset_env=True) 2515 2516 if result.failed: 2517 logging.error("os create command '%s' returned error: %s output: %s", 2518 result.cmd, result.fail_reason, result.output) 2519 lines = [utils.SafeEncode(val) 2520 for val in utils.TailFile(logfile, lines=20)] 2521 _Fail("OS rename script failed (%s), last lines in the" 2522 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)

2523

2524 2525 -def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None):

2526 """Returns symlink path for block device. 2527 2528 """ 2529 if _dir is None: 2530 _dir = pathutils.DISK_LINKS_DIR 2531 2532 return utils.PathJoin(_dir, 2533 ("%s%s%s" % 2534 (instance_name, constants.DISK_SEPARATOR, idx)))

2535

2536 2537 -def _SymlinkBlockDev(instance_name, device_path, idx):

2538 """Set up symlinks to a instance's block device. 2539 2540 This is an auxiliary function run when an instance is start (on the primary 2541 node) or when an instance is migrated (on the target node). 2542 2543 2544 @param instance_name: the name of the target instance 2545 @param device_path: path of the physical block device, on the node 2546 @param idx: the disk index 2547 @return: absolute path to the disk's symlink 2548 2549 """ 2550 link_name = _GetBlockDevSymlinkPath(instance_name, idx) 2551 try: 2552 os.symlink(device_path, link_name) 2553 except OSError, err: 2554 if err.errno == errno.EEXIST: 2555 if (not os.path.islink(link_name) or 2556 os.readlink(link_name) != device_path): 2557 os.remove(link_name) 2558 os.symlink(device_path, link_name) 2559 else: 2560 raise 2561 2562 return link_name

2563

2564 2565 -def _RemoveBlockDevLinks(instance_name, disks):

2566 """Remove the block device symlinks belonging to the given instance. 2567 2568 """ 2569 for idx, _ in enumerate(disks): 2570 link_name = _GetBlockDevSymlinkPath(instance_name, idx) 2571 if os.path.islink(link_name): 2572 try: 2573 os.remove(link_name) 2574 except OSError: 2575 logging.exception("Can't remove symlink '%s'", link_name)

2576

2577 2578 -def _CalculateDeviceURI(instance, disk, device):

2579 """Get the URI for the device. 2580 2581 @type instance: L{objects.Instance} 2582 @param instance: the instance which disk belongs to 2583 @type disk: L{objects.Disk} 2584 @param disk: the target disk object 2585 @type device: L{bdev.BlockDev} 2586 @param device: the corresponding BlockDevice 2587 @rtype: string 2588 @return: the device uri if any else None 2589 2590 """ 2591 access_mode = disk.params.get(constants.LDP_ACCESS, 2592 constants.DISK_KERNELSPACE) 2593 if access_mode == constants.DISK_USERSPACE: 2594 # This can raise errors.BlockDeviceError 2595 return device.GetUserspaceAccessUri(instance.hypervisor) 2596 else: 2597 return None

2598

2599 2600 -def _GatherAndLinkBlockDevs(instance):

2601 """Set up an instance's block device(s). 2602 2603 This is run on the primary node at instance startup. The block 2604 devices must be already assembled. 2605 2606 @type instance: L{objects.Instance} 2607 @param instance: the instance whose disks we should assemble 2608 @rtype: list 2609 @return: list of (disk_object, link_name, drive_uri) 2610 2611 """ 2612 block_devices = [] 2613 for idx, disk in enumerate(instance.disks_info): 2614 device = _RecursiveFindBD(disk) 2615 if device is None: 2616 raise errors.BlockDeviceError("Block device '%s' is not set up." % 2617 str(disk)) 2618 device.Open() 2619 try: 2620 link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx) 2621 except OSError, e: 2622 raise errors.BlockDeviceError("Cannot create block device symlink: %s" % 2623 e.strerror) 2624 uri = _CalculateDeviceURI(instance, disk, device) 2625 2626 block_devices.append((disk, link_name, uri)) 2627 2628 return block_devices

2629

2630 2631 -def _IsInstanceUserDown(instance_info):

2632 return instance_info and \ 2633 "state" in instance_info and \ 2634 hv_base.HvInstanceState.IsShutdown(instance_info["state"])

2635

2636 2637 -def _GetInstanceInfo(instance):

2638 """Helper function L{GetInstanceInfo}""" 2639 return GetInstanceInfo(instance.name, instance.hypervisor, 2640 hvparams=instance.hvparams)

2641

2642 2643 -def StartInstance(instance, startup_paused, reason, store_reason=True):

2644 """Start an instance. 2645 2646 @type instance: L{objects.Instance} 2647 @param instance: the instance object 2648 @type startup_paused: bool 2649 @param instance: pause instance at startup? 2650 @type reason: list of reasons 2651 @param reason: the reason trail for this startup 2652 @type store_reason: boolean 2653 @param store_reason: whether to store the shutdown reason trail on file 2654 @rtype: None 2655 2656 """ 2657 instance_info = _GetInstanceInfo(instance) 2658 2659 if instance_info and not _IsInstanceUserDown(instance_info): 2660 logging.info("Instance '%s' already running, not starting", instance.name) 2661 return 2662 2663 try: 2664 block_devices = _GatherAndLinkBlockDevs(instance) 2665 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2666 hyper.StartInstance(instance, block_devices, startup_paused) 2667 if store_reason: 2668 _StoreInstReasonTrail(instance.name, reason) 2669 except errors.BlockDeviceError, err: 2670 _Fail("Block device error: %s", err, exc=True) 2671 except errors.HypervisorError, err: 2672 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2673 _Fail("Hypervisor error: %s", err, exc=True)

2674

2675 2676 -def InstanceShutdown(instance, timeout, reason, store_reason=True):

2677 """Shut an instance down. 2678 2679 @note: this functions uses polling with a hardcoded timeout. 2680 2681 @type instance: L{objects.Instance} 2682 @param instance: the instance object 2683 @type timeout: integer 2684 @param timeout: maximum timeout for soft shutdown 2685 @type reason: list of reasons 2686 @param reason: the reason trail for this shutdown 2687 @type store_reason: boolean 2688 @param store_reason: whether to store the shutdown reason trail on file 2689 @rtype: None 2690 2691 """ 2692 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2693 2694 if not _GetInstanceInfo(instance): 2695 logging.info("Instance '%s' not running, doing nothing", instance.name) 2696 return 2697 2698 class _TryShutdown(object): 2699 def __init__(self): 2700 self.tried_once = False

2701 2702 def __call__(self): 2703 if not _GetInstanceInfo(instance): 2704 return 2705 2706 try: 2707 hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout) 2708 if store_reason: 2709 _StoreInstReasonTrail(instance.name, reason) 2710 except errors.HypervisorError, err: 2711 # if the instance is no longer existing, consider this a 2712 # success and go to cleanup 2713 if not _GetInstanceInfo(instance): 2714 return 2715 2716 _Fail("Failed to stop instance '%s': %s", instance.name, err) 2717 2718 self.tried_once = True 2719 2720 raise utils.RetryAgain() 2721 2722 try: 2723 utils.Retry(_TryShutdown(), 5, timeout) 2724 except utils.RetryTimeout: 2725 # the shutdown did not succeed 2726 logging.error("Shutdown of '%s' unsuccessful, forcing", instance.name) 2727 2728 try: 2729 hyper.StopInstance(instance, force=True) 2730 except errors.HypervisorError, err: 2731 # only raise an error if the instance still exists, otherwise 2732 # the error could simply be "instance ... unknown"! 2733 if _GetInstanceInfo(instance): 2734 _Fail("Failed to force stop instance '%s': %s", instance.name, err) 2735 2736 time.sleep(1) 2737 2738 if _GetInstanceInfo(instance): 2739 _Fail("Could not shutdown instance '%s' even by destroy", instance.name) 2740 2741 try: 2742 hyper.CleanupInstance(instance.name) 2743 except errors.HypervisorError, err: 2744 logging.warning("Failed to execute post-shutdown cleanup step: %s", err) 2745 2746 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2747

2748 2749 -def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):

2750 """Reboot an instance. 2751 2752 @type instance: L{objects.Instance} 2753 @param instance: the instance object to reboot 2754 @type reboot_type: str 2755 @param reboot_type: the type of reboot, one the following 2756 constants: 2757 - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the 2758 instance OS, do not recreate the VM 2759 - L{constants.INSTANCE_REBOOT_HARD}: tear down and 2760 restart the VM (at the hypervisor level) 2761 - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is 2762 not accepted here, since that mode is handled differently, in 2763 cmdlib, and translates into full stop and start of the 2764 instance (instead of a call_instance_reboot RPC) 2765 @type shutdown_timeout: integer 2766 @param shutdown_timeout: maximum timeout for soft shutdown 2767 @type reason: list of reasons 2768 @param reason: the reason trail for this reboot 2769 @rtype: None 2770 2771 """ 2772 # TODO: this is inconsistent with 'StartInstance' and 'InstanceShutdown' 2773 # because those functions simply 'return' on error whereas this one 2774 # raises an exception with '_Fail' 2775 if not _GetInstanceInfo(instance): 2776 _Fail("Cannot reboot instance '%s' that is not running", instance.name) 2777 2778 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2779 if reboot_type == constants.INSTANCE_REBOOT_SOFT: 2780 try: 2781 hyper.RebootInstance(instance) 2782 except errors.HypervisorError, err: 2783 _Fail("Failed to soft reboot instance '%s': %s", instance.name, err) 2784 elif reboot_type == constants.INSTANCE_REBOOT_HARD: 2785 try: 2786 InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False) 2787 result = StartInstance(instance, False, reason, store_reason=False) 2788 _StoreInstReasonTrail(instance.name, reason) 2789 return result 2790 except errors.HypervisorError, err: 2791 _Fail("Failed to hard reboot instance '%s': %s", instance.name, err) 2792 else: 2793 _Fail("Invalid reboot_type received: '%s'", reboot_type)

2794

2795 2796 -def InstanceBalloonMemory(instance, memory):

2797 """Resize an instance's memory. 2798 2799 @type instance: L{objects.Instance} 2800 @param instance: the instance object 2801 @type memory: int 2802 @param memory: new memory amount in MB 2803 @rtype: None 2804 2805 """ 2806 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2807 running = hyper.ListInstances(hvparams=instance.hvparams) 2808 if instance.name not in running: 2809 logging.info("Instance %s is not running, cannot balloon", instance.name) 2810 return 2811 try: 2812 hyper.BalloonInstanceMemory(instance, memory) 2813 except errors.HypervisorError, err: 2814 _Fail("Failed to balloon instance memory: %s", err, exc=True)

2815

2816 2817 -def MigrationInfo(instance):

2818 """Gather information about an instance to be migrated. 2819 2820 @type instance: L{objects.Instance} 2821 @param instance: the instance definition 2822 2823 """ 2824 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2825 try: 2826 info = hyper.MigrationInfo(instance) 2827 except errors.HypervisorError, err: 2828 _Fail("Failed to fetch migration information: %s", err, exc=True) 2829 return info

2830

2831 2832 -def AcceptInstance(instance, info, target):

2833 """Prepare the node to accept an instance. 2834 2835 @type instance: L{objects.Instance} 2836 @param instance: the instance definition 2837 @type info: string/data (opaque) 2838 @param info: migration information, from the source node 2839 @type target: string 2840 @param target: target host (usually ip), on this node 2841 2842 """ 2843 # TODO: why is this required only for DTS_EXT_MIRROR? 2844 if instance.disk_template in constants.DTS_EXT_MIRROR: 2845 # Create the symlinks, as the disks are not active 2846 # in any way 2847 try: 2848 _GatherAndLinkBlockDevs(instance) 2849 except errors.BlockDeviceError, err: 2850 _Fail("Block device error: %s", err, exc=True) 2851 2852 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2853 try: 2854 hyper.AcceptInstance(instance, info, target) 2855 except errors.HypervisorError, err: 2856 if instance.disk_template in constants.DTS_EXT_MIRROR: 2857 _RemoveBlockDevLinks(instance.name, instance.disks_info) 2858 _Fail("Failed to accept instance: %s", err, exc=True)

2859

2860 2861 -def FinalizeMigrationDst(instance, info, success):

2862 """Finalize any preparation to accept an instance. 2863 2864 @type instance: L{objects.Instance} 2865 @param instance: the instance definition 2866 @type info: string/data (opaque) 2867 @param info: migration information, from the source node 2868 @type success: boolean 2869 @param success: whether the migration was a success or a failure 2870 2871 """ 2872 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2873 try: 2874 hyper.FinalizeMigrationDst(instance, info, success) 2875 except errors.HypervisorError, err: 2876 _Fail("Failed to finalize migration on the target node: %s", err, exc=True)

2877

2878 2879 -def MigrateInstance(cluster_name, instance, target, live):

2880 """Migrates an instance to another node. 2881 2882 @type cluster_name: string 2883 @param cluster_name: name of the cluster 2884 @type instance: L{objects.Instance} 2885 @param instance: the instance definition 2886 @type target: string 2887 @param target: the target node name 2888 @type live: boolean 2889 @param live: whether the migration should be done live or not (the 2890 interpretation of this parameter is left to the hypervisor) 2891 @raise RPCFail: if migration fails for some reason 2892 2893 """ 2894 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2895 2896 try: 2897 hyper.MigrateInstance(cluster_name, instance, target, live) 2898 except errors.HypervisorError, err: 2899 _Fail("Failed to migrate instance: %s", err, exc=True)

2900

2901 2902 -def FinalizeMigrationSource(instance, success, live):

2903 """Finalize the instance migration on the source node. 2904 2905 @type instance: L{objects.Instance} 2906 @param instance: the instance definition of the migrated instance 2907 @type success: bool 2908 @param success: whether the migration succeeded or not 2909 @type live: bool 2910 @param live: whether the user requested a live migration or not 2911 @raise RPCFail: If the execution fails for some reason 2912 2913 """ 2914 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2915 2916 try: 2917 hyper.FinalizeMigrationSource(instance, success, live) 2918 except Exception, err: # pylint: disable=W0703 2919 _Fail("Failed to finalize the migration on the source node: %s", err, 2920 exc=True)

2921

2922 2923 -def GetMigrationStatus(instance):

2924 """Get the migration status 2925 2926 @type instance: L{objects.Instance} 2927 @param instance: the instance that is being migrated 2928 @rtype: L{objects.MigrationStatus} 2929 @return: the status of the current migration (one of 2930 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional 2931 progress info that can be retrieved from the hypervisor 2932 @raise RPCFail: If the migration status cannot be retrieved 2933 2934 """ 2935 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2936 try: 2937 return hyper.GetMigrationStatus(instance) 2938 except Exception, err: # pylint: disable=W0703 2939 _Fail("Failed to get migration status: %s", err, exc=True)

2940

2941 2942 -def HotplugDevice(instance, action, dev_type, device, extra, seq):

2943 """Hotplug a device 2944 2945 Hotplug is currently supported only for KVM Hypervisor. 2946 @type instance: L{objects.Instance} 2947 @param instance: the instance to which we hotplug a device 2948 @type action: string 2949 @param action: the hotplug action to perform 2950 @type dev_type: string 2951 @param dev_type: the device type to hotplug 2952 @type device: either L{objects.NIC} or L{objects.Disk} 2953 @param device: the device object to hotplug 2954 @type extra: tuple 2955 @param extra: extra info used for disk hotplug (disk link, drive uri) 2956 @type seq: int 2957 @param seq: the index of the device from master perspective 2958 @raise RPCFail: in case instance does not have KVM hypervisor 2959 2960 """ 2961 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2962 try: 2963 hyper.VerifyHotplugSupport(instance, action, dev_type) 2964 except errors.HotplugError, err: 2965 _Fail("Hotplug is not supported: %s", err) 2966 2967 if action == constants.HOTPLUG_ACTION_ADD: 2968 fn = hyper.HotAddDevice 2969 elif action == constants.HOTPLUG_ACTION_REMOVE: 2970 fn = hyper.HotDelDevice 2971 elif action == constants.HOTPLUG_ACTION_MODIFY: 2972 fn = hyper.HotModDevice 2973 else: 2974 assert action in constants.HOTPLUG_ALL_ACTIONS 2975 2976 return fn(instance, dev_type, device, extra, seq)

2977

2978 2979 -def HotplugSupported(instance):

2980 """Checks if hotplug is generally supported. 2981 2982 """ 2983 hyper = hypervisor.GetHypervisor(instance.hypervisor) 2984 try: 2985 hyper.HotplugSupported(instance) 2986 except errors.HotplugError, err: 2987 _Fail("Hotplug is not supported: %s", err)

2988

2989 2990 -def ModifyInstanceMetadata(metadata):

2991 """Sends instance data to the metadata daemon. 2992 2993 Uses the Luxi transport layer to communicate with the metadata 2994 daemon configuration server. It starts the metadata daemon if it is 2995 not running. 2996 The daemon must be enabled during at configuration time. 2997 2998 @type metadata: dict 2999 @param metadata: instance metadata obtained by calling 3000 L{objects.Instance.ToDict} on an instance object 3001 3002 """ 3003 if not constants.ENABLE_METAD: 3004 raise errors.ProgrammerError("The metadata deamon is disabled, yet" 3005 " ModifyInstanceMetadata has been called") 3006 3007 if not utils.IsDaemonAlive(constants.METAD): 3008 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.METAD]) 3009 if result.failed: 3010 raise errors.HypervisorError("Failed to start metadata daemon") 3011 3012 def _Connect(): 3013 return transport.Transport(pathutils.SOCKET_DIR + "/ganeti-metad", 3014 allow_non_master=True)

3015 3016 retries = 5 3017 3018 while True: 3019 try: 3020 trans = utils.Retry(_Connect, 1.0, constants.LUXI_DEF_CTMO) 3021 break 3022 except utils.RetryTimeout: 3023 raise TimeoutError("Connection to metadata daemon timed out") 3024 except (socket.error, NoMasterError), err: 3025 if retries == 0: 3026 logging.error("Failed to connect to the metadata daemon", 3027 exc_info=True) 3028 raise TimeoutError("Failed to connect to metadata daemon: %s" % err) 3029 else: 3030 retries -= 1 3031 3032 data = serializer.DumpJson(metadata, 3033 private_encoder=serializer.EncodeWithPrivateFields) 3034 3035 trans.Send(data) 3036 trans.Close() 3037

3038 3039 -def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):

3040 """Creates a block device for an instance. 3041 3042 @type disk: L{objects.Disk} 3043 @param disk: the object describing the disk we should create 3044 @type size: int 3045 @param size: the size of the physical underlying device, in MiB 3046 @type owner: str 3047 @param owner: the name of the instance for which disk is created, 3048 used for device cache data 3049 @type on_primary: boolean 3050 @param on_primary: indicates if it is the primary node or not 3051 @type info: string 3052 @param info: string that will be sent to the physical device 3053 creation, used for example to set (LVM) tags on LVs 3054 @type excl_stor: boolean 3055 @param excl_stor: Whether exclusive_storage is active 3056 3057 @return: the new unique_id of the device (this can sometime be 3058 computed only after creation), or None. On secondary nodes, 3059 it's not required to return anything. 3060 3061 """ 3062 # TODO: remove the obsolete "size" argument 3063 # pylint: disable=W0613 3064 clist = [] 3065 if disk.children: 3066 for child in disk.children: 3067 try: 3068 crdev = _RecursiveAssembleBD(child, owner, on_primary) 3069 except errors.BlockDeviceError, err: 3070 _Fail("Can't assemble device %s: %s", child, err) 3071 if on_primary or disk.AssembleOnSecondary(): 3072 # we need the children open in case the device itself has to 3073 # be assembled 3074 try: 3075 # pylint: disable=E1103 3076 crdev.Open() 3077 except errors.BlockDeviceError, err: 3078 _Fail("Can't make child '%s' read-write: %s", child, err) 3079 clist.append(crdev) 3080 3081 try: 3082 device = bdev.Create(disk, clist, excl_stor) 3083 except errors.BlockDeviceError, err: 3084 _Fail("Can't create block device: %s", err) 3085 3086 if on_primary or disk.AssembleOnSecondary(): 3087 try: 3088 device.Assemble() 3089 except errors.BlockDeviceError, err: 3090 _Fail("Can't assemble device after creation, unusual event: %s", err) 3091 if on_primary or disk.OpenOnSecondary(): 3092 try: 3093 device.Open(force=True) 3094 except errors.BlockDeviceError, err: 3095 _Fail("Can't make device r/w after creation, unusual event: %s", err) 3096 DevCacheManager.UpdateCache(device.dev_path, owner, 3097 on_primary, disk.iv_name) 3098 3099 device.SetInfo(info) 3100 3101 return device.unique_id

3102

3103 3104 -def _DumpDevice(source_path, target_path, offset, size, truncate):

3105 """This function images/wipes the device using a local file. 3106 3107 @type source_path: string 3108 @param source_path: path of the image or data source (e.g., "/dev/zero") 3109 3110 @type target_path: string 3111 @param target_path: path of the device to image/wipe 3112 3113 @type offset: int 3114 @param offset: offset in MiB in the output file 3115 3116 @type size: int 3117 @param size: maximum size in MiB to write (data source might be smaller) 3118 3119 @type truncate: bool 3120 @param truncate: whether the file should be truncated 3121 3122 @return: None 3123 @raise RPCFail: in case of failure 3124 3125 """ 3126 # Internal sizes are always in Mebibytes; if the following "dd" command 3127 # should use a different block size the offset and size given to this 3128 # function must be adjusted accordingly before being passed to "dd". 3129 block_size = constants.DD_BLOCK_SIZE 3130 3131 cmd = [constants.DD_CMD, "if=%s" % source_path, "seek=%d" % offset, 3132 "bs=%s" % block_size, "oflag=direct", "of=%s" % target_path, 3133 "count=%d" % size] 3134 3135 if not truncate: 3136 cmd.append("conv=notrunc") 3137 3138 result = utils.RunCmd(cmd) 3139 3140 if result.failed: 3141 _Fail("Dump command '%s' exited with error: %s; output: %s", result.cmd, 3142 result.fail_reason, result.output)

3143

3144 3145 -def _DownloadAndDumpDevice(source_url, target_path, size):

3146 """This function images a device using a downloaded image file. 3147 3148 @type source_url: string 3149 @param source_url: URL of image to dump to disk 3150 3151 @type target_path: string 3152 @param target_path: path of the device to image 3153 3154 @type size: int 3155 @param size: maximum size in MiB to write (data source might be smaller) 3156 3157 @rtype: NoneType 3158 @return: None 3159 @raise RPCFail: in case of download or write failures 3160 3161 """ 3162 class DDParams(object): 3163 def __init__(self, current_size, total_size): 3164 self.current_size = current_size 3165 self.total_size = total_size 3166 self.image_size_error = False

3167 3168 def dd_write(ddparams, out): 3169 if ddparams.current_size < ddparams.total_size: 3170 ddparams.current_size += len(out) 3171 target_file.write(out) 3172 else: 3173 ddparams.image_size_error = True 3174 return -1 3175 3176 target_file = open(target_path, "r+") 3177 ddparams = DDParams(0, 1024 * 1024 * size) 3178 3179 curl = pycurl.Curl() 3180 curl.setopt(pycurl.VERBOSE, True) 3181 curl.setopt(pycurl.NOSIGNAL, True) 3182 curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION) 3183 curl.setopt(pycurl.URL, source_url) 3184 curl.setopt(pycurl.WRITEFUNCTION, lambda out: dd_write(ddparams, out)) 3185 3186 try: 3187 curl.perform() 3188 except pycurl.error: 3189 if ddparams.image_size_error: 3190 _Fail("Disk image larger than the disk") 3191 else: 3192 raise 3193 3194 target_file.close() 3195

3196 3197 -def BlockdevConvert(src_disk, target_disk):

3198 """Copies data from source block device to target. 3199 3200 This function gets the export and import commands from the source and 3201 target devices respectively, and then concatenates them to a single 3202 command using a pipe ("|"). Finally, executes the unified command that 3203 will transfer the data between the devices during the disk template 3204 conversion operation. 3205 3206 @type src_disk: L{objects.Disk} 3207 @param src_disk: the disk object we want to copy from 3208 @type target_disk: L{objects.Disk} 3209 @param target_disk: the disk object we want to copy to 3210 3211 @rtype: NoneType 3212 @return: None 3213 @raise RPCFail: in case of failure 3214 3215 """ 3216 src_dev = _RecursiveFindBD(src_disk) 3217 if src_dev is None: 3218 _Fail("Cannot copy from device '%s': device not found", src_disk.uuid) 3219 3220 dest_dev = _RecursiveFindBD(target_disk) 3221 if dest_dev is None: 3222 _Fail("Cannot copy to device '%s': device not found", target_disk.uuid) 3223 3224 src_cmd = src_dev.Export() 3225 dest_cmd = dest_dev.Import() 3226 command = "%s | %s" % (utils.ShellQuoteArgs(src_cmd), 3227 utils.ShellQuoteArgs(dest_cmd)) 3228 3229 result = utils.RunCmd(command) 3230 if result.failed: 3231 _Fail("Disk conversion command '%s' exited with error: %s; output: %s", 3232 result.cmd, result.fail_reason, result.output)

3233

3234 3235 -def BlockdevWipe(disk, offset, size):

3236 """Wipes a block device. 3237 3238 @type disk: L{objects.Disk} 3239 @param disk: the disk object we want to wipe 3240 @type offset: int 3241 @param offset: The offset in MiB in the file 3242 @type size: int 3243 @param size: The size in MiB to write 3244 3245 """ 3246 try: 3247 rdev = _RecursiveFindBD(disk) 3248 except errors.BlockDeviceError: 3249 rdev = None 3250 3251 if not rdev: 3252 _Fail("Cannot wipe device %s: device not found", disk.iv_name) 3253 if offset < 0: 3254 _Fail("Negative offset") 3255 if size < 0: 3256 _Fail("Negative size") 3257 if offset > rdev.size: 3258 _Fail("Wipe offset is bigger than device size") 3259 if (offset + size) > rdev.size: 3260 _Fail("Wipe offset and size are bigger than device size") 3261 3262 _DumpDevice("/dev/zero", rdev.dev_path, offset, size, True)

3263

3264 3265 -def BlockdevImage(disk, image, size):

3266 """Images a block device either by dumping a local file or 3267 downloading a URL. 3268 3269 @type disk: L{objects.Disk} 3270 @param disk: the disk object we want to image 3271 3272 @type image: string 3273 @param image: file path to the disk image be dumped 3274 3275 @type size: int 3276 @param size: The size in MiB to write 3277 3278 @rtype: NoneType 3279 @return: None 3280 @raise RPCFail: in case of failure 3281 3282 """ 3283 if not (utils.IsUrl(image) or os.path.exists(image)): 3284 _Fail("Image '%s' not found", image) 3285 3286 try: 3287 rdev = _RecursiveFindBD(disk) 3288 except errors.BlockDeviceError: 3289 rdev = None 3290 3291 if not rdev: 3292 _Fail("Cannot image device %s: device not found", disk.iv_name) 3293 if size < 0: 3294 _Fail("Negative size") 3295 if size > rdev.size: 3296 _Fail("Image size is bigger than device size") 3297 3298 if utils.IsUrl(image): 3299 _DownloadAndDumpDevice(image, rdev.dev_path, size) 3300 else: 3301 _DumpDevice(image, rdev.dev_path, 0, size, False)

3302

3303 3304 -def BlockdevPauseResumeSync(disks, pause):

3305 """Pause or resume the sync of the block device. 3306 3307 @type disks: list of L{objects.Disk} 3308 @param disks: the disks object we want to pause/resume 3309 @type pause: bool 3310 @param pause: Wheater to pause or resume 3311 3312 """ 3313 success = [] 3314 for disk in disks: 3315 try: 3316 rdev = _RecursiveFindBD(disk) 3317 except errors.BlockDeviceError: 3318 rdev = None 3319 3320 if not rdev: 3321 success.append((False, ("Cannot change sync for device %s:" 3322 " device not found" % disk.iv_name))) 3323 continue 3324 3325 result = rdev.PauseResumeSync(pause) 3326 3327 if result: 3328 success.append((result, None)) 3329 else: 3330 if pause: 3331 msg = "Pause" 3332 else: 3333 msg = "Resume" 3334 success.append((result, "%s for device %s failed" % (msg, disk.iv_name))) 3335 3336 return success

3337

3338 3339 -def BlockdevRemove(disk):

3340 """Remove a block device. 3341 3342 @note: This is intended to be called recursively. 3343 3344 @type disk: L{objects.Disk} 3345 @param disk: the disk object we should remove 3346 @rtype: boolean 3347 @return: the success of the operation 3348 3349 """ 3350 msgs = [] 3351 try: 3352 rdev = _RecursiveFindBD(disk) 3353 except errors.BlockDeviceError, err: 3354 # probably can't attach 3355 logging.info("Can't attach to device %s in remove", disk) 3356 rdev = None 3357 if rdev is not None: 3358 r_path = rdev.dev_path 3359 3360 def _TryRemove(): 3361 try: 3362 rdev.Remove() 3363 return [] 3364 except errors.BlockDeviceError, err: 3365 return [str(err)]

3366 3367 msgs.extend(utils.SimpleRetry([], _TryRemove, 3368 constants.DISK_REMOVE_RETRY_INTERVAL, 3369 constants.DISK_REMOVE_RETRY_TIMEOUT)) 3370 3371 if not msgs: 3372 DevCacheManager.RemoveCache(r_path) 3373 3374 if disk.children: 3375 for child in disk.children: 3376 try: 3377 BlockdevRemove(child) 3378 except RPCFail, err: 3379 msgs.append(str(err)) 3380 3381 if msgs: 3382 _Fail("; ".join(msgs)) 3383

3384 3385 -def _RecursiveAssembleBD(disk, owner, as_primary):

3386 """Activate a block device for an instance. 3387 3388 This is run on the primary and secondary nodes for an instance. 3389 3390 @note: this function is called recursively. 3391 3392 @type disk: L{objects.Disk} 3393 @param disk: the disk we try to assemble 3394 @type owner: str 3395 @param owner: the name of the instance which owns the disk 3396 @type as_primary: boolean 3397 @param as_primary: if we should make the block device 3398 read/write 3399 3400 @return: the assembled device or None (in case no device 3401 was assembled) 3402 @raise errors.BlockDeviceError: in case there is an error 3403 during the activation of the children or the device 3404 itself 3405 3406 """ 3407 children = [] 3408 if disk.children: 3409 mcn = disk.ChildrenNeeded() 3410 if mcn == -1: 3411 mcn = 0 # max number of Nones allowed 3412 else: 3413 mcn = len(disk.children) - mcn # max number of Nones 3414 for chld_disk in disk.children: 3415 try: 3416 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary) 3417 except errors.BlockDeviceError, err: 3418 if children.count(None) >= mcn: 3419 raise 3420 cdev = None 3421 logging.error("Error in child activation (but continuing): %s", 3422 str(err)) 3423 children.append(cdev) 3424 3425 if as_primary or disk.AssembleOnSecondary(): 3426 r_dev = bdev.Assemble(disk, children) 3427 result = r_dev 3428 if as_primary or disk.OpenOnSecondary(): 3429 r_dev.Open() 3430 DevCacheManager.UpdateCache(r_dev.dev_path, owner, 3431 as_primary, disk.iv_name) 3432 3433 else: 3434 result = True 3435 return result

3436

3437 3438 -def BlockdevAssemble(disk, instance, as_primary, idx):

3439 """Activate a block device for an instance. 3440 3441 This is a wrapper over _RecursiveAssembleBD. 3442 3443 @rtype: str or boolean 3444 @return: a tuple with the C{/dev/...} path and the created symlink 3445 for primary nodes, and (C{True}, C{True}) for secondary nodes 3446 3447 """ 3448 try: 3449 result = _RecursiveAssembleBD(disk, instance.name, as_primary) 3450 if isinstance(result, BlockDev): 3451 # pylint: disable=E1103 3452 dev_path = result.dev_path 3453 link_name = None 3454 uri = None 3455 if as_primary: 3456 link_name = _SymlinkBlockDev(instance.name, dev_path, idx) 3457 uri = _CalculateDeviceURI(instance, disk, result) 3458 elif result: 3459 return result, result 3460 else: 3461 _Fail("Unexpected result from _RecursiveAssembleBD") 3462 except errors.BlockDeviceError, err: 3463 _Fail("Error while assembling disk: %s", err, exc=True) 3464 except OSError, err: 3465 _Fail("Error while symlinking disk: %s", err, exc=True) 3466 3467 return dev_path, link_name, uri

3468

3469 3470 -def BlockdevShutdown(disk):

3471 """Shut down a block device. 3472 3473 First, if the device is assembled (Attach() is successful), then 3474 the device is shutdown. Then the children of the device are 3475 shutdown. 3476 3477 This function is called recursively. Note that we don't cache the 3478 children or such, as oppossed to assemble, shutdown of different 3479 devices doesn't require that the upper device was active. 3480 3481 @type disk: L{objects.Disk} 3482 @param disk: the description of the disk we should 3483 shutdown 3484 @rtype: None 3485 3486 """ 3487 msgs = [] 3488 r_dev = _RecursiveFindBD(disk) 3489 if r_dev is not None: 3490 r_path = r_dev.dev_path 3491 try: 3492 r_dev.Shutdown() 3493 DevCacheManager.RemoveCache(r_path) 3494 except errors.BlockDeviceError, err: 3495 msgs.append(str(err)) 3496 3497 if disk.children: 3498 for child in disk.children: 3499 try: 3500 BlockdevShutdown(child) 3501 except RPCFail, err: 3502 msgs.append(str(err)) 3503 3504 if msgs: 3505 _Fail("; ".join(msgs))

3506

3507 3508 -def BlockdevAddchildren(parent_cdev, new_cdevs):

3509 """Extend a mirrored block device. 3510 3511 @type parent_cdev: L{objects.Disk} 3512 @param parent_cdev: the disk to which we should add children 3513 @type new_cdevs: list of L{objects.Disk} 3514 @param new_cdevs: the list of children which we should add 3515 @rtype: None 3516 3517 """ 3518 parent_bdev = _RecursiveFindBD(parent_cdev) 3519 if parent_bdev is None: 3520 _Fail("Can't find parent device '%s' in add children", parent_cdev) 3521 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs] 3522 if new_bdevs.count(None) > 0: 3523 _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs) 3524 parent_bdev.AddChildren(new_bdevs)

3525

3526 3527 -def BlockdevRemovechildren(parent_cdev, new_cdevs):

3528 """Shrink a mirrored block device. 3529 3530 @type parent_cdev: L{objects.Disk} 3531 @param parent_cdev: the disk from which we should remove children 3532 @type new_cdevs: list of L{objects.Disk} 3533 @param new_cdevs: the list of children which we should remove 3534 @rtype: None 3535 3536 """ 3537 parent_bdev = _RecursiveFindBD(parent_cdev) 3538 if parent_bdev is None: 3539 _Fail("Can't find parent device '%s' in remove children", parent_cdev) 3540 devs = [] 3541 for disk in new_cdevs: 3542 rpath = disk.StaticDevPath() 3543 if rpath is None: 3544 bd = _RecursiveFindBD(disk) 3545 if bd is None: 3546 _Fail("Can't find device %s while removing children", disk) 3547 else: 3548 devs.append(bd.dev_path) 3549 else: 3550 if not utils.IsNormAbsPath(rpath): 3551 _Fail("Strange path returned from StaticDevPath: '%s'", rpath) 3552 devs.append(rpath) 3553 parent_bdev.RemoveChildren(devs)

3554

3555 3556 -def BlockdevGetmirrorstatus(disks):

3557 """Get the mirroring status of a list of devices. 3558 3559 @type disks: list of L{objects.Disk} 3560 @param disks: the list of disks which we should query 3561 @rtype: disk 3562 @return: List of L{objects.BlockDevStatus}, one for each disk 3563 @raise errors.BlockDeviceError: if any of the disks cannot be 3564 found 3565 3566 """ 3567 stats = [] 3568 for dsk in disks: 3569 rbd = _RecursiveFindBD(dsk) 3570 if rbd is None: 3571 _Fail("Can't find device %s", dsk) 3572 3573 stats.append(rbd.CombinedSyncStatus()) 3574 3575 return stats

3576

3577 3578 -def BlockdevGetmirrorstatusMulti(disks):

3579 """Get the mirroring status of a list of devices. 3580 3581 @type disks: list of L{objects.Disk} 3582 @param disks: the list of disks which we should query 3583 @rtype: disk 3584 @return: List of tuples, (bool, status), one for each disk; bool denotes 3585 success/failure, status is L{objects.BlockDevStatus} on success, string 3586 otherwise 3587 3588 """ 3589 result = [] 3590 for disk in disks: 3591 try: 3592 rbd = _RecursiveFindBD(disk) 3593 if rbd is None: 3594 result.append((False, "Can't find device %s" % disk)) 3595 continue 3596 3597 status = rbd.CombinedSyncStatus() 3598 except errors.BlockDeviceError, err: 3599 logging.exception("Error while getting disk status") 3600 result.append((False, str(err))) 3601 else: 3602 result.append((True, status)) 3603 3604 assert len(disks) == len(result) 3605 3606 return result

3607

3608 3609 -def _RecursiveFindBD(disk):

3610 """Check if a device is activated. 3611 3612 If so, return information about the real device. 3613 3614 @type disk: L{objects.Disk} 3615 @param disk: the disk object we need to find 3616 3617 @return: None if the device can't be found, 3618 otherwise the device instance 3619 3620 """ 3621 children = [] 3622 if disk.children: 3623 for chdisk in disk.children: 3624 children.append(_RecursiveFindBD(chdisk)) 3625 3626 return bdev.FindDevice(disk, children)

3627

3628 3629 -def _OpenRealBD(disk):

3630 """Opens the underlying block device of a disk. 3631 3632 @type disk: L{objects.Disk} 3633 @param disk: the disk object we want to open 3634 3635 """ 3636 real_disk = _RecursiveFindBD(disk) 3637 if real_disk is None: 3638 _Fail("Block device '%s' is not set up", disk) 3639 3640 real_disk.Open() 3641 3642 return real_disk

3643

3644 3645 -def BlockdevFind(disk):

3646 """Check if a device is activated. 3647 3648 If it is, return information about the real device. 3649 3650 @type disk: L{objects.Disk} 3651 @param disk: the disk to find 3652 @rtype: None or objects.BlockDevStatus 3653 @return: None if the disk cannot be found, otherwise a the current 3654 information 3655 3656 """ 3657 try: 3658 rbd = _RecursiveFindBD(disk) 3659 except errors.BlockDeviceError, err: 3660 _Fail("Failed to find device: %s", err, exc=True) 3661 3662 if rbd is None: 3663 return None 3664 3665 return rbd.GetSyncStatus()

3666

3667 3668 -def BlockdevGetdimensions(disks):

3669 """Computes the size of the given disks. 3670 3671 If a disk is not found, returns None instead. 3672 3673 @type disks: list of L{objects.Disk} 3674 @param disks: the list of disk to compute the size for 3675 @rtype: list 3676 @return: list with elements None if the disk cannot be found, 3677 otherwise the pair (size, spindles), where spindles is None if the 3678 device doesn't support that 3679 3680 """ 3681 result = [] 3682 for cf in disks: 3683 try: 3684 rbd = _RecursiveFindBD(cf) 3685 except errors.BlockDeviceError: 3686 result.append(None) 3687 continue 3688 if rbd is None: 3689 result.append(None) 3690 else: 3691 result.append(rbd.GetActualDimensions()) 3692 return result

3693

3694 3695 -def UploadFile(file_name, data, mode, uid, gid, atime, mtime):

3696 """Write a file to the filesystem. 3697 3698 This allows the master to overwrite(!) a file. It will only perform 3699 the operation if the file belongs to a list of configuration files. 3700 3701 @type file_name: str 3702 @param file_name: the target file name 3703 @type data: str 3704 @param data: the new contents of the file 3705 @type mode: int 3706 @param mode: the mode to give the file (can be None) 3707 @type uid: string 3708 @param uid: the owner of the file 3709 @type gid: string 3710 @param gid: the group of the file 3711 @type atime: float 3712 @param atime: the atime to set on the file (can be None) 3713 @type mtime: float 3714 @param mtime: the mtime to set on the file (can be None) 3715 @rtype: None 3716 3717 """ 3718 file_name = vcluster.LocalizeVirtualPath(file_name) 3719 3720 if not os.path.isabs(file_name): 3721 _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name) 3722 3723 if file_name not in _ALLOWED_UPLOAD_FILES: 3724 _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'", 3725 file_name) 3726 3727 raw_data = _Decompress(data) 3728 3729 if not (isinstance(uid, basestring) and isinstance(gid, basestring)): 3730 _Fail("Invalid username/groupname type") 3731 3732 getents = runtime.GetEnts() 3733 uid = getents.LookupUser(uid) 3734 gid = getents.LookupGroup(gid) 3735 3736 utils.SafeWriteFile(file_name, None, 3737 data=raw_data, mode=mode, uid=uid, gid=gid, 3738 atime=atime, mtime=mtime)

3739

3740 3741 -def RunOob(oob_program, command, node, timeout):

3742 """Executes oob_program with given command on given node. 3743 3744 @param oob_program: The path to the executable oob_program 3745 @param command: The command to invoke on oob_program 3746 @param node: The node given as an argument to the program 3747 @param timeout: Timeout after which we kill the oob program 3748 3749 @return: stdout 3750 @raise RPCFail: If execution fails for some reason 3751 3752 """ 3753 result = utils.RunCmd([oob_program, command, node], timeout=timeout) 3754 3755 if result.failed: 3756 _Fail("'%s' failed with reason '%s'; output: %s", result.cmd, 3757 result.fail_reason, result.output) 3758 3759 return result.stdout

3760

3761 3762 -def _OSOndiskAPIVersion(os_dir):

3763 """Compute and return the API version of a given OS. 3764 3765 This function will try to read the API version of the OS residing in 3766 the 'os_dir' directory. 3767 3768 @type os_dir: str 3769 @param os_dir: the directory in which we should look for the OS 3770 @rtype: tuple 3771 @return: tuple (status, data) with status denoting the validity and 3772 data holding either the valid versions or an error message 3773 3774 """ 3775 api_file = utils.PathJoin(os_dir, constants.OS_API_FILE) 3776 3777 try: 3778 st = os.stat(api_file) 3779 except EnvironmentError, err: 3780 return False, ("Required file '%s' not found under path %s: %s" % 3781 (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err))) 3782 3783 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 3784 return False, ("File '%s' in %s is not a regular file" % 3785 (constants.OS_API_FILE, os_dir)) 3786 3787 try: 3788 api_versions = utils.ReadFile(api_file).splitlines() 3789 except EnvironmentError, err: 3790 return False, ("Error while reading the API version file at %s: %s" % 3791 (api_file, utils.ErrnoOrStr(err))) 3792 3793 try: 3794 api_versions = [int(version.strip()) for version in api_versions] 3795 except (TypeError, ValueError), err: 3796 return False, ("API version(s) can't be converted to integer: %s" % 3797 str(err)) 3798 3799 return True, api_versions

3800

3801 3802 -def DiagnoseOS(top_dirs=None):

3803 """Compute the validity for all OSes. 3804 3805 @type top_dirs: list 3806 @param top_dirs: the list of directories in which to 3807 search (if not given defaults to 3808 L{pathutils.OS_SEARCH_PATH}) 3809 @rtype: list of L{objects.OS} 3810 @return: a list of tuples (name, path, status, diagnose, variants, 3811 parameters, api_version) for all (potential) OSes under all 3812 search paths, where: 3813 - name is the (potential) OS name 3814 - path is the full path to the OS 3815 - status True/False is the validity of the OS 3816 - diagnose is the error message for an invalid OS, otherwise empty 3817 - variants is a list of supported OS variants, if any 3818 - parameters is a list of (name, help) parameters, if any 3819 - api_version is a list of support OS API versions 3820 3821 """ 3822 if top_dirs is None: 3823 top_dirs = pathutils.OS_SEARCH_PATH 3824 3825 result = [] 3826 for dir_name in top_dirs: 3827 if os.path.isdir(dir_name): 3828 try: 3829 f_names = utils.ListVisibleFiles(dir_name) 3830 except EnvironmentError, err: 3831 logging.exception("Can't list the OS directory %s: %s", dir_name, err) 3832 break 3833 for name in f_names: 3834 os_path = utils.PathJoin(dir_name, name) 3835 status, os_inst = _TryOSFromDisk(name, base_dir=dir_name) 3836 if status: 3837 diagnose = "" 3838 variants = os_inst.supported_variants 3839 parameters = os_inst.supported_parameters 3840 api_versions = os_inst.api_versions 3841 trusted = False if os_inst.create_script_untrusted else True 3842 else: 3843 diagnose = os_inst 3844 variants = parameters = api_versions = [] 3845 trusted = True 3846 result.append((name, os_path, status, diagnose, variants, 3847 parameters, api_versions, trusted)) 3848 3849 return result

3850

3851 3852 -def _TryOSFromDisk(name, base_dir=None):

3853 """Create an OS instance from disk. 3854 3855 This function will return an OS instance if the given name is a 3856 valid OS name. 3857 3858 @type base_dir: string 3859 @keyword base_dir: Base directory containing OS installations. 3860 Defaults to a search in all the OS_SEARCH_PATH dirs. 3861 @rtype: tuple 3862 @return: success and either the OS instance if we find a valid one, 3863 or error message 3864 3865 """ 3866 if base_dir is None: 3867 os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir) 3868 else: 3869 os_dir = utils.FindFile(name, [base_dir], os.path.isdir) 3870 3871 if os_dir is None: 3872 return False, "Directory for OS %s not found in search path" % name 3873 3874 status, api_versions = _OSOndiskAPIVersion(os_dir) 3875 if not status: 3876 # push the error up 3877 return status, api_versions 3878 3879 if not constants.OS_API_VERSIONS.intersection(api_versions): 3880 return False, ("API version mismatch for path '%s': found %s, want %s." % 3881 (os_dir, api_versions, constants.OS_API_VERSIONS)) 3882 3883 # OS Files dictionary, we will populate it with the absolute path 3884 # names; if the value is True, then it is a required file, otherwise 3885 # an optional one 3886 os_files = dict.fromkeys(constants.OS_SCRIPTS, True) 3887 3888 os_files[constants.OS_SCRIPT_CREATE] = False 3889 os_files[constants.OS_SCRIPT_CREATE_UNTRUSTED] = False 3890 3891 if max(api_versions) >= constants.OS_API_V15: 3892 os_files[constants.OS_VARIANTS_FILE] = False 3893 3894 if max(api_versions) >= constants.OS_API_V20: 3895 os_files[constants.OS_PARAMETERS_FILE] = True 3896 else: 3897 del os_files[constants.OS_SCRIPT_VERIFY] 3898 3899 for (filename, required) in os_files.items(): 3900 os_files[filename] = utils.PathJoin(os_dir, filename) 3901 3902 try: 3903 st = os.stat(os_files[filename]) 3904 except EnvironmentError, err: 3905 if err.errno == errno.ENOENT and not required: 3906 del os_files[filename] 3907 continue 3908 return False, ("File '%s' under path '%s' is missing (%s)" % 3909 (filename, os_dir, utils.ErrnoOrStr(err))) 3910 3911 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)): 3912 return False, ("File '%s' under path '%s' is not a regular file" % 3913 (filename, os_dir)) 3914 3915 if filename in constants.OS_SCRIPTS: 3916 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR: 3917 return False, ("File '%s' under path '%s' is not executable" % 3918 (filename, os_dir)) 3919 3920 if not constants.OS_SCRIPT_CREATE in os_files and \ 3921 not constants.OS_SCRIPT_CREATE_UNTRUSTED in os_files: 3922 return False, ("A create script (trusted or untrusted) under path '%s'" 3923 " must exist" % os_dir) 3924 3925 create_script = os_files.get(constants.OS_SCRIPT_CREATE, None) 3926 create_script_untrusted = os_files.get(constants.OS_SCRIPT_CREATE_UNTRUSTED, 3927 None) 3928 3929 variants = [] 3930 if constants.OS_VARIANTS_FILE in os_files: 3931 variants_file = os_files[constants.OS_VARIANTS_FILE] 3932 try: 3933 variants = \ 3934 utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file)) 3935 except EnvironmentError, err: 3936 # we accept missing files, but not other errors 3937 if err.errno != errno.ENOENT: 3938 return False, ("Error while reading the OS variants file at %s: %s" % 3939 (variants_file, utils.ErrnoOrStr(err))) 3940 3941 parameters = [] 3942 if constants.OS_PARAMETERS_FILE in os_files: 3943 parameters_file = os_files[constants.OS_PARAMETERS_FILE] 3944 try: 3945 parameters = utils.ReadFile(parameters_file).splitlines() 3946 except EnvironmentError, err: 3947 return False, ("Error while reading the OS parameters file at %s: %s" % 3948 (parameters_file, utils.ErrnoOrStr(err))) 3949 parameters = [v.split(None, 1) for v in parameters] 3950 3951 os_obj = objects.OS(name=name, path=os_dir, 3952 create_script=create_script, 3953 create_script_untrusted=create_script_untrusted, 3954 export_script=os_files[constants.OS_SCRIPT_EXPORT], 3955 import_script=os_files[constants.OS_SCRIPT_IMPORT], 3956 rename_script=os_files[constants.OS_SCRIPT_RENAME], 3957 verify_script=os_files.get(constants.OS_SCRIPT_VERIFY, 3958 None), 3959 supported_variants=variants, 3960 supported_parameters=parameters, 3961 api_versions=api_versions) 3962 return True, os_obj

3963

3964 3965 -def OSFromDisk(name, base_dir=None):

3966 """Create an OS instance from disk. 3967 3968 This function will return an OS instance if the given name is a 3969 valid OS name. Otherwise, it will raise an appropriate 3970 L{RPCFail} exception, detailing why this is not a valid OS. 3971 3972 This is just a wrapper over L{_TryOSFromDisk}, which doesn't raise 3973 an exception but returns true/false status data. 3974 3975 @type base_dir: string 3976 @keyword base_dir: Base directory containing OS installations. 3977 Defaults to a search in all the OS_SEARCH_PATH dirs. 3978 @rtype: L{objects.OS} 3979 @return: the OS instance if we find a valid one 3980 @raise RPCFail: if we don't find a valid OS 3981 3982 """ 3983 name_only = objects.OS.GetName(name) 3984 status, payload = _TryOSFromDisk(name_only, base_dir) 3985 3986 if not status: 3987 _Fail(payload) 3988 3989 return payload

3990

3991 3992 -def OSCoreEnv(os_name, inst_os, os_params, debug=0):

3993 """Calculate the basic environment for an os script. 3994 3995 @type os_name: str 3996 @param os_name: full operating system name (including variant) 3997 @type inst_os: L{objects.OS} 3998 @param inst_os: operating system for which the environment is being built 3999 @type os_params: dict 4000 @param os_params: the OS parameters 4001 @type debug: integer 4002 @param debug: debug level (0 or 1, for OS Api 10) 4003 @rtype: dict 4004 @return: dict of environment variables 4005 @raise errors.BlockDeviceError: if the block device 4006 cannot be found 4007 4008 """ 4009 result = {} 4010 api_version = \ 4011 max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions)) 4012 result["OS_API_VERSION"] = "%d" % api_version 4013 result["OS_NAME"] = inst_os.name 4014 result["DEBUG_LEVEL"] = "%d" % debug 4015 4016 # OS variants 4017 if api_version >= constants.OS_API_V15 and inst_os.supported_variants: 4018 variant = objects.OS.GetVariant(os_name) 4019 if not variant: 4020 variant = inst_os.supported_variants[0] 4021 else: 4022 variant = "" 4023 result["OS_VARIANT"] = variant 4024 4025 # OS params 4026 for pname, pvalue in os_params.items(): 4027 result["OSP_%s" % pname.upper().replace("-", "_")] = pvalue 4028 4029 # Set a default path otherwise programs called by OS scripts (or 4030 # even hooks called from OS scripts) might break, and we don't want 4031 # to have each script require setting a PATH variable 4032 result["PATH"] = constants.HOOKS_PATH 4033 4034 return result

4035

4036 4037 -def OSEnvironment(instance, inst_os, debug=0):

4038 """Calculate the environment for an os script. 4039 4040 @type instance: L{objects.Instance} 4041 @param instance: target instance for the os script run 4042 @type inst_os: L{objects.OS} 4043 @param inst_os: operating system for which the environment is being built 4044 @type debug: integer 4045 @param debug: debug level (0 or 1, for OS Api 10) 4046 @rtype: dict 4047 @return: dict of environment variables 4048 @raise errors.BlockDeviceError: if the block device 4049 cannot be found 4050 4051 """ 4052 result = OSCoreEnv(instance.os, inst_os, objects.FillDict(instance.osparams, 4053 instance.osparams_private.Unprivate()), debug=debug) 4054 4055 for attr in ["name", "os", "uuid", "ctime", "mtime", "primary_node"]: 4056 result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr)) 4057 4058 result["HYPERVISOR"] = instance.hypervisor 4059 result["DISK_COUNT"] = "%d" % len(instance.disks_info) 4060 result["NIC_COUNT"] = "%d" % len(instance.nics) 4061 result["INSTANCE_SECONDARY_NODES"] = \ 4062 ("%s" % " ".join(instance.secondary_nodes)) 4063 4064 # Disks 4065 for idx, disk in enumerate(instance.disks_info): 4066 real_disk = _OpenRealBD(disk) 4067 result["DISK_%d_PATH" % idx] = real_disk.dev_path 4068 result["DISK_%d_ACCESS" % idx] = disk.mode 4069 result["DISK_%d_UUID" % idx] = disk.uuid 4070 if disk.name: 4071 result["DISK_%d_NAME" % idx] = disk.name 4072 if constants.HV_DISK_TYPE in instance.hvparams: 4073 result["DISK_%d_FRONTEND_TYPE" % idx] = \ 4074 instance.hvparams[constants.HV_DISK_TYPE] 4075 if disk.dev_type in constants.DTS_BLOCK: 4076 result["DISK_%d_BACKEND_TYPE" % idx] = "block" 4077 elif disk.dev_type in constants.DTS_FILEBASED: 4078 result["DISK_%d_BACKEND_TYPE" % idx] = \ 4079 "file:%s" % disk.logical_id[0] 4080 4081 # NICs 4082 for idx, nic in enumerate(instance.nics): 4083 result["NIC_%d_MAC" % idx] = nic.mac 4084 result["NIC_%d_UUID" % idx] = nic.uuid 4085 if nic.name: 4086 result["NIC_%d_NAME" % idx] = nic.name 4087 if nic.ip: 4088 result["NIC_%d_IP" % idx] = nic.ip 4089 result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE] 4090 if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: 4091 result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK] 4092 if nic.nicparams[constants.NIC_LINK]: 4093 result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK] 4094 if nic.netinfo: 4095 nobj = objects.Network.FromDict(nic.netinfo) 4096 result.update(nobj.HooksDict("NIC_%d_" % idx)) 4097 if constants.HV_NIC_TYPE in instance.hvparams: 4098 result["NIC_%d_FRONTEND_TYPE" % idx] = \ 4099 instance.hvparams[constants.HV_NIC_TYPE] 4100 4101 # HV/BE params 4102 for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]: 4103 for key, value in source.items(): 4104 result["INSTANCE_%s_%s" % (kind, key)] = str(value) 4105 4106 return result

4107

4108 4109 -def DiagnoseExtStorage(top_dirs=None):

4110 """Compute the validity for all ExtStorage Providers. 4111 4112 @type top_dirs: list 4113 @param top_dirs: the list of directories in which to 4114 search (if not given defaults to 4115 L{pathutils.ES_SEARCH_PATH}) 4116 @rtype: list of L{objects.ExtStorage} 4117 @return: a list of tuples (name, path, status, diagnose, parameters) 4118 for all (potential) ExtStorage Providers under all 4119 search paths, where: 4120 - name is the (potential) ExtStorage Provider 4121 - path is the full path to the ExtStorage Provider 4122 - status True/False is the validity of the ExtStorage Provider 4123 - diagnose is the error message for an invalid ExtStorage Provider, 4124 otherwise empty 4125 - parameters is a list of (name, help) parameters, if any 4126 4127 """ 4128 if top_dirs is None: 4129 top_dirs = pathutils.ES_SEARCH_PATH 4130 4131 result = [] 4132 for dir_name in top_dirs: 4133 if os.path.isdir(dir_name): 4134 try: 4135 f_names = utils.ListVisibleFiles(dir_name) 4136 except EnvironmentError, err: 4137 logging.exception("Can't list the ExtStorage directory %s: %s", 4138 dir_name, err) 4139 break 4140 for name in f_names: 4141 es_path = utils.PathJoin(dir_name, name) 4142 status, es_inst = extstorage.ExtStorageFromDisk(name, base_dir=dir_name) 4143 if status: 4144 diagnose = "" 4145 parameters = es_inst.supported_parameters 4146 else: 4147 diagnose = es_inst 4148 parameters = [] 4149 result.append((name, es_path, status, diagnose, parameters)) 4150 4151 return result

4152

4153 4154 -def BlockdevGrow(disk, amount, dryrun, backingstore, excl_stor):

4155 """Grow a stack of block devices. 4156 4157 This function is called recursively, with the childrens being the 4158 first ones to resize. 4159 4160 @type disk: L{objects.Disk} 4161 @param disk: the disk to be grown 4162 @type amount: integer 4163 @param amount: the amount (in mebibytes) to grow with 4164 @type dryrun: boolean 4165 @param dryrun: whether to execute the operation in simulation mode 4166 only, without actually increasing the size 4167 @param backingstore: whether to execute the operation on backing storage 4168 only, or on "logical" storage only; e.g. DRBD is logical storage, 4169 whereas LVM, file, RBD are backing storage 4170 @rtype: (status, result) 4171 @type excl_stor: boolean 4172 @param excl_stor: Whether exclusive_storage is active 4173 @return: a tuple with the status of the operation (True/False), and 4174 the errors message if status is False 4175 4176 """ 4177 r_dev = _RecursiveFindBD(disk) 4178 if r_dev is None: 4179 _Fail("Cannot find block device %s", disk) 4180 4181 try: 4182 r_dev.Grow(amount, dryrun, backingstore, excl_stor) 4183 except errors.BlockDeviceError, err: 4184 _Fail("Failed to grow block device: %s", err, exc=True)

4185

4186 4187 -def BlockdevSnapshot(disk, snap_name, snap_size):

4188 """Create a snapshot copy of a block device. 4189 4190 This function is called recursively, and the snapshot is actually created 4191 just for the leaf lvm backend device. 4192 4193 @type disk: L{objects.Disk} 4194 @param disk: the disk to be snapshotted 4195 @type snap_name: string 4196 @param snap_name: the name of the snapshot 4197 @type snap_size: int 4198 @param snap_size: the size of the snapshot 4199 @rtype: string 4200 @return: snapshot disk ID as (vg, lv) 4201 4202 """ 4203 def _DiskSnapshot(disk, snap_name=None, snap_size=None): 4204 r_dev = _RecursiveFindBD(disk) 4205 if r_dev is not None: 4206 return r_dev.Snapshot(snap_name=snap_name, snap_size=snap_size) 4207 else: 4208 _Fail("Cannot find block device %s", disk)

4209 4210 if disk.dev_type == constants.DT_DRBD8: 4211 if not disk.children: 4212 _Fail("DRBD device '%s' without backing storage cannot be snapshotted", 4213 disk.unique_id) 4214 return BlockdevSnapshot(disk.children[0], snap_name, snap_size) 4215 elif disk.dev_type == constants.DT_PLAIN: 4216 return _DiskSnapshot(disk, snap_name, snap_size) 4217 elif disk.dev_type == constants.DT_EXT: 4218 return _DiskSnapshot(disk, snap_name, snap_size) 4219 else: 4220 _Fail("Cannot snapshot block device '%s' of type '%s'", 4221 disk.logical_id, disk.dev_type) 4222

4223 4224 -def BlockdevSetInfo(disk, info):

4225 """Sets 'metadata' information on block devices. 4226 4227 This function sets 'info' metadata on block devices. Initial 4228 information is set at device creation; this function should be used 4229 for example after renames. 4230 4231 @type disk: L{objects.Disk} 4232 @param disk: the disk to be grown 4233 @type info: string 4234 @param info: new 'info' metadata 4235 @rtype: (status, result) 4236 @return: a tuple with the status of the operation (True/False), and 4237 the errors message if status is False 4238 4239 """ 4240 r_dev = _RecursiveFindBD(disk) 4241 if r_dev is None: 4242 _Fail("Cannot find block device %s", disk) 4243 4244 try: 4245 r_dev.SetInfo(info) 4246 except errors.BlockDeviceError, err: 4247 _Fail("Failed to set information on block device: %s", err, exc=True)

4248

4249 4250 -def FinalizeExport(instance, snap_disks):

4251 """Write out the export configuration information. 4252 4253 @type instance: L{objects.Instance} 4254 @param instance: the instance which we export, used for 4255 saving configuration 4256 @type snap_disks: list of L{objects.Disk} 4257 @param snap_disks: list of snapshot block devices, which 4258 will be used to get the actual name of the dump file 4259 4260 @rtype: None 4261 4262 """ 4263 destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new") 4264 finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name) 4265 4266 config = objects.SerializableConfigParser() 4267 4268 config.add_section(constants.INISECT_EXP) 4269 config.set(constants.INISECT_EXP, "version", str(constants.EXPORT_VERSION)) 4270 config.set(constants.INISECT_EXP, "timestamp", "%d" % int(time.time())) 4271 config.set(constants.INISECT_EXP, "source", instance.primary_node) 4272 config.set(constants.INISECT_EXP, "os", instance.os) 4273 config.set(constants.INISECT_EXP, "compression", "none") 4274 4275 config.add_section(constants.INISECT_INS) 4276 config.set(constants.INISECT_INS, "name", instance.name) 4277 config.set(constants.INISECT_INS, "maxmem", "%d" % 4278 instance.beparams[constants.BE_MAXMEM]) 4279 config.set(constants.INISECT_INS, "minmem", "%d" % 4280 instance.beparams[constants.BE_MINMEM]) 4281 # "memory" is deprecated, but useful for exporting to old ganeti versions 4282 config.set(constants.INISECT_INS, "memory", "%d" % 4283 instance.beparams[constants.BE_MAXMEM]) 4284 config.set(constants.INISECT_INS, "vcpus", "%d" % 4285 instance.beparams[constants.BE_VCPUS]) 4286 config.set(constants.INISECT_INS, "disk_template", instance.disk_template) 4287 config.set(constants.INISECT_INS, "hypervisor", instance.hypervisor) 4288 config.set(constants.INISECT_INS, "tags", " ".join(instance.GetTags())) 4289 4290 nic_total = 0 4291 for nic_count, nic in enumerate(instance.nics): 4292 nic_total += 1 4293 config.set(constants.INISECT_INS, "nic%d_mac" % 4294 nic_count, "%s" % nic.mac) 4295 config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip) 4296 config.set(constants.INISECT_INS, "nic%d_network" % nic_count, 4297 "%s" % nic.network) 4298 config.set(constants.INISECT_INS, "nic%d_name" % nic_count, 4299 "%s" % nic.name) 4300 for param in constants.NICS_PARAMETER_TYPES: 4301 config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param), 4302 "%s" % nic.nicparams.get(param, None)) 4303 # TODO: redundant: on load can read nics until it doesn't exist 4304 config.set(constants.INISECT_INS, "nic_count", "%d" % nic_total) 4305 4306 disk_total = 0 4307 for disk_count, disk in enumerate(snap_disks): 4308 if disk: 4309 disk_total += 1 4310 config.set(constants.INISECT_INS, "disk%d_ivname" % disk_count, 4311 ("%s" % disk.iv_name)) 4312 config.set(constants.INISECT_INS, "disk%d_dump" % disk_count, 4313 ("%s" % disk.logical_id[1])) 4314 config.set(constants.INISECT_INS, "disk%d_size" % disk_count, 4315 ("%d" % disk.size)) 4316 config.set(constants.INISECT_INS, "disk%d_name" % disk_count, 4317 "%s" % disk.name) 4318 4319 config.set(constants.INISECT_INS, "disk_count", "%d" % disk_total) 4320 4321 # New-style hypervisor/backend parameters 4322 4323 config.add_section(constants.INISECT_HYP) 4324 for name, value in instance.hvparams.items(): 4325 if name not in constants.HVC_GLOBALS: 4326 config.set(constants.INISECT_HYP, name, str(value)) 4327 4328 config.add_section(constants.INISECT_BEP) 4329 for name, value in instance.beparams.items(): 4330 config.set(constants.INISECT_BEP, name, str(value)) 4331 4332 config.add_section(constants.INISECT_OSP) 4333 for name, value in instance.osparams.items(): 4334 config.set(constants.INISECT_OSP, name, str(value)) 4335 4336 config.add_section(constants.INISECT_OSP_PRIVATE) 4337 for name, value in instance.osparams_private.items(): 4338 config.set(constants.INISECT_OSP_PRIVATE, name, str(value.Get())) 4339 4340 utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE), 4341 data=config.Dumps()) 4342 shutil.rmtree(finaldestdir, ignore_errors=True) 4343 shutil.move(destdir, finaldestdir)

4344

4345 4346 -def ExportInfo(dest):

4347 """Get export configuration information. 4348 4349 @type dest: str 4350 @param dest: directory containing the export 4351 4352 @rtype: L{objects.SerializableConfigParser} 4353 @return: a serializable config file containing the 4354 export info 4355 4356 """ 4357 cff = utils.PathJoin(dest, constants.EXPORT_CONF_FILE) 4358 4359 config = objects.SerializableConfigParser() 4360 config.read(cff) 4361 4362 if (not config.has_section(constants.INISECT_EXP) or 4363 not config.has_section(constants.INISECT_INS)): 4364 _Fail("Export info file doesn't have the required fields") 4365 4366 return config.Dumps()

4367

4368 4369 -def ListExports():

4370 """Return a list of exports currently available on this machine. 4371 4372 @rtype: list 4373 @return: list of the exports 4374 4375 """ 4376 if os.path.isdir(pathutils.EXPORT_DIR): 4377 return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR)) 4378 else: 4379 _Fail("No exports directory")

4380

4381 4382 -def RemoveExport(export):

4383 """Remove an existing export from the node. 4384 4385 @type export: str 4386 @param export: the name of the export to remove 4387 @rtype: None 4388 4389 """ 4390 target = utils.PathJoin(pathutils.EXPORT_DIR, export) 4391 4392 try: 4393 shutil.rmtree(target) 4394 except EnvironmentError, err: 4395 _Fail("Error while removing the export: %s", err, exc=True)

4396

4397 4398 -def BlockdevRename(devlist):

4399 """Rename a list of block devices. 4400 4401 @type devlist: list of tuples 4402 @param devlist: list of tuples of the form (disk, new_unique_id); disk is 4403 an L{objects.Disk} object describing the current disk, and new 4404 unique_id is the name we rename it to 4405 @rtype: boolean 4406 @return: True if all renames succeeded, False otherwise 4407 4408 """ 4409 msgs = [] 4410 result = True 4411 for disk, unique_id in devlist: 4412 dev = _RecursiveFindBD(disk) 4413 if dev is None: 4414 msgs.append("Can't find device %s in rename" % str(disk)) 4415 result = False 4416 continue 4417 try: 4418 old_rpath = dev.dev_path 4419 dev.Rename(unique_id) 4420 new_rpath = dev.dev_path 4421 if old_rpath != new_rpath: 4422 DevCacheManager.RemoveCache(old_rpath) 4423 # FIXME: we should add the new cache information here, like: 4424 # DevCacheManager.UpdateCache(new_rpath, owner, ...) 4425 # but we don't have the owner here - maybe parse from existing 4426 # cache? for now, we only lose lvm data when we rename, which 4427 # is less critical than DRBD or MD 4428 except errors.BlockDeviceError, err: 4429 msgs.append("Can't rename device '%s' to '%s': %s" % 4430 (dev, unique_id, err)) 4431 logging.exception("Can't rename device '%s' to '%s'", dev, unique_id) 4432 result = False 4433 if not result: 4434 _Fail("; ".join(msgs))

4435

4436 4437 -def _TransformFileStorageDir(fs_dir):

4438 """Checks whether given file_storage_dir is valid. 4439 4440 Checks wheter the given fs_dir is within the cluster-wide default 4441 file_storage_dir or the shared_file_storage_dir, which are stored in 4442 SimpleStore. Only paths under those directories are allowed. 4443 4444 @type fs_dir: str 4445 @param fs_dir: the path to check 4446 4447 @return: the normalized path if valid, None otherwise 4448 4449 """ 4450 filestorage.CheckFileStoragePath(fs_dir) 4451 4452 return os.path.normpath(fs_dir)

4453

4454 4455 -def CreateFileStorageDir(file_storage_dir):

4456 """Create file storage directory. 4457 4458 @type file_storage_dir: str 4459 @param file_storage_dir: directory to create 4460 4461 @rtype: tuple 4462 @return: tuple with first element a boolean indicating wheter dir 4463 creation was successful or not 4464 4465 """ 4466 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 4467 if os.path.exists(file_storage_dir): 4468 if not os.path.isdir(file_storage_dir): 4469 _Fail("Specified storage dir '%s' is not a directory", 4470 file_storage_dir) 4471 else: 4472 try: 4473 os.makedirs(file_storage_dir, 0750) 4474 except OSError, err: 4475 _Fail("Cannot create file storage directory '%s': %s", 4476 file_storage_dir, err, exc=True)

4477

4478 4479 -def RemoveFileStorageDir(file_storage_dir):

4480 """Remove file storage directory. 4481 4482 Remove it only if it's empty. If not log an error and return. 4483 4484 @type file_storage_dir: str 4485 @param file_storage_dir: the directory we should cleanup 4486 @rtype: tuple (success,) 4487 @return: tuple of one element, C{success}, denoting 4488 whether the operation was successful 4489 4490 """ 4491 file_storage_dir = _TransformFileStorageDir(file_storage_dir) 4492 if os.path.exists(file_storage_dir): 4493 if not os.path.isdir(file_storage_dir): 4494 _Fail("Specified Storage directory '%s' is not a directory", 4495 file_storage_dir) 4496 # deletes dir only if empty, otherwise we want to fail the rpc call 4497 try: 4498 os.rmdir(file_storage_dir) 4499 except OSError, err: 4500 _Fail("Cannot remove file storage directory '%s': %s", 4501 file_storage_dir, err)

4502

4503 4504 -def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):

4505 """Rename the file storage directory. 4506 4507 @type old_file_storage_dir: str 4508 @param old_file_storage_dir: the current path 4509 @type new_file_storage_dir: str 4510 @param new_file_storage_dir: the name we should rename to 4511 @rtype: tuple (success,) 4512 @return: tuple of one element, C{success}, denoting 4513 whether the operation was successful 4514 4515 """ 4516 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir) 4517 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir) 4518 if not os.path.exists(new_file_storage_dir): 4519 if os.path.isdir(old_file_storage_dir): 4520 try: 4521 os.rename(old_file_storage_dir, new_file_storage_dir) 4522 except OSError, err: 4523 _Fail("Cannot rename '%s' to '%s': %s", 4524 old_file_storage_dir, new_file_storage_dir, err) 4525 else: 4526 _Fail("Specified storage dir '%s' is not a directory", 4527 old_file_storage_dir) 4528 else: 4529 if os.path.exists(old_file_storage_dir): 4530 _Fail("Cannot rename '%s' to '%s': both locations exist", 4531 old_file_storage_dir, new_file_storage_dir)

4532

4533 4534 -def _EnsureJobQueueFile(file_name):

4535 """Checks whether the given filename is in the queue directory. 4536 4537 @type file_name: str 4538 @param file_name: the file name we should check 4539 @rtype: None 4540 @raises RPCFail: if the file is not valid 4541 4542 """ 4543 if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name): 4544 _Fail("Passed job queue file '%s' does not belong to" 4545 " the queue directory '%s'", file_name, pathutils.QUEUE_DIR)

4546

4547 4548 -def JobQueueUpdate(file_name, content):

4549 """Updates a file in the queue directory. 4550 4551 This is just a wrapper over L{utils.io.WriteFile}, with proper 4552 checking. 4553 4554 @type file_name: str 4555 @param file_name: the job file name 4556 @type content: str 4557 @param content: the new job contents 4558 @rtype: boolean 4559 @return: the success of the operation 4560 4561 """ 4562 file_name = vcluster.LocalizeVirtualPath(file_name) 4563 4564 _EnsureJobQueueFile(file_name) 4565 getents = runtime.GetEnts() 4566 4567 # Write and replace the file atomically 4568 utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid, 4569 gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS)

4570

4571 4572 -def JobQueueRename(old, new):

4573 """Renames a job queue file. 4574 4575 This is just a wrapper over os.rename with proper checking. 4576 4577 @type old: str 4578 @param old: the old (actual) file name 4579 @type new: str 4580 @param new: the desired file name 4581 @rtype: tuple 4582 @return: the success of the operation and payload 4583 4584 """ 4585 old = vcluster.LocalizeVirtualPath(old) 4586 new = vcluster.LocalizeVirtualPath(new) 4587 4588 _EnsureJobQueueFile(old) 4589 _EnsureJobQueueFile(new) 4590 4591 getents = runtime.GetEnts() 4592 4593 utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750, 4594 dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid)

4595

4596 4597 -def BlockdevClose(instance_name, disks):

4598 """Closes the given block devices. 4599 4600 This means they will be switched to secondary mode (in case of 4601 DRBD). 4602 4603 @param instance_name: if the argument is not empty, the symlinks 4604 of this instance will be removed 4605 @type disks: list of L{objects.Disk} 4606 @param disks: the list of disks to be closed 4607 @rtype: tuple (success, message) 4608 @return: a tuple of success and message, where success 4609 indicates the succes of the operation, and message 4610 which will contain the error details in case we 4611 failed 4612 4613 """ 4614 bdevs = [] 4615 for cf in disks: 4616 rd = _RecursiveFindBD(cf) 4617 if rd is None: 4618 _Fail("Can't find device %s", cf) 4619 bdevs.append(rd) 4620 4621 msg = [] 4622 for rd in bdevs: 4623 try: 4624 rd.Close() 4625 except errors.BlockDeviceError, err: 4626 msg.append(str(err)) 4627 if msg: 4628 _Fail("Can't make devices secondary: %s", ",".join(msg)) 4629 else: 4630 if instance_name: 4631 _RemoveBlockDevLinks(instance_name, disks)

4632

4633 4634 -def ValidateHVParams(hvname, hvparams):

4635 """Validates the given hypervisor parameters. 4636 4637 @type hvname: string 4638 @param hvname: the hypervisor name 4639 @type hvparams: dict 4640 @param hvparams: the hypervisor parameters to be validated 4641 @rtype: None 4642 4643 """ 4644 try: 4645 hv_type = hypervisor.GetHypervisor(hvname) 4646 hv_type.ValidateParameters(hvparams) 4647 except errors.HypervisorError, err: 4648 _Fail(str(err), log=False)

4649

4650 4651 -def _CheckOSPList(os_obj, parameters):

4652 """Check whether a list of parameters is supported by the OS. 4653 4654 @type os_obj: L{objects.OS} 4655 @param os_obj: OS object to check 4656 @type parameters: list 4657 @param parameters: the list of parameters to check 4658 4659 """ 4660 supported = [v[0] for v in os_obj.supported_parameters] 4661 delta = frozenset(parameters).difference(supported) 4662 if delta: 4663 _Fail("The following parameters are not supported" 4664 " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta)))

4665

4666 4667 -def _CheckOSVariant(os_obj, name):

4668 """Check whether an OS name conforms to the os variants specification. 4669 4670 @type os_obj: L{objects.OS} 4671 @param os_obj: OS object to check 4672 4673 @type name: string 4674 @param name: OS name passed by the user, to check for validity 4675 4676 @rtype: NoneType 4677 @return: None 4678 @raise RPCFail: if OS variant is not valid 4679 4680 """ 4681 variant = objects.OS.GetVariant(name) 4682 4683 if not os_obj.supported_variants: 4684 if variant: 4685 _Fail("OS '%s' does not support variants ('%s' passed)" % 4686 (os_obj.name, variant)) 4687 else: 4688 return 4689 4690 if not variant: 4691 _Fail("OS name '%s' must include a variant" % name) 4692 4693 if variant not in os_obj.supported_variants: 4694 _Fail("OS '%s' does not support variant '%s'" % (os_obj.name, variant))

4695

4696 4697 -def ValidateOS(required, osname, checks, osparams, force_variant):

4698 """Validate the given OS parameters. 4699 4700 @type required: boolean 4701 @param required: whether absence of the OS should translate into 4702 failure or not 4703 @type osname: string 4704 @param osname: the OS to be validated 4705 @type checks: list 4706 @param checks: list of the checks to run (currently only 'parameters') 4707 @type osparams: dict 4708 @param osparams: dictionary with OS parameters, some of which may be 4709 private. 4710 @rtype: boolean 4711 @return: True if the validation passed, or False if the OS was not 4712 found and L{required} was false 4713 4714 """ 4715 if not constants.OS_VALIDATE_CALLS.issuperset(checks): 4716 _Fail("Unknown checks required for OS %s: %s", osname, 4717 set(checks).difference(constants.OS_VALIDATE_CALLS)) 4718 4719 name_only = objects.OS.GetName(osname) 4720 status, tbv = _TryOSFromDisk(name_only, None) 4721 4722 if not status: 4723 if required: 4724 _Fail(tbv) 4725 else: 4726 return False 4727 4728 if not force_variant: 4729 _CheckOSVariant(tbv, osname) 4730 4731 if max(tbv.api_versions) < constants.OS_API_V20: 4732 return True 4733 4734 if constants.OS_VALIDATE_PARAMETERS in checks: 4735 _CheckOSPList(tbv, osparams.keys()) 4736 4737 validate_env = OSCoreEnv(osname, tbv, osparams) 4738 result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env, 4739 cwd=tbv.path, reset_env=True) 4740 if result.failed: 4741 logging.error("os validate command '%s' returned error: %s output: %s", 4742 result.cmd, result.fail_reason, result.output) 4743 _Fail("OS validation script failed (%s), output: %s", 4744 result.fail_reason, result.output, log=False) 4745 4746 return True

4747

4748 4749 -def ExportOS(instance, override_env):

4750 """Creates a GZIPed tarball with an OS definition and environment. 4751 4752 The archive contains a file with the environment variables needed by 4753 the OS scripts. 4754 4755 @type instance: L{objects.Instance} 4756 @param instance: instance for which the OS definition is exported 4757 4758 @type override_env: dict of string to string 4759 @param override_env: if supplied, it overrides the environment on a 4760 key-by-key basis that is part of the archive 4761 4762 @rtype: string 4763 @return: filepath of the archive 4764 4765 """ 4766 assert instance 4767 assert instance.os 4768 4769 temp_dir = tempfile.mkdtemp() 4770 inst_os = OSFromDisk(instance.os) 4771 4772 result = utils.RunCmd(["ln", "-s", inst_os.path, 4773 utils.PathJoin(temp_dir, "os")]) 4774 if result.failed: 4775 _Fail("Failed to copy OS package '%s' to '%s': %s, output '%s'", 4776 inst_os, temp_dir, result.fail_reason, result.output) 4777 4778 env = OSEnvironment(instance, inst_os) 4779 env.update(override_env) 4780 4781 with open(utils.PathJoin(temp_dir, "environment"), "w") as f: 4782 for var in env: 4783 f.write(var + "=" + env[var] + "\n") 4784 4785 (fd, os_package) = tempfile.mkstemp(suffix=".tgz") 4786 os.close(fd) 4787 4788 result = utils.RunCmd(["tar", "--dereference", "-czv", 4789 "-f", os_package, 4790 "-C", temp_dir, 4791 "."]) 4792 if result.failed: 4793 _Fail("Failed to create OS archive '%s': %s, output '%s'", 4794 os_package, result.fail_reason, result.output) 4795 4796 result = utils.RunCmd(["rm", "-rf", temp_dir]) 4797 if result.failed: 4798 _Fail("Failed to remove copy of OS package '%s' in '%s': %s, output '%s'", 4799 inst_os, temp_dir, result.fail_reason, result.output) 4800 4801 return os_package

4802

4803 4804 -def DemoteFromMC():

4805 """Demotes the current node from master candidate role. 4806 4807 """ 4808 # try to ensure we're not the master by mistake 4809 master, myself = ssconf.GetMasterAndMyself() 4810 if master == myself: 4811 _Fail("ssconf status shows I'm the master node, will not demote") 4812 4813 result = utils.RunCmd([pathutils.DAEMON_UTIL, "check", constants.MASTERD]) 4814 if not result.failed: 4815 _Fail("The master daemon is running, will not demote") 4816 4817 try: 4818 if os.path.isfile(pathutils.CLUSTER_CONF_FILE): 4819 utils.CreateBackup(pathutils.CLUSTER_CONF_FILE) 4820 except EnvironmentError, err: 4821 if err.errno != errno.ENOENT: 4822 _Fail("Error while backing up cluster file: %s", err, exc=True) 4823 4824 utils.RemoveFile(pathutils.CLUSTER_CONF_FILE)

4825

4826 4827 -def _GetX509Filenames(cryptodir, name):

4828 """Returns the full paths for the private key and certificate. 4829 4830 """ 4831 return (utils.PathJoin(cryptodir, name), 4832 utils.PathJoin(cryptodir, name, _X509_KEY_FILE), 4833 utils.PathJoin(cryptodir, name, _X509_CERT_FILE))

4834

4835 4836 -def CreateX509Certificate(validity, cryptodir=pathutils.CRYPTO_KEYS_DIR):

4837 """Creates a new X509 certificate for SSL/TLS. 4838 4839 @type validity: int 4840 @param validity: Validity in seconds 4841 @rtype: tuple; (string, string) 4842 @return: Certificate name and public part 4843 4844 """ 4845 serial_no = int(time.time()) 4846 (key_pem, cert_pem) = \ 4847 utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(), 4848 min(validity, _MAX_SSL_CERT_VALIDITY), 4849 serial_no) 4850 4851 cert_dir = tempfile.mkdtemp(dir=cryptodir, 4852 prefix="x509-%s-" % utils.TimestampForFilename()) 4853 try: 4854 name = os.path.basename(cert_dir) 4855 assert len(name) > 5 4856 4857 (_, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 4858 4859 utils.WriteFile(key_file, mode=0400, data=key_pem) 4860 utils.WriteFile(cert_file, mode=0400, data=cert_pem) 4861 4862 # Never return private key as it shouldn't leave the node 4863 return (name, cert_pem) 4864 except Exception: 4865 shutil.rmtree(cert_dir, ignore_errors=True) 4866 raise

4867

4868 4869 -def RemoveX509Certificate(name, cryptodir=pathutils.CRYPTO_KEYS_DIR):

4870 """Removes a X509 certificate. 4871 4872 @type name: string 4873 @param name: Certificate name 4874 4875 """ 4876 (cert_dir, key_file, cert_file) = _GetX509Filenames(cryptodir, name) 4877 4878 utils.RemoveFile(key_file) 4879 utils.RemoveFile(cert_file) 4880 4881 try: 4882 os.rmdir(cert_dir) 4883 except EnvironmentError, err: 4884 _Fail("Cannot remove certificate directory '%s': %s", 4885 cert_dir, err)

4886

4887 4888 -def _GetImportExportIoCommand(instance, mode, ieio, ieargs):

4889 """Returns the command for the requested input/output. 4890 4891 @type instance: L{objects.Instance} 4892 @param instance: The instance object 4893 @param mode: Import/export mode 4894 @param ieio: Input/output type 4895 @param ieargs: Input/output arguments 4896 4897 """ 4898 assert mode in (constants.IEM_IMPORT, constants.IEM_EXPORT) 4899 4900 env = None 4901 prefix = None 4902 suffix = None 4903 exp_size = None 4904 4905 if ieio == constants.IEIO_FILE: 4906 (filename, ) = ieargs 4907 4908 if not utils.IsNormAbsPath(filename): 4909 _Fail("Path '%s' is not normalized or absolute", filename) 4910 4911 real_filename = os.path.realpath(filename) 4912 directory = os.path.dirname(real_filename) 4913 4914 if not utils.IsBelowDir(pathutils.EXPORT_DIR, real_filename): 4915 _Fail("File '%s' is not under exports directory '%s': %s", 4916 filename, pathutils.EXPORT_DIR, real_filename) 4917 4918 # Create directory 4919 utils.Makedirs(directory, mode=0750) 4920 4921 quoted_filename = utils.ShellQuote(filename) 4922 4923 if mode == constants.IEM_IMPORT: 4924 suffix = "> %s" % quoted_filename 4925 elif mode == constants.IEM_EXPORT: 4926 suffix = "< %s" % quoted_filename 4927 4928 # Retrieve file size 4929 try: 4930 st = os.stat(filename) 4931 except EnvironmentError, err: 4932 logging.error("Can't stat(2) %s: %s", filename, err) 4933 else: 4934 exp_size = utils.BytesToMebibyte(st.st_size) 4935 4936 elif ieio == constants.IEIO_RAW_DISK: 4937 (disk, ) = ieargs 4938 4939 real_disk = _OpenRealBD(disk) 4940 4941 if mode == constants.IEM_IMPORT: 4942 # we use nocreat to fail if the device is not already there or we pass a 4943 # wrong path; we use notrunc to no attempt truncate on an LV device 4944 suffix = utils.BuildShellCmd("| dd of=%s conv=nocreat,notrunc bs=%s", 4945 real_disk.dev_path, 4946 str(constants.DD_BLOCK_SIZE)) # 1 MB 4947 4948 elif mode == constants.IEM_EXPORT: 4949 # the block size on the read dd is 1MiB to match our units 4950 prefix = utils.BuildShellCmd("dd if=%s bs=%s count=%s |", 4951 real_disk.dev_path, 4952 str(constants.DD_BLOCK_SIZE), # 1 MB 4953 str(disk.size)) 4954 exp_size = disk.size 4955 4956 elif ieio == constants.IEIO_SCRIPT: 4957 (disk, disk_index, ) = ieargs 4958 4959 assert isinstance(disk_index, (int, long)) 4960 4961 inst_os = OSFromDisk(instance.os) 4962 env = OSEnvironment(instance, inst_os) 4963 4964 if mode == constants.IEM_IMPORT: 4965 env["IMPORT_DEVICE"] = env["DISK_%d_PATH" % disk_index] 4966 env["IMPORT_INDEX"] = str(disk_index) 4967 script = inst_os.import_script 4968 4969 elif mode == constants.IEM_EXPORT: 4970 real_disk = _OpenRealBD(disk) 4971 env["EXPORT_DEVICE"] = real_disk.dev_path 4972 env["EXPORT_INDEX"] = str(disk_index) 4973 script = inst_os.export_script 4974 4975 # TODO: Pass special environment only to script 4976 script_cmd = utils.BuildShellCmd("( cd %s && %s; )", inst_os.path, script) 4977 4978 if mode == constants.IEM_IMPORT: 4979 suffix = "| %s" % script_cmd 4980 4981 elif mode == constants.IEM_EXPORT: 4982 prefix = "%s |" % script_cmd 4983 4984 # Let script predict size 4985 exp_size = constants.IE_CUSTOM_SIZE 4986 4987 else: 4988 _Fail("Invalid %s I/O mode %r", mode, ieio) 4989 4990 return (env, prefix, suffix, exp_size)

4991

4992 4993 -def _CreateImportExportStatusDir(prefix):

4994 """Creates status directory for import/export. 4995 4996 """ 4997 return tempfile.mkdtemp(dir=pathutils.IMPORT_EXPORT_DIR, 4998 prefix=("%s-%s-" % 4999 (prefix, utils.TimestampForFilename())))

5000

5001 5002 -def StartImportExportDaemon(mode, opts, host, port, instance, component, 5003 ieio, ieioargs):

5004 """Starts an import or export daemon. 5005 5006 @param mode: Import/output mode 5007 @type opts: L{objects.ImportExportOptions} 5008 @param opts: Daemon options 5009 @type host: string 5010 @param host: Remote host for export (None for import) 5011 @type port: int 5012 @param port: Remote port for export (None for import) 5013 @type instance: L{objects.Instance} 5014 @param instance: Instance object 5015 @type component: string 5016 @param component: which part of the instance is transferred now, 5017 e.g. 'disk/0' 5018 @param ieio: Input/output type 5019 @param ieioargs: Input/output arguments 5020 5021 """ 5022 if mode == constants.IEM_IMPORT: 5023 prefix = "import" 5024 5025 if not (host is None and port is None): 5026 _Fail("Can not specify host or port on import") 5027 5028 elif mode == constants.IEM_EXPORT: 5029 prefix = "export" 5030 5031 if host is None or port is None: 5032 _Fail("Host and port must be specified for an export") 5033 5034 else: 5035 _Fail("Invalid mode %r", mode) 5036 5037 if (opts.key_name is None) ^ (opts.ca_pem is None): 5038 _Fail("Cluster certificate can only be used for both key and CA") 5039 5040 (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \ 5041 _GetImportExportIoCommand(instance, mode, ieio, ieioargs) 5042 5043 if opts.key_name is None: 5044 # Use server.pem 5045 key_path = pathutils.NODED_CERT_FILE 5046 cert_path = pathutils.NODED_CERT_FILE 5047 assert opts.ca_pem is None 5048 else: 5049 (_, key_path, cert_path) = _GetX509Filenames(pathutils.CRYPTO_KEYS_DIR, 5050 opts.key_name) 5051 assert opts.ca_pem is not None 5052 5053 for i in [key_path, cert_path]: 5054 if not os.path.exists(i): 5055 _Fail("File '%s' does not exist" % i) 5056 5057 status_dir = _CreateImportExportStatusDir("%s-%s" % (prefix, component)) 5058 try: 5059 status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE) 5060 pid_file = utils.PathJoin(status_dir, _IES_PID_FILE) 5061 ca_file = utils.PathJoin(status_dir, _IES_CA_FILE) 5062 5063 if opts.ca_pem is None: 5064 # Use server.pem 5065 ca = utils.ReadFile(pathutils.NODED_CERT_FILE) 5066 else: 5067 ca = opts.ca_pem 5068 5069 # Write CA file 5070 utils.WriteFile(ca_file, data=ca, mode=0400) 5071 5072 cmd = [ 5073 pathutils.IMPORT_EXPORT_DAEMON, 5074 status_file, mode, 5075 "--key=%s" % key_path, 5076 "--cert=%s" % cert_path, 5077 "--ca=%s" % ca_file, 5078 ] 5079 5080 if host: 5081 cmd.append("--host=%s" % host) 5082 5083 if port: 5084 cmd.append("--port=%s" % port) 5085 5086 if opts.ipv6: 5087 cmd.append("--ipv6") 5088 else: 5089 cmd.append("--ipv4") 5090 5091 if opts.compress: 5092 cmd.append("--compress=%s" % opts.compress) 5093 5094 if opts.magic: 5095 cmd.append("--magic=%s" % opts.magic) 5096 5097 if exp_size is not None: 5098 cmd.append("--expected-size=%s" % exp_size) 5099 5100 if cmd_prefix: 5101 cmd.append("--cmd-prefix=%s" % cmd_prefix) 5102 5103 if cmd_suffix: 5104 cmd.append("--cmd-suffix=%s" % cmd_suffix) 5105 5106 if mode == constants.IEM_EXPORT: 5107 # Retry connection a few times when connecting to remote peer 5108 cmd.append("--connect-retries=%s" % constants.RIE_CONNECT_RETRIES) 5109 cmd.append("--connect-timeout=%s" % constants.RIE_CONNECT_ATTEMPT_TIMEOUT) 5110 elif opts.connect_timeout is not None: 5111 assert mode == constants.IEM_IMPORT 5112 # Overall timeout for establishing connection while listening 5113 cmd.append("--connect-timeout=%s" % opts.connect_timeout) 5114 5115 logfile = _InstanceLogName(prefix, instance.os, instance.name, component) 5116 5117 # TODO: Once _InstanceLogName uses tempfile.mkstemp, StartDaemon has 5118 # support for receiving a file descriptor for output 5119 utils.StartDaemon(cmd, env=cmd_env, pidfile=pid_file, 5120 output=logfile) 5121 5122 # The import/export name is simply the status directory name 5123 return os.path.basename(status_dir) 5124 5125 except Exception: 5126 shutil.rmtree(status_dir, ignore_errors=True) 5127 raise

5128

5129 5130 -def GetImportExportStatus(names):

5131 """Returns import/export daemon status. 5132 5133 @type names: sequence 5134 @param names: List of names 5135 @rtype: List of dicts 5136 @return: Returns a list of the state of each named import/export or None if a 5137 status couldn't be read 5138 5139 """ 5140 result = [] 5141 5142 for name in names: 5143 status_file = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name, 5144 _IES_STATUS_FILE) 5145 5146 try: 5147 data = utils.ReadFile(status_file) 5148 except EnvironmentError, err: 5149 if err.errno != errno.ENOENT: 5150 raise 5151 data = None 5152 5153 if not data: 5154 result.append(None) 5155 continue 5156 5157 result.append(serializer.LoadJson(data)) 5158 5159 return result

5160

5161 5162 -def AbortImportExport(name):

5163 """Sends SIGTERM to a running import/export daemon. 5164 5165 """ 5166 logging.info("Abort import/export %s", name) 5167 5168 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 5169 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 5170 5171 if pid: 5172 logging.info("Import/export %s is running with PID %s, sending SIGTERM", 5173 name, pid) 5174 utils.IgnoreProcessNotFound(os.kill, pid, signal.SIGTERM)

5175

5176 5177 -def CleanupImportExport(name):

5178 """Cleanup after an import or export. 5179 5180 If the import/export daemon is still running it's killed. Afterwards the 5181 whole status directory is removed. 5182 5183 """ 5184 logging.info("Finalizing import/export %s", name) 5185 5186 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name) 5187 5188 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE)) 5189 5190 if pid: 5191 logging.info("Import/export %s is still running with PID %s", 5192 name, pid) 5193 utils.KillProcess(pid, waitpid=False) 5194 5195 shutil.rmtree(status_dir, ignore_errors=True)

5196

5197 5198 -def _FindDisks(disks):

5199 """Finds attached L{BlockDev}s for the given disks. 5200 5201 @type disks: list of L{objects.Disk} 5202 @param disks: the disk objects we need to find 5203 5204 @return: list of L{BlockDev} objects or C{None} if a given disk 5205 was not found or was no attached. 5206 5207 """ 5208 bdevs = [] 5209 5210 for disk in disks: 5211 rd = _RecursiveFindBD(disk) 5212 if rd is None: 5213 _Fail("Can't find device %s", disk) 5214 bdevs.append(rd) 5215 return bdevs

5216

5217 5218 -def DrbdDisconnectNet(disks):

5219 """Disconnects the network on a list of drbd devices. 5220 5221 """ 5222 bdevs = _FindDisks(disks) 5223 5224 # disconnect disks 5225 for rd in bdevs: 5226 try: 5227 rd.DisconnectNet() 5228 except errors.BlockDeviceError, err: 5229 _Fail("Can't change network configuration to standalone mode: %s", 5230 err, exc=True)

5231

5232 5233 -def DrbdAttachNet(disks, instance_name, multimaster):

5234 """Attaches the network on a list of drbd devices. 5235 5236 """ 5237 bdevs = _FindDisks(disks) 5238 5239 if multimaster: 5240 for idx, rd in enumerate(bdevs): 5241 try: 5242 _SymlinkBlockDev(instance_name, rd.dev_path, idx) 5243 except EnvironmentError, err: 5244 _Fail("Can't create symlink: %s", err) 5245 # reconnect disks, switch to new master configuration and if 5246 # needed primary mode 5247 for rd in bdevs: 5248 try: 5249 rd.AttachNet(multimaster) 5250 except errors.BlockDeviceError, err: 5251 _Fail("Can't change network configuration: %s", err) 5252 5253 # wait until the disks are connected; we need to retry the re-attach 5254 # if the device becomes standalone, as this might happen if the one 5255 # node disconnects and reconnects in a different mode before the 5256 # other node reconnects; in this case, one or both of the nodes will 5257 # decide it has wrong configuration and switch to standalone 5258 5259 def _Attach(): 5260 all_connected = True 5261 5262 for rd in bdevs: 5263 stats = rd.GetProcStatus() 5264 5265 if multimaster: 5266 # In the multimaster case we have to wait explicitly until 5267 # the resource is Connected and UpToDate/UpToDate, because 5268 # we promote *both nodes* to primary directly afterwards. 5269 # Being in resync is not enough, since there is a race during which we 5270 # may promote a node with an Outdated disk to primary, effectively 5271 # tearing down the connection. 5272 all_connected = (all_connected and 5273 stats.is_connected and 5274 stats.is_disk_uptodate and 5275 stats.peer_disk_uptodate) 5276 else: 5277 all_connected = (all_connected and 5278 (stats.is_connected or stats.is_in_resync)) 5279 5280 if stats.is_standalone: 5281 # peer had different config info and this node became 5282 # standalone, even though this should not happen with the 5283 # new staged way of changing disk configs 5284 try: 5285 rd.AttachNet(multimaster) 5286 except errors.BlockDeviceError, err: 5287 _Fail("Can't change network configuration: %s", err) 5288 5289 if not all_connected: 5290 raise utils.RetryAgain()

5291 5292 try: 5293 # Start with a delay of 100 miliseconds and go up to 5 seconds 5294 utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60) 5295 except utils.RetryTimeout: 5296 _Fail("Timeout in disk reconnecting") 5297 5298 if multimaster: 5299 # change to primary mode 5300 for rd in bdevs: 5301 try: 5302 rd.Open() 5303 except errors.BlockDeviceError, err: 5304 _Fail("Can't change to primary mode: %s", err) 5305

5306 5307 -def DrbdWaitSync(disks):

5308 """Wait until DRBDs have synchronized. 5309 5310 """ 5311 def _helper(rd): 5312 stats = rd.GetProcStatus() 5313 if not (stats.is_connected or stats.is_in_resync): 5314 raise utils.RetryAgain() 5315 return stats

5316 5317 bdevs = _FindDisks(disks) 5318 5319 min_resync = 100 5320 alldone = True 5321 for rd in bdevs: 5322 try: 5323 # poll each second for 15 seconds 5324 stats = utils.Retry(_helper, 1, 15, args=[rd]) 5325 except utils.RetryTimeout: 5326 stats = rd.GetProcStatus() 5327 # last check 5328 if not (stats.is_connected or stats.is_in_resync): 5329 _Fail("DRBD device %s is not in sync: stats=%s", rd, stats) 5330 alldone = alldone and (not stats.is_in_resync) 5331 if stats.sync_percent is not None: 5332 min_resync = min(min_resync, stats.sync_percent) 5333 5334 return (alldone, min_resync) 5335

5336 5337 -def DrbdNeedsActivation(disks):

5338 """Checks which of the passed disks needs activation and returns their UUIDs. 5339 5340 """ 5341 faulty_disks = [] 5342 5343 for disk in disks: 5344 rd = _RecursiveFindBD(disk) 5345 if rd is None: 5346 faulty_disks.append(disk) 5347 continue 5348 5349 stats = rd.GetProcStatus() 5350 if stats.is_standalone or stats.is_diskless: 5351 faulty_disks.append(disk) 5352 5353 return [disk.uuid for disk in faulty_disks]

5354

5355 5356 -def GetDrbdUsermodeHelper():

5357 """Returns DRBD usermode helper currently configured. 5358 5359 """ 5360 try: 5361 return drbd.DRBD8.GetUsermodeHelper() 5362 except errors.BlockDeviceError, err: 5363 _Fail(str(err))

5364

5365 5366 -def PowercycleNode(hypervisor_type, hvparams=None):

5367 """Hard-powercycle the node. 5368 5369 Because we need to return first, and schedule the powercycle in the 5370 background, we won't be able to report failures nicely. 5371 5372 """ 5373 hyper = hypervisor.GetHypervisor(hypervisor_type) 5374 try: 5375 pid = os.fork() 5376 except OSError: 5377 # if we can't fork, we'll pretend that we're in the child process 5378 pid = 0 5379 if pid > 0: 5380 return "Reboot scheduled in 5 seconds" 5381 # ensure the child is running on ram 5382 try: 5383 utils.Mlockall() 5384 except Exception: # pylint: disable=W0703 5385 pass 5386 time.sleep(5) 5387 hyper.PowercycleNode(hvparams=hvparams)

5388

5389 5390 -def _VerifyRestrictedCmdName(cmd):

5391 """Verifies a restricted command name. 5392 5393 @type cmd: string 5394 @param cmd: Command name 5395 @rtype: tuple; (boolean, string or None) 5396 @return: The tuple's first element is the status; if C{False}, the second 5397 element is an error message string, otherwise it's C{None} 5398 5399 """ 5400 if not cmd.strip(): 5401 return (False, "Missing command name") 5402 5403 if os.path.basename(cmd) != cmd: 5404 return (False, "Invalid command name") 5405 5406 if not constants.EXT_PLUGIN_MASK.match(cmd): 5407 return (False, "Command name contains forbidden characters") 5408 5409 return (True, None)

5410

5411 5412 -def _CommonRestrictedCmdCheck(path, owner):

5413 """Common checks for restricted command file system directories and files. 5414 5415 @type path: string 5416 @param path: Path to check 5417 @param owner: C{None} or tuple containing UID and GID 5418 @rtype: tuple; (boolean, string or C{os.stat} result) 5419 @return: The tuple's first element is the status; if C{False}, the second 5420 element is an error message string, otherwise it's the result of C{os.stat} 5421 5422 """ 5423 if owner is None: 5424 # Default to root as owner 5425 owner = (0, 0) 5426 5427 try: 5428 st = os.stat(path) 5429 except EnvironmentError, err: 5430 return (False, "Can't stat(2) '%s': %s" % (path, err)) 5431 5432 if stat.S_IMODE(st.st_mode) & (~_RCMD_MAX_MODE): 5433 return (False, "Permissions on '%s' are too permissive" % path) 5434 5435 if (st.st_uid, st.st_gid) != owner: 5436 (owner_uid, owner_gid) = owner 5437 return (False, "'%s' is not owned by %s:%s" % (path, owner_uid, owner_gid)) 5438 5439 return (True, st)

5440

5441 5442 -def _VerifyRestrictedCmdDirectory(path, _owner=None):

5443 """Verifies restricted command directory. 5444 5445 @type path: string 5446 @param path: Path to check 5447 @rtype: tuple; (boolean, string or None) 5448 @return: The tuple's first element is the status; if C{False}, the second 5449 element is an error message string, otherwise it's C{None} 5450 5451 """ 5452 (status, value) = _CommonRestrictedCmdCheck(path, _owner) 5453 5454 if not status: 5455 return (False, value) 5456 5457 if not stat.S_ISDIR(value.st_mode): 5458 return (False, "Path '%s' is not a directory" % path) 5459 5460 return (True, None)

5461

5462 5463 -def _VerifyRestrictedCmd(path, cmd, _owner=None):

5464 """Verifies a whole restricted command and returns its executable filename. 5465 5466 @type path: string 5467 @param path: Directory containing restricted commands 5468 @type cmd: string 5469 @param cmd: Command name 5470 @rtype: tuple; (boolean, string) 5471 @return: The tuple's first element is the status; if C{False}, the second 5472 element is an error message string, otherwise the second element is the 5473 absolute path to the executable 5474 5475 """ 5476 executable = utils.PathJoin(path, cmd) 5477 5478 (status, msg) = _CommonRestrictedCmdCheck(executable, _owner) 5479 5480 if not status: 5481 return (False, msg) 5482 5483 if not utils.IsExecutable(executable): 5484 return (False, "access(2) thinks '%s' can't be executed" % executable) 5485 5486 return (True, executable)

5487

5488 5489 -def _PrepareRestrictedCmd(path, cmd, 5490 _verify_dir=_VerifyRestrictedCmdDirectory, 5491 _verify_name=_VerifyRestrictedCmdName, 5492 _verify_cmd=_VerifyRestrictedCmd):

5493 """Performs a number of tests on a restricted command. 5494 5495 @type path: string 5496 @param path: Directory containing restricted commands 5497 @type cmd: string 5498 @param cmd: Command name 5499 @return: Same as L{_VerifyRestrictedCmd} 5500 5501 """ 5502 # Verify the directory first 5503 (status, msg) = _verify_dir(path) 5504 if status: 5505 # Check command if everything was alright 5506 (status, msg) = _verify_name(cmd) 5507 5508 if not status: 5509 return (False, msg) 5510 5511 # Check actual executable 5512 return _verify_cmd(path, cmd)

5513

5514 5515 -def RunRestrictedCmd(cmd, 5516 _lock_timeout=_RCMD_LOCK_TIMEOUT, 5517 _lock_file=pathutils.RESTRICTED_COMMANDS_LOCK_FILE, 5518 _path=pathutils.RESTRICTED_COMMANDS_DIR, 5519 _sleep_fn=time.sleep, 5520 _prepare_fn=_PrepareRestrictedCmd, 5521 _runcmd_fn=utils.RunCmd, 5522 _enabled=constants.ENABLE_RESTRICTED_COMMANDS):

5523 """Executes a restricted command after performing strict tests. 5524 5525 @type cmd: string 5526 @param cmd: Command name 5527 @rtype: string 5528 @return: Command output 5529 @raise RPCFail: In case of an error 5530 5531 """ 5532 logging.info("Preparing to run restricted command '%s'", cmd) 5533 5534 if not _enabled: 5535 _Fail("Restricted commands disabled at configure time") 5536 5537 lock = None 5538 try: 5539 cmdresult = None 5540 try: 5541 lock = utils.FileLock.Open(_lock_file) 5542 lock.Exclusive(blocking=True, timeout=_lock_timeout) 5543 5544 (status, value) = _prepare_fn(_path, cmd) 5545 5546 if status: 5547 cmdresult = _runcmd_fn([value], env={}, reset_env=True, 5548 postfork_fn=lambda _: lock.Unlock()) 5549 else: 5550 logging.error(value) 5551 except Exception: # pylint: disable=W0703 5552 # Keep original error in log 5553 logging.exception("Caught exception") 5554 5555 if cmdresult is None: 5556 logging.info("Sleeping for %0.1f seconds before returning", 5557 _RCMD_INVALID_DELAY) 5558 _sleep_fn(_RCMD_INVALID_DELAY) 5559 5560 # Do not include original error message in returned error 5561 _Fail("Executing command '%s' failed" % cmd) 5562 elif cmdresult.failed or cmdresult.fail_reason: 5563 _Fail("Restricted command '%s' failed: %s; output: %s", 5564 cmd, cmdresult.fail_reason, cmdresult.output) 5565 else: 5566 return cmdresult.output 5567 finally: 5568 if lock is not None: 5569 # Release lock at last 5570 lock.Close() 5571 lock = None

5572

5573 5574 -def SetWatcherPause(until, _filename=pathutils.WATCHER_PAUSEFILE):

5575 """Creates or removes the watcher pause file. 5576 5577 @type until: None or number 5578 @param until: Unix timestamp saying until when the watcher shouldn't run 5579 5580 """ 5581 if until is None: 5582 logging.info("Received request to no longer pause watcher") 5583 utils.RemoveFile(_filename) 5584 else: 5585 logging.info("Received request to pause watcher until %s", until) 5586 5587 if not ht.TNumber(until): 5588 _Fail("Duration must be numeric") 5589 5590 utils.WriteFile(_filename, data="%d\n" % (until, ), mode=0644)

5591

5592 5593 -def ConfigureOVS(ovs_name, ovs_link):

5594 """Creates a OpenvSwitch on the node. 5595 5596 This function sets up a OpenvSwitch on the node with given name nad 5597 connects it via a given eth device. 5598 5599 @type ovs_name: string 5600 @param ovs_name: Name of the OpenvSwitch to create. 5601 @type ovs_link: None or string 5602 @param ovs_link: Ethernet device for outside connection (can be missing) 5603 5604 """ 5605 # Initialize the OpenvSwitch 5606 result = utils.RunCmd(["ovs-vsctl", "add-br", ovs_name]) 5607 if result.failed: 5608 _Fail("Failed to create openvswitch. Script return value: %s, output: '%s'" 5609 % (result.exit_code, result.output), log=True) 5610 5611 # And connect it to a physical interface, if given 5612 if ovs_link: 5613 result = utils.RunCmd(["ovs-vsctl", "add-port", ovs_name, ovs_link]) 5614 if result.failed: 5615 _Fail("Failed to connect openvswitch to interface %s. Script return" 5616 " value: %s, output: '%s'" % (ovs_link, result.exit_code, 5617 result.output), log=True)

5618

5619 5620 -def GetFileInfo(file_path):

5621 """ Checks if a file exists and returns information related to it. 5622 5623 Currently returned information: 5624 - file size: int, size in bytes 5625 5626 @type file_path: string 5627 @param file_path: Name of file to examine. 5628 5629 @rtype: tuple of bool, dict 5630 @return: Whether the file exists, and a dictionary of information about the 5631 file gathered by os.stat. 5632 5633 """ 5634 try: 5635 stat_info = os.stat(file_path) 5636 values_dict = { 5637 constants.STAT_SIZE: stat_info.st_size, 5638 } 5639 return True, values_dict 5640 except IOError: 5641 return False, {}

5642

5643 5644 -class HooksRunner(object):

5645 """Hook runner. 5646 5647 This class is instantiated on the node side (ganeti-noded) and not 5648 on the master side. 5649 5650 """

5651 - def __init__(self, hooks_base_dir=None):

5652 """Constructor for hooks runner. 5653 5654 @type hooks_base_dir: str or None 5655 @param hooks_base_dir: if not None, this overrides the 5656 L{pathutils.HOOKS_BASE_DIR} (useful for unittests) 5657 5658 """ 5659 if hooks_base_dir is None: 5660 hooks_base_dir = pathutils.HOOKS_BASE_DIR 5661 # yeah, _BASE_DIR is not valid for attributes, we use it like a 5662 # constant 5663 self._BASE_DIR = hooks_base_dir # pylint: disable=C0103

5664

5665 - def RunLocalHooks(self, node_list, hpath, phase, env):

5666 """Check that the hooks will be run only locally and then run them. 5667 5668 """ 5669 assert len(node_list) == 1 5670 node = node_list[0] 5671 _, myself = ssconf.GetMasterAndMyself() 5672 assert node == myself 5673 5674 results = self.RunHooks(hpath, phase, env) 5675 5676 # Return values in the form expected by HooksMaster 5677 return {node: (None, False, results)}

5678

5679 - def RunHooks(self, hpath, phase, env):

5680 """Run the scripts in the hooks directory. 5681 5682 @type hpath: str 5683 @param hpath: the path to the hooks directory which 5684 holds the scripts 5685 @type phase: str 5686 @param phase: either L{constants.HOOKS_PHASE_PRE} or 5687 L{constants.HOOKS_PHASE_POST} 5688 @type env: dict 5689 @param env: dictionary with the environment for the hook 5690 @rtype: list 5691 @return: list of 3-element tuples: 5692 - script path 5693 - script result, either L{constants.HKR_SUCCESS} or 5694 L{constants.HKR_FAIL} 5695 - output of the script 5696 5697 @raise errors.ProgrammerError: for invalid input 5698 parameters 5699 5700 """ 5701 if phase == constants.HOOKS_PHASE_PRE: 5702 suffix = "pre" 5703 elif phase == constants.HOOKS_PHASE_POST: 5704 suffix = "post" 5705 else: 5706 _Fail("Unknown hooks phase '%s'", phase) 5707 5708 subdir = "%s-%s.d" % (hpath, suffix) 5709 dir_name = utils.PathJoin(self._BASE_DIR, subdir) 5710 5711 results = [] 5712 5713 if not os.path.isdir(dir_name): 5714 # for non-existing/non-dirs, we simply exit instead of logging a 5715 # warning at every operation 5716 return results 5717 5718 runparts_results = utils.RunParts(dir_name, env=env, reset_env=True) 5719 5720 for (relname, relstatus, runresult) in runparts_results: 5721 if relstatus == constants.RUNPARTS_SKIP: 5722 rrval = constants.HKR_SKIP 5723 output = "" 5724 elif relstatus == constants.RUNPARTS_ERR: 5725 rrval = constants.HKR_FAIL 5726 output = "Hook script execution error: %s" % runresult 5727 elif relstatus == constants.RUNPARTS_RUN: 5728 if runresult.failed: 5729 rrval = constants.HKR_FAIL 5730 else: 5731 rrval = constants.HKR_SUCCESS 5732 output = utils.SafeEncode(runresult.output.strip()) 5733 results.append(("%s/%s" % (subdir, relname), rrval, output)) 5734 5735 return results

5736

5737 5738 -class IAllocatorRunner(object):

5739 """IAllocator runner. 5740 5741 This class is instantiated on the node side (ganeti-noded) and not on 5742 the master side. 5743 5744 """ 5745 @staticmethod

5746 - def Run(name, idata, ial_params):

5747 """Run an iallocator script. 5748 5749 @type name: str 5750 @param name: the iallocator script name 5751 @type idata: str 5752 @param idata: the allocator input data 5753 @type ial_params: list 5754 @param ial_params: the iallocator parameters 5755 5756 @rtype: tuple 5757 @return: two element tuple of: 5758 - status 5759 - either error message or stdout of allocator (for success) 5760 5761 """ 5762 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH, 5763 os.path.isfile) 5764 if alloc_script is None: 5765 _Fail("iallocator module '%s' not found in the search path", name) 5766 5767 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.") 5768 try: 5769 os.write(fd, idata) 5770 os.close(fd) 5771 result = utils.RunCmd([alloc_script, fin_name] + ial_params) 5772 if result.failed: 5773 _Fail("iallocator module '%s' failed: %s, output '%s'", 5774 name, result.fail_reason, result.output) 5775 finally: 5776 os.unlink(fin_name) 5777 5778 return result.stdout

5779

5780 5781 -class DevCacheManager(object):

5782 """Simple class for managing a cache of block device information. 5783 5784 """ 5785 _DEV_PREFIX = "/dev/" 5786 _ROOT_DIR = pathutils.BDEV_CACHE_DIR 5787 5788 @classmethod

5789 - def _ConvertPath(cls, dev_path):

5790 """Converts a /dev/name path to the cache file name. 5791 5792 This replaces slashes with underscores and strips the /dev 5793 prefix. It then returns the full path to the cache file. 5794 5795 @type dev_path: str 5796 @param dev_path: the C{/dev/} path name 5797 @rtype: str 5798 @return: the converted path name 5799 5800 """ 5801 if dev_path.startswith(cls._DEV_PREFIX): 5802 dev_path = dev_path[len(cls._DEV_PREFIX):] 5803 dev_path = dev_path.replace("/", "_") 5804 fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path) 5805 return fpath

5806 5807 @classmethod

5808 - def UpdateCache(cls, dev_path, owner, on_primary, iv_name):

5809 """Updates the cache information for a given device. 5810 5811 @type dev_path: str 5812 @param dev_path: the pathname of the device 5813 @type owner: str 5814 @param owner: the owner (instance name) of the device 5815 @type on_primary: bool 5816 @param on_primary: whether this is the primary 5817 node nor not 5818 @type iv_name: str 5819 @param iv_name: the instance-visible name of the 5820 device, as in objects.Disk.iv_name 5821 5822 @rtype: None 5823 5824 """ 5825 if dev_path is None: 5826 logging.error("DevCacheManager.UpdateCache got a None dev_path") 5827 return 5828 fpath = cls._ConvertPath(dev_path) 5829 if on_primary: 5830 state = "primary" 5831 else: 5832 state = "secondary" 5833 if iv_name is None: 5834 iv_name = "not_visible" 5835 fdata = "%s %s %s\n" % (str(owner), state, iv_name) 5836 try: 5837 utils.WriteFile(fpath, data=fdata) 5838 except EnvironmentError, err: 5839 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)

5840 5841 @classmethod

5842 - def RemoveCache(cls, dev_path):

5843 """Remove data for a dev_path. 5844 5845 This is just a wrapper over L{utils.io.RemoveFile} with a converted 5846 path name and logging. 5847 5848 @type dev_path: str 5849 @param dev_path: the pathname of the device 5850 5851 @rtype: None 5852 5853 """ 5854 if dev_path is None: 5855 logging.error("DevCacheManager.RemoveCache got a None dev_path") 5856 return 5857 fpath = cls._ConvertPath(dev_path) 5858 try: 5859 utils.RemoveFile(fpath) 5860 except EnvironmentError, err: 5861 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)

5862