1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """Functions used by the node daemon
32
33 @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in
34 the L{UploadFile} function
35 @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted
36 in the L{_CleanDirectory} function
37
38 """
39
40
41
42
43
44
45
46
47
48
49 import base64
50 import errno
51 import logging
52 import os
53 import os.path
54 import pycurl
55 import random
56 import re
57 import shutil
58 import signal
59 import socket
60 import stat
61 import tempfile
62 import time
63 import zlib
64 import copy
65
66 from ganeti import errors
67 from ganeti import http
68 from ganeti import utils
69 from ganeti import ssh
70 from ganeti import hypervisor
71 from ganeti.hypervisor import hv_base
72 from ganeti import constants
73 from ganeti.storage import bdev
74 from ganeti.storage import drbd
75 from ganeti.storage import extstorage
76 from ganeti.storage import filestorage
77 from ganeti import objects
78 from ganeti import ssconf
79 from ganeti import serializer
80 from ganeti import netutils
81 from ganeti import runtime
82 from ganeti import compat
83 from ganeti import pathutils
84 from ganeti import vcluster
85 from ganeti import ht
86 from ganeti.storage.base import BlockDev
87 from ganeti.storage.drbd import DRBD8
88 from ganeti import hooksmaster
89 from ganeti.rpc import transport
90 from ganeti.rpc.errors import NoMasterError, TimeoutError
91
92
93 _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id"
94 _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([
95 pathutils.DATA_DIR,
96 pathutils.JOB_QUEUE_ARCHIVE_DIR,
97 pathutils.QUEUE_DIR,
98 pathutils.CRYPTO_KEYS_DIR,
99 ])
100 _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60
101 _X509_KEY_FILE = "key"
102 _X509_CERT_FILE = "cert"
103 _IES_STATUS_FILE = "status"
104 _IES_PID_FILE = "pid"
105 _IES_CA_FILE = "ca"
106
107
108 _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$")
109
110
111 _MASTER_START = "start"
112 _MASTER_STOP = "stop"
113
114
115 _RCMD_MAX_MODE = (stat.S_IRWXU |
116 stat.S_IRGRP | stat.S_IXGRP |
117 stat.S_IROTH | stat.S_IXOTH)
118
119
120 _RCMD_INVALID_DELAY = 10
121
122
123
124
125 _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8
129 """Class denoting RPC failure.
130
131 Its argument is the error message.
132
133 """
134
137 """Path of the file containing the reason of the instance status change.
138
139 @type instance_name: string
140 @param instance_name: The name of the instance
141 @rtype: string
142 @return: The path of the file
143
144 """
145 return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)
146
149 """Serialize a reason trail related to an instance change of state to file.
150
151 The exact location of the file depends on the name of the instance and on
152 the configuration of the Ganeti cluster defined at deploy time.
153
154 @type instance_name: string
155 @param instance_name: The name of the instance
156
157 @type trail: list of reasons
158 @param trail: reason trail
159
160 @rtype: None
161
162 """
163 json = serializer.DumpJson(trail)
164 filename = _GetInstReasonFilename(instance_name)
165 utils.WriteFile(filename, data=json)
166
167
168 -def _Fail(msg, *args, **kwargs):
169 """Log an error and the raise an RPCFail exception.
170
171 This exception is then handled specially in the ganeti daemon and
172 turned into a 'failed' return type. As such, this function is a
173 useful shortcut for logging the error and returning it to the master
174 daemon.
175
176 @type msg: string
177 @param msg: the text of the exception
178 @raise RPCFail
179
180 """
181 if args:
182 msg = msg % args
183 if "log" not in kwargs or kwargs["log"]:
184 if "exc" in kwargs and kwargs["exc"]:
185 logging.exception(msg)
186 else:
187 logging.error(msg)
188 raise RPCFail(msg)
189
192 """Simple wrapper to return a SimpleStore.
193
194 @rtype: L{ssconf.SimpleStore}
195 @return: a SimpleStore instance
196
197 """
198 return ssconf.SimpleStore()
199
202 """Simple wrapper to return an SshRunner.
203
204 @type cluster_name: str
205 @param cluster_name: the cluster name, which is needed
206 by the SshRunner constructor
207 @rtype: L{ssh.SshRunner}
208 @return: an SshRunner instance
209
210 """
211 return ssh.SshRunner(cluster_name)
212
215 """Unpacks data compressed by the RPC client.
216
217 @type data: list or tuple
218 @param data: Data sent by RPC client
219 @rtype: str
220 @return: Decompressed data
221
222 """
223 assert isinstance(data, (list, tuple))
224 assert len(data) == 2
225 (encoding, content) = data
226 if encoding == constants.RPC_ENCODING_NONE:
227 return content
228 elif encoding == constants.RPC_ENCODING_ZLIB_BASE64:
229 return zlib.decompress(base64.b64decode(content))
230 else:
231 raise AssertionError("Unknown data encoding")
232
235 """Removes all regular files in a directory.
236
237 @type path: str
238 @param path: the directory to clean
239 @type exclude: list
240 @param exclude: list of files to be excluded, defaults
241 to the empty list
242
243 """
244 if path not in _ALLOWED_CLEAN_DIRS:
245 _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'",
246 path)
247
248 if not os.path.isdir(path):
249 return
250 if exclude is None:
251 exclude = []
252 else:
253
254 exclude = [os.path.normpath(i) for i in exclude]
255
256 for rel_name in utils.ListVisibleFiles(path):
257 full_name = utils.PathJoin(path, rel_name)
258 if full_name in exclude:
259 continue
260 if os.path.isfile(full_name) and not os.path.islink(full_name):
261 utils.RemoveFile(full_name)
262
265 """Build the list of allowed upload files.
266
267 This is abstracted so that it's built only once at module import time.
268
269 """
270 allowed_files = set([
271 pathutils.CLUSTER_CONF_FILE,
272 pathutils.ETC_HOSTS,
273 pathutils.SSH_KNOWN_HOSTS_FILE,
274 pathutils.VNC_PASSWORD_FILE,
275 pathutils.RAPI_CERT_FILE,
276 pathutils.SPICE_CERT_FILE,
277 pathutils.SPICE_CACERT_FILE,
278 pathutils.RAPI_USERS_FILE,
279 pathutils.CONFD_HMAC_KEY,
280 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
281 ])
282
283 for hv_name in constants.HYPER_TYPES:
284 hv_class = hypervisor.GetHypervisorClass(hv_name)
285 allowed_files.update(hv_class.GetAncillaryFiles()[0])
286
287 assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \
288 "Allowed file storage paths should never be uploaded via RPC"
289
290 return frozenset(allowed_files)
291
292
293 _ALLOWED_UPLOAD_FILES = _BuildUploadFileList()
305
308 """Returns the master node name.
309
310 @rtype: string
311 @return: name of the master node
312 @raise RPCFail: in case of errors
313
314 """
315 try:
316 return _GetConfig().GetMasterNode()
317 except errors.ConfigurationError, err:
318 _Fail("Cluster configuration incomplete: %s", err, exc=True)
319
322 """Decorator that runs hooks before and after the decorated function.
323
324 @type hook_opcode: string
325 @param hook_opcode: opcode of the hook
326 @type hooks_path: string
327 @param hooks_path: path of the hooks
328 @type env_builder_fn: function
329 @param env_builder_fn: function that returns a dictionary containing the
330 environment variables for the hooks. Will get all the parameters of the
331 decorated function.
332 @raise RPCFail: in case of pre-hook failure
333
334 """
335 def decorator(fn):
336 def wrapper(*args, **kwargs):
337 _, myself = ssconf.GetMasterAndMyself()
338 nodes = ([myself], [myself])
339
340 env_fn = compat.partial(env_builder_fn, *args, **kwargs)
341
342 cfg = _GetConfig()
343 hr = HooksRunner()
344 hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes,
345 hr.RunLocalHooks, None, env_fn, None,
346 logging.warning, cfg.GetClusterName(),
347 cfg.GetMasterNode())
348 hm.RunPhase(constants.HOOKS_PHASE_PRE)
349 result = fn(*args, **kwargs)
350 hm.RunPhase(constants.HOOKS_PHASE_POST)
351
352 return result
353 return wrapper
354 return decorator
355
358 """Builds environment variables for master IP hooks.
359
360 @type master_params: L{objects.MasterNetworkParameters}
361 @param master_params: network parameters of the master
362 @type use_external_mip_script: boolean
363 @param use_external_mip_script: whether to use an external master IP
364 address setup script (unused, but necessary per the implementation of the
365 _RunLocalHooks decorator)
366
367 """
368
369 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family)
370 env = {
371 "MASTER_NETDEV": master_params.netdev,
372 "MASTER_IP": master_params.ip,
373 "MASTER_NETMASK": str(master_params.netmask),
374 "CLUSTER_IP_VERSION": str(ver),
375 }
376
377 return env
378
381 """Execute the master IP address setup script.
382
383 @type master_params: L{objects.MasterNetworkParameters}
384 @param master_params: network parameters of the master
385 @type action: string
386 @param action: action to pass to the script. Must be one of
387 L{backend._MASTER_START} or L{backend._MASTER_STOP}
388 @type use_external_mip_script: boolean
389 @param use_external_mip_script: whether to use an external master IP
390 address setup script
391 @raise backend.RPCFail: if there are errors during the execution of the
392 script
393
394 """
395 env = _BuildMasterIpEnv(master_params)
396
397 if use_external_mip_script:
398 setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT
399 else:
400 setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT
401
402 result = utils.RunCmd([setup_script, action], env=env, reset_env=True)
403
404 if result.failed:
405 _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" %
406 (action, result.exit_code, result.output), log=True)
407
408
409 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup",
410 _BuildMasterIpEnv)
412 """Activate the IP address of the master daemon.
413
414 @type master_params: L{objects.MasterNetworkParameters}
415 @param master_params: network parameters of the master
416 @type use_external_mip_script: boolean
417 @param use_external_mip_script: whether to use an external master IP
418 address setup script
419 @raise RPCFail: in case of errors during the IP startup
420
421 """
422 _RunMasterSetupScript(master_params, _MASTER_START,
423 use_external_mip_script)
424
427 """Activate local node as master node.
428
429 The function will start the master daemons (ganeti-masterd and ganeti-rapi).
430
431 @type no_voting: boolean
432 @param no_voting: whether to start ganeti-masterd without a node vote
433 but still non-interactively
434 @rtype: None
435
436 """
437
438 if no_voting:
439 daemon_args = "--no-voting --yes-do-it"
440 else:
441 daemon_args = ""
442
443 env = {
444 "EXTRA_LUXID_ARGS": daemon_args,
445 "EXTRA_WCONFD_ARGS": daemon_args,
446 }
447
448 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env)
449 if result.failed:
450 msg = "Can't start Ganeti master: %s" % result.output
451 logging.error(msg)
452 _Fail(msg)
453
454
455 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown",
456 _BuildMasterIpEnv)
458 """Deactivate the master IP on this node.
459
460 @type master_params: L{objects.MasterNetworkParameters}
461 @param master_params: network parameters of the master
462 @type use_external_mip_script: boolean
463 @param use_external_mip_script: whether to use an external master IP
464 address setup script
465 @raise RPCFail: in case of errors during the IP turndown
466
467 """
468 _RunMasterSetupScript(master_params, _MASTER_STOP,
469 use_external_mip_script)
470
473 """Stop the master daemons on this node.
474
475 Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node.
476
477 @rtype: None
478
479 """
480
481
482
483 result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"])
484 if result.failed:
485 logging.error("Could not stop Ganeti master, command %s had exitcode %s"
486 " and error %s",
487 result.cmd, result.exit_code, result.output)
488
491 """Change the netmask of the master IP.
492
493 @param old_netmask: the old value of the netmask
494 @param netmask: the new value of the netmask
495 @param master_ip: the master IP
496 @param master_netdev: the master network device
497
498 """
499 if old_netmask == netmask:
500 return
501
502 if not netutils.IPAddress.Own(master_ip):
503 _Fail("The master IP address is not up, not attempting to change its"
504 " netmask")
505
506 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add",
507 "%s/%s" % (master_ip, netmask),
508 "dev", master_netdev, "label",
509 "%s:0" % master_netdev])
510 if result.failed:
511 _Fail("Could not set the new netmask on the master IP address")
512
513 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del",
514 "%s/%s" % (master_ip, old_netmask),
515 "dev", master_netdev, "label",
516 "%s:0" % master_netdev])
517 if result.failed:
518 _Fail("Could not bring down the master IP address with the old netmask")
519
522 """Modify a host entry in /etc/hosts.
523
524 @param mode: The mode to operate. Either add or remove entry
525 @param host: The host to operate on
526 @param ip: The ip associated with the entry
527
528 """
529 if mode == constants.ETC_HOSTS_ADD:
530 if not ip:
531 RPCFail("Mode 'add' needs 'ip' parameter, but parameter not"
532 " present")
533 utils.AddHostToEtcHosts(host, ip)
534 elif mode == constants.ETC_HOSTS_REMOVE:
535 if ip:
536 RPCFail("Mode 'remove' does not allow 'ip' parameter, but"
537 " parameter is present")
538 utils.RemoveHostFromEtcHosts(host)
539 else:
540 RPCFail("Mode not supported")
541
586
589 """Performs sanity checks for storage parameters.
590
591 @type params: list
592 @param params: list of storage parameters
593 @type num_params: int
594 @param num_params: expected number of parameters
595
596 """
597 if params is None:
598 raise errors.ProgrammerError("No storage parameters for storage"
599 " reporting is provided.")
600 if not isinstance(params, list):
601 raise errors.ProgrammerError("The storage parameters are not of type"
602 " list: '%s'" % params)
603 if not len(params) == num_params:
604 raise errors.ProgrammerError("Did not receive the expected number of"
605 "storage parameters: expected %s,"
606 " received '%s'" % (num_params, len(params)))
607
610 """Performs sanity check for the 'exclusive storage' flag.
611
612 @see: C{_CheckStorageParams}
613
614 """
615 _CheckStorageParams(params, 1)
616 excl_stor = params[0]
617 if not isinstance(params[0], bool):
618 raise errors.ProgrammerError("Exclusive storage parameter is not"
619 " boolean: '%s'." % excl_stor)
620 return excl_stor
621
624 """Wrapper around C{_GetVgInfo} which checks the storage parameters.
625
626 @type name: string
627 @param name: name of the volume group
628 @type params: list
629 @param params: list of storage parameters, which in this case should be
630 containing only one for exclusive storage
631
632 """
633 excl_stor = _CheckLvmStorageParams(params)
634 return _GetVgInfo(name, excl_stor)
635
639 """Retrieves information about a LVM volume group.
640
641 """
642
643 vginfo = info_fn([name], excl_stor)
644 if vginfo:
645 vg_free = int(round(vginfo[0][0], 0))
646 vg_size = int(round(vginfo[0][1], 0))
647 else:
648 vg_free = None
649 vg_size = None
650
651 return {
652 "type": constants.ST_LVM_VG,
653 "name": name,
654 "storage_free": vg_free,
655 "storage_size": vg_size,
656 }
657
667
671 """Retrieves information about spindles in an LVM volume group.
672
673 @type name: string
674 @param name: VG name
675 @type excl_stor: bool
676 @param excl_stor: exclusive storage
677 @rtype: dict
678 @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name,
679 free spindles, total spindles respectively
680
681 """
682 if excl_stor:
683 (vg_free, vg_size) = info_fn(name)
684 else:
685 vg_free = 0
686 vg_size = 0
687 return {
688 "type": constants.ST_LVM_PV,
689 "name": name,
690 "storage_free": vg_free,
691 "storage_size": vg_size,
692 }
693
696 """Retrieves node information from a hypervisor.
697
698 The information returned depends on the hypervisor. Common items:
699
700 - vg_size is the size of the configured volume group in MiB
701 - vg_free is the free size of the volume group in MiB
702 - memory_dom0 is the memory allocated for domain0 in MiB
703 - memory_free is the currently available (free) ram in MiB
704 - memory_total is the total number of ram in MiB
705 - hv_version: the hypervisor version, if available
706
707 @type hvparams: dict of string
708 @param hvparams: the hypervisor's hvparams
709
710 """
711 return get_hv_fn(name).GetNodeInfo(hvparams=hvparams)
712
715 """Retrieves node information for all hypervisors.
716
717 See C{_GetHvInfo} for information on the output.
718
719 @type hv_specs: list of pairs (string, dict of strings)
720 @param hv_specs: list of pairs of a hypervisor's name and its hvparams
721
722 """
723 if hv_specs is None:
724 return None
725
726 result = []
727 for hvname, hvparams in hv_specs:
728 result.append(_GetHvInfo(hvname, hvparams, get_hv_fn))
729 return result
730
733 """Calls C{fn} for all names in C{names} and returns a dictionary.
734
735 @rtype: None or dict
736
737 """
738 if names is None:
739 return None
740 else:
741 return map(fn, names)
742
745 """Gives back a hash with different information about the node.
746
747 @type storage_units: list of tuples (string, string, list)
748 @param storage_units: List of tuples (storage unit, identifier, parameters) to
749 ask for disk space information. In case of lvm-vg, the identifier is
750 the VG name. The parameters can contain additional, storage-type-specific
751 parameters, for example exclusive storage for lvm storage.
752 @type hv_specs: list of pairs (string, dict of strings)
753 @param hv_specs: list of pairs of a hypervisor's name and its hvparams
754 @rtype: tuple; (string, None/dict, None/dict)
755 @return: Tuple containing boot ID, volume group information and hypervisor
756 information
757
758 """
759 bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n")
760 storage_info = _GetNamedNodeInfo(
761 storage_units,
762 (lambda (storage_type, storage_key, storage_params):
763 _ApplyStorageInfoFunction(storage_type, storage_key, storage_params)))
764 hv_info = _GetHvInfoAll(hv_specs)
765 return (bootid, storage_info, hv_info)
766
769 """Wrapper around filestorage.GetSpaceInfo.
770
771 The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo
772 and ignore the *args parameter to not leak it into the filestorage
773 module's code.
774
775 @see: C{filestorage.GetFileStorageSpaceInfo} for description of the
776 parameters.
777
778 """
779 _CheckStorageParams(params, 0)
780 return filestorage.GetFileStorageSpaceInfo(path)
781
782
783
784 _STORAGE_TYPE_INFO_FN = {
785 constants.ST_BLOCK: None,
786 constants.ST_DISKLESS: None,
787 constants.ST_EXT: None,
788 constants.ST_FILE: _GetFileStorageSpaceInfo,
789 constants.ST_LVM_PV: _GetLvmPvSpaceInfo,
790 constants.ST_LVM_VG: _GetLvmVgSpaceInfo,
791 constants.ST_SHARED_FILE: None,
792 constants.ST_GLUSTER: None,
793 constants.ST_RADOS: None,
794 }
798 """Looks up and applies the correct function to calculate free and total
799 storage for the given storage type.
800
801 @type storage_type: string
802 @param storage_type: the storage type for which the storage shall be reported.
803 @type storage_key: string
804 @param storage_key: identifier of a storage unit, e.g. the volume group name
805 of an LVM storage unit
806 @type args: any
807 @param args: various parameters that can be used for storage reporting. These
808 parameters and their semantics vary from storage type to storage type and
809 are just propagated in this function.
810 @return: the results of the application of the storage space function (see
811 _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that
812 storage type
813 @raises NotImplementedError: for storage types who don't support space
814 reporting yet
815 """
816 fn = _STORAGE_TYPE_INFO_FN[storage_type]
817 if fn is not None:
818 return fn(storage_key, *args)
819 else:
820 raise NotImplementedError
821
824 """Check that PVs are not shared among LVs
825
826 @type pvi_list: list of L{objects.LvmPvInfo} objects
827 @param pvi_list: information about the PVs
828
829 @rtype: list of tuples (string, list of strings)
830 @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...])
831
832 """
833 res = []
834 for pvi in pvi_list:
835 if len(pvi.lv_list) > 1:
836 res.append((pvi.name, pvi.lv_list))
837 return res
838
842 """Verifies the hypervisor. Appends the results to the 'results' list.
843
844 @type what: C{dict}
845 @param what: a dictionary of things to check
846 @type vm_capable: boolean
847 @param vm_capable: whether or not this node is vm capable
848 @type result: dict
849 @param result: dictionary of verification results; results of the
850 verifications in this function will be added here
851 @type all_hvparams: dict of dict of string
852 @param all_hvparams: dictionary mapping hypervisor names to hvparams
853 @type get_hv_fn: function
854 @param get_hv_fn: function to retrieve the hypervisor, to improve testability
855
856 """
857 if not vm_capable:
858 return
859
860 if constants.NV_HYPERVISOR in what:
861 result[constants.NV_HYPERVISOR] = {}
862 for hv_name in what[constants.NV_HYPERVISOR]:
863 hvparams = all_hvparams[hv_name]
864 try:
865 val = get_hv_fn(hv_name).Verify(hvparams=hvparams)
866 except errors.HypervisorError, err:
867 val = "Error while checking hypervisor: %s" % str(err)
868 result[constants.NV_HYPERVISOR][hv_name] = val
869
873 """Verifies the hvparams. Appends the results to the 'results' list.
874
875 @type what: C{dict}
876 @param what: a dictionary of things to check
877 @type vm_capable: boolean
878 @param vm_capable: whether or not this node is vm capable
879 @type result: dict
880 @param result: dictionary of verification results; results of the
881 verifications in this function will be added here
882 @type get_hv_fn: function
883 @param get_hv_fn: function to retrieve the hypervisor, to improve testability
884
885 """
886 if not vm_capable:
887 return
888
889 if constants.NV_HVPARAMS in what:
890 result[constants.NV_HVPARAMS] = []
891 for source, hv_name, hvparms in what[constants.NV_HVPARAMS]:
892 try:
893 logging.info("Validating hv %s, %s", hv_name, hvparms)
894 get_hv_fn(hv_name).ValidateParameters(hvparms)
895 except errors.HypervisorError, err:
896 result[constants.NV_HVPARAMS].append((source, hv_name, str(err)))
897
900 """Verifies the instance list.
901
902 @type what: C{dict}
903 @param what: a dictionary of things to check
904 @type vm_capable: boolean
905 @param vm_capable: whether or not this node is vm capable
906 @type result: dict
907 @param result: dictionary of verification results; results of the
908 verifications in this function will be added here
909 @type all_hvparams: dict of dict of string
910 @param all_hvparams: dictionary mapping hypervisor names to hvparams
911
912 """
913 if constants.NV_INSTANCELIST in what and vm_capable:
914
915 try:
916 val = GetInstanceList(what[constants.NV_INSTANCELIST],
917 all_hvparams=all_hvparams)
918 except RPCFail, err:
919 val = str(err)
920 result[constants.NV_INSTANCELIST] = val
921
924 """Verifies the node info.
925
926 @type what: C{dict}
927 @param what: a dictionary of things to check
928 @type vm_capable: boolean
929 @param vm_capable: whether or not this node is vm capable
930 @type result: dict
931 @param result: dictionary of verification results; results of the
932 verifications in this function will be added here
933 @type all_hvparams: dict of dict of string
934 @param all_hvparams: dictionary mapping hypervisor names to hvparams
935
936 """
937 if constants.NV_HVINFO in what and vm_capable:
938 hvname = what[constants.NV_HVINFO]
939 hyper = hypervisor.GetHypervisor(hvname)
940 hvparams = all_hvparams[hvname]
941 result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams)
942
945 """Verify the existance and validity of the client SSL certificate.
946
947 Also, verify that the client certificate is not self-signed. Self-
948 signed client certificates stem from Ganeti versions 2.12.0 - 2.12.4
949 and should be replaced by client certificates signed by the server
950 certificate. Hence we output a warning when we encounter a self-signed
951 one.
952
953 """
954 create_cert_cmd = "gnt-cluster renew-crypto --new-node-certificates"
955 if not os.path.exists(cert_file):
956 return (constants.CV_ERROR,
957 "The client certificate does not exist. Run '%s' to create"
958 " client certificates for all nodes." % create_cert_cmd)
959
960 (errcode, msg) = utils.VerifyCertificate(cert_file)
961 if errcode is not None:
962 return (errcode, msg)
963
964 (errcode, msg) = utils.IsCertificateSelfSigned(cert_file)
965 if errcode is not None:
966 return (errcode, msg)
967
968
969 return (None, utils.GetCertificateDigest(cert_filename=cert_file))
970
974 """Verifies the state of the SSH key files.
975
976 @type node_status_list: list of tuples
977 @param node_status_list: list of nodes of the cluster associated with a
978 couple of flags: (uuid, name, is_master_candidate,
979 is_potential_master_candidate, online)
980 @type my_name: str
981 @param my_name: name of this node
982 @type pub_key_file: str
983 @param pub_key_file: filename of the public key file
984
985 """
986 if node_status_list is None:
987 return ["No node list to check against the pub_key_file received."]
988
989 my_status_list = [(my_uuid, name, mc, pot_mc, online) for
990 (my_uuid, name, mc, pot_mc, online)
991 in node_status_list if name == my_name]
992 if len(my_status_list) == 0:
993 return ["Cannot find node information for node '%s'." % my_name]
994 (my_uuid, _, _, potential_master_candidate, online) = \
995 my_status_list[0]
996
997 result = []
998
999 if not os.path.exists(pub_key_file):
1000 result.append("The public key file '%s' does not exist. Consider running"
1001 " 'gnt-cluster renew-crypto --new-ssh-keys"
1002 " [--no-ssh-key-check]' to fix this." % pub_key_file)
1003 return result
1004
1005 pot_mc_uuids = [uuid for (uuid, _, _, _, _) in node_status_list]
1006 offline_nodes = [uuid for (uuid, _, _, _, online) in node_status_list
1007 if not online]
1008 pub_keys = ssh.QueryPubKeyFile(None)
1009
1010 if potential_master_candidate:
1011
1012
1013 pub_uuids_set = set(pub_keys.keys()) - set(offline_nodes)
1014 pot_mc_uuids_set = set(pot_mc_uuids) - set(offline_nodes)
1015 missing_uuids = set([])
1016 if pub_uuids_set != pot_mc_uuids_set:
1017 unknown_uuids = pub_uuids_set - pot_mc_uuids_set
1018 if unknown_uuids:
1019 result.append("The following node UUIDs are listed in the public key"
1020 " file on node '%s', but are not potential master"
1021 " candidates: %s."
1022 % (my_name, ", ".join(list(unknown_uuids))))
1023 missing_uuids = pot_mc_uuids_set - pub_uuids_set
1024 if missing_uuids:
1025 result.append("The following node UUIDs of potential master candidates"
1026 " are missing in the public key file on node %s: %s."
1027 % (my_name, ", ".join(list(missing_uuids))))
1028
1029 (_, key_files) = \
1030 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1031 (_, dsa_pub_key_filename) = key_files[constants.SSHK_DSA]
1032
1033 my_keys = pub_keys[my_uuid]
1034
1035 dsa_pub_key = utils.ReadFile(dsa_pub_key_filename)
1036 if dsa_pub_key.strip() not in my_keys:
1037 result.append("The dsa key of node %s does not match this node's key"
1038 " in the pub key file." % (my_name))
1039 if len(my_keys) != 1:
1040 result.append("There is more than one key for node %s in the public key"
1041 " file." % my_name)
1042 else:
1043 if len(pub_keys.keys()) > 0:
1044 result.append("The public key file of node '%s' is not empty, although"
1045 " the node is not a potential master candidate."
1046 % my_name)
1047
1048
1049 (auth_key_file, _) = \
1050 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1051 for (uuid, name, mc, _, online) in node_status_list:
1052 if not online:
1053 continue
1054 if uuid in missing_uuids:
1055 continue
1056 if mc:
1057 for key in pub_keys[uuid]:
1058 if not ssh.HasAuthorizedKey(auth_key_file, key):
1059 result.append("A SSH key of master candidate '%s' (UUID: '%s') is"
1060 " not in the 'authorized_keys' file of node '%s'."
1061 % (name, uuid, my_name))
1062 else:
1063 for key in pub_keys[uuid]:
1064 if name != my_name and ssh.HasAuthorizedKey(auth_key_file, key):
1065 result.append("A SSH key of normal node '%s' (UUID: '%s') is in the"
1066 " 'authorized_keys' file of node '%s'."
1067 % (name, uuid, my_name))
1068 if name == my_name and not ssh.HasAuthorizedKey(auth_key_file, key):
1069 result.append("A SSH key of normal node '%s' (UUID: '%s') is not"
1070 " in the 'authorized_keys' file of itself."
1071 % (my_name, uuid))
1072
1073 return result
1074
1077 """Verifies that the 'authorized_keys' files are not cluttered up.
1078
1079 @type node_status_list: list of tuples
1080 @param node_status_list: list of nodes of the cluster associated with a
1081 couple of flags: (uuid, name, is_master_candidate,
1082 is_potential_master_candidate, online)
1083 @type my_name: str
1084 @param my_name: name of this node
1085
1086 """
1087 result = []
1088 (auth_key_file, _) = \
1089 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1090 node_names = [name for (_, name, _, _) in node_status_list]
1091 multiple_occurrences = ssh.CheckForMultipleKeys(auth_key_file, node_names)
1092 if multiple_occurrences:
1093 msg = "There are hosts which have more than one SSH key stored for the" \
1094 " same user in the 'authorized_keys' file of node %s. This can be" \
1095 " due to an unsuccessful operation which cluttered up the" \
1096 " 'authorized_keys' file. We recommend to clean this up manually. " \
1097 % my_name
1098 for host, occ in multiple_occurrences.items():
1099 msg += "Entry for '%s' in lines %s. " % (host, utils.CommaJoin(occ))
1100 result.append(msg)
1101
1102 return result
1103
1104
1105 -def VerifyNode(what, cluster_name, all_hvparams, node_groups, groups_cfg):
1106 """Verify the status of the local node.
1107
1108 Based on the input L{what} parameter, various checks are done on the
1109 local node.
1110
1111 If the I{filelist} key is present, this list of
1112 files is checksummed and the file/checksum pairs are returned.
1113
1114 If the I{nodelist} key is present, we check that we have
1115 connectivity via ssh with the target nodes (and check the hostname
1116 report).
1117
1118 If the I{node-net-test} key is present, we check that we have
1119 connectivity to the given nodes via both primary IP and, if
1120 applicable, secondary IPs.
1121
1122 @type what: C{dict}
1123 @param what: a dictionary of things to check:
1124 - filelist: list of files for which to compute checksums
1125 - nodelist: list of nodes we should check ssh communication with
1126 - node-net-test: list of nodes we should check node daemon port
1127 connectivity with
1128 - hypervisor: list with hypervisors to run the verify for
1129 @type cluster_name: string
1130 @param cluster_name: the cluster's name
1131 @type all_hvparams: dict of dict of strings
1132 @param all_hvparams: a dictionary mapping hypervisor names to hvparams
1133 @type node_groups: a dict of strings
1134 @param node_groups: node _names_ mapped to their group uuids (it's enough to
1135 have only those nodes that are in `what["nodelist"]`)
1136 @type groups_cfg: a dict of dict of strings
1137 @param groups_cfg: a dictionary mapping group uuids to their configuration
1138 @rtype: dict
1139 @return: a dictionary with the same keys as the input dict, and
1140 values representing the result of the checks
1141
1142 """
1143 result = {}
1144 my_name = netutils.Hostname.GetSysName()
1145 port = netutils.GetDaemonPort(constants.NODED)
1146 vm_capable = my_name not in what.get(constants.NV_NONVMNODES, [])
1147
1148 _VerifyHypervisors(what, vm_capable, result, all_hvparams)
1149 _VerifyHvparams(what, vm_capable, result)
1150
1151 if constants.NV_FILELIST in what:
1152 fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath,
1153 what[constants.NV_FILELIST]))
1154 result[constants.NV_FILELIST] = \
1155 dict((vcluster.MakeVirtualPath(key), value)
1156 for (key, value) in fingerprints.items())
1157
1158 if constants.NV_CLIENT_CERT in what:
1159 result[constants.NV_CLIENT_CERT] = _VerifyClientCertificate()
1160
1161 if constants.NV_SSH_SETUP in what:
1162 result[constants.NV_SSH_SETUP] = \
1163 _VerifySshSetup(what[constants.NV_SSH_SETUP], my_name)
1164 if constants.NV_SSH_CLUTTER in what:
1165 result[constants.NV_SSH_CLUTTER] = \
1166 _VerifySshClutter(what[constants.NV_SSH_SETUP], my_name)
1167
1168 if constants.NV_NODELIST in what:
1169 (nodes, bynode, mcs) = what[constants.NV_NODELIST]
1170
1171
1172 try:
1173 nodes.extend(bynode[my_name])
1174 except KeyError:
1175 pass
1176
1177
1178 random.shuffle(nodes)
1179
1180
1181 val = {}
1182 for node in nodes:
1183 params = groups_cfg.get(node_groups.get(node))
1184 ssh_port = params["ndparams"].get(constants.ND_SSH_PORT)
1185 logging.debug("Ssh port %s (None = default) for node %s",
1186 str(ssh_port), node)
1187
1188
1189
1190
1191
1192 if my_name in mcs:
1193 success, message = _GetSshRunner(cluster_name). \
1194 VerifyNodeHostname(node, ssh_port)
1195 if not success:
1196 val[node] = message
1197
1198 result[constants.NV_NODELIST] = val
1199
1200 if constants.NV_NODENETTEST in what:
1201 result[constants.NV_NODENETTEST] = tmp = {}
1202 my_pip = my_sip = None
1203 for name, pip, sip in what[constants.NV_NODENETTEST]:
1204 if name == my_name:
1205 my_pip = pip
1206 my_sip = sip
1207 break
1208 if not my_pip:
1209 tmp[my_name] = ("Can't find my own primary/secondary IP"
1210 " in the node list")
1211 else:
1212 for name, pip, sip in what[constants.NV_NODENETTEST]:
1213 fail = []
1214 if not netutils.TcpPing(pip, port, source=my_pip):
1215 fail.append("primary")
1216 if sip != pip:
1217 if not netutils.TcpPing(sip, port, source=my_sip):
1218 fail.append("secondary")
1219 if fail:
1220 tmp[name] = ("failure using the %s interface(s)" %
1221 " and ".join(fail))
1222
1223 if constants.NV_MASTERIP in what:
1224
1225
1226 master_name, master_ip = what[constants.NV_MASTERIP]
1227 if master_name == my_name:
1228 source = constants.IP4_ADDRESS_LOCALHOST
1229 else:
1230 source = None
1231 result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port,
1232 source=source)
1233
1234 if constants.NV_USERSCRIPTS in what:
1235 result[constants.NV_USERSCRIPTS] = \
1236 [script for script in what[constants.NV_USERSCRIPTS]
1237 if not utils.IsExecutable(script)]
1238
1239 if constants.NV_OOB_PATHS in what:
1240 result[constants.NV_OOB_PATHS] = tmp = []
1241 for path in what[constants.NV_OOB_PATHS]:
1242 try:
1243 st = os.stat(path)
1244 except OSError, err:
1245 tmp.append("error stating out of band helper: %s" % err)
1246 else:
1247 if stat.S_ISREG(st.st_mode):
1248 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR:
1249 tmp.append(None)
1250 else:
1251 tmp.append("out of band helper %s is not executable" % path)
1252 else:
1253 tmp.append("out of band helper %s is not a file" % path)
1254
1255 if constants.NV_LVLIST in what and vm_capable:
1256 try:
1257 val = GetVolumeList(utils.ListVolumeGroups().keys())
1258 except RPCFail, err:
1259 val = str(err)
1260 result[constants.NV_LVLIST] = val
1261
1262 _VerifyInstanceList(what, vm_capable, result, all_hvparams)
1263
1264 if constants.NV_VGLIST in what and vm_capable:
1265 result[constants.NV_VGLIST] = utils.ListVolumeGroups()
1266
1267 if constants.NV_PVLIST in what and vm_capable:
1268 check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what
1269 val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST],
1270 filter_allocatable=False,
1271 include_lvs=check_exclusive_pvs)
1272 if check_exclusive_pvs:
1273 result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val)
1274 for pvi in val:
1275
1276 pvi.lv_list = []
1277 result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val)
1278
1279 if constants.NV_VERSION in what:
1280 result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION,
1281 constants.RELEASE_VERSION)
1282
1283 _VerifyNodeInfo(what, vm_capable, result, all_hvparams)
1284
1285 if constants.NV_DRBDVERSION in what and vm_capable:
1286 try:
1287 drbd_version = DRBD8.GetProcInfo().GetVersionString()
1288 except errors.BlockDeviceError, err:
1289 logging.warning("Can't get DRBD version", exc_info=True)
1290 drbd_version = str(err)
1291 result[constants.NV_DRBDVERSION] = drbd_version
1292
1293 if constants.NV_DRBDLIST in what and vm_capable:
1294 try:
1295 used_minors = drbd.DRBD8.GetUsedDevs()
1296 except errors.BlockDeviceError, err:
1297 logging.warning("Can't get used minors list", exc_info=True)
1298 used_minors = str(err)
1299 result[constants.NV_DRBDLIST] = used_minors
1300
1301 if constants.NV_DRBDHELPER in what and vm_capable:
1302 status = True
1303 try:
1304 payload = drbd.DRBD8.GetUsermodeHelper()
1305 except errors.BlockDeviceError, err:
1306 logging.error("Can't get DRBD usermode helper: %s", str(err))
1307 status = False
1308 payload = str(err)
1309 result[constants.NV_DRBDHELPER] = (status, payload)
1310
1311 if constants.NV_NODESETUP in what:
1312 result[constants.NV_NODESETUP] = tmpr = []
1313 if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"):
1314 tmpr.append("The sysfs filesytem doesn't seem to be mounted"
1315 " under /sys, missing required directories /sys/block"
1316 " and /sys/class/net")
1317 if (not os.path.isdir("/proc/sys") or
1318 not os.path.isfile("/proc/sysrq-trigger")):
1319 tmpr.append("The procfs filesystem doesn't seem to be mounted"
1320 " under /proc, missing required directory /proc/sys and"
1321 " the file /proc/sysrq-trigger")
1322
1323 if constants.NV_TIME in what:
1324 result[constants.NV_TIME] = utils.SplitTime(time.time())
1325
1326 if constants.NV_OSLIST in what and vm_capable:
1327 result[constants.NV_OSLIST] = DiagnoseOS()
1328
1329 if constants.NV_BRIDGES in what and vm_capable:
1330 result[constants.NV_BRIDGES] = [bridge
1331 for bridge in what[constants.NV_BRIDGES]
1332 if not utils.BridgeExists(bridge)]
1333
1334 if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name:
1335 result[constants.NV_ACCEPTED_STORAGE_PATHS] = \
1336 filestorage.ComputeWrongFileStoragePaths()
1337
1338 if what.get(constants.NV_FILE_STORAGE_PATH):
1339 pathresult = filestorage.CheckFileStoragePath(
1340 what[constants.NV_FILE_STORAGE_PATH])
1341 if pathresult:
1342 result[constants.NV_FILE_STORAGE_PATH] = pathresult
1343
1344 if what.get(constants.NV_SHARED_FILE_STORAGE_PATH):
1345 pathresult = filestorage.CheckFileStoragePath(
1346 what[constants.NV_SHARED_FILE_STORAGE_PATH])
1347 if pathresult:
1348 result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult
1349
1350 return result
1351
1354 """Perform actions on the node's cryptographic tokens.
1355
1356 Token types can be 'ssl' or 'ssh'. So far only some actions are implemented
1357 for 'ssl'. Action 'get' returns the digest of the public client ssl
1358 certificate. Action 'create' creates a new client certificate and private key
1359 and also returns the digest of the certificate. The third parameter of a
1360 token request are optional parameters for the actions, so far only the
1361 filename is supported.
1362
1363 @type token_requests: list of tuples of (string, string, dict), where the
1364 first string is in constants.CRYPTO_TYPES, the second in
1365 constants.CRYPTO_ACTIONS. The third parameter is a dictionary of string
1366 to string.
1367 @param token_requests: list of requests of cryptographic tokens and actions
1368 to perform on them. The actions come with a dictionary of options.
1369 @rtype: list of tuples (string, string)
1370 @return: list of tuples of the token type and the public crypto token
1371
1372 """
1373 tokens = []
1374 for (token_type, action, _) in token_requests:
1375 if token_type not in constants.CRYPTO_TYPES:
1376 raise errors.ProgrammerError("Token type '%s' not supported." %
1377 token_type)
1378 if action not in constants.CRYPTO_ACTIONS:
1379 raise errors.ProgrammerError("Action '%s' is not supported." %
1380 action)
1381 if token_type == constants.CRYPTO_TYPE_SSL_DIGEST:
1382 tokens.append((token_type,
1383 utils.GetCertificateDigest()))
1384 return tokens
1385
1388 """Ensures the given daemon is running or stopped.
1389
1390 @type daemon_name: string
1391 @param daemon_name: name of the daemon (e.g., constants.KVMD)
1392
1393 @type run: bool
1394 @param run: whether to start or stop the daemon
1395
1396 @rtype: bool
1397 @return: 'True' if daemon successfully started/stopped,
1398 'False' otherwise
1399
1400 """
1401 allowed_daemons = [constants.KVMD]
1402
1403 if daemon_name not in allowed_daemons:
1404 fn = lambda _: False
1405 elif run:
1406 fn = utils.EnsureDaemon
1407 else:
1408 fn = utils.StopDaemon
1409
1410 return fn(daemon_name)
1411
1420
1431 """Distributes a node's public SSH key across the cluster.
1432
1433 Note that this function should only be executed on the master node, which
1434 then will copy the new node's key to all nodes in the cluster via SSH.
1435
1436 Also note: at least one of the flags C{to_authorized_keys},
1437 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for
1438 the function to actually perform any actions.
1439
1440 @type node_uuid: str
1441 @param node_uuid: the UUID of the node whose key is added
1442 @type node_name: str
1443 @param node_name: the name of the node whose key is added
1444 @type potential_master_candidates: list of str
1445 @param potential_master_candidates: list of node names of potential master
1446 candidates; this should match the list of uuids in the public key file
1447 @type to_authorized_keys: boolean
1448 @param to_authorized_keys: whether the key should be added to the
1449 C{authorized_keys} file of all nodes
1450 @type to_public_keys: boolean
1451 @param to_public_keys: whether the keys should be added to the public key file
1452 @type get_public_keys: boolean
1453 @param get_public_keys: whether the node should add the clusters' public keys
1454 to its {ganeti_pub_keys} file
1455
1456 """
1457
1458
1459 assert (to_authorized_keys or to_public_keys or get_public_keys)
1460
1461 if not ssconf_store:
1462 ssconf_store = ssconf.SimpleStore()
1463
1464
1465 keys_by_name = ssh.QueryPubKeyFile([node_name], key_file=pub_key_file)
1466 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1467
1468 if (not keys_by_name or node_name not in keys_by_name) \
1469 and (not keys_by_uuid or node_uuid not in keys_by_uuid):
1470 raise errors.SshUpdateError(
1471 "No keys found for the new node '%s' (UUID %s) in the list of public"
1472 " SSH keys, neither for the name or the UUID" % (node_name, node_uuid))
1473 else:
1474 if node_name in keys_by_name:
1475 keys_by_uuid = {}
1476
1477
1478 ssh.ReplaceNameByUuid(node_uuid, node_name,
1479 error_fn=errors.SshUpdateError,
1480 key_file=pub_key_file)
1481 keys_by_uuid[node_uuid] = keys_by_name[node_name]
1482
1483
1484 if to_authorized_keys:
1485 (auth_key_file, _) = \
1486 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1487 ssh.AddAuthorizedKeys(auth_key_file, keys_by_uuid[node_uuid])
1488
1489 base_data = {}
1490 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store)
1491 cluster_name = base_data[constants.SSHS_CLUSTER_NAME]
1492
1493 ssh_port_map = ssconf_store.GetSshPortMap()
1494
1495
1496 logging.debug("Updating SSH key files of target node '%s'.", node_name)
1497 if get_public_keys:
1498 node_data = {}
1499 _InitSshUpdateData(node_data, noded_cert_file, ssconf_store)
1500 all_keys = ssh.QueryPubKeyFile(None, key_file=pub_key_file)
1501 node_data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1502 (constants.SSHS_OVERRIDE, all_keys)
1503
1504 try:
1505 utils.RetryByNumberOfTimes(
1506 constants.SSHS_MAX_RETRIES,
1507 errors.SshUpdateError,
1508 run_cmd_fn, cluster_name, node_name, pathutils.SSH_UPDATE,
1509 ssh_port_map.get(node_name), node_data,
1510 debug=False, verbose=False, use_cluster_key=False,
1511 ask_key=False, strict_host_check=False)
1512 except errors.SshUpdateError as e:
1513
1514 if to_public_keys:
1515 ssh.RemovePublicKey(node_uuid)
1516 raise e
1517
1518
1519 if to_authorized_keys:
1520 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \
1521 (constants.SSHS_ADD, keys_by_uuid)
1522
1523 pot_mc_data = copy.deepcopy(base_data)
1524 if to_public_keys:
1525 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1526 (constants.SSHS_REPLACE_OR_ADD, keys_by_uuid)
1527
1528 all_nodes = ssconf_store.GetNodeList()
1529 master_node = ssconf_store.GetMasterNode()
1530 online_nodes = ssconf_store.GetOnlineNodeList()
1531
1532 node_errors = []
1533 for node in all_nodes:
1534 if node == master_node:
1535 logging.debug("Skipping master node '%s'.", master_node)
1536 continue
1537 if node not in online_nodes:
1538 logging.debug("Skipping offline node '%s'.", node)
1539 continue
1540 if node in potential_master_candidates:
1541 logging.debug("Updating SSH key files of node '%s'.", node)
1542 try:
1543 utils.RetryByNumberOfTimes(
1544 constants.SSHS_MAX_RETRIES,
1545 errors.SshUpdateError,
1546 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE,
1547 ssh_port_map.get(node), pot_mc_data,
1548 debug=False, verbose=False, use_cluster_key=False,
1549 ask_key=False, strict_host_check=False)
1550 except errors.SshUpdateError as last_exception:
1551 error_msg = ("When adding the key of node '%s', updating SSH key"
1552 " files of node '%s' failed after %s retries."
1553 " Not trying again. Last error was: %s." %
1554 (node, node_name, constants.SSHS_MAX_RETRIES,
1555 last_exception))
1556 node_errors.append((node, error_msg))
1557
1558
1559 logging.error(error_msg)
1560
1561 else:
1562 if to_authorized_keys:
1563 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE,
1564 ssh_port_map.get(node), base_data,
1565 debug=False, verbose=False, use_cluster_key=False,
1566 ask_key=False, strict_host_check=False)
1567
1568 return node_errors
1569
1570
1571 -def RemoveNodeSshKey(node_uuid, node_name,
1572 master_candidate_uuids,
1573 potential_master_candidates,
1574 master_uuid=None,
1575 keys_to_remove=None,
1576 from_authorized_keys=False,
1577 from_public_keys=False,
1578 clear_authorized_keys=False,
1579 clear_public_keys=False,
1580 pub_key_file=pathutils.SSH_PUB_KEYS,
1581 ssconf_store=None,
1582 noded_cert_file=pathutils.NODED_CERT_FILE,
1583 readd=False,
1584 run_cmd_fn=ssh.RunSshCmdWithStdin):
1585 """Removes the node's SSH keys from the key files and distributes those.
1586
1587 Note that at least one of the flags C{from_authorized_keys},
1588 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys}
1589 has to be set to C{True} for the function to perform any action at all.
1590 Not doing so will trigger an assertion in the function.
1591
1592 @type node_uuid: str
1593 @param node_uuid: UUID of the node whose key is removed
1594 @type node_name: str
1595 @param node_name: name of the node whose key is remove
1596 @type master_candidate_uuids: list of str
1597 @param master_candidate_uuids: list of UUIDs of the current master candidates
1598 @type potential_master_candidates: list of str
1599 @param potential_master_candidates: list of names of potential master
1600 candidates
1601 @type keys_to_remove: dict of str to list of str
1602 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys
1603 to be removed. This list is supposed to be used only if the keys are not
1604 in the public keys file. This is for example the case when removing a
1605 master node's key.
1606 @type from_authorized_keys: boolean
1607 @param from_authorized_keys: whether or not the key should be removed
1608 from the C{authorized_keys} file
1609 @type from_public_keys: boolean
1610 @param from_public_keys: whether or not the key should be remove from
1611 the C{ganeti_pub_keys} file
1612 @type clear_authorized_keys: boolean
1613 @param clear_authorized_keys: whether or not the C{authorized_keys} file
1614 should be cleared on the node whose keys are removed
1615 @type clear_public_keys: boolean
1616 @param clear_public_keys: whether to clear the node's C{ganeti_pub_key} file
1617 @type readd: boolean
1618 @param readd: whether this is called during a readd operation.
1619 @rtype: list of string
1620 @returns: list of feedback messages
1621
1622 """
1623
1624 result_msgs = []
1625
1626
1627 if not (from_authorized_keys or from_public_keys or clear_authorized_keys
1628 or clear_public_keys):
1629 raise errors.SshUpdateError("No removal from any key file was requested.")
1630
1631 if not ssconf_store:
1632 ssconf_store = ssconf.SimpleStore()
1633
1634 master_node = ssconf_store.GetMasterNode()
1635 ssh_port_map = ssconf_store.GetSshPortMap()
1636
1637 if from_authorized_keys or from_public_keys:
1638 if keys_to_remove:
1639 keys = keys_to_remove
1640 else:
1641 keys = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1642 if (not keys or node_uuid not in keys) and not readd:
1643 raise errors.SshUpdateError("Node '%s' not found in the list of public"
1644 " SSH keys. It seems someone tries to"
1645 " remove a key from outside the cluster!"
1646 % node_uuid)
1647
1648
1649
1650 master_keys = None
1651 if master_uuid:
1652 master_keys = ssh.QueryPubKeyFile([master_uuid], key_file=pub_key_file)
1653 for master_key in master_keys:
1654 if master_key in keys[node_uuid]:
1655 keys[node_uuid].remove(master_key)
1656
1657 if node_name == master_node and not keys_to_remove:
1658 raise errors.SshUpdateError("Cannot remove the master node's keys.")
1659
1660 if node_uuid in keys:
1661 base_data = {}
1662 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store)
1663 cluster_name = base_data[constants.SSHS_CLUSTER_NAME]
1664
1665 if from_authorized_keys:
1666 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \
1667 (constants.SSHS_REMOVE, keys)
1668 (auth_key_file, _) = \
1669 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False,
1670 dircheck=False)
1671 ssh.RemoveAuthorizedKeys(auth_key_file, keys[node_uuid])
1672
1673 pot_mc_data = copy.deepcopy(base_data)
1674
1675 if from_public_keys:
1676 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1677 (constants.SSHS_REMOVE, keys)
1678 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file)
1679
1680 all_nodes = ssconf_store.GetNodeList()
1681 online_nodes = ssconf_store.GetOnlineNodeList()
1682 logging.debug("Removing key of node '%s' from all nodes but itself and"
1683 " master.", node_name)
1684 for node in all_nodes:
1685 if node == master_node:
1686 logging.debug("Skipping master node '%s'.", master_node)
1687 continue
1688 if node not in online_nodes:
1689 logging.debug("Skipping offline node '%s'.", node)
1690 continue
1691 ssh_port = ssh_port_map.get(node)
1692 if not ssh_port:
1693 raise errors.OpExecError("No SSH port information available for"
1694 " node '%s', map: %s." %
1695 (node, ssh_port_map))
1696 error_msg_final = ("When removing the key of node '%s', updating the"
1697 " SSH key files of node '%s' failed. Last error"
1698 " was: %s.")
1699 if node in potential_master_candidates:
1700 logging.debug("Updating key setup of potential master candidate node"
1701 " %s.", node)
1702 try:
1703 utils.RetryByNumberOfTimes(
1704 constants.SSHS_MAX_RETRIES,
1705 errors.SshUpdateError,
1706 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE,
1707 ssh_port, pot_mc_data,
1708 debug=False, verbose=False, use_cluster_key=False,
1709 ask_key=False, strict_host_check=False)
1710 except errors.SshUpdateError as last_exception:
1711 error_msg = error_msg_final % (
1712 node_name, node, last_exception)
1713 result_msgs.append((node, error_msg))
1714 logging.error(error_msg)
1715
1716 else:
1717 if from_authorized_keys:
1718 logging.debug("Updating key setup of normal node %s.", node)
1719 try:
1720 utils.RetryByNumberOfTimes(
1721 constants.SSHS_MAX_RETRIES,
1722 errors.SshUpdateError,
1723 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE,
1724 ssh_port, base_data,
1725 debug=False, verbose=False, use_cluster_key=False,
1726 ask_key=False, strict_host_check=False)
1727 except errors.SshUpdateError as last_exception:
1728 error_msg = error_msg_final % (
1729 node_name, node, last_exception)
1730 result_msgs.append((node, error_msg))
1731 logging.error(error_msg)
1732
1733 if clear_authorized_keys or from_public_keys or clear_public_keys:
1734 data = {}
1735 _InitSshUpdateData(data, noded_cert_file, ssconf_store)
1736 cluster_name = data[constants.SSHS_CLUSTER_NAME]
1737 ssh_port = ssh_port_map.get(node_name)
1738 if not ssh_port:
1739 raise errors.OpExecError("No SSH port information available for"
1740 " node '%s', which is leaving the cluster.")
1741
1742 if clear_authorized_keys:
1743
1744
1745
1746 other_master_candidate_uuids = [uuid for uuid in master_candidate_uuids
1747 if uuid != node_uuid]
1748 candidate_keys = ssh.QueryPubKeyFile(other_master_candidate_uuids,
1749 key_file=pub_key_file)
1750 data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \
1751 (constants.SSHS_REMOVE, candidate_keys)
1752
1753 if clear_public_keys:
1754 data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1755 (constants.SSHS_CLEAR, {})
1756 elif from_public_keys:
1757
1758
1759
1760 if keys[node_uuid]:
1761 data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1762 (constants.SSHS_REMOVE, keys)
1763
1764
1765 if not (constants.SSHS_SSH_PUBLIC_KEYS in data or
1766 constants.SSHS_SSH_AUTHORIZED_KEYS in data):
1767 return
1768
1769 logging.debug("Updating SSH key setup of target node '%s'.", node_name)
1770 try:
1771 utils.RetryByNumberOfTimes(
1772 constants.SSHS_MAX_RETRIES,
1773 errors.SshUpdateError,
1774 run_cmd_fn, cluster_name, node_name, pathutils.SSH_UPDATE,
1775 ssh_port, data,
1776 debug=False, verbose=False, use_cluster_key=False,
1777 ask_key=False, strict_host_check=False)
1778 except errors.SshUpdateError as last_exception:
1779 result_msgs.append(
1780 (node_name,
1781 ("Removing SSH keys from node '%s' failed."
1782 " This can happen when the node is already unreachable."
1783 " Error: %s" % (node_name, last_exception))))
1784
1785 return result_msgs
1786
1794 """Generates the root SSH key pair on the node.
1795
1796 @type node_uuid: str
1797 @param node_uuid: UUID of the node whose key is removed
1798 @type node_name: str
1799 @param node_name: name of the node whose key is remove
1800 @type ssh_port_map: dict of str to int
1801 @param ssh_port_map: mapping of node names to their SSH port
1802
1803 """
1804 if not ssconf_store:
1805 ssconf_store = ssconf.SimpleStore()
1806
1807 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1808 if not keys_by_uuid or node_uuid not in keys_by_uuid:
1809 raise errors.SshUpdateError("Node %s (UUID: %s) whose key is requested to"
1810 " be regenerated is not registered in the"
1811 " public keys file." % (node_name, node_uuid))
1812
1813 data = {}
1814 _InitSshUpdateData(data, noded_cert_file, ssconf_store)
1815 cluster_name = data[constants.SSHS_CLUSTER_NAME]
1816 data[constants.SSHS_GENERATE] = {constants.SSHS_SUFFIX: suffix}
1817
1818 run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
1819 ssh_port_map.get(node_name), data,
1820 debug=False, verbose=False, use_cluster_key=False,
1821 ask_key=False, strict_host_check=False)
1822
1825 master_node_uuids = [node_uuid for (node_uuid, node_name)
1826 in node_uuid_name_map
1827 if node_name == master_node_name]
1828 if len(master_node_uuids) != 1:
1829 raise errors.SshUpdateError("No (unique) master UUID found. Master node"
1830 " name: '%s', Master UUID: '%s'" %
1831 (master_node_name, master_node_uuids))
1832 return master_node_uuids[0]
1833
1836 old_master_keys_by_uuid = ssh.QueryPubKeyFile([master_node_uuid],
1837 key_file=pub_key_file)
1838 if not old_master_keys_by_uuid:
1839 raise errors.SshUpdateError("No public key of the master node (UUID '%s')"
1840 " found, not generating a new key."
1841 % master_node_uuid)
1842 return old_master_keys_by_uuid
1843
1846 new_master_keys = []
1847 for (_, (_, public_key_file)) in root_keyfiles.items():
1848 public_key_dir = os.path.dirname(public_key_file)
1849 public_key_file_tmp_filename = \
1850 os.path.splitext(os.path.basename(public_key_file))[0] \
1851 + constants.SSHS_MASTER_SUFFIX + ".pub"
1852 public_key_path_tmp = os.path.join(public_key_dir,
1853 public_key_file_tmp_filename)
1854 if os.path.exists(public_key_path_tmp):
1855
1856 key = utils.ReadFile(public_key_path_tmp)
1857 new_master_keys.append(key)
1858 if not new_master_keys:
1859 raise errors.SshUpdateError("Cannot find any type of temporary SSH key.")
1860 return {master_node_uuid: new_master_keys}
1861
1864 number_of_moves = 0
1865 for (_, (private_key_file, public_key_file)) in root_keyfiles.items():
1866 key_dir = os.path.dirname(public_key_file)
1867 private_key_file_tmp = \
1868 os.path.basename(private_key_file) + constants.SSHS_MASTER_SUFFIX
1869 public_key_file_tmp = private_key_file_tmp + ".pub"
1870 private_key_path_tmp = os.path.join(key_dir,
1871 private_key_file_tmp)
1872 public_key_path_tmp = os.path.join(key_dir,
1873 public_key_file_tmp)
1874 if os.path.exists(public_key_file):
1875 utils.CreateBackup(public_key_file)
1876 utils.RemoveFile(public_key_file)
1877 if os.path.exists(private_key_file):
1878 utils.CreateBackup(private_key_file)
1879 utils.RemoveFile(private_key_file)
1880 if os.path.exists(public_key_path_tmp) and \
1881 os.path.exists(private_key_path_tmp):
1882
1883 shutil.move(public_key_path_tmp, public_key_file)
1884 shutil.move(private_key_path_tmp, private_key_file)
1885 number_of_moves += 1
1886 if not number_of_moves:
1887 raise errors.SshUpdateError("Could not move at least one master SSH key.")
1888
1896 """Renews all SSH keys and updates authorized_keys and ganeti_pub_keys.
1897
1898 @type node_uuids: list of str
1899 @param node_uuids: list of node UUIDs whose keys should be renewed
1900 @type node_names: list of str
1901 @param node_names: list of node names whose keys should be removed. This list
1902 should match the C{node_uuids} parameter
1903 @type master_candidate_uuids: list of str
1904 @param master_candidate_uuids: list of UUIDs of master candidates or
1905 master node
1906 @type pub_key_file: str
1907 @param pub_key_file: file path of the the public key file
1908 @type noded_cert_file: str
1909 @param noded_cert_file: path of the noded SSL certificate file
1910 @type run_cmd_fn: function
1911 @param run_cmd_fn: function to run commands on remote nodes via SSH
1912 @raises ProgrammerError: if node_uuids and node_names don't match;
1913 SshUpdateError if a node's key is missing from the public key file,
1914 if a node's new SSH key could not be fetched from it, if there is
1915 none or more than one entry in the public key list for the master
1916 node.
1917
1918 """
1919 if not ssconf_store:
1920 ssconf_store = ssconf.SimpleStore()
1921 cluster_name = ssconf_store.GetClusterName()
1922
1923 if not len(node_uuids) == len(node_names):
1924 raise errors.ProgrammerError("List of nodes UUIDs and node names"
1925 " does not match in length.")
1926
1927 (_, root_keyfiles) = \
1928 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1929 (_, dsa_pub_keyfile) = root_keyfiles[constants.SSHK_DSA]
1930 old_master_key = utils.ReadFile(dsa_pub_keyfile)
1931
1932 node_uuid_name_map = zip(node_uuids, node_names)
1933
1934 master_node_name = ssconf_store.GetMasterNode()
1935 master_node_uuid = _GetMasterNodeUUID(node_uuid_name_map, master_node_name)
1936 ssh_port_map = ssconf_store.GetSshPortMap()
1937
1938
1939
1940 all_node_errors = []
1941
1942
1943 for node_uuid, node_name in node_uuid_name_map:
1944 if node_name == master_node_name:
1945 continue
1946 master_candidate = node_uuid in master_candidate_uuids
1947 potential_master_candidate = node_name in potential_master_candidates
1948
1949 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1950 if not keys_by_uuid:
1951 raise errors.SshUpdateError("No public key of node %s (UUID %s) found,"
1952 " not generating a new key."
1953 % (node_name, node_uuid))
1954
1955 if master_candidate:
1956 logging.debug("Fetching old SSH key from node '%s'.", node_name)
1957 old_pub_key = ssh.ReadRemoteSshPubKeys(dsa_pub_keyfile,
1958 node_name, cluster_name,
1959 ssh_port_map[node_name],
1960 False,
1961 False)
1962 if old_pub_key != old_master_key:
1963
1964
1965
1966
1967
1968 logging.debug("Removing SSH key of node '%s'.", node_name)
1969 node_errors = RemoveNodeSshKey(
1970 node_uuid, node_name, master_candidate_uuids,
1971 potential_master_candidates,
1972 master_uuid=master_node_uuid, from_authorized_keys=master_candidate,
1973 from_public_keys=False, clear_authorized_keys=False,
1974 clear_public_keys=False)
1975 if node_errors:
1976 all_node_errors = all_node_errors + node_errors
1977 else:
1978 logging.debug("Old key of node '%s' is the same as the current master"
1979 " key. Not deleting that key on the node.", node_name)
1980
1981 logging.debug("Generating new SSH key for node '%s'.", node_name)
1982 _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map,
1983 pub_key_file=pub_key_file,
1984 ssconf_store=ssconf_store,
1985 noded_cert_file=noded_cert_file,
1986 run_cmd_fn=run_cmd_fn)
1987
1988 try:
1989 logging.debug("Fetching newly created SSH key from node '%s'.", node_name)
1990 pub_key = ssh.ReadRemoteSshPubKeys(dsa_pub_keyfile,
1991 node_name, cluster_name,
1992 ssh_port_map[node_name],
1993 False,
1994 False)
1995 except:
1996 raise errors.SshUpdateError("Could not fetch key of node %s"
1997 " (UUID %s)" % (node_name, node_uuid))
1998
1999 if potential_master_candidate:
2000 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file)
2001 ssh.AddPublicKey(node_uuid, pub_key, key_file=pub_key_file)
2002
2003 logging.debug("Add ssh key of node '%s'.", node_name)
2004 node_errors = AddNodeSshKey(
2005 node_uuid, node_name, potential_master_candidates,
2006 to_authorized_keys=master_candidate,
2007 to_public_keys=potential_master_candidate,
2008 get_public_keys=True,
2009 pub_key_file=pub_key_file, ssconf_store=ssconf_store,
2010 noded_cert_file=noded_cert_file,
2011 run_cmd_fn=run_cmd_fn)
2012 if node_errors:
2013 all_node_errors = all_node_errors + node_errors
2014
2015
2016
2017
2018 old_master_keys_by_uuid = _GetOldMasterKeys(master_node_uuid, pub_key_file)
2019
2020
2021 logging.debug("Generate new ssh key of master.")
2022 _GenerateNodeSshKey(master_node_uuid, master_node_name, ssh_port_map,
2023 pub_key_file=pub_key_file,
2024 ssconf_store=ssconf_store,
2025 noded_cert_file=noded_cert_file,
2026 run_cmd_fn=run_cmd_fn,
2027 suffix=constants.SSHS_MASTER_SUFFIX)
2028
2029 new_master_key_dict = _GetNewMasterKey(root_keyfiles, master_node_uuid)
2030
2031
2032 ssh.RemovePublicKey(master_node_uuid, key_file=pub_key_file)
2033 for pub_key in new_master_key_dict[master_node_uuid]:
2034 ssh.AddPublicKey(master_node_uuid, pub_key, key_file=pub_key_file)
2035
2036
2037 logging.debug("Add new master key to all nodes.")
2038 node_errors = AddNodeSshKey(
2039 master_node_uuid, master_node_name, potential_master_candidates,
2040 to_authorized_keys=True, to_public_keys=True,
2041 get_public_keys=False, pub_key_file=pub_key_file,
2042 ssconf_store=ssconf_store, noded_cert_file=noded_cert_file,
2043 run_cmd_fn=run_cmd_fn)
2044 if node_errors:
2045 all_node_errors = all_node_errors + node_errors
2046
2047
2048 _ReplaceMasterKeyOnMaster(root_keyfiles)
2049
2050
2051 (auth_key_file, _) = \
2052 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
2053 ssh.RemoveAuthorizedKeys(auth_key_file,
2054 old_master_keys_by_uuid[master_node_uuid])
2055
2056
2057 logging.debug("Remove the old master key from all nodes.")
2058 node_errors = RemoveNodeSshKey(
2059 master_node_uuid, master_node_name, master_candidate_uuids,
2060 potential_master_candidates,
2061 keys_to_remove=old_master_keys_by_uuid, from_authorized_keys=True,
2062 from_public_keys=False, clear_authorized_keys=False,
2063 clear_public_keys=False)
2064 if node_errors:
2065 all_node_errors = all_node_errors + node_errors
2066
2067 return all_node_errors
2068
2071 """Return the size of the given block devices
2072
2073 @type devices: list
2074 @param devices: list of block device nodes to query
2075 @rtype: dict
2076 @return:
2077 dictionary of all block devices under /dev (key). The value is their
2078 size in MiB.
2079
2080 {'/dev/disk/by-uuid/123456-12321231-312312-312': 124}
2081
2082 """
2083 DEV_PREFIX = "/dev/"
2084 blockdevs = {}
2085
2086 for devpath in devices:
2087 if not utils.IsBelowDir(DEV_PREFIX, devpath):
2088 continue
2089
2090 try:
2091 st = os.stat(devpath)
2092 except EnvironmentError, err:
2093 logging.warning("Error stat()'ing device %s: %s", devpath, str(err))
2094 continue
2095
2096 if stat.S_ISBLK(st.st_mode):
2097 result = utils.RunCmd(["blockdev", "--getsize64", devpath])
2098 if result.failed:
2099
2100 logging.warning("Cannot get size for block device %s", devpath)
2101 continue
2102
2103 size = int(result.stdout) / (1024 * 1024)
2104 blockdevs[devpath] = size
2105 return blockdevs
2106
2109 """Compute list of logical volumes and their size.
2110
2111 @type vg_names: list
2112 @param vg_names: the volume groups whose LVs we should list, or
2113 empty for all volume groups
2114 @rtype: dict
2115 @return:
2116 dictionary of all partions (key) with value being a tuple of
2117 their size (in MiB), inactive and online status::
2118
2119 {'xenvg/test1': ('20.06', True, True)}
2120
2121 in case of errors, a string is returned with the error
2122 details.
2123
2124 """
2125 lvs = {}
2126 sep = "|"
2127 if not vg_names:
2128 vg_names = []
2129 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
2130 "--separator=%s" % sep,
2131 "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names)
2132 if result.failed:
2133 _Fail("Failed to list logical volumes, lvs output: %s", result.output)
2134
2135 for line in result.stdout.splitlines():
2136 line = line.strip()
2137 match = _LVSLINE_REGEX.match(line)
2138 if not match:
2139 logging.error("Invalid line returned from lvs output: '%s'", line)
2140 continue
2141 vg_name, name, size, attr = match.groups()
2142 inactive = attr[4] == "-"
2143 online = attr[5] == "o"
2144 virtual = attr[0] == "v"
2145 if virtual:
2146
2147
2148 continue
2149 lvs[vg_name + "/" + name] = (size, inactive, online)
2150
2151 return lvs
2152
2155 """List the volume groups and their size.
2156
2157 @rtype: dict
2158 @return: dictionary with keys volume name and values the
2159 size of the volume
2160
2161 """
2162 return utils.ListVolumeGroups()
2163
2166 """List all volumes on this node.
2167
2168 @rtype: list
2169 @return:
2170 A list of dictionaries, each having four keys:
2171 - name: the logical volume name,
2172 - size: the size of the logical volume
2173 - dev: the physical device on which the LV lives
2174 - vg: the volume group to which it belongs
2175
2176 In case of errors, we return an empty list and log the
2177 error.
2178
2179 Note that since a logical volume can live on multiple physical
2180 volumes, the resulting list might include a logical volume
2181 multiple times.
2182
2183 """
2184 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
2185 "--separator=|",
2186 "--options=lv_name,lv_size,devices,vg_name"])
2187 if result.failed:
2188 _Fail("Failed to list logical volumes, lvs output: %s",
2189 result.output)
2190
2191 def parse_dev(dev):
2192 return dev.split("(")[0]
2193
2194 def handle_dev(dev):
2195 return [parse_dev(x) for x in dev.split(",")]
2196
2197 def map_line(line):
2198 line = [v.strip() for v in line]
2199 return [{"name": line[0], "size": line[1],
2200 "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])]
2201
2202 all_devs = []
2203 for line in result.stdout.splitlines():
2204 if line.count("|") >= 3:
2205 all_devs.extend(map_line(line.split("|")))
2206 else:
2207 logging.warning("Strange line in the output from lvs: '%s'", line)
2208 return all_devs
2209
2212 """Check if a list of bridges exist on the current node.
2213
2214 @rtype: boolean
2215 @return: C{True} if all of them exist, C{False} otherwise
2216
2217 """
2218 missing = []
2219 for bridge in bridges_list:
2220 if not utils.BridgeExists(bridge):
2221 missing.append(bridge)
2222
2223 if missing:
2224 _Fail("Missing bridges %s", utils.CommaJoin(missing))
2225
2229 """Provides a list of instances of the given hypervisor.
2230
2231 @type hname: string
2232 @param hname: name of the hypervisor
2233 @type hvparams: dict of strings
2234 @param hvparams: hypervisor parameters for the given hypervisor
2235 @type get_hv_fn: function
2236 @param get_hv_fn: function that returns a hypervisor for the given hypervisor
2237 name; optional parameter to increase testability
2238
2239 @rtype: list
2240 @return: a list of all running instances on the current node
2241 - instance1.example.com
2242 - instance2.example.com
2243
2244 """
2245 try:
2246 return get_hv_fn(hname).ListInstances(hvparams=hvparams)
2247 except errors.HypervisorError, err:
2248 _Fail("Error enumerating instances (hypervisor %s): %s",
2249 hname, err, exc=True)
2250
2254 """Provides a list of instances.
2255
2256 @type hypervisor_list: list
2257 @param hypervisor_list: the list of hypervisors to query information
2258 @type all_hvparams: dict of dict of strings
2259 @param all_hvparams: a dictionary mapping hypervisor types to respective
2260 cluster-wide hypervisor parameters
2261 @type get_hv_fn: function
2262 @param get_hv_fn: function that returns a hypervisor for the given hypervisor
2263 name; optional parameter to increase testability
2264
2265 @rtype: list
2266 @return: a list of all running instances on the current node
2267 - instance1.example.com
2268 - instance2.example.com
2269
2270 """
2271 results = []
2272 for hname in hypervisor_list:
2273 hvparams = all_hvparams[hname]
2274 results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams,
2275 get_hv_fn=get_hv_fn))
2276 return results
2277
2280 """Gives back the information about an instance as a dictionary.
2281
2282 @type instance: string
2283 @param instance: the instance name
2284 @type hname: string
2285 @param hname: the hypervisor type of the instance
2286 @type hvparams: dict of strings
2287 @param hvparams: the instance's hvparams
2288
2289 @rtype: dict
2290 @return: dictionary with the following keys:
2291 - memory: memory size of instance (int)
2292 - state: state of instance (HvInstanceState)
2293 - time: cpu time of instance (float)
2294 - vcpus: the number of vcpus (int)
2295
2296 """
2297 output = {}
2298
2299 iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance,
2300 hvparams=hvparams)
2301 if iinfo is not None:
2302 output["memory"] = iinfo[2]
2303 output["vcpus"] = iinfo[3]
2304 output["state"] = iinfo[4]
2305 output["time"] = iinfo[5]
2306
2307 return output
2308
2311 """Computes whether an instance can be migrated.
2312
2313 @type instance: L{objects.Instance}
2314 @param instance: object representing the instance to be checked.
2315
2316 @rtype: tuple
2317 @return: tuple of (result, description) where:
2318 - result: whether the instance can be migrated or not
2319 - description: a description of the issue, if relevant
2320
2321 """
2322 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2323 iname = instance.name
2324 if iname not in hyper.ListInstances(hvparams=instance.hvparams):
2325 _Fail("Instance %s is not running", iname)
2326
2327 for idx in range(len(instance.disks_info)):
2328 link_name = _GetBlockDevSymlinkPath(iname, idx)
2329 if not os.path.islink(link_name):
2330 logging.warning("Instance %s is missing symlink %s for disk %d",
2331 iname, link_name, idx)
2332
2335 """Gather data about all instances.
2336
2337 This is the equivalent of L{GetInstanceInfo}, except that it
2338 computes data for all instances at once, thus being faster if one
2339 needs data about more than one instance.
2340
2341 @type hypervisor_list: list
2342 @param hypervisor_list: list of hypervisors to query for instance data
2343 @type all_hvparams: dict of dict of strings
2344 @param all_hvparams: mapping of hypervisor names to hvparams
2345
2346 @rtype: dict
2347 @return: dictionary of instance: data, with data having the following keys:
2348 - memory: memory size of instance (int)
2349 - state: xen state of instance (string)
2350 - time: cpu time of instance (float)
2351 - vcpus: the number of vcpus
2352
2353 """
2354 output = {}
2355 for hname in hypervisor_list:
2356 hvparams = all_hvparams[hname]
2357 iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams)
2358 if iinfo:
2359 for name, _, memory, vcpus, state, times in iinfo:
2360 value = {
2361 "memory": memory,
2362 "vcpus": vcpus,
2363 "state": state,
2364 "time": times,
2365 }
2366 if name in output:
2367
2368
2369
2370 for key in "memory", "vcpus":
2371 if value[key] != output[name][key]:
2372 _Fail("Instance %s is running twice"
2373 " with different parameters", name)
2374 output[name] = value
2375
2376 return output
2377
2381 """Gather data about the console access of a set of instances of this node.
2382
2383 This function assumes that the caller already knows which instances are on
2384 this node, by calling a function such as L{GetAllInstancesInfo} or
2385 L{GetInstanceList}.
2386
2387 For every instance, a large amount of configuration data needs to be
2388 provided to the hypervisor interface in order to receive the console
2389 information. Whether this could or should be cut down can be discussed.
2390 The information is provided in a dictionary indexed by instance name,
2391 allowing any number of instance queries to be done.
2392
2393 @type instance_param_dict: dict of string to tuple of dictionaries, where the
2394 dictionaries represent: L{objects.Instance}, L{objects.Node},
2395 L{objects.NodeGroup}, HvParams, BeParams
2396 @param instance_param_dict: mapping of instance name to parameters necessary
2397 for console information retrieval
2398
2399 @rtype: dict
2400 @return: dictionary of instance: data, with data having the following keys:
2401 - instance: instance name
2402 - kind: console kind
2403 - message: used with kind == CONS_MESSAGE, indicates console to be
2404 unavailable, supplies error message
2405 - host: host to connect to
2406 - port: port to use
2407 - user: user for login
2408 - command: the command, broken into parts as an array
2409 - display: unknown, potentially unused?
2410
2411 """
2412
2413 output = {}
2414 for inst_name in instance_param_dict:
2415 instance = instance_param_dict[inst_name]["instance"]
2416 pnode = instance_param_dict[inst_name]["node"]
2417 group = instance_param_dict[inst_name]["group"]
2418 hvparams = instance_param_dict[inst_name]["hvParams"]
2419 beparams = instance_param_dict[inst_name]["beParams"]
2420
2421 instance = objects.Instance.FromDict(instance)
2422 pnode = objects.Node.FromDict(pnode)
2423 group = objects.NodeGroup.FromDict(group)
2424
2425 h = get_hv_fn(instance.hypervisor)
2426 output[inst_name] = h.GetInstanceConsole(instance, pnode, group,
2427 hvparams, beparams).ToDict()
2428
2429 return output
2430
2433 """Compute the OS log filename for a given instance and operation.
2434
2435 The instance name and os name are passed in as strings since not all
2436 operations have these as part of an instance object.
2437
2438 @type kind: string
2439 @param kind: the operation type (e.g. add, import, etc.)
2440 @type os_name: string
2441 @param os_name: the os name
2442 @type instance: string
2443 @param instance: the name of the instance being imported/added/etc.
2444 @type component: string or None
2445 @param component: the name of the component of the instance being
2446 transferred
2447
2448 """
2449
2450 if component:
2451 assert "/" not in component
2452 c_msg = "-%s" % component
2453 else:
2454 c_msg = ""
2455 base = ("%s-%s-%s%s-%s.log" %
2456 (kind, os_name, instance, c_msg, utils.TimestampForFilename()))
2457 return utils.PathJoin(pathutils.LOG_OS_DIR, base)
2458
2461 """Add an OS to an instance.
2462
2463 @type instance: L{objects.Instance}
2464 @param instance: Instance whose OS is to be installed
2465 @type reinstall: boolean
2466 @param reinstall: whether this is an instance reinstall
2467 @type debug: integer
2468 @param debug: debug level, passed to the OS scripts
2469 @rtype: None
2470
2471 """
2472 inst_os = OSFromDisk(instance.os)
2473
2474 create_env = OSEnvironment(instance, inst_os, debug)
2475 if reinstall:
2476 create_env["INSTANCE_REINSTALL"] = "1"
2477
2478 logfile = _InstanceLogName("add", instance.os, instance.name, None)
2479
2480 result = utils.RunCmd([inst_os.create_script], env=create_env,
2481 cwd=inst_os.path, output=logfile, reset_env=True)
2482 if result.failed:
2483 logging.error("os create command '%s' returned error: %s, logfile: %s,"
2484 " output: %s", result.cmd, result.fail_reason, logfile,
2485 result.output)
2486 lines = [utils.SafeEncode(val)
2487 for val in utils.TailFile(logfile, lines=20)]
2488 _Fail("OS create script failed (%s), last lines in the"
2489 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
2490
2493 """Run the OS rename script for an instance.
2494
2495 @type instance: L{objects.Instance}
2496 @param instance: Instance whose OS is to be installed
2497 @type old_name: string
2498 @param old_name: previous instance name
2499 @type debug: integer
2500 @param debug: debug level, passed to the OS scripts
2501 @rtype: boolean
2502 @return: the success of the operation
2503
2504 """
2505 inst_os = OSFromDisk(instance.os)
2506
2507 rename_env = OSEnvironment(instance, inst_os, debug)
2508 rename_env["OLD_INSTANCE_NAME"] = old_name
2509
2510 logfile = _InstanceLogName("rename", instance.os,
2511 "%s-%s" % (old_name, instance.name), None)
2512
2513 result = utils.RunCmd([inst_os.rename_script], env=rename_env,
2514 cwd=inst_os.path, output=logfile, reset_env=True)
2515
2516 if result.failed:
2517 logging.error("os create command '%s' returned error: %s output: %s",
2518 result.cmd, result.fail_reason, result.output)
2519 lines = [utils.SafeEncode(val)
2520 for val in utils.TailFile(logfile, lines=20)]
2521 _Fail("OS rename script failed (%s), last lines in the"
2522 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
2523
2535
2538 """Set up symlinks to a instance's block device.
2539
2540 This is an auxiliary function run when an instance is start (on the primary
2541 node) or when an instance is migrated (on the target node).
2542
2543
2544 @param instance_name: the name of the target instance
2545 @param device_path: path of the physical block device, on the node
2546 @param idx: the disk index
2547 @return: absolute path to the disk's symlink
2548
2549 """
2550 link_name = _GetBlockDevSymlinkPath(instance_name, idx)
2551 try:
2552 os.symlink(device_path, link_name)
2553 except OSError, err:
2554 if err.errno == errno.EEXIST:
2555 if (not os.path.islink(link_name) or
2556 os.readlink(link_name) != device_path):
2557 os.remove(link_name)
2558 os.symlink(device_path, link_name)
2559 else:
2560 raise
2561
2562 return link_name
2563
2566 """Remove the block device symlinks belonging to the given instance.
2567
2568 """
2569 for idx, _ in enumerate(disks):
2570 link_name = _GetBlockDevSymlinkPath(instance_name, idx)
2571 if os.path.islink(link_name):
2572 try:
2573 os.remove(link_name)
2574 except OSError:
2575 logging.exception("Can't remove symlink '%s'", link_name)
2576
2579 """Get the URI for the device.
2580
2581 @type instance: L{objects.Instance}
2582 @param instance: the instance which disk belongs to
2583 @type disk: L{objects.Disk}
2584 @param disk: the target disk object
2585 @type device: L{bdev.BlockDev}
2586 @param device: the corresponding BlockDevice
2587 @rtype: string
2588 @return: the device uri if any else None
2589
2590 """
2591 access_mode = disk.params.get(constants.LDP_ACCESS,
2592 constants.DISK_KERNELSPACE)
2593 if access_mode == constants.DISK_USERSPACE:
2594
2595 return device.GetUserspaceAccessUri(instance.hypervisor)
2596 else:
2597 return None
2598
2601 """Set up an instance's block device(s).
2602
2603 This is run on the primary node at instance startup. The block
2604 devices must be already assembled.
2605
2606 @type instance: L{objects.Instance}
2607 @param instance: the instance whose disks we should assemble
2608 @rtype: list
2609 @return: list of (disk_object, link_name, drive_uri)
2610
2611 """
2612 block_devices = []
2613 for idx, disk in enumerate(instance.disks_info):
2614 device = _RecursiveFindBD(disk)
2615 if device is None:
2616 raise errors.BlockDeviceError("Block device '%s' is not set up." %
2617 str(disk))
2618 device.Open()
2619 try:
2620 link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx)
2621 except OSError, e:
2622 raise errors.BlockDeviceError("Cannot create block device symlink: %s" %
2623 e.strerror)
2624 uri = _CalculateDeviceURI(instance, disk, device)
2625
2626 block_devices.append((disk, link_name, uri))
2627
2628 return block_devices
2629
2635
2641
2642
2643 -def StartInstance(instance, startup_paused, reason, store_reason=True):
2644 """Start an instance.
2645
2646 @type instance: L{objects.Instance}
2647 @param instance: the instance object
2648 @type startup_paused: bool
2649 @param instance: pause instance at startup?
2650 @type reason: list of reasons
2651 @param reason: the reason trail for this startup
2652 @type store_reason: boolean
2653 @param store_reason: whether to store the shutdown reason trail on file
2654 @rtype: None
2655
2656 """
2657 instance_info = _GetInstanceInfo(instance)
2658
2659 if instance_info and not _IsInstanceUserDown(instance_info):
2660 logging.info("Instance '%s' already running, not starting", instance.name)
2661 return
2662
2663 try:
2664 block_devices = _GatherAndLinkBlockDevs(instance)
2665 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2666 hyper.StartInstance(instance, block_devices, startup_paused)
2667 if store_reason:
2668 _StoreInstReasonTrail(instance.name, reason)
2669 except errors.BlockDeviceError, err:
2670 _Fail("Block device error: %s", err, exc=True)
2671 except errors.HypervisorError, err:
2672 _RemoveBlockDevLinks(instance.name, instance.disks_info)
2673 _Fail("Hypervisor error: %s", err, exc=True)
2674
2677 """Shut an instance down.
2678
2679 @note: this functions uses polling with a hardcoded timeout.
2680
2681 @type instance: L{objects.Instance}
2682 @param instance: the instance object
2683 @type timeout: integer
2684 @param timeout: maximum timeout for soft shutdown
2685 @type reason: list of reasons
2686 @param reason: the reason trail for this shutdown
2687 @type store_reason: boolean
2688 @param store_reason: whether to store the shutdown reason trail on file
2689 @rtype: None
2690
2691 """
2692 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2693
2694 if not _GetInstanceInfo(instance):
2695 logging.info("Instance '%s' not running, doing nothing", instance.name)
2696 return
2697
2698 class _TryShutdown(object):
2699 def __init__(self):
2700 self.tried_once = False
2701
2702 def __call__(self):
2703 if not _GetInstanceInfo(instance):
2704 return
2705
2706 try:
2707 hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout)
2708 if store_reason:
2709 _StoreInstReasonTrail(instance.name, reason)
2710 except errors.HypervisorError, err:
2711
2712
2713 if not _GetInstanceInfo(instance):
2714 return
2715
2716 _Fail("Failed to stop instance '%s': %s", instance.name, err)
2717
2718 self.tried_once = True
2719
2720 raise utils.RetryAgain()
2721
2722 try:
2723 utils.Retry(_TryShutdown(), 5, timeout)
2724 except utils.RetryTimeout:
2725
2726 logging.error("Shutdown of '%s' unsuccessful, forcing", instance.name)
2727
2728 try:
2729 hyper.StopInstance(instance, force=True)
2730 except errors.HypervisorError, err:
2731
2732
2733 if _GetInstanceInfo(instance):
2734 _Fail("Failed to force stop instance '%s': %s", instance.name, err)
2735
2736 time.sleep(1)
2737
2738 if _GetInstanceInfo(instance):
2739 _Fail("Could not shutdown instance '%s' even by destroy", instance.name)
2740
2741 try:
2742 hyper.CleanupInstance(instance.name)
2743 except errors.HypervisorError, err:
2744 logging.warning("Failed to execute post-shutdown cleanup step: %s", err)
2745
2746 _RemoveBlockDevLinks(instance.name, instance.disks_info)
2747
2748
2749 -def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):
2750 """Reboot an instance.
2751
2752 @type instance: L{objects.Instance}
2753 @param instance: the instance object to reboot
2754 @type reboot_type: str
2755 @param reboot_type: the type of reboot, one the following
2756 constants:
2757 - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the
2758 instance OS, do not recreate the VM
2759 - L{constants.INSTANCE_REBOOT_HARD}: tear down and
2760 restart the VM (at the hypervisor level)
2761 - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is
2762 not accepted here, since that mode is handled differently, in
2763 cmdlib, and translates into full stop and start of the
2764 instance (instead of a call_instance_reboot RPC)
2765 @type shutdown_timeout: integer
2766 @param shutdown_timeout: maximum timeout for soft shutdown
2767 @type reason: list of reasons
2768 @param reason: the reason trail for this reboot
2769 @rtype: None
2770
2771 """
2772
2773
2774
2775 if not _GetInstanceInfo(instance):
2776 _Fail("Cannot reboot instance '%s' that is not running", instance.name)
2777
2778 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2779 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
2780 try:
2781 hyper.RebootInstance(instance)
2782 except errors.HypervisorError, err:
2783 _Fail("Failed to soft reboot instance '%s': %s", instance.name, err)
2784 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
2785 try:
2786 InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False)
2787 result = StartInstance(instance, False, reason, store_reason=False)
2788 _StoreInstReasonTrail(instance.name, reason)
2789 return result
2790 except errors.HypervisorError, err:
2791 _Fail("Failed to hard reboot instance '%s': %s", instance.name, err)
2792 else:
2793 _Fail("Invalid reboot_type received: '%s'", reboot_type)
2794
2815
2830
2833 """Prepare the node to accept an instance.
2834
2835 @type instance: L{objects.Instance}
2836 @param instance: the instance definition
2837 @type info: string/data (opaque)
2838 @param info: migration information, from the source node
2839 @type target: string
2840 @param target: target host (usually ip), on this node
2841
2842 """
2843
2844 if instance.disk_template in constants.DTS_EXT_MIRROR:
2845
2846
2847 try:
2848 _GatherAndLinkBlockDevs(instance)
2849 except errors.BlockDeviceError, err:
2850 _Fail("Block device error: %s", err, exc=True)
2851
2852 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2853 try:
2854 hyper.AcceptInstance(instance, info, target)
2855 except errors.HypervisorError, err:
2856 if instance.disk_template in constants.DTS_EXT_MIRROR:
2857 _RemoveBlockDevLinks(instance.name, instance.disks_info)
2858 _Fail("Failed to accept instance: %s", err, exc=True)
2859
2862 """Finalize any preparation to accept an instance.
2863
2864 @type instance: L{objects.Instance}
2865 @param instance: the instance definition
2866 @type info: string/data (opaque)
2867 @param info: migration information, from the source node
2868 @type success: boolean
2869 @param success: whether the migration was a success or a failure
2870
2871 """
2872 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2873 try:
2874 hyper.FinalizeMigrationDst(instance, info, success)
2875 except errors.HypervisorError, err:
2876 _Fail("Failed to finalize migration on the target node: %s", err, exc=True)
2877
2880 """Migrates an instance to another node.
2881
2882 @type cluster_name: string
2883 @param cluster_name: name of the cluster
2884 @type instance: L{objects.Instance}
2885 @param instance: the instance definition
2886 @type target: string
2887 @param target: the target node name
2888 @type live: boolean
2889 @param live: whether the migration should be done live or not (the
2890 interpretation of this parameter is left to the hypervisor)
2891 @raise RPCFail: if migration fails for some reason
2892
2893 """
2894 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2895
2896 try:
2897 hyper.MigrateInstance(cluster_name, instance, target, live)
2898 except errors.HypervisorError, err:
2899 _Fail("Failed to migrate instance: %s", err, exc=True)
2900
2903 """Finalize the instance migration on the source node.
2904
2905 @type instance: L{objects.Instance}
2906 @param instance: the instance definition of the migrated instance
2907 @type success: bool
2908 @param success: whether the migration succeeded or not
2909 @type live: bool
2910 @param live: whether the user requested a live migration or not
2911 @raise RPCFail: If the execution fails for some reason
2912
2913 """
2914 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2915
2916 try:
2917 hyper.FinalizeMigrationSource(instance, success, live)
2918 except Exception, err:
2919 _Fail("Failed to finalize the migration on the source node: %s", err,
2920 exc=True)
2921
2924 """Get the migration status
2925
2926 @type instance: L{objects.Instance}
2927 @param instance: the instance that is being migrated
2928 @rtype: L{objects.MigrationStatus}
2929 @return: the status of the current migration (one of
2930 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional
2931 progress info that can be retrieved from the hypervisor
2932 @raise RPCFail: If the migration status cannot be retrieved
2933
2934 """
2935 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2936 try:
2937 return hyper.GetMigrationStatus(instance)
2938 except Exception, err:
2939 _Fail("Failed to get migration status: %s", err, exc=True)
2940
2941
2942 -def HotplugDevice(instance, action, dev_type, device, extra, seq):
2943 """Hotplug a device
2944
2945 Hotplug is currently supported only for KVM Hypervisor.
2946 @type instance: L{objects.Instance}
2947 @param instance: the instance to which we hotplug a device
2948 @type action: string
2949 @param action: the hotplug action to perform
2950 @type dev_type: string
2951 @param dev_type: the device type to hotplug
2952 @type device: either L{objects.NIC} or L{objects.Disk}
2953 @param device: the device object to hotplug
2954 @type extra: tuple
2955 @param extra: extra info used for disk hotplug (disk link, drive uri)
2956 @type seq: int
2957 @param seq: the index of the device from master perspective
2958 @raise RPCFail: in case instance does not have KVM hypervisor
2959
2960 """
2961 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2962 try:
2963 hyper.VerifyHotplugSupport(instance, action, dev_type)
2964 except errors.HotplugError, err:
2965 _Fail("Hotplug is not supported: %s", err)
2966
2967 if action == constants.HOTPLUG_ACTION_ADD:
2968 fn = hyper.HotAddDevice
2969 elif action == constants.HOTPLUG_ACTION_REMOVE:
2970 fn = hyper.HotDelDevice
2971 elif action == constants.HOTPLUG_ACTION_MODIFY:
2972 fn = hyper.HotModDevice
2973 else:
2974 assert action in constants.HOTPLUG_ALL_ACTIONS
2975
2976 return fn(instance, dev_type, device, extra, seq)
2977
2988
3015
3016 retries = 5
3017
3018 while True:
3019 try:
3020 trans = utils.Retry(_Connect, 1.0, constants.LUXI_DEF_CTMO)
3021 break
3022 except utils.RetryTimeout:
3023 raise TimeoutError("Connection to metadata daemon timed out")
3024 except (socket.error, NoMasterError), err:
3025 if retries == 0:
3026 logging.error("Failed to connect to the metadata daemon",
3027 exc_info=True)
3028 raise TimeoutError("Failed to connect to metadata daemon: %s" % err)
3029 else:
3030 retries -= 1
3031
3032 data = serializer.DumpJson(metadata,
3033 private_encoder=serializer.EncodeWithPrivateFields)
3034
3035 trans.Send(data)
3036 trans.Close()
3037
3038
3039 -def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):
3040 """Creates a block device for an instance.
3041
3042 @type disk: L{objects.Disk}
3043 @param disk: the object describing the disk we should create
3044 @type size: int
3045 @param size: the size of the physical underlying device, in MiB
3046 @type owner: str
3047 @param owner: the name of the instance for which disk is created,
3048 used for device cache data
3049 @type on_primary: boolean
3050 @param on_primary: indicates if it is the primary node or not
3051 @type info: string
3052 @param info: string that will be sent to the physical device
3053 creation, used for example to set (LVM) tags on LVs
3054 @type excl_stor: boolean
3055 @param excl_stor: Whether exclusive_storage is active
3056
3057 @return: the new unique_id of the device (this can sometime be
3058 computed only after creation), or None. On secondary nodes,
3059 it's not required to return anything.
3060
3061 """
3062
3063
3064 clist = []
3065 if disk.children:
3066 for child in disk.children:
3067 try:
3068 crdev = _RecursiveAssembleBD(child, owner, on_primary)
3069 except errors.BlockDeviceError, err:
3070 _Fail("Can't assemble device %s: %s", child, err)
3071 if on_primary or disk.AssembleOnSecondary():
3072
3073
3074 try:
3075
3076 crdev.Open()
3077 except errors.BlockDeviceError, err:
3078 _Fail("Can't make child '%s' read-write: %s", child, err)
3079 clist.append(crdev)
3080
3081 try:
3082 device = bdev.Create(disk, clist, excl_stor)
3083 except errors.BlockDeviceError, err:
3084 _Fail("Can't create block device: %s", err)
3085
3086 if on_primary or disk.AssembleOnSecondary():
3087 try:
3088 device.Assemble()
3089 except errors.BlockDeviceError, err:
3090 _Fail("Can't assemble device after creation, unusual event: %s", err)
3091 if on_primary or disk.OpenOnSecondary():
3092 try:
3093 device.Open(force=True)
3094 except errors.BlockDeviceError, err:
3095 _Fail("Can't make device r/w after creation, unusual event: %s", err)
3096 DevCacheManager.UpdateCache(device.dev_path, owner,
3097 on_primary, disk.iv_name)
3098
3099 device.SetInfo(info)
3100
3101 return device.unique_id
3102
3103
3104 -def _DumpDevice(source_path, target_path, offset, size, truncate):
3105 """This function images/wipes the device using a local file.
3106
3107 @type source_path: string
3108 @param source_path: path of the image or data source (e.g., "/dev/zero")
3109
3110 @type target_path: string
3111 @param target_path: path of the device to image/wipe
3112
3113 @type offset: int
3114 @param offset: offset in MiB in the output file
3115
3116 @type size: int
3117 @param size: maximum size in MiB to write (data source might be smaller)
3118
3119 @type truncate: bool
3120 @param truncate: whether the file should be truncated
3121
3122 @return: None
3123 @raise RPCFail: in case of failure
3124
3125 """
3126
3127
3128
3129 block_size = constants.DD_BLOCK_SIZE
3130
3131 cmd = [constants.DD_CMD, "if=%s" % source_path, "seek=%d" % offset,
3132 "bs=%s" % block_size, "oflag=direct", "of=%s" % target_path,
3133 "count=%d" % size]
3134
3135 if not truncate:
3136 cmd.append("conv=notrunc")
3137
3138 result = utils.RunCmd(cmd)
3139
3140 if result.failed:
3141 _Fail("Dump command '%s' exited with error: %s; output: %s", result.cmd,
3142 result.fail_reason, result.output)
3143
3146 """This function images a device using a downloaded image file.
3147
3148 @type source_url: string
3149 @param source_url: URL of image to dump to disk
3150
3151 @type target_path: string
3152 @param target_path: path of the device to image
3153
3154 @type size: int
3155 @param size: maximum size in MiB to write (data source might be smaller)
3156
3157 @rtype: NoneType
3158 @return: None
3159 @raise RPCFail: in case of download or write failures
3160
3161 """
3162 class DDParams(object):
3163 def __init__(self, current_size, total_size):
3164 self.current_size = current_size
3165 self.total_size = total_size
3166 self.image_size_error = False
3167
3168 def dd_write(ddparams, out):
3169 if ddparams.current_size < ddparams.total_size:
3170 ddparams.current_size += len(out)
3171 target_file.write(out)
3172 else:
3173 ddparams.image_size_error = True
3174 return -1
3175
3176 target_file = open(target_path, "r+")
3177 ddparams = DDParams(0, 1024 * 1024 * size)
3178
3179 curl = pycurl.Curl()
3180 curl.setopt(pycurl.VERBOSE, True)
3181 curl.setopt(pycurl.NOSIGNAL, True)
3182 curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION)
3183 curl.setopt(pycurl.URL, source_url)
3184 curl.setopt(pycurl.WRITEFUNCTION, lambda out: dd_write(ddparams, out))
3185
3186 try:
3187 curl.perform()
3188 except pycurl.error:
3189 if ddparams.image_size_error:
3190 _Fail("Disk image larger than the disk")
3191 else:
3192 raise
3193
3194 target_file.close()
3195
3198 """Copies data from source block device to target.
3199
3200 This function gets the export and import commands from the source and
3201 target devices respectively, and then concatenates them to a single
3202 command using a pipe ("|"). Finally, executes the unified command that
3203 will transfer the data between the devices during the disk template
3204 conversion operation.
3205
3206 @type src_disk: L{objects.Disk}
3207 @param src_disk: the disk object we want to copy from
3208 @type target_disk: L{objects.Disk}
3209 @param target_disk: the disk object we want to copy to
3210
3211 @rtype: NoneType
3212 @return: None
3213 @raise RPCFail: in case of failure
3214
3215 """
3216 src_dev = _RecursiveFindBD(src_disk)
3217 if src_dev is None:
3218 _Fail("Cannot copy from device '%s': device not found", src_disk.uuid)
3219
3220 dest_dev = _RecursiveFindBD(target_disk)
3221 if dest_dev is None:
3222 _Fail("Cannot copy to device '%s': device not found", target_disk.uuid)
3223
3224 src_cmd = src_dev.Export()
3225 dest_cmd = dest_dev.Import()
3226 command = "%s | %s" % (utils.ShellQuoteArgs(src_cmd),
3227 utils.ShellQuoteArgs(dest_cmd))
3228
3229 result = utils.RunCmd(command)
3230 if result.failed:
3231 _Fail("Disk conversion command '%s' exited with error: %s; output: %s",
3232 result.cmd, result.fail_reason, result.output)
3233
3236 """Wipes a block device.
3237
3238 @type disk: L{objects.Disk}
3239 @param disk: the disk object we want to wipe
3240 @type offset: int
3241 @param offset: The offset in MiB in the file
3242 @type size: int
3243 @param size: The size in MiB to write
3244
3245 """
3246 try:
3247 rdev = _RecursiveFindBD(disk)
3248 except errors.BlockDeviceError:
3249 rdev = None
3250
3251 if not rdev:
3252 _Fail("Cannot wipe device %s: device not found", disk.iv_name)
3253 if offset < 0:
3254 _Fail("Negative offset")
3255 if size < 0:
3256 _Fail("Negative size")
3257 if offset > rdev.size:
3258 _Fail("Wipe offset is bigger than device size")
3259 if (offset + size) > rdev.size:
3260 _Fail("Wipe offset and size are bigger than device size")
3261
3262 _DumpDevice("/dev/zero", rdev.dev_path, offset, size, True)
3263
3266 """Images a block device either by dumping a local file or
3267 downloading a URL.
3268
3269 @type disk: L{objects.Disk}
3270 @param disk: the disk object we want to image
3271
3272 @type image: string
3273 @param image: file path to the disk image be dumped
3274
3275 @type size: int
3276 @param size: The size in MiB to write
3277
3278 @rtype: NoneType
3279 @return: None
3280 @raise RPCFail: in case of failure
3281
3282 """
3283 if not (utils.IsUrl(image) or os.path.exists(image)):
3284 _Fail("Image '%s' not found", image)
3285
3286 try:
3287 rdev = _RecursiveFindBD(disk)
3288 except errors.BlockDeviceError:
3289 rdev = None
3290
3291 if not rdev:
3292 _Fail("Cannot image device %s: device not found", disk.iv_name)
3293 if size < 0:
3294 _Fail("Negative size")
3295 if size > rdev.size:
3296 _Fail("Image size is bigger than device size")
3297
3298 if utils.IsUrl(image):
3299 _DownloadAndDumpDevice(image, rdev.dev_path, size)
3300 else:
3301 _DumpDevice(image, rdev.dev_path, 0, size, False)
3302
3305 """Pause or resume the sync of the block device.
3306
3307 @type disks: list of L{objects.Disk}
3308 @param disks: the disks object we want to pause/resume
3309 @type pause: bool
3310 @param pause: Wheater to pause or resume
3311
3312 """
3313 success = []
3314 for disk in disks:
3315 try:
3316 rdev = _RecursiveFindBD(disk)
3317 except errors.BlockDeviceError:
3318 rdev = None
3319
3320 if not rdev:
3321 success.append((False, ("Cannot change sync for device %s:"
3322 " device not found" % disk.iv_name)))
3323 continue
3324
3325 result = rdev.PauseResumeSync(pause)
3326
3327 if result:
3328 success.append((result, None))
3329 else:
3330 if pause:
3331 msg = "Pause"
3332 else:
3333 msg = "Resume"
3334 success.append((result, "%s for device %s failed" % (msg, disk.iv_name)))
3335
3336 return success
3337
3340 """Remove a block device.
3341
3342 @note: This is intended to be called recursively.
3343
3344 @type disk: L{objects.Disk}
3345 @param disk: the disk object we should remove
3346 @rtype: boolean
3347 @return: the success of the operation
3348
3349 """
3350 msgs = []
3351 try:
3352 rdev = _RecursiveFindBD(disk)
3353 except errors.BlockDeviceError, err:
3354
3355 logging.info("Can't attach to device %s in remove", disk)
3356 rdev = None
3357 if rdev is not None:
3358 r_path = rdev.dev_path
3359
3360 def _TryRemove():
3361 try:
3362 rdev.Remove()
3363 return []
3364 except errors.BlockDeviceError, err:
3365 return [str(err)]
3366
3367 msgs.extend(utils.SimpleRetry([], _TryRemove,
3368 constants.DISK_REMOVE_RETRY_INTERVAL,
3369 constants.DISK_REMOVE_RETRY_TIMEOUT))
3370
3371 if not msgs:
3372 DevCacheManager.RemoveCache(r_path)
3373
3374 if disk.children:
3375 for child in disk.children:
3376 try:
3377 BlockdevRemove(child)
3378 except RPCFail, err:
3379 msgs.append(str(err))
3380
3381 if msgs:
3382 _Fail("; ".join(msgs))
3383
3386 """Activate a block device for an instance.
3387
3388 This is run on the primary and secondary nodes for an instance.
3389
3390 @note: this function is called recursively.
3391
3392 @type disk: L{objects.Disk}
3393 @param disk: the disk we try to assemble
3394 @type owner: str
3395 @param owner: the name of the instance which owns the disk
3396 @type as_primary: boolean
3397 @param as_primary: if we should make the block device
3398 read/write
3399
3400 @return: the assembled device or None (in case no device
3401 was assembled)
3402 @raise errors.BlockDeviceError: in case there is an error
3403 during the activation of the children or the device
3404 itself
3405
3406 """
3407 children = []
3408 if disk.children:
3409 mcn = disk.ChildrenNeeded()
3410 if mcn == -1:
3411 mcn = 0
3412 else:
3413 mcn = len(disk.children) - mcn
3414 for chld_disk in disk.children:
3415 try:
3416 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
3417 except errors.BlockDeviceError, err:
3418 if children.count(None) >= mcn:
3419 raise
3420 cdev = None
3421 logging.error("Error in child activation (but continuing): %s",
3422 str(err))
3423 children.append(cdev)
3424
3425 if as_primary or disk.AssembleOnSecondary():
3426 r_dev = bdev.Assemble(disk, children)
3427 result = r_dev
3428 if as_primary or disk.OpenOnSecondary():
3429 r_dev.Open()
3430 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
3431 as_primary, disk.iv_name)
3432
3433 else:
3434 result = True
3435 return result
3436
3439 """Activate a block device for an instance.
3440
3441 This is a wrapper over _RecursiveAssembleBD.
3442
3443 @rtype: str or boolean
3444 @return: a tuple with the C{/dev/...} path and the created symlink
3445 for primary nodes, and (C{True}, C{True}) for secondary nodes
3446
3447 """
3448 try:
3449 result = _RecursiveAssembleBD(disk, instance.name, as_primary)
3450 if isinstance(result, BlockDev):
3451
3452 dev_path = result.dev_path
3453 link_name = None
3454 uri = None
3455 if as_primary:
3456 link_name = _SymlinkBlockDev(instance.name, dev_path, idx)
3457 uri = _CalculateDeviceURI(instance, disk, result)
3458 elif result:
3459 return result, result
3460 else:
3461 _Fail("Unexpected result from _RecursiveAssembleBD")
3462 except errors.BlockDeviceError, err:
3463 _Fail("Error while assembling disk: %s", err, exc=True)
3464 except OSError, err:
3465 _Fail("Error while symlinking disk: %s", err, exc=True)
3466
3467 return dev_path, link_name, uri
3468
3471 """Shut down a block device.
3472
3473 First, if the device is assembled (Attach() is successful), then
3474 the device is shutdown. Then the children of the device are
3475 shutdown.
3476
3477 This function is called recursively. Note that we don't cache the
3478 children or such, as oppossed to assemble, shutdown of different
3479 devices doesn't require that the upper device was active.
3480
3481 @type disk: L{objects.Disk}
3482 @param disk: the description of the disk we should
3483 shutdown
3484 @rtype: None
3485
3486 """
3487 msgs = []
3488 r_dev = _RecursiveFindBD(disk)
3489 if r_dev is not None:
3490 r_path = r_dev.dev_path
3491 try:
3492 r_dev.Shutdown()
3493 DevCacheManager.RemoveCache(r_path)
3494 except errors.BlockDeviceError, err:
3495 msgs.append(str(err))
3496
3497 if disk.children:
3498 for child in disk.children:
3499 try:
3500 BlockdevShutdown(child)
3501 except RPCFail, err:
3502 msgs.append(str(err))
3503
3504 if msgs:
3505 _Fail("; ".join(msgs))
3506
3509 """Extend a mirrored block device.
3510
3511 @type parent_cdev: L{objects.Disk}
3512 @param parent_cdev: the disk to which we should add children
3513 @type new_cdevs: list of L{objects.Disk}
3514 @param new_cdevs: the list of children which we should add
3515 @rtype: None
3516
3517 """
3518 parent_bdev = _RecursiveFindBD(parent_cdev)
3519 if parent_bdev is None:
3520 _Fail("Can't find parent device '%s' in add children", parent_cdev)
3521 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
3522 if new_bdevs.count(None) > 0:
3523 _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs)
3524 parent_bdev.AddChildren(new_bdevs)
3525
3528 """Shrink a mirrored block device.
3529
3530 @type parent_cdev: L{objects.Disk}
3531 @param parent_cdev: the disk from which we should remove children
3532 @type new_cdevs: list of L{objects.Disk}
3533 @param new_cdevs: the list of children which we should remove
3534 @rtype: None
3535
3536 """
3537 parent_bdev = _RecursiveFindBD(parent_cdev)
3538 if parent_bdev is None:
3539 _Fail("Can't find parent device '%s' in remove children", parent_cdev)
3540 devs = []
3541 for disk in new_cdevs:
3542 rpath = disk.StaticDevPath()
3543 if rpath is None:
3544 bd = _RecursiveFindBD(disk)
3545 if bd is None:
3546 _Fail("Can't find device %s while removing children", disk)
3547 else:
3548 devs.append(bd.dev_path)
3549 else:
3550 if not utils.IsNormAbsPath(rpath):
3551 _Fail("Strange path returned from StaticDevPath: '%s'", rpath)
3552 devs.append(rpath)
3553 parent_bdev.RemoveChildren(devs)
3554
3557 """Get the mirroring status of a list of devices.
3558
3559 @type disks: list of L{objects.Disk}
3560 @param disks: the list of disks which we should query
3561 @rtype: disk
3562 @return: List of L{objects.BlockDevStatus}, one for each disk
3563 @raise errors.BlockDeviceError: if any of the disks cannot be
3564 found
3565
3566 """
3567 stats = []
3568 for dsk in disks:
3569 rbd = _RecursiveFindBD(dsk)
3570 if rbd is None:
3571 _Fail("Can't find device %s", dsk)
3572
3573 stats.append(rbd.CombinedSyncStatus())
3574
3575 return stats
3576
3579 """Get the mirroring status of a list of devices.
3580
3581 @type disks: list of L{objects.Disk}
3582 @param disks: the list of disks which we should query
3583 @rtype: disk
3584 @return: List of tuples, (bool, status), one for each disk; bool denotes
3585 success/failure, status is L{objects.BlockDevStatus} on success, string
3586 otherwise
3587
3588 """
3589 result = []
3590 for disk in disks:
3591 try:
3592 rbd = _RecursiveFindBD(disk)
3593 if rbd is None:
3594 result.append((False, "Can't find device %s" % disk))
3595 continue
3596
3597 status = rbd.CombinedSyncStatus()
3598 except errors.BlockDeviceError, err:
3599 logging.exception("Error while getting disk status")
3600 result.append((False, str(err)))
3601 else:
3602 result.append((True, status))
3603
3604 assert len(disks) == len(result)
3605
3606 return result
3607
3610 """Check if a device is activated.
3611
3612 If so, return information about the real device.
3613
3614 @type disk: L{objects.Disk}
3615 @param disk: the disk object we need to find
3616
3617 @return: None if the device can't be found,
3618 otherwise the device instance
3619
3620 """
3621 children = []
3622 if disk.children:
3623 for chdisk in disk.children:
3624 children.append(_RecursiveFindBD(chdisk))
3625
3626 return bdev.FindDevice(disk, children)
3627
3630 """Opens the underlying block device of a disk.
3631
3632 @type disk: L{objects.Disk}
3633 @param disk: the disk object we want to open
3634
3635 """
3636 real_disk = _RecursiveFindBD(disk)
3637 if real_disk is None:
3638 _Fail("Block device '%s' is not set up", disk)
3639
3640 real_disk.Open()
3641
3642 return real_disk
3643
3646 """Check if a device is activated.
3647
3648 If it is, return information about the real device.
3649
3650 @type disk: L{objects.Disk}
3651 @param disk: the disk to find
3652 @rtype: None or objects.BlockDevStatus
3653 @return: None if the disk cannot be found, otherwise a the current
3654 information
3655
3656 """
3657 try:
3658 rbd = _RecursiveFindBD(disk)
3659 except errors.BlockDeviceError, err:
3660 _Fail("Failed to find device: %s", err, exc=True)
3661
3662 if rbd is None:
3663 return None
3664
3665 return rbd.GetSyncStatus()
3666
3669 """Computes the size of the given disks.
3670
3671 If a disk is not found, returns None instead.
3672
3673 @type disks: list of L{objects.Disk}
3674 @param disks: the list of disk to compute the size for
3675 @rtype: list
3676 @return: list with elements None if the disk cannot be found,
3677 otherwise the pair (size, spindles), where spindles is None if the
3678 device doesn't support that
3679
3680 """
3681 result = []
3682 for cf in disks:
3683 try:
3684 rbd = _RecursiveFindBD(cf)
3685 except errors.BlockDeviceError:
3686 result.append(None)
3687 continue
3688 if rbd is None:
3689 result.append(None)
3690 else:
3691 result.append(rbd.GetActualDimensions())
3692 return result
3693
3694
3695 -def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
3696 """Write a file to the filesystem.
3697
3698 This allows the master to overwrite(!) a file. It will only perform
3699 the operation if the file belongs to a list of configuration files.
3700
3701 @type file_name: str
3702 @param file_name: the target file name
3703 @type data: str
3704 @param data: the new contents of the file
3705 @type mode: int
3706 @param mode: the mode to give the file (can be None)
3707 @type uid: string
3708 @param uid: the owner of the file
3709 @type gid: string
3710 @param gid: the group of the file
3711 @type atime: float
3712 @param atime: the atime to set on the file (can be None)
3713 @type mtime: float
3714 @param mtime: the mtime to set on the file (can be None)
3715 @rtype: None
3716
3717 """
3718 file_name = vcluster.LocalizeVirtualPath(file_name)
3719
3720 if not os.path.isabs(file_name):
3721 _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name)
3722
3723 if file_name not in _ALLOWED_UPLOAD_FILES:
3724 _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'",
3725 file_name)
3726
3727 raw_data = _Decompress(data)
3728
3729 if not (isinstance(uid, basestring) and isinstance(gid, basestring)):
3730 _Fail("Invalid username/groupname type")
3731
3732 getents = runtime.GetEnts()
3733 uid = getents.LookupUser(uid)
3734 gid = getents.LookupGroup(gid)
3735
3736 utils.SafeWriteFile(file_name, None,
3737 data=raw_data, mode=mode, uid=uid, gid=gid,
3738 atime=atime, mtime=mtime)
3739
3740
3741 -def RunOob(oob_program, command, node, timeout):
3742 """Executes oob_program with given command on given node.
3743
3744 @param oob_program: The path to the executable oob_program
3745 @param command: The command to invoke on oob_program
3746 @param node: The node given as an argument to the program
3747 @param timeout: Timeout after which we kill the oob program
3748
3749 @return: stdout
3750 @raise RPCFail: If execution fails for some reason
3751
3752 """
3753 result = utils.RunCmd([oob_program, command, node], timeout=timeout)
3754
3755 if result.failed:
3756 _Fail("'%s' failed with reason '%s'; output: %s", result.cmd,
3757 result.fail_reason, result.output)
3758
3759 return result.stdout
3760
3763 """Compute and return the API version of a given OS.
3764
3765 This function will try to read the API version of the OS residing in
3766 the 'os_dir' directory.
3767
3768 @type os_dir: str
3769 @param os_dir: the directory in which we should look for the OS
3770 @rtype: tuple
3771 @return: tuple (status, data) with status denoting the validity and
3772 data holding either the valid versions or an error message
3773
3774 """
3775 api_file = utils.PathJoin(os_dir, constants.OS_API_FILE)
3776
3777 try:
3778 st = os.stat(api_file)
3779 except EnvironmentError, err:
3780 return False, ("Required file '%s' not found under path %s: %s" %
3781 (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err)))
3782
3783 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
3784 return False, ("File '%s' in %s is not a regular file" %
3785 (constants.OS_API_FILE, os_dir))
3786
3787 try:
3788 api_versions = utils.ReadFile(api_file).splitlines()
3789 except EnvironmentError, err:
3790 return False, ("Error while reading the API version file at %s: %s" %
3791 (api_file, utils.ErrnoOrStr(err)))
3792
3793 try:
3794 api_versions = [int(version.strip()) for version in api_versions]
3795 except (TypeError, ValueError), err:
3796 return False, ("API version(s) can't be converted to integer: %s" %
3797 str(err))
3798
3799 return True, api_versions
3800
3803 """Compute the validity for all OSes.
3804
3805 @type top_dirs: list
3806 @param top_dirs: the list of directories in which to
3807 search (if not given defaults to
3808 L{pathutils.OS_SEARCH_PATH})
3809 @rtype: list of L{objects.OS}
3810 @return: a list of tuples (name, path, status, diagnose, variants,
3811 parameters, api_version) for all (potential) OSes under all
3812 search paths, where:
3813 - name is the (potential) OS name
3814 - path is the full path to the OS
3815 - status True/False is the validity of the OS
3816 - diagnose is the error message for an invalid OS, otherwise empty
3817 - variants is a list of supported OS variants, if any
3818 - parameters is a list of (name, help) parameters, if any
3819 - api_version is a list of support OS API versions
3820
3821 """
3822 if top_dirs is None:
3823 top_dirs = pathutils.OS_SEARCH_PATH
3824
3825 result = []
3826 for dir_name in top_dirs:
3827 if os.path.isdir(dir_name):
3828 try:
3829 f_names = utils.ListVisibleFiles(dir_name)
3830 except EnvironmentError, err:
3831 logging.exception("Can't list the OS directory %s: %s", dir_name, err)
3832 break
3833 for name in f_names:
3834 os_path = utils.PathJoin(dir_name, name)
3835 status, os_inst = _TryOSFromDisk(name, base_dir=dir_name)
3836 if status:
3837 diagnose = ""
3838 variants = os_inst.supported_variants
3839 parameters = os_inst.supported_parameters
3840 api_versions = os_inst.api_versions
3841 trusted = False if os_inst.create_script_untrusted else True
3842 else:
3843 diagnose = os_inst
3844 variants = parameters = api_versions = []
3845 trusted = True
3846 result.append((name, os_path, status, diagnose, variants,
3847 parameters, api_versions, trusted))
3848
3849 return result
3850
3853 """Create an OS instance from disk.
3854
3855 This function will return an OS instance if the given name is a
3856 valid OS name.
3857
3858 @type base_dir: string
3859 @keyword base_dir: Base directory containing OS installations.
3860 Defaults to a search in all the OS_SEARCH_PATH dirs.
3861 @rtype: tuple
3862 @return: success and either the OS instance if we find a valid one,
3863 or error message
3864
3865 """
3866 if base_dir is None:
3867 os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir)
3868 else:
3869 os_dir = utils.FindFile(name, [base_dir], os.path.isdir)
3870
3871 if os_dir is None:
3872 return False, "Directory for OS %s not found in search path" % name
3873
3874 status, api_versions = _OSOndiskAPIVersion(os_dir)
3875 if not status:
3876
3877 return status, api_versions
3878
3879 if not constants.OS_API_VERSIONS.intersection(api_versions):
3880 return False, ("API version mismatch for path '%s': found %s, want %s." %
3881 (os_dir, api_versions, constants.OS_API_VERSIONS))
3882
3883
3884
3885
3886 os_files = dict.fromkeys(constants.OS_SCRIPTS, True)
3887
3888 os_files[constants.OS_SCRIPT_CREATE] = False
3889 os_files[constants.OS_SCRIPT_CREATE_UNTRUSTED] = False
3890
3891 if max(api_versions) >= constants.OS_API_V15:
3892 os_files[constants.OS_VARIANTS_FILE] = False
3893
3894 if max(api_versions) >= constants.OS_API_V20:
3895 os_files[constants.OS_PARAMETERS_FILE] = True
3896 else:
3897 del os_files[constants.OS_SCRIPT_VERIFY]
3898
3899 for (filename, required) in os_files.items():
3900 os_files[filename] = utils.PathJoin(os_dir, filename)
3901
3902 try:
3903 st = os.stat(os_files[filename])
3904 except EnvironmentError, err:
3905 if err.errno == errno.ENOENT and not required:
3906 del os_files[filename]
3907 continue
3908 return False, ("File '%s' under path '%s' is missing (%s)" %
3909 (filename, os_dir, utils.ErrnoOrStr(err)))
3910
3911 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
3912 return False, ("File '%s' under path '%s' is not a regular file" %
3913 (filename, os_dir))
3914
3915 if filename in constants.OS_SCRIPTS:
3916 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
3917 return False, ("File '%s' under path '%s' is not executable" %
3918 (filename, os_dir))
3919
3920 if not constants.OS_SCRIPT_CREATE in os_files and \
3921 not constants.OS_SCRIPT_CREATE_UNTRUSTED in os_files:
3922 return False, ("A create script (trusted or untrusted) under path '%s'"
3923 " must exist" % os_dir)
3924
3925 create_script = os_files.get(constants.OS_SCRIPT_CREATE, None)
3926 create_script_untrusted = os_files.get(constants.OS_SCRIPT_CREATE_UNTRUSTED,
3927 None)
3928
3929 variants = []
3930 if constants.OS_VARIANTS_FILE in os_files:
3931 variants_file = os_files[constants.OS_VARIANTS_FILE]
3932 try:
3933 variants = \
3934 utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file))
3935 except EnvironmentError, err:
3936
3937 if err.errno != errno.ENOENT:
3938 return False, ("Error while reading the OS variants file at %s: %s" %
3939 (variants_file, utils.ErrnoOrStr(err)))
3940
3941 parameters = []
3942 if constants.OS_PARAMETERS_FILE in os_files:
3943 parameters_file = os_files[constants.OS_PARAMETERS_FILE]
3944 try:
3945 parameters = utils.ReadFile(parameters_file).splitlines()
3946 except EnvironmentError, err:
3947 return False, ("Error while reading the OS parameters file at %s: %s" %
3948 (parameters_file, utils.ErrnoOrStr(err)))
3949 parameters = [v.split(None, 1) for v in parameters]
3950
3951 os_obj = objects.OS(name=name, path=os_dir,
3952 create_script=create_script,
3953 create_script_untrusted=create_script_untrusted,
3954 export_script=os_files[constants.OS_SCRIPT_EXPORT],
3955 import_script=os_files[constants.OS_SCRIPT_IMPORT],
3956 rename_script=os_files[constants.OS_SCRIPT_RENAME],
3957 verify_script=os_files.get(constants.OS_SCRIPT_VERIFY,
3958 None),
3959 supported_variants=variants,
3960 supported_parameters=parameters,
3961 api_versions=api_versions)
3962 return True, os_obj
3963
3966 """Create an OS instance from disk.
3967
3968 This function will return an OS instance if the given name is a
3969 valid OS name. Otherwise, it will raise an appropriate
3970 L{RPCFail} exception, detailing why this is not a valid OS.
3971
3972 This is just a wrapper over L{_TryOSFromDisk}, which doesn't raise
3973 an exception but returns true/false status data.
3974
3975 @type base_dir: string
3976 @keyword base_dir: Base directory containing OS installations.
3977 Defaults to a search in all the OS_SEARCH_PATH dirs.
3978 @rtype: L{objects.OS}
3979 @return: the OS instance if we find a valid one
3980 @raise RPCFail: if we don't find a valid OS
3981
3982 """
3983 name_only = objects.OS.GetName(name)
3984 status, payload = _TryOSFromDisk(name_only, base_dir)
3985
3986 if not status:
3987 _Fail(payload)
3988
3989 return payload
3990
3991
3992 -def OSCoreEnv(os_name, inst_os, os_params, debug=0):
3993 """Calculate the basic environment for an os script.
3994
3995 @type os_name: str
3996 @param os_name: full operating system name (including variant)
3997 @type inst_os: L{objects.OS}
3998 @param inst_os: operating system for which the environment is being built
3999 @type os_params: dict
4000 @param os_params: the OS parameters
4001 @type debug: integer
4002 @param debug: debug level (0 or 1, for OS Api 10)
4003 @rtype: dict
4004 @return: dict of environment variables
4005 @raise errors.BlockDeviceError: if the block device
4006 cannot be found
4007
4008 """
4009 result = {}
4010 api_version = \
4011 max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions))
4012 result["OS_API_VERSION"] = "%d" % api_version
4013 result["OS_NAME"] = inst_os.name
4014 result["DEBUG_LEVEL"] = "%d" % debug
4015
4016
4017 if api_version >= constants.OS_API_V15 and inst_os.supported_variants:
4018 variant = objects.OS.GetVariant(os_name)
4019 if not variant:
4020 variant = inst_os.supported_variants[0]
4021 else:
4022 variant = ""
4023 result["OS_VARIANT"] = variant
4024
4025
4026 for pname, pvalue in os_params.items():
4027 result["OSP_%s" % pname.upper().replace("-", "_")] = pvalue
4028
4029
4030
4031
4032 result["PATH"] = constants.HOOKS_PATH
4033
4034 return result
4035
4038 """Calculate the environment for an os script.
4039
4040 @type instance: L{objects.Instance}
4041 @param instance: target instance for the os script run
4042 @type inst_os: L{objects.OS}
4043 @param inst_os: operating system for which the environment is being built
4044 @type debug: integer
4045 @param debug: debug level (0 or 1, for OS Api 10)
4046 @rtype: dict
4047 @return: dict of environment variables
4048 @raise errors.BlockDeviceError: if the block device
4049 cannot be found
4050
4051 """
4052 result = OSCoreEnv(instance.os, inst_os, objects.FillDict(instance.osparams,
4053 instance.osparams_private.Unprivate()), debug=debug)
4054
4055 for attr in ["name", "os", "uuid", "ctime", "mtime", "primary_node"]:
4056 result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr))
4057
4058 result["HYPERVISOR"] = instance.hypervisor
4059 result["DISK_COUNT"] = "%d" % len(instance.disks_info)
4060 result["NIC_COUNT"] = "%d" % len(instance.nics)
4061 result["INSTANCE_SECONDARY_NODES"] = \
4062 ("%s" % " ".join(instance.secondary_nodes))
4063
4064
4065 for idx, disk in enumerate(instance.disks_info):
4066 real_disk = _OpenRealBD(disk)
4067 result["DISK_%d_PATH" % idx] = real_disk.dev_path
4068 result["DISK_%d_ACCESS" % idx] = disk.mode
4069 result["DISK_%d_UUID" % idx] = disk.uuid
4070 if disk.name:
4071 result["DISK_%d_NAME" % idx] = disk.name
4072 if constants.HV_DISK_TYPE in instance.hvparams:
4073 result["DISK_%d_FRONTEND_TYPE" % idx] = \
4074 instance.hvparams[constants.HV_DISK_TYPE]
4075 if disk.dev_type in constants.DTS_BLOCK:
4076 result["DISK_%d_BACKEND_TYPE" % idx] = "block"
4077 elif disk.dev_type in constants.DTS_FILEBASED:
4078 result["DISK_%d_BACKEND_TYPE" % idx] = \
4079 "file:%s" % disk.logical_id[0]
4080
4081
4082 for idx, nic in enumerate(instance.nics):
4083 result["NIC_%d_MAC" % idx] = nic.mac
4084 result["NIC_%d_UUID" % idx] = nic.uuid
4085 if nic.name:
4086 result["NIC_%d_NAME" % idx] = nic.name
4087 if nic.ip:
4088 result["NIC_%d_IP" % idx] = nic.ip
4089 result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE]
4090 if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4091 result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK]
4092 if nic.nicparams[constants.NIC_LINK]:
4093 result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK]
4094 if nic.netinfo:
4095 nobj = objects.Network.FromDict(nic.netinfo)
4096 result.update(nobj.HooksDict("NIC_%d_" % idx))
4097 if constants.HV_NIC_TYPE in instance.hvparams:
4098 result["NIC_%d_FRONTEND_TYPE" % idx] = \
4099 instance.hvparams[constants.HV_NIC_TYPE]
4100
4101
4102 for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]:
4103 for key, value in source.items():
4104 result["INSTANCE_%s_%s" % (kind, key)] = str(value)
4105
4106 return result
4107
4110 """Compute the validity for all ExtStorage Providers.
4111
4112 @type top_dirs: list
4113 @param top_dirs: the list of directories in which to
4114 search (if not given defaults to
4115 L{pathutils.ES_SEARCH_PATH})
4116 @rtype: list of L{objects.ExtStorage}
4117 @return: a list of tuples (name, path, status, diagnose, parameters)
4118 for all (potential) ExtStorage Providers under all
4119 search paths, where:
4120 - name is the (potential) ExtStorage Provider
4121 - path is the full path to the ExtStorage Provider
4122 - status True/False is the validity of the ExtStorage Provider
4123 - diagnose is the error message for an invalid ExtStorage Provider,
4124 otherwise empty
4125 - parameters is a list of (name, help) parameters, if any
4126
4127 """
4128 if top_dirs is None:
4129 top_dirs = pathutils.ES_SEARCH_PATH
4130
4131 result = []
4132 for dir_name in top_dirs:
4133 if os.path.isdir(dir_name):
4134 try:
4135 f_names = utils.ListVisibleFiles(dir_name)
4136 except EnvironmentError, err:
4137 logging.exception("Can't list the ExtStorage directory %s: %s",
4138 dir_name, err)
4139 break
4140 for name in f_names:
4141 es_path = utils.PathJoin(dir_name, name)
4142 status, es_inst = extstorage.ExtStorageFromDisk(name, base_dir=dir_name)
4143 if status:
4144 diagnose = ""
4145 parameters = es_inst.supported_parameters
4146 else:
4147 diagnose = es_inst
4148 parameters = []
4149 result.append((name, es_path, status, diagnose, parameters))
4150
4151 return result
4152
4153
4154 -def BlockdevGrow(disk, amount, dryrun, backingstore, excl_stor):
4155 """Grow a stack of block devices.
4156
4157 This function is called recursively, with the childrens being the
4158 first ones to resize.
4159
4160 @type disk: L{objects.Disk}
4161 @param disk: the disk to be grown
4162 @type amount: integer
4163 @param amount: the amount (in mebibytes) to grow with
4164 @type dryrun: boolean
4165 @param dryrun: whether to execute the operation in simulation mode
4166 only, without actually increasing the size
4167 @param backingstore: whether to execute the operation on backing storage
4168 only, or on "logical" storage only; e.g. DRBD is logical storage,
4169 whereas LVM, file, RBD are backing storage
4170 @rtype: (status, result)
4171 @type excl_stor: boolean
4172 @param excl_stor: Whether exclusive_storage is active
4173 @return: a tuple with the status of the operation (True/False), and
4174 the errors message if status is False
4175
4176 """
4177 r_dev = _RecursiveFindBD(disk)
4178 if r_dev is None:
4179 _Fail("Cannot find block device %s", disk)
4180
4181 try:
4182 r_dev.Grow(amount, dryrun, backingstore, excl_stor)
4183 except errors.BlockDeviceError, err:
4184 _Fail("Failed to grow block device: %s", err, exc=True)
4185
4188 """Create a snapshot copy of a block device.
4189
4190 This function is called recursively, and the snapshot is actually created
4191 just for the leaf lvm backend device.
4192
4193 @type disk: L{objects.Disk}
4194 @param disk: the disk to be snapshotted
4195 @type snap_name: string
4196 @param snap_name: the name of the snapshot
4197 @type snap_size: int
4198 @param snap_size: the size of the snapshot
4199 @rtype: string
4200 @return: snapshot disk ID as (vg, lv)
4201
4202 """
4203 def _DiskSnapshot(disk, snap_name=None, snap_size=None):
4204 r_dev = _RecursiveFindBD(disk)
4205 if r_dev is not None:
4206 return r_dev.Snapshot(snap_name=snap_name, snap_size=snap_size)
4207 else:
4208 _Fail("Cannot find block device %s", disk)
4209
4210 if disk.dev_type == constants.DT_DRBD8:
4211 if not disk.children:
4212 _Fail("DRBD device '%s' without backing storage cannot be snapshotted",
4213 disk.unique_id)
4214 return BlockdevSnapshot(disk.children[0], snap_name, snap_size)
4215 elif disk.dev_type == constants.DT_PLAIN:
4216 return _DiskSnapshot(disk, snap_name, snap_size)
4217 elif disk.dev_type == constants.DT_EXT:
4218 return _DiskSnapshot(disk, snap_name, snap_size)
4219 else:
4220 _Fail("Cannot snapshot block device '%s' of type '%s'",
4221 disk.logical_id, disk.dev_type)
4222
4225 """Sets 'metadata' information on block devices.
4226
4227 This function sets 'info' metadata on block devices. Initial
4228 information is set at device creation; this function should be used
4229 for example after renames.
4230
4231 @type disk: L{objects.Disk}
4232 @param disk: the disk to be grown
4233 @type info: string
4234 @param info: new 'info' metadata
4235 @rtype: (status, result)
4236 @return: a tuple with the status of the operation (True/False), and
4237 the errors message if status is False
4238
4239 """
4240 r_dev = _RecursiveFindBD(disk)
4241 if r_dev is None:
4242 _Fail("Cannot find block device %s", disk)
4243
4244 try:
4245 r_dev.SetInfo(info)
4246 except errors.BlockDeviceError, err:
4247 _Fail("Failed to set information on block device: %s", err, exc=True)
4248
4251 """Write out the export configuration information.
4252
4253 @type instance: L{objects.Instance}
4254 @param instance: the instance which we export, used for
4255 saving configuration
4256 @type snap_disks: list of L{objects.Disk}
4257 @param snap_disks: list of snapshot block devices, which
4258 will be used to get the actual name of the dump file
4259
4260 @rtype: None
4261
4262 """
4263 destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new")
4264 finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name)
4265
4266 config = objects.SerializableConfigParser()
4267
4268 config.add_section(constants.INISECT_EXP)
4269 config.set(constants.INISECT_EXP, "version", str(constants.EXPORT_VERSION))
4270 config.set(constants.INISECT_EXP, "timestamp", "%d" % int(time.time()))
4271 config.set(constants.INISECT_EXP, "source", instance.primary_node)
4272 config.set(constants.INISECT_EXP, "os", instance.os)
4273 config.set(constants.INISECT_EXP, "compression", "none")
4274
4275 config.add_section(constants.INISECT_INS)
4276 config.set(constants.INISECT_INS, "name", instance.name)
4277 config.set(constants.INISECT_INS, "maxmem", "%d" %
4278 instance.beparams[constants.BE_MAXMEM])
4279 config.set(constants.INISECT_INS, "minmem", "%d" %
4280 instance.beparams[constants.BE_MINMEM])
4281
4282 config.set(constants.INISECT_INS, "memory", "%d" %
4283 instance.beparams[constants.BE_MAXMEM])
4284 config.set(constants.INISECT_INS, "vcpus", "%d" %
4285 instance.beparams[constants.BE_VCPUS])
4286 config.set(constants.INISECT_INS, "disk_template", instance.disk_template)
4287 config.set(constants.INISECT_INS, "hypervisor", instance.hypervisor)
4288 config.set(constants.INISECT_INS, "tags", " ".join(instance.GetTags()))
4289
4290 nic_total = 0
4291 for nic_count, nic in enumerate(instance.nics):
4292 nic_total += 1
4293 config.set(constants.INISECT_INS, "nic%d_mac" %
4294 nic_count, "%s" % nic.mac)
4295 config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip)
4296 config.set(constants.INISECT_INS, "nic%d_network" % nic_count,
4297 "%s" % nic.network)
4298 config.set(constants.INISECT_INS, "nic%d_name" % nic_count,
4299 "%s" % nic.name)
4300 for param in constants.NICS_PARAMETER_TYPES:
4301 config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param),
4302 "%s" % nic.nicparams.get(param, None))
4303
4304 config.set(constants.INISECT_INS, "nic_count", "%d" % nic_total)
4305
4306 disk_total = 0
4307 for disk_count, disk in enumerate(snap_disks):
4308 if disk:
4309 disk_total += 1
4310 config.set(constants.INISECT_INS, "disk%d_ivname" % disk_count,
4311 ("%s" % disk.iv_name))
4312 config.set(constants.INISECT_INS, "disk%d_dump" % disk_count,
4313 ("%s" % disk.logical_id[1]))
4314 config.set(constants.INISECT_INS, "disk%d_size" % disk_count,
4315 ("%d" % disk.size))
4316 config.set(constants.INISECT_INS, "disk%d_name" % disk_count,
4317 "%s" % disk.name)
4318
4319 config.set(constants.INISECT_INS, "disk_count", "%d" % disk_total)
4320
4321
4322
4323 config.add_section(constants.INISECT_HYP)
4324 for name, value in instance.hvparams.items():
4325 if name not in constants.HVC_GLOBALS:
4326 config.set(constants.INISECT_HYP, name, str(value))
4327
4328 config.add_section(constants.INISECT_BEP)
4329 for name, value in instance.beparams.items():
4330 config.set(constants.INISECT_BEP, name, str(value))
4331
4332 config.add_section(constants.INISECT_OSP)
4333 for name, value in instance.osparams.items():
4334 config.set(constants.INISECT_OSP, name, str(value))
4335
4336 config.add_section(constants.INISECT_OSP_PRIVATE)
4337 for name, value in instance.osparams_private.items():
4338 config.set(constants.INISECT_OSP_PRIVATE, name, str(value.Get()))
4339
4340 utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE),
4341 data=config.Dumps())
4342 shutil.rmtree(finaldestdir, ignore_errors=True)
4343 shutil.move(destdir, finaldestdir)
4344
4367
4370 """Return a list of exports currently available on this machine.
4371
4372 @rtype: list
4373 @return: list of the exports
4374
4375 """
4376 if os.path.isdir(pathutils.EXPORT_DIR):
4377 return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR))
4378 else:
4379 _Fail("No exports directory")
4380
4383 """Remove an existing export from the node.
4384
4385 @type export: str
4386 @param export: the name of the export to remove
4387 @rtype: None
4388
4389 """
4390 target = utils.PathJoin(pathutils.EXPORT_DIR, export)
4391
4392 try:
4393 shutil.rmtree(target)
4394 except EnvironmentError, err:
4395 _Fail("Error while removing the export: %s", err, exc=True)
4396
4399 """Rename a list of block devices.
4400
4401 @type devlist: list of tuples
4402 @param devlist: list of tuples of the form (disk, new_unique_id); disk is
4403 an L{objects.Disk} object describing the current disk, and new
4404 unique_id is the name we rename it to
4405 @rtype: boolean
4406 @return: True if all renames succeeded, False otherwise
4407
4408 """
4409 msgs = []
4410 result = True
4411 for disk, unique_id in devlist:
4412 dev = _RecursiveFindBD(disk)
4413 if dev is None:
4414 msgs.append("Can't find device %s in rename" % str(disk))
4415 result = False
4416 continue
4417 try:
4418 old_rpath = dev.dev_path
4419 dev.Rename(unique_id)
4420 new_rpath = dev.dev_path
4421 if old_rpath != new_rpath:
4422 DevCacheManager.RemoveCache(old_rpath)
4423
4424
4425
4426
4427
4428 except errors.BlockDeviceError, err:
4429 msgs.append("Can't rename device '%s' to '%s': %s" %
4430 (dev, unique_id, err))
4431 logging.exception("Can't rename device '%s' to '%s'", dev, unique_id)
4432 result = False
4433 if not result:
4434 _Fail("; ".join(msgs))
4435
4453
4456 """Create file storage directory.
4457
4458 @type file_storage_dir: str
4459 @param file_storage_dir: directory to create
4460
4461 @rtype: tuple
4462 @return: tuple with first element a boolean indicating wheter dir
4463 creation was successful or not
4464
4465 """
4466 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
4467 if os.path.exists(file_storage_dir):
4468 if not os.path.isdir(file_storage_dir):
4469 _Fail("Specified storage dir '%s' is not a directory",
4470 file_storage_dir)
4471 else:
4472 try:
4473 os.makedirs(file_storage_dir, 0750)
4474 except OSError, err:
4475 _Fail("Cannot create file storage directory '%s': %s",
4476 file_storage_dir, err, exc=True)
4477
4480 """Remove file storage directory.
4481
4482 Remove it only if it's empty. If not log an error and return.
4483
4484 @type file_storage_dir: str
4485 @param file_storage_dir: the directory we should cleanup
4486 @rtype: tuple (success,)
4487 @return: tuple of one element, C{success}, denoting
4488 whether the operation was successful
4489
4490 """
4491 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
4492 if os.path.exists(file_storage_dir):
4493 if not os.path.isdir(file_storage_dir):
4494 _Fail("Specified Storage directory '%s' is not a directory",
4495 file_storage_dir)
4496
4497 try:
4498 os.rmdir(file_storage_dir)
4499 except OSError, err:
4500 _Fail("Cannot remove file storage directory '%s': %s",
4501 file_storage_dir, err)
4502
4505 """Rename the file storage directory.
4506
4507 @type old_file_storage_dir: str
4508 @param old_file_storage_dir: the current path
4509 @type new_file_storage_dir: str
4510 @param new_file_storage_dir: the name we should rename to
4511 @rtype: tuple (success,)
4512 @return: tuple of one element, C{success}, denoting
4513 whether the operation was successful
4514
4515 """
4516 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
4517 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
4518 if not os.path.exists(new_file_storage_dir):
4519 if os.path.isdir(old_file_storage_dir):
4520 try:
4521 os.rename(old_file_storage_dir, new_file_storage_dir)
4522 except OSError, err:
4523 _Fail("Cannot rename '%s' to '%s': %s",
4524 old_file_storage_dir, new_file_storage_dir, err)
4525 else:
4526 _Fail("Specified storage dir '%s' is not a directory",
4527 old_file_storage_dir)
4528 else:
4529 if os.path.exists(old_file_storage_dir):
4530 _Fail("Cannot rename '%s' to '%s': both locations exist",
4531 old_file_storage_dir, new_file_storage_dir)
4532
4535 """Checks whether the given filename is in the queue directory.
4536
4537 @type file_name: str
4538 @param file_name: the file name we should check
4539 @rtype: None
4540 @raises RPCFail: if the file is not valid
4541
4542 """
4543 if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name):
4544 _Fail("Passed job queue file '%s' does not belong to"
4545 " the queue directory '%s'", file_name, pathutils.QUEUE_DIR)
4546
4549 """Updates a file in the queue directory.
4550
4551 This is just a wrapper over L{utils.io.WriteFile}, with proper
4552 checking.
4553
4554 @type file_name: str
4555 @param file_name: the job file name
4556 @type content: str
4557 @param content: the new job contents
4558 @rtype: boolean
4559 @return: the success of the operation
4560
4561 """
4562 file_name = vcluster.LocalizeVirtualPath(file_name)
4563
4564 _EnsureJobQueueFile(file_name)
4565 getents = runtime.GetEnts()
4566
4567
4568 utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid,
4569 gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS)
4570
4573 """Renames a job queue file.
4574
4575 This is just a wrapper over os.rename with proper checking.
4576
4577 @type old: str
4578 @param old: the old (actual) file name
4579 @type new: str
4580 @param new: the desired file name
4581 @rtype: tuple
4582 @return: the success of the operation and payload
4583
4584 """
4585 old = vcluster.LocalizeVirtualPath(old)
4586 new = vcluster.LocalizeVirtualPath(new)
4587
4588 _EnsureJobQueueFile(old)
4589 _EnsureJobQueueFile(new)
4590
4591 getents = runtime.GetEnts()
4592
4593 utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750,
4594 dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid)
4595
4598 """Closes the given block devices.
4599
4600 This means they will be switched to secondary mode (in case of
4601 DRBD).
4602
4603 @param instance_name: if the argument is not empty, the symlinks
4604 of this instance will be removed
4605 @type disks: list of L{objects.Disk}
4606 @param disks: the list of disks to be closed
4607 @rtype: tuple (success, message)
4608 @return: a tuple of success and message, where success
4609 indicates the succes of the operation, and message
4610 which will contain the error details in case we
4611 failed
4612
4613 """
4614 bdevs = []
4615 for cf in disks:
4616 rd = _RecursiveFindBD(cf)
4617 if rd is None:
4618 _Fail("Can't find device %s", cf)
4619 bdevs.append(rd)
4620
4621 msg = []
4622 for rd in bdevs:
4623 try:
4624 rd.Close()
4625 except errors.BlockDeviceError, err:
4626 msg.append(str(err))
4627 if msg:
4628 _Fail("Can't make devices secondary: %s", ",".join(msg))
4629 else:
4630 if instance_name:
4631 _RemoveBlockDevLinks(instance_name, disks)
4632
4635 """Validates the given hypervisor parameters.
4636
4637 @type hvname: string
4638 @param hvname: the hypervisor name
4639 @type hvparams: dict
4640 @param hvparams: the hypervisor parameters to be validated
4641 @rtype: None
4642
4643 """
4644 try:
4645 hv_type = hypervisor.GetHypervisor(hvname)
4646 hv_type.ValidateParameters(hvparams)
4647 except errors.HypervisorError, err:
4648 _Fail(str(err), log=False)
4649
4652 """Check whether a list of parameters is supported by the OS.
4653
4654 @type os_obj: L{objects.OS}
4655 @param os_obj: OS object to check
4656 @type parameters: list
4657 @param parameters: the list of parameters to check
4658
4659 """
4660 supported = [v[0] for v in os_obj.supported_parameters]
4661 delta = frozenset(parameters).difference(supported)
4662 if delta:
4663 _Fail("The following parameters are not supported"
4664 " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta)))
4665
4668 """Check whether an OS name conforms to the os variants specification.
4669
4670 @type os_obj: L{objects.OS}
4671 @param os_obj: OS object to check
4672
4673 @type name: string
4674 @param name: OS name passed by the user, to check for validity
4675
4676 @rtype: NoneType
4677 @return: None
4678 @raise RPCFail: if OS variant is not valid
4679
4680 """
4681 variant = objects.OS.GetVariant(name)
4682
4683 if not os_obj.supported_variants:
4684 if variant:
4685 _Fail("OS '%s' does not support variants ('%s' passed)" %
4686 (os_obj.name, variant))
4687 else:
4688 return
4689
4690 if not variant:
4691 _Fail("OS name '%s' must include a variant" % name)
4692
4693 if variant not in os_obj.supported_variants:
4694 _Fail("OS '%s' does not support variant '%s'" % (os_obj.name, variant))
4695
4696
4697 -def ValidateOS(required, osname, checks, osparams, force_variant):
4698 """Validate the given OS parameters.
4699
4700 @type required: boolean
4701 @param required: whether absence of the OS should translate into
4702 failure or not
4703 @type osname: string
4704 @param osname: the OS to be validated
4705 @type checks: list
4706 @param checks: list of the checks to run (currently only 'parameters')
4707 @type osparams: dict
4708 @param osparams: dictionary with OS parameters, some of which may be
4709 private.
4710 @rtype: boolean
4711 @return: True if the validation passed, or False if the OS was not
4712 found and L{required} was false
4713
4714 """
4715 if not constants.OS_VALIDATE_CALLS.issuperset(checks):
4716 _Fail("Unknown checks required for OS %s: %s", osname,
4717 set(checks).difference(constants.OS_VALIDATE_CALLS))
4718
4719 name_only = objects.OS.GetName(osname)
4720 status, tbv = _TryOSFromDisk(name_only, None)
4721
4722 if not status:
4723 if required:
4724 _Fail(tbv)
4725 else:
4726 return False
4727
4728 if not force_variant:
4729 _CheckOSVariant(tbv, osname)
4730
4731 if max(tbv.api_versions) < constants.OS_API_V20:
4732 return True
4733
4734 if constants.OS_VALIDATE_PARAMETERS in checks:
4735 _CheckOSPList(tbv, osparams.keys())
4736
4737 validate_env = OSCoreEnv(osname, tbv, osparams)
4738 result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env,
4739 cwd=tbv.path, reset_env=True)
4740 if result.failed:
4741 logging.error("os validate command '%s' returned error: %s output: %s",
4742 result.cmd, result.fail_reason, result.output)
4743 _Fail("OS validation script failed (%s), output: %s",
4744 result.fail_reason, result.output, log=False)
4745
4746 return True
4747
4748
4749 -def ExportOS(instance, override_env):
4750 """Creates a GZIPed tarball with an OS definition and environment.
4751
4752 The archive contains a file with the environment variables needed by
4753 the OS scripts.
4754
4755 @type instance: L{objects.Instance}
4756 @param instance: instance for which the OS definition is exported
4757
4758 @type override_env: dict of string to string
4759 @param override_env: if supplied, it overrides the environment on a
4760 key-by-key basis that is part of the archive
4761
4762 @rtype: string
4763 @return: filepath of the archive
4764
4765 """
4766 assert instance
4767 assert instance.os
4768
4769 temp_dir = tempfile.mkdtemp()
4770 inst_os = OSFromDisk(instance.os)
4771
4772 result = utils.RunCmd(["ln", "-s", inst_os.path,
4773 utils.PathJoin(temp_dir, "os")])
4774 if result.failed:
4775 _Fail("Failed to copy OS package '%s' to '%s': %s, output '%s'",
4776 inst_os, temp_dir, result.fail_reason, result.output)
4777
4778 env = OSEnvironment(instance, inst_os)
4779 env.update(override_env)
4780
4781 with open(utils.PathJoin(temp_dir, "environment"), "w") as f:
4782 for var in env:
4783 f.write(var + "=" + env[var] + "\n")
4784
4785 (fd, os_package) = tempfile.mkstemp(suffix=".tgz")
4786 os.close(fd)
4787
4788 result = utils.RunCmd(["tar", "--dereference", "-czv",
4789 "-f", os_package,
4790 "-C", temp_dir,
4791 "."])
4792 if result.failed:
4793 _Fail("Failed to create OS archive '%s': %s, output '%s'",
4794 os_package, result.fail_reason, result.output)
4795
4796 result = utils.RunCmd(["rm", "-rf", temp_dir])
4797 if result.failed:
4798 _Fail("Failed to remove copy of OS package '%s' in '%s': %s, output '%s'",
4799 inst_os, temp_dir, result.fail_reason, result.output)
4800
4801 return os_package
4802
4825
4834
4837 """Creates a new X509 certificate for SSL/TLS.
4838
4839 @type validity: int
4840 @param validity: Validity in seconds
4841 @rtype: tuple; (string, string)
4842 @return: Certificate name and public part
4843
4844 """
4845 serial_no = int(time.time())
4846 (key_pem, cert_pem) = \
4847 utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(),
4848 min(validity, _MAX_SSL_CERT_VALIDITY),
4849 serial_no)
4850
4851 cert_dir = tempfile.mkdtemp(dir=cryptodir,
4852 prefix="x509-%s-" % utils.TimestampForFilename())
4853 try:
4854 name = os.path.basename(cert_dir)
4855 assert len(name) > 5
4856
4857 (_, key_file, cert_file) = _GetX509Filenames(cryptodir, name)
4858
4859 utils.WriteFile(key_file, mode=0400, data=key_pem)
4860 utils.WriteFile(cert_file, mode=0400, data=cert_pem)
4861
4862
4863 return (name, cert_pem)
4864 except Exception:
4865 shutil.rmtree(cert_dir, ignore_errors=True)
4866 raise
4867
4870 """Removes a X509 certificate.
4871
4872 @type name: string
4873 @param name: Certificate name
4874
4875 """
4876 (cert_dir, key_file, cert_file) = _GetX509Filenames(cryptodir, name)
4877
4878 utils.RemoveFile(key_file)
4879 utils.RemoveFile(cert_file)
4880
4881 try:
4882 os.rmdir(cert_dir)
4883 except EnvironmentError, err:
4884 _Fail("Cannot remove certificate directory '%s': %s",
4885 cert_dir, err)
4886
4889 """Returns the command for the requested input/output.
4890
4891 @type instance: L{objects.Instance}
4892 @param instance: The instance object
4893 @param mode: Import/export mode
4894 @param ieio: Input/output type
4895 @param ieargs: Input/output arguments
4896
4897 """
4898 assert mode in (constants.IEM_IMPORT, constants.IEM_EXPORT)
4899
4900 env = None
4901 prefix = None
4902 suffix = None
4903 exp_size = None
4904
4905 if ieio == constants.IEIO_FILE:
4906 (filename, ) = ieargs
4907
4908 if not utils.IsNormAbsPath(filename):
4909 _Fail("Path '%s' is not normalized or absolute", filename)
4910
4911 real_filename = os.path.realpath(filename)
4912 directory = os.path.dirname(real_filename)
4913
4914 if not utils.IsBelowDir(pathutils.EXPORT_DIR, real_filename):
4915 _Fail("File '%s' is not under exports directory '%s': %s",
4916 filename, pathutils.EXPORT_DIR, real_filename)
4917
4918
4919 utils.Makedirs(directory, mode=0750)
4920
4921 quoted_filename = utils.ShellQuote(filename)
4922
4923 if mode == constants.IEM_IMPORT:
4924 suffix = "> %s" % quoted_filename
4925 elif mode == constants.IEM_EXPORT:
4926 suffix = "< %s" % quoted_filename
4927
4928
4929 try:
4930 st = os.stat(filename)
4931 except EnvironmentError, err:
4932 logging.error("Can't stat(2) %s: %s", filename, err)
4933 else:
4934 exp_size = utils.BytesToMebibyte(st.st_size)
4935
4936 elif ieio == constants.IEIO_RAW_DISK:
4937 (disk, ) = ieargs
4938
4939 real_disk = _OpenRealBD(disk)
4940
4941 if mode == constants.IEM_IMPORT:
4942
4943
4944 suffix = utils.BuildShellCmd("| dd of=%s conv=nocreat,notrunc bs=%s",
4945 real_disk.dev_path,
4946 str(constants.DD_BLOCK_SIZE))
4947
4948 elif mode == constants.IEM_EXPORT:
4949
4950 prefix = utils.BuildShellCmd("dd if=%s bs=%s count=%s |",
4951 real_disk.dev_path,
4952 str(constants.DD_BLOCK_SIZE),
4953 str(disk.size))
4954 exp_size = disk.size
4955
4956 elif ieio == constants.IEIO_SCRIPT:
4957 (disk, disk_index, ) = ieargs
4958
4959 assert isinstance(disk_index, (int, long))
4960
4961 inst_os = OSFromDisk(instance.os)
4962 env = OSEnvironment(instance, inst_os)
4963
4964 if mode == constants.IEM_IMPORT:
4965 env["IMPORT_DEVICE"] = env["DISK_%d_PATH" % disk_index]
4966 env["IMPORT_INDEX"] = str(disk_index)
4967 script = inst_os.import_script
4968
4969 elif mode == constants.IEM_EXPORT:
4970 real_disk = _OpenRealBD(disk)
4971 env["EXPORT_DEVICE"] = real_disk.dev_path
4972 env["EXPORT_INDEX"] = str(disk_index)
4973 script = inst_os.export_script
4974
4975
4976 script_cmd = utils.BuildShellCmd("( cd %s && %s; )", inst_os.path, script)
4977
4978 if mode == constants.IEM_IMPORT:
4979 suffix = "| %s" % script_cmd
4980
4981 elif mode == constants.IEM_EXPORT:
4982 prefix = "%s |" % script_cmd
4983
4984
4985 exp_size = constants.IE_CUSTOM_SIZE
4986
4987 else:
4988 _Fail("Invalid %s I/O mode %r", mode, ieio)
4989
4990 return (env, prefix, suffix, exp_size)
4991
4994 """Creates status directory for import/export.
4995
4996 """
4997 return tempfile.mkdtemp(dir=pathutils.IMPORT_EXPORT_DIR,
4998 prefix=("%s-%s-" %
4999 (prefix, utils.TimestampForFilename())))
5000
5004 """Starts an import or export daemon.
5005
5006 @param mode: Import/output mode
5007 @type opts: L{objects.ImportExportOptions}
5008 @param opts: Daemon options
5009 @type host: string
5010 @param host: Remote host for export (None for import)
5011 @type port: int
5012 @param port: Remote port for export (None for import)
5013 @type instance: L{objects.Instance}
5014 @param instance: Instance object
5015 @type component: string
5016 @param component: which part of the instance is transferred now,
5017 e.g. 'disk/0'
5018 @param ieio: Input/output type
5019 @param ieioargs: Input/output arguments
5020
5021 """
5022 if mode == constants.IEM_IMPORT:
5023 prefix = "import"
5024
5025 if not (host is None and port is None):
5026 _Fail("Can not specify host or port on import")
5027
5028 elif mode == constants.IEM_EXPORT:
5029 prefix = "export"
5030
5031 if host is None or port is None:
5032 _Fail("Host and port must be specified for an export")
5033
5034 else:
5035 _Fail("Invalid mode %r", mode)
5036
5037 if (opts.key_name is None) ^ (opts.ca_pem is None):
5038 _Fail("Cluster certificate can only be used for both key and CA")
5039
5040 (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \
5041 _GetImportExportIoCommand(instance, mode, ieio, ieioargs)
5042
5043 if opts.key_name is None:
5044
5045 key_path = pathutils.NODED_CERT_FILE
5046 cert_path = pathutils.NODED_CERT_FILE
5047 assert opts.ca_pem is None
5048 else:
5049 (_, key_path, cert_path) = _GetX509Filenames(pathutils.CRYPTO_KEYS_DIR,
5050 opts.key_name)
5051 assert opts.ca_pem is not None
5052
5053 for i in [key_path, cert_path]:
5054 if not os.path.exists(i):
5055 _Fail("File '%s' does not exist" % i)
5056
5057 status_dir = _CreateImportExportStatusDir("%s-%s" % (prefix, component))
5058 try:
5059 status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE)
5060 pid_file = utils.PathJoin(status_dir, _IES_PID_FILE)
5061 ca_file = utils.PathJoin(status_dir, _IES_CA_FILE)
5062
5063 if opts.ca_pem is None:
5064
5065 ca = utils.ReadFile(pathutils.NODED_CERT_FILE)
5066 else:
5067 ca = opts.ca_pem
5068
5069
5070 utils.WriteFile(ca_file, data=ca, mode=0400)
5071
5072 cmd = [
5073 pathutils.IMPORT_EXPORT_DAEMON,
5074 status_file, mode,
5075 "--key=%s" % key_path,
5076 "--cert=%s" % cert_path,
5077 "--ca=%s" % ca_file,
5078 ]
5079
5080 if host:
5081 cmd.append("--host=%s" % host)
5082
5083 if port:
5084 cmd.append("--port=%s" % port)
5085
5086 if opts.ipv6:
5087 cmd.append("--ipv6")
5088 else:
5089 cmd.append("--ipv4")
5090
5091 if opts.compress:
5092 cmd.append("--compress=%s" % opts.compress)
5093
5094 if opts.magic:
5095 cmd.append("--magic=%s" % opts.magic)
5096
5097 if exp_size is not None:
5098 cmd.append("--expected-size=%s" % exp_size)
5099
5100 if cmd_prefix:
5101 cmd.append("--cmd-prefix=%s" % cmd_prefix)
5102
5103 if cmd_suffix:
5104 cmd.append("--cmd-suffix=%s" % cmd_suffix)
5105
5106 if mode == constants.IEM_EXPORT:
5107
5108 cmd.append("--connect-retries=%s" % constants.RIE_CONNECT_RETRIES)
5109 cmd.append("--connect-timeout=%s" % constants.RIE_CONNECT_ATTEMPT_TIMEOUT)
5110 elif opts.connect_timeout is not None:
5111 assert mode == constants.IEM_IMPORT
5112
5113 cmd.append("--connect-timeout=%s" % opts.connect_timeout)
5114
5115 logfile = _InstanceLogName(prefix, instance.os, instance.name, component)
5116
5117
5118
5119 utils.StartDaemon(cmd, env=cmd_env, pidfile=pid_file,
5120 output=logfile)
5121
5122
5123 return os.path.basename(status_dir)
5124
5125 except Exception:
5126 shutil.rmtree(status_dir, ignore_errors=True)
5127 raise
5128
5131 """Returns import/export daemon status.
5132
5133 @type names: sequence
5134 @param names: List of names
5135 @rtype: List of dicts
5136 @return: Returns a list of the state of each named import/export or None if a
5137 status couldn't be read
5138
5139 """
5140 result = []
5141
5142 for name in names:
5143 status_file = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name,
5144 _IES_STATUS_FILE)
5145
5146 try:
5147 data = utils.ReadFile(status_file)
5148 except EnvironmentError, err:
5149 if err.errno != errno.ENOENT:
5150 raise
5151 data = None
5152
5153 if not data:
5154 result.append(None)
5155 continue
5156
5157 result.append(serializer.LoadJson(data))
5158
5159 return result
5160
5175
5178 """Cleanup after an import or export.
5179
5180 If the import/export daemon is still running it's killed. Afterwards the
5181 whole status directory is removed.
5182
5183 """
5184 logging.info("Finalizing import/export %s", name)
5185
5186 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name)
5187
5188 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE))
5189
5190 if pid:
5191 logging.info("Import/export %s is still running with PID %s",
5192 name, pid)
5193 utils.KillProcess(pid, waitpid=False)
5194
5195 shutil.rmtree(status_dir, ignore_errors=True)
5196
5199 """Finds attached L{BlockDev}s for the given disks.
5200
5201 @type disks: list of L{objects.Disk}
5202 @param disks: the disk objects we need to find
5203
5204 @return: list of L{BlockDev} objects or C{None} if a given disk
5205 was not found or was no attached.
5206
5207 """
5208 bdevs = []
5209
5210 for disk in disks:
5211 rd = _RecursiveFindBD(disk)
5212 if rd is None:
5213 _Fail("Can't find device %s", disk)
5214 bdevs.append(rd)
5215 return bdevs
5216
5219 """Disconnects the network on a list of drbd devices.
5220
5221 """
5222 bdevs = _FindDisks(disks)
5223
5224
5225 for rd in bdevs:
5226 try:
5227 rd.DisconnectNet()
5228 except errors.BlockDeviceError, err:
5229 _Fail("Can't change network configuration to standalone mode: %s",
5230 err, exc=True)
5231
5234 """Attaches the network on a list of drbd devices.
5235
5236 """
5237 bdevs = _FindDisks(disks)
5238
5239 if multimaster:
5240 for idx, rd in enumerate(bdevs):
5241 try:
5242 _SymlinkBlockDev(instance_name, rd.dev_path, idx)
5243 except EnvironmentError, err:
5244 _Fail("Can't create symlink: %s", err)
5245
5246
5247 for rd in bdevs:
5248 try:
5249 rd.AttachNet(multimaster)
5250 except errors.BlockDeviceError, err:
5251 _Fail("Can't change network configuration: %s", err)
5252
5253
5254
5255
5256
5257
5258
5259 def _Attach():
5260 all_connected = True
5261
5262 for rd in bdevs:
5263 stats = rd.GetProcStatus()
5264
5265 if multimaster:
5266
5267
5268
5269
5270
5271
5272 all_connected = (all_connected and
5273 stats.is_connected and
5274 stats.is_disk_uptodate and
5275 stats.peer_disk_uptodate)
5276 else:
5277 all_connected = (all_connected and
5278 (stats.is_connected or stats.is_in_resync))
5279
5280 if stats.is_standalone:
5281
5282
5283
5284 try:
5285 rd.AttachNet(multimaster)
5286 except errors.BlockDeviceError, err:
5287 _Fail("Can't change network configuration: %s", err)
5288
5289 if not all_connected:
5290 raise utils.RetryAgain()
5291
5292 try:
5293
5294 utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60)
5295 except utils.RetryTimeout:
5296 _Fail("Timeout in disk reconnecting")
5297
5298 if multimaster:
5299
5300 for rd in bdevs:
5301 try:
5302 rd.Open()
5303 except errors.BlockDeviceError, err:
5304 _Fail("Can't change to primary mode: %s", err)
5305
5308 """Wait until DRBDs have synchronized.
5309
5310 """
5311 def _helper(rd):
5312 stats = rd.GetProcStatus()
5313 if not (stats.is_connected or stats.is_in_resync):
5314 raise utils.RetryAgain()
5315 return stats
5316
5317 bdevs = _FindDisks(disks)
5318
5319 min_resync = 100
5320 alldone = True
5321 for rd in bdevs:
5322 try:
5323
5324 stats = utils.Retry(_helper, 1, 15, args=[rd])
5325 except utils.RetryTimeout:
5326 stats = rd.GetProcStatus()
5327
5328 if not (stats.is_connected or stats.is_in_resync):
5329 _Fail("DRBD device %s is not in sync: stats=%s", rd, stats)
5330 alldone = alldone and (not stats.is_in_resync)
5331 if stats.sync_percent is not None:
5332 min_resync = min(min_resync, stats.sync_percent)
5333
5334 return (alldone, min_resync)
5335
5338 """Checks which of the passed disks needs activation and returns their UUIDs.
5339
5340 """
5341 faulty_disks = []
5342
5343 for disk in disks:
5344 rd = _RecursiveFindBD(disk)
5345 if rd is None:
5346 faulty_disks.append(disk)
5347 continue
5348
5349 stats = rd.GetProcStatus()
5350 if stats.is_standalone or stats.is_diskless:
5351 faulty_disks.append(disk)
5352
5353 return [disk.uuid for disk in faulty_disks]
5354
5364
5367 """Hard-powercycle the node.
5368
5369 Because we need to return first, and schedule the powercycle in the
5370 background, we won't be able to report failures nicely.
5371
5372 """
5373 hyper = hypervisor.GetHypervisor(hypervisor_type)
5374 try:
5375 pid = os.fork()
5376 except OSError:
5377
5378 pid = 0
5379 if pid > 0:
5380 return "Reboot scheduled in 5 seconds"
5381
5382 try:
5383 utils.Mlockall()
5384 except Exception:
5385 pass
5386 time.sleep(5)
5387 hyper.PowercycleNode(hvparams=hvparams)
5388
5391 """Verifies a restricted command name.
5392
5393 @type cmd: string
5394 @param cmd: Command name
5395 @rtype: tuple; (boolean, string or None)
5396 @return: The tuple's first element is the status; if C{False}, the second
5397 element is an error message string, otherwise it's C{None}
5398
5399 """
5400 if not cmd.strip():
5401 return (False, "Missing command name")
5402
5403 if os.path.basename(cmd) != cmd:
5404 return (False, "Invalid command name")
5405
5406 if not constants.EXT_PLUGIN_MASK.match(cmd):
5407 return (False, "Command name contains forbidden characters")
5408
5409 return (True, None)
5410
5413 """Common checks for restricted command file system directories and files.
5414
5415 @type path: string
5416 @param path: Path to check
5417 @param owner: C{None} or tuple containing UID and GID
5418 @rtype: tuple; (boolean, string or C{os.stat} result)
5419 @return: The tuple's first element is the status; if C{False}, the second
5420 element is an error message string, otherwise it's the result of C{os.stat}
5421
5422 """
5423 if owner is None:
5424
5425 owner = (0, 0)
5426
5427 try:
5428 st = os.stat(path)
5429 except EnvironmentError, err:
5430 return (False, "Can't stat(2) '%s': %s" % (path, err))
5431
5432 if stat.S_IMODE(st.st_mode) & (~_RCMD_MAX_MODE):
5433 return (False, "Permissions on '%s' are too permissive" % path)
5434
5435 if (st.st_uid, st.st_gid) != owner:
5436 (owner_uid, owner_gid) = owner
5437 return (False, "'%s' is not owned by %s:%s" % (path, owner_uid, owner_gid))
5438
5439 return (True, st)
5440
5443 """Verifies restricted command directory.
5444
5445 @type path: string
5446 @param path: Path to check
5447 @rtype: tuple; (boolean, string or None)
5448 @return: The tuple's first element is the status; if C{False}, the second
5449 element is an error message string, otherwise it's C{None}
5450
5451 """
5452 (status, value) = _CommonRestrictedCmdCheck(path, _owner)
5453
5454 if not status:
5455 return (False, value)
5456
5457 if not stat.S_ISDIR(value.st_mode):
5458 return (False, "Path '%s' is not a directory" % path)
5459
5460 return (True, None)
5461
5464 """Verifies a whole restricted command and returns its executable filename.
5465
5466 @type path: string
5467 @param path: Directory containing restricted commands
5468 @type cmd: string
5469 @param cmd: Command name
5470 @rtype: tuple; (boolean, string)
5471 @return: The tuple's first element is the status; if C{False}, the second
5472 element is an error message string, otherwise the second element is the
5473 absolute path to the executable
5474
5475 """
5476 executable = utils.PathJoin(path, cmd)
5477
5478 (status, msg) = _CommonRestrictedCmdCheck(executable, _owner)
5479
5480 if not status:
5481 return (False, msg)
5482
5483 if not utils.IsExecutable(executable):
5484 return (False, "access(2) thinks '%s' can't be executed" % executable)
5485
5486 return (True, executable)
5487
5493 """Performs a number of tests on a restricted command.
5494
5495 @type path: string
5496 @param path: Directory containing restricted commands
5497 @type cmd: string
5498 @param cmd: Command name
5499 @return: Same as L{_VerifyRestrictedCmd}
5500
5501 """
5502
5503 (status, msg) = _verify_dir(path)
5504 if status:
5505
5506 (status, msg) = _verify_name(cmd)
5507
5508 if not status:
5509 return (False, msg)
5510
5511
5512 return _verify_cmd(path, cmd)
5513
5523 """Executes a restricted command after performing strict tests.
5524
5525 @type cmd: string
5526 @param cmd: Command name
5527 @rtype: string
5528 @return: Command output
5529 @raise RPCFail: In case of an error
5530
5531 """
5532 logging.info("Preparing to run restricted command '%s'", cmd)
5533
5534 if not _enabled:
5535 _Fail("Restricted commands disabled at configure time")
5536
5537 lock = None
5538 try:
5539 cmdresult = None
5540 try:
5541 lock = utils.FileLock.Open(_lock_file)
5542 lock.Exclusive(blocking=True, timeout=_lock_timeout)
5543
5544 (status, value) = _prepare_fn(_path, cmd)
5545
5546 if status:
5547 cmdresult = _runcmd_fn([value], env={}, reset_env=True,
5548 postfork_fn=lambda _: lock.Unlock())
5549 else:
5550 logging.error(value)
5551 except Exception:
5552
5553 logging.exception("Caught exception")
5554
5555 if cmdresult is None:
5556 logging.info("Sleeping for %0.1f seconds before returning",
5557 _RCMD_INVALID_DELAY)
5558 _sleep_fn(_RCMD_INVALID_DELAY)
5559
5560
5561 _Fail("Executing command '%s' failed" % cmd)
5562 elif cmdresult.failed or cmdresult.fail_reason:
5563 _Fail("Restricted command '%s' failed: %s; output: %s",
5564 cmd, cmdresult.fail_reason, cmdresult.output)
5565 else:
5566 return cmdresult.output
5567 finally:
5568 if lock is not None:
5569
5570 lock.Close()
5571 lock = None
5572
5575 """Creates or removes the watcher pause file.
5576
5577 @type until: None or number
5578 @param until: Unix timestamp saying until when the watcher shouldn't run
5579
5580 """
5581 if until is None:
5582 logging.info("Received request to no longer pause watcher")
5583 utils.RemoveFile(_filename)
5584 else:
5585 logging.info("Received request to pause watcher until %s", until)
5586
5587 if not ht.TNumber(until):
5588 _Fail("Duration must be numeric")
5589
5590 utils.WriteFile(_filename, data="%d\n" % (until, ), mode=0644)
5591
5618
5621 """ Checks if a file exists and returns information related to it.
5622
5623 Currently returned information:
5624 - file size: int, size in bytes
5625
5626 @type file_path: string
5627 @param file_path: Name of file to examine.
5628
5629 @rtype: tuple of bool, dict
5630 @return: Whether the file exists, and a dictionary of information about the
5631 file gathered by os.stat.
5632
5633 """
5634 try:
5635 stat_info = os.stat(file_path)
5636 values_dict = {
5637 constants.STAT_SIZE: stat_info.st_size,
5638 }
5639 return True, values_dict
5640 except IOError:
5641 return False, {}
5642
5645 """Hook runner.
5646
5647 This class is instantiated on the node side (ganeti-noded) and not
5648 on the master side.
5649
5650 """
5651 - def __init__(self, hooks_base_dir=None):
5652 """Constructor for hooks runner.
5653
5654 @type hooks_base_dir: str or None
5655 @param hooks_base_dir: if not None, this overrides the
5656 L{pathutils.HOOKS_BASE_DIR} (useful for unittests)
5657
5658 """
5659 if hooks_base_dir is None:
5660 hooks_base_dir = pathutils.HOOKS_BASE_DIR
5661
5662
5663 self._BASE_DIR = hooks_base_dir
5664
5666 """Check that the hooks will be run only locally and then run them.
5667
5668 """
5669 assert len(node_list) == 1
5670 node = node_list[0]
5671 _, myself = ssconf.GetMasterAndMyself()
5672 assert node == myself
5673
5674 results = self.RunHooks(hpath, phase, env)
5675
5676
5677 return {node: (None, False, results)}
5678
5679 - def RunHooks(self, hpath, phase, env):
5680 """Run the scripts in the hooks directory.
5681
5682 @type hpath: str
5683 @param hpath: the path to the hooks directory which
5684 holds the scripts
5685 @type phase: str
5686 @param phase: either L{constants.HOOKS_PHASE_PRE} or
5687 L{constants.HOOKS_PHASE_POST}
5688 @type env: dict
5689 @param env: dictionary with the environment for the hook
5690 @rtype: list
5691 @return: list of 3-element tuples:
5692 - script path
5693 - script result, either L{constants.HKR_SUCCESS} or
5694 L{constants.HKR_FAIL}
5695 - output of the script
5696
5697 @raise errors.ProgrammerError: for invalid input
5698 parameters
5699
5700 """
5701 if phase == constants.HOOKS_PHASE_PRE:
5702 suffix = "pre"
5703 elif phase == constants.HOOKS_PHASE_POST:
5704 suffix = "post"
5705 else:
5706 _Fail("Unknown hooks phase '%s'", phase)
5707
5708 subdir = "%s-%s.d" % (hpath, suffix)
5709 dir_name = utils.PathJoin(self._BASE_DIR, subdir)
5710
5711 results = []
5712
5713 if not os.path.isdir(dir_name):
5714
5715
5716 return results
5717
5718 runparts_results = utils.RunParts(dir_name, env=env, reset_env=True)
5719
5720 for (relname, relstatus, runresult) in runparts_results:
5721 if relstatus == constants.RUNPARTS_SKIP:
5722 rrval = constants.HKR_SKIP
5723 output = ""
5724 elif relstatus == constants.RUNPARTS_ERR:
5725 rrval = constants.HKR_FAIL
5726 output = "Hook script execution error: %s" % runresult
5727 elif relstatus == constants.RUNPARTS_RUN:
5728 if runresult.failed:
5729 rrval = constants.HKR_FAIL
5730 else:
5731 rrval = constants.HKR_SUCCESS
5732 output = utils.SafeEncode(runresult.output.strip())
5733 results.append(("%s/%s" % (subdir, relname), rrval, output))
5734
5735 return results
5736
5739 """IAllocator runner.
5740
5741 This class is instantiated on the node side (ganeti-noded) and not on
5742 the master side.
5743
5744 """
5745 @staticmethod
5746 - def Run(name, idata, ial_params):
5747 """Run an iallocator script.
5748
5749 @type name: str
5750 @param name: the iallocator script name
5751 @type idata: str
5752 @param idata: the allocator input data
5753 @type ial_params: list
5754 @param ial_params: the iallocator parameters
5755
5756 @rtype: tuple
5757 @return: two element tuple of:
5758 - status
5759 - either error message or stdout of allocator (for success)
5760
5761 """
5762 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
5763 os.path.isfile)
5764 if alloc_script is None:
5765 _Fail("iallocator module '%s' not found in the search path", name)
5766
5767 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
5768 try:
5769 os.write(fd, idata)
5770 os.close(fd)
5771 result = utils.RunCmd([alloc_script, fin_name] + ial_params)
5772 if result.failed:
5773 _Fail("iallocator module '%s' failed: %s, output '%s'",
5774 name, result.fail_reason, result.output)
5775 finally:
5776 os.unlink(fin_name)
5777
5778 return result.stdout
5779
5782 """Simple class for managing a cache of block device information.
5783
5784 """
5785 _DEV_PREFIX = "/dev/"
5786 _ROOT_DIR = pathutils.BDEV_CACHE_DIR
5787
5788 @classmethod
5790 """Converts a /dev/name path to the cache file name.
5791
5792 This replaces slashes with underscores and strips the /dev
5793 prefix. It then returns the full path to the cache file.
5794
5795 @type dev_path: str
5796 @param dev_path: the C{/dev/} path name
5797 @rtype: str
5798 @return: the converted path name
5799
5800 """
5801 if dev_path.startswith(cls._DEV_PREFIX):
5802 dev_path = dev_path[len(cls._DEV_PREFIX):]
5803 dev_path = dev_path.replace("/", "_")
5804 fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path)
5805 return fpath
5806
5807 @classmethod
5808 - def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
5809 """Updates the cache information for a given device.
5810
5811 @type dev_path: str
5812 @param dev_path: the pathname of the device
5813 @type owner: str
5814 @param owner: the owner (instance name) of the device
5815 @type on_primary: bool
5816 @param on_primary: whether this is the primary
5817 node nor not
5818 @type iv_name: str
5819 @param iv_name: the instance-visible name of the
5820 device, as in objects.Disk.iv_name
5821
5822 @rtype: None
5823
5824 """
5825 if dev_path is None:
5826 logging.error("DevCacheManager.UpdateCache got a None dev_path")
5827 return
5828 fpath = cls._ConvertPath(dev_path)
5829 if on_primary:
5830 state = "primary"
5831 else:
5832 state = "secondary"
5833 if iv_name is None:
5834 iv_name = "not_visible"
5835 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
5836 try:
5837 utils.WriteFile(fpath, data=fdata)
5838 except EnvironmentError, err:
5839 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)
5840
5841 @classmethod
5843 """Remove data for a dev_path.
5844
5845 This is just a wrapper over L{utils.io.RemoveFile} with a converted
5846 path name and logging.
5847
5848 @type dev_path: str
5849 @param dev_path: the pathname of the device
5850
5851 @rtype: None
5852
5853 """
5854 if dev_path is None:
5855 logging.error("DevCacheManager.RemoveCache got a None dev_path")
5856 return
5857 fpath = cls._ConvertPath(dev_path)
5858 try:
5859 utils.RemoveFile(fpath)
5860 except EnvironmentError, err:
5861 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)
5862