1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """Functions used by the node daemon
32
33 @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in
34 the L{UploadFile} function
35 @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted
36 in the L{_CleanDirectory} function
37
38 """
39
40
41
42
43
44
45
46
47
48
49 import base64
50 import errno
51 import logging
52 import os
53 import os.path
54 import pycurl
55 import random
56 import re
57 import shutil
58 import signal
59 import socket
60 import stat
61 import tempfile
62 import time
63 import zlib
64 import copy
65
66 from ganeti import errors
67 from ganeti import http
68 from ganeti import utils
69 from ganeti import ssh
70 from ganeti import hypervisor
71 from ganeti.hypervisor import hv_base
72 from ganeti import constants
73 from ganeti.storage import bdev
74 from ganeti.storage import drbd
75 from ganeti.storage import extstorage
76 from ganeti.storage import filestorage
77 from ganeti import objects
78 from ganeti import ssconf
79 from ganeti import serializer
80 from ganeti import netutils
81 from ganeti import runtime
82 from ganeti import compat
83 from ganeti import pathutils
84 from ganeti import vcluster
85 from ganeti import ht
86 from ganeti.storage.base import BlockDev
87 from ganeti.storage.drbd import DRBD8
88 from ganeti import hooksmaster
89 from ganeti.rpc import transport
90 from ganeti.rpc.errors import NoMasterError, TimeoutError
91
92
93 _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id"
94 _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([
95 pathutils.DATA_DIR,
96 pathutils.JOB_QUEUE_ARCHIVE_DIR,
97 pathutils.QUEUE_DIR,
98 pathutils.CRYPTO_KEYS_DIR,
99 ])
100 _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60
101 _X509_KEY_FILE = "key"
102 _X509_CERT_FILE = "cert"
103 _IES_STATUS_FILE = "status"
104 _IES_PID_FILE = "pid"
105 _IES_CA_FILE = "ca"
106
107
108 _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$")
109
110
111 _MASTER_START = "start"
112 _MASTER_STOP = "stop"
113
114
115 _RCMD_MAX_MODE = (stat.S_IRWXU |
116 stat.S_IRGRP | stat.S_IXGRP |
117 stat.S_IROTH | stat.S_IXOTH)
118
119
120 _RCMD_INVALID_DELAY = 10
121
122
123
124
125 _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8
129 """Class denoting RPC failure.
130
131 Its argument is the error message.
132
133 """
134
137 """Path of the file containing the reason of the instance status change.
138
139 @type instance_name: string
140 @param instance_name: The name of the instance
141 @rtype: string
142 @return: The path of the file
143
144 """
145 return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)
146
149 """Serialize a reason trail related to an instance change of state to file.
150
151 The exact location of the file depends on the name of the instance and on
152 the configuration of the Ganeti cluster defined at deploy time.
153
154 @type instance_name: string
155 @param instance_name: The name of the instance
156
157 @type trail: list of reasons
158 @param trail: reason trail
159
160 @rtype: None
161
162 """
163 json = serializer.DumpJson(trail)
164 filename = _GetInstReasonFilename(instance_name)
165 utils.WriteFile(filename, data=json)
166
167
168 -def _Fail(msg, *args, **kwargs):
169 """Log an error and the raise an RPCFail exception.
170
171 This exception is then handled specially in the ganeti daemon and
172 turned into a 'failed' return type. As such, this function is a
173 useful shortcut for logging the error and returning it to the master
174 daemon.
175
176 @type msg: string
177 @param msg: the text of the exception
178 @raise RPCFail
179
180 """
181 if args:
182 msg = msg % args
183 if "log" not in kwargs or kwargs["log"]:
184 if "exc" in kwargs and kwargs["exc"]:
185 logging.exception(msg)
186 else:
187 logging.error(msg)
188 raise RPCFail(msg)
189
192 """Simple wrapper to return a SimpleStore.
193
194 @rtype: L{ssconf.SimpleStore}
195 @return: a SimpleStore instance
196
197 """
198 return ssconf.SimpleStore()
199
202 """Simple wrapper to return an SshRunner.
203
204 @type cluster_name: str
205 @param cluster_name: the cluster name, which is needed
206 by the SshRunner constructor
207 @rtype: L{ssh.SshRunner}
208 @return: an SshRunner instance
209
210 """
211 return ssh.SshRunner(cluster_name)
212
215 """Unpacks data compressed by the RPC client.
216
217 @type data: list or tuple
218 @param data: Data sent by RPC client
219 @rtype: str
220 @return: Decompressed data
221
222 """
223 assert isinstance(data, (list, tuple))
224 assert len(data) == 2
225 (encoding, content) = data
226 if encoding == constants.RPC_ENCODING_NONE:
227 return content
228 elif encoding == constants.RPC_ENCODING_ZLIB_BASE64:
229 return zlib.decompress(base64.b64decode(content))
230 else:
231 raise AssertionError("Unknown data encoding")
232
235 """Removes all regular files in a directory.
236
237 @type path: str
238 @param path: the directory to clean
239 @type exclude: list
240 @param exclude: list of files to be excluded, defaults
241 to the empty list
242
243 """
244 if path not in _ALLOWED_CLEAN_DIRS:
245 _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'",
246 path)
247
248 if not os.path.isdir(path):
249 return
250 if exclude is None:
251 exclude = []
252 else:
253
254 exclude = [os.path.normpath(i) for i in exclude]
255
256 for rel_name in utils.ListVisibleFiles(path):
257 full_name = utils.PathJoin(path, rel_name)
258 if full_name in exclude:
259 continue
260 if os.path.isfile(full_name) and not os.path.islink(full_name):
261 utils.RemoveFile(full_name)
262
265 """Build the list of allowed upload files.
266
267 This is abstracted so that it's built only once at module import time.
268
269 """
270 allowed_files = set([
271 pathutils.CLUSTER_CONF_FILE,
272 pathutils.ETC_HOSTS,
273 pathutils.SSH_KNOWN_HOSTS_FILE,
274 pathutils.VNC_PASSWORD_FILE,
275 pathutils.RAPI_CERT_FILE,
276 pathutils.SPICE_CERT_FILE,
277 pathutils.SPICE_CACERT_FILE,
278 pathutils.RAPI_USERS_FILE,
279 pathutils.CONFD_HMAC_KEY,
280 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
281 ])
282
283 for hv_name in constants.HYPER_TYPES:
284 hv_class = hypervisor.GetHypervisorClass(hv_name)
285 allowed_files.update(hv_class.GetAncillaryFiles()[0])
286
287 assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \
288 "Allowed file storage paths should never be uploaded via RPC"
289
290 return frozenset(allowed_files)
291
292
293 _ALLOWED_UPLOAD_FILES = _BuildUploadFileList()
305
308 """Returns the master node name.
309
310 @rtype: string
311 @return: name of the master node
312 @raise RPCFail: in case of errors
313
314 """
315 try:
316 return _GetConfig().GetMasterNode()
317 except errors.ConfigurationError, err:
318 _Fail("Cluster configuration incomplete: %s", err, exc=True)
319
322 """Decorator that runs hooks before and after the decorated function.
323
324 @type hook_opcode: string
325 @param hook_opcode: opcode of the hook
326 @type hooks_path: string
327 @param hooks_path: path of the hooks
328 @type env_builder_fn: function
329 @param env_builder_fn: function that returns a dictionary containing the
330 environment variables for the hooks. Will get all the parameters of the
331 decorated function.
332 @raise RPCFail: in case of pre-hook failure
333
334 """
335 def decorator(fn):
336 def wrapper(*args, **kwargs):
337 _, myself = ssconf.GetMasterAndMyself()
338 nodes = ([myself], [myself])
339
340 env_fn = compat.partial(env_builder_fn, *args, **kwargs)
341
342 cfg = _GetConfig()
343 hr = HooksRunner()
344 hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes,
345 hr.RunLocalHooks, None, env_fn, None,
346 logging.warning, cfg.GetClusterName(),
347 cfg.GetMasterNode())
348 hm.RunPhase(constants.HOOKS_PHASE_PRE)
349 result = fn(*args, **kwargs)
350 hm.RunPhase(constants.HOOKS_PHASE_POST)
351
352 return result
353 return wrapper
354 return decorator
355
358 """Builds environment variables for master IP hooks.
359
360 @type master_params: L{objects.MasterNetworkParameters}
361 @param master_params: network parameters of the master
362 @type use_external_mip_script: boolean
363 @param use_external_mip_script: whether to use an external master IP
364 address setup script (unused, but necessary per the implementation of the
365 _RunLocalHooks decorator)
366
367 """
368
369 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family)
370 env = {
371 "MASTER_NETDEV": master_params.netdev,
372 "MASTER_IP": master_params.ip,
373 "MASTER_NETMASK": str(master_params.netmask),
374 "CLUSTER_IP_VERSION": str(ver),
375 }
376
377 return env
378
381 """Execute the master IP address setup script.
382
383 @type master_params: L{objects.MasterNetworkParameters}
384 @param master_params: network parameters of the master
385 @type action: string
386 @param action: action to pass to the script. Must be one of
387 L{backend._MASTER_START} or L{backend._MASTER_STOP}
388 @type use_external_mip_script: boolean
389 @param use_external_mip_script: whether to use an external master IP
390 address setup script
391 @raise backend.RPCFail: if there are errors during the execution of the
392 script
393
394 """
395 env = _BuildMasterIpEnv(master_params)
396
397 if use_external_mip_script:
398 setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT
399 else:
400 setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT
401
402 result = utils.RunCmd([setup_script, action], env=env, reset_env=True)
403
404 if result.failed:
405 _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" %
406 (action, result.exit_code, result.output), log=True)
407
408
409 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup",
410 _BuildMasterIpEnv)
412 """Activate the IP address of the master daemon.
413
414 @type master_params: L{objects.MasterNetworkParameters}
415 @param master_params: network parameters of the master
416 @type use_external_mip_script: boolean
417 @param use_external_mip_script: whether to use an external master IP
418 address setup script
419 @raise RPCFail: in case of errors during the IP startup
420
421 """
422 _RunMasterSetupScript(master_params, _MASTER_START,
423 use_external_mip_script)
424
427 """Activate local node as master node.
428
429 The function will start the master daemons (ganeti-masterd and ganeti-rapi).
430
431 @type no_voting: boolean
432 @param no_voting: whether to start ganeti-masterd without a node vote
433 but still non-interactively
434 @rtype: None
435
436 """
437
438 if no_voting:
439 daemon_args = "--no-voting --yes-do-it"
440 else:
441 daemon_args = ""
442
443 env = {
444 "EXTRA_LUXID_ARGS": daemon_args,
445 "EXTRA_WCONFD_ARGS": daemon_args,
446 }
447
448 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env)
449 if result.failed:
450 msg = "Can't start Ganeti master: %s" % result.output
451 logging.error(msg)
452 _Fail(msg)
453
454
455 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown",
456 _BuildMasterIpEnv)
458 """Deactivate the master IP on this node.
459
460 @type master_params: L{objects.MasterNetworkParameters}
461 @param master_params: network parameters of the master
462 @type use_external_mip_script: boolean
463 @param use_external_mip_script: whether to use an external master IP
464 address setup script
465 @raise RPCFail: in case of errors during the IP turndown
466
467 """
468 _RunMasterSetupScript(master_params, _MASTER_STOP,
469 use_external_mip_script)
470
473 """Stop the master daemons on this node.
474
475 Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node.
476
477 @rtype: None
478
479 """
480
481
482
483 result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"])
484 if result.failed:
485 logging.error("Could not stop Ganeti master, command %s had exitcode %s"
486 " and error %s",
487 result.cmd, result.exit_code, result.output)
488
491 """Change the netmask of the master IP.
492
493 @param old_netmask: the old value of the netmask
494 @param netmask: the new value of the netmask
495 @param master_ip: the master IP
496 @param master_netdev: the master network device
497
498 """
499 if old_netmask == netmask:
500 return
501
502 if not netutils.IPAddress.Own(master_ip):
503 _Fail("The master IP address is not up, not attempting to change its"
504 " netmask")
505
506 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add",
507 "%s/%s" % (master_ip, netmask),
508 "dev", master_netdev, "label",
509 "%s:0" % master_netdev])
510 if result.failed:
511 _Fail("Could not set the new netmask on the master IP address")
512
513 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del",
514 "%s/%s" % (master_ip, old_netmask),
515 "dev", master_netdev, "label",
516 "%s:0" % master_netdev])
517 if result.failed:
518 _Fail("Could not bring down the master IP address with the old netmask")
519
522 """Modify a host entry in /etc/hosts.
523
524 @param mode: The mode to operate. Either add or remove entry
525 @param host: The host to operate on
526 @param ip: The ip associated with the entry
527
528 """
529 if mode == constants.ETC_HOSTS_ADD:
530 if not ip:
531 RPCFail("Mode 'add' needs 'ip' parameter, but parameter not"
532 " present")
533 utils.AddHostToEtcHosts(host, ip)
534 elif mode == constants.ETC_HOSTS_REMOVE:
535 if ip:
536 RPCFail("Mode 'remove' does not allow 'ip' parameter, but"
537 " parameter is present")
538 utils.RemoveHostFromEtcHosts(host)
539 else:
540 RPCFail("Mode not supported")
541
586
589 """Performs sanity checks for storage parameters.
590
591 @type params: list
592 @param params: list of storage parameters
593 @type num_params: int
594 @param num_params: expected number of parameters
595
596 """
597 if params is None:
598 raise errors.ProgrammerError("No storage parameters for storage"
599 " reporting is provided.")
600 if not isinstance(params, list):
601 raise errors.ProgrammerError("The storage parameters are not of type"
602 " list: '%s'" % params)
603 if not len(params) == num_params:
604 raise errors.ProgrammerError("Did not receive the expected number of"
605 "storage parameters: expected %s,"
606 " received '%s'" % (num_params, len(params)))
607
610 """Performs sanity check for the 'exclusive storage' flag.
611
612 @see: C{_CheckStorageParams}
613
614 """
615 _CheckStorageParams(params, 1)
616 excl_stor = params[0]
617 if not isinstance(params[0], bool):
618 raise errors.ProgrammerError("Exclusive storage parameter is not"
619 " boolean: '%s'." % excl_stor)
620 return excl_stor
621
624 """Wrapper around C{_GetVgInfo} which checks the storage parameters.
625
626 @type name: string
627 @param name: name of the volume group
628 @type params: list
629 @param params: list of storage parameters, which in this case should be
630 containing only one for exclusive storage
631
632 """
633 excl_stor = _CheckLvmStorageParams(params)
634 return _GetVgInfo(name, excl_stor)
635
639 """Retrieves information about a LVM volume group.
640
641 """
642
643 vginfo = info_fn([name], excl_stor)
644 if vginfo:
645 vg_free = int(round(vginfo[0][0], 0))
646 vg_size = int(round(vginfo[0][1], 0))
647 else:
648 vg_free = None
649 vg_size = None
650
651 return {
652 "type": constants.ST_LVM_VG,
653 "name": name,
654 "storage_free": vg_free,
655 "storage_size": vg_size,
656 }
657
667
671 """Retrieves information about spindles in an LVM volume group.
672
673 @type name: string
674 @param name: VG name
675 @type excl_stor: bool
676 @param excl_stor: exclusive storage
677 @rtype: dict
678 @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name,
679 free spindles, total spindles respectively
680
681 """
682 if excl_stor:
683 (vg_free, vg_size) = info_fn(name)
684 else:
685 vg_free = 0
686 vg_size = 0
687 return {
688 "type": constants.ST_LVM_PV,
689 "name": name,
690 "storage_free": vg_free,
691 "storage_size": vg_size,
692 }
693
696 """Retrieves node information from a hypervisor.
697
698 The information returned depends on the hypervisor. Common items:
699
700 - vg_size is the size of the configured volume group in MiB
701 - vg_free is the free size of the volume group in MiB
702 - memory_dom0 is the memory allocated for domain0 in MiB
703 - memory_free is the currently available (free) ram in MiB
704 - memory_total is the total number of ram in MiB
705 - hv_version: the hypervisor version, if available
706
707 @type hvparams: dict of string
708 @param hvparams: the hypervisor's hvparams
709
710 """
711 return get_hv_fn(name).GetNodeInfo(hvparams=hvparams)
712
715 """Retrieves node information for all hypervisors.
716
717 See C{_GetHvInfo} for information on the output.
718
719 @type hv_specs: list of pairs (string, dict of strings)
720 @param hv_specs: list of pairs of a hypervisor's name and its hvparams
721
722 """
723 if hv_specs is None:
724 return None
725
726 result = []
727 for hvname, hvparams in hv_specs:
728 result.append(_GetHvInfo(hvname, hvparams, get_hv_fn))
729 return result
730
733 """Calls C{fn} for all names in C{names} and returns a dictionary.
734
735 @rtype: None or dict
736
737 """
738 if names is None:
739 return None
740 else:
741 return map(fn, names)
742
745 """Gives back a hash with different information about the node.
746
747 @type storage_units: list of tuples (string, string, list)
748 @param storage_units: List of tuples (storage unit, identifier, parameters) to
749 ask for disk space information. In case of lvm-vg, the identifier is
750 the VG name. The parameters can contain additional, storage-type-specific
751 parameters, for example exclusive storage for lvm storage.
752 @type hv_specs: list of pairs (string, dict of strings)
753 @param hv_specs: list of pairs of a hypervisor's name and its hvparams
754 @rtype: tuple; (string, None/dict, None/dict)
755 @return: Tuple containing boot ID, volume group information and hypervisor
756 information
757
758 """
759 bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n")
760 storage_info = _GetNamedNodeInfo(
761 storage_units,
762 (lambda (storage_type, storage_key, storage_params):
763 _ApplyStorageInfoFunction(storage_type, storage_key, storage_params)))
764 hv_info = _GetHvInfoAll(hv_specs)
765 return (bootid, storage_info, hv_info)
766
769 """Wrapper around filestorage.GetSpaceInfo.
770
771 The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo
772 and ignore the *args parameter to not leak it into the filestorage
773 module's code.
774
775 @see: C{filestorage.GetFileStorageSpaceInfo} for description of the
776 parameters.
777
778 """
779 _CheckStorageParams(params, 0)
780 return filestorage.GetFileStorageSpaceInfo(path)
781
782
783
784 _STORAGE_TYPE_INFO_FN = {
785 constants.ST_BLOCK: None,
786 constants.ST_DISKLESS: None,
787 constants.ST_EXT: None,
788 constants.ST_FILE: _GetFileStorageSpaceInfo,
789 constants.ST_LVM_PV: _GetLvmPvSpaceInfo,
790 constants.ST_LVM_VG: _GetLvmVgSpaceInfo,
791 constants.ST_SHARED_FILE: None,
792 constants.ST_GLUSTER: None,
793 constants.ST_RADOS: None,
794 }
798 """Looks up and applies the correct function to calculate free and total
799 storage for the given storage type.
800
801 @type storage_type: string
802 @param storage_type: the storage type for which the storage shall be reported.
803 @type storage_key: string
804 @param storage_key: identifier of a storage unit, e.g. the volume group name
805 of an LVM storage unit
806 @type args: any
807 @param args: various parameters that can be used for storage reporting. These
808 parameters and their semantics vary from storage type to storage type and
809 are just propagated in this function.
810 @return: the results of the application of the storage space function (see
811 _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that
812 storage type
813 @raises NotImplementedError: for storage types who don't support space
814 reporting yet
815 """
816 fn = _STORAGE_TYPE_INFO_FN[storage_type]
817 if fn is not None:
818 return fn(storage_key, *args)
819 else:
820 raise NotImplementedError
821
824 """Check that PVs are not shared among LVs
825
826 @type pvi_list: list of L{objects.LvmPvInfo} objects
827 @param pvi_list: information about the PVs
828
829 @rtype: list of tuples (string, list of strings)
830 @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...])
831
832 """
833 res = []
834 for pvi in pvi_list:
835 if len(pvi.lv_list) > 1:
836 res.append((pvi.name, pvi.lv_list))
837 return res
838
842 """Verifies the hypervisor. Appends the results to the 'results' list.
843
844 @type what: C{dict}
845 @param what: a dictionary of things to check
846 @type vm_capable: boolean
847 @param vm_capable: whether or not this node is vm capable
848 @type result: dict
849 @param result: dictionary of verification results; results of the
850 verifications in this function will be added here
851 @type all_hvparams: dict of dict of string
852 @param all_hvparams: dictionary mapping hypervisor names to hvparams
853 @type get_hv_fn: function
854 @param get_hv_fn: function to retrieve the hypervisor, to improve testability
855
856 """
857 if not vm_capable:
858 return
859
860 if constants.NV_HYPERVISOR in what:
861 result[constants.NV_HYPERVISOR] = {}
862 for hv_name in what[constants.NV_HYPERVISOR]:
863 hvparams = all_hvparams[hv_name]
864 try:
865 val = get_hv_fn(hv_name).Verify(hvparams=hvparams)
866 except errors.HypervisorError, err:
867 val = "Error while checking hypervisor: %s" % str(err)
868 result[constants.NV_HYPERVISOR][hv_name] = val
869
873 """Verifies the hvparams. Appends the results to the 'results' list.
874
875 @type what: C{dict}
876 @param what: a dictionary of things to check
877 @type vm_capable: boolean
878 @param vm_capable: whether or not this node is vm capable
879 @type result: dict
880 @param result: dictionary of verification results; results of the
881 verifications in this function will be added here
882 @type get_hv_fn: function
883 @param get_hv_fn: function to retrieve the hypervisor, to improve testability
884
885 """
886 if not vm_capable:
887 return
888
889 if constants.NV_HVPARAMS in what:
890 result[constants.NV_HVPARAMS] = []
891 for source, hv_name, hvparms in what[constants.NV_HVPARAMS]:
892 try:
893 logging.info("Validating hv %s, %s", hv_name, hvparms)
894 get_hv_fn(hv_name).ValidateParameters(hvparms)
895 except errors.HypervisorError, err:
896 result[constants.NV_HVPARAMS].append((source, hv_name, str(err)))
897
900 """Verifies the instance list.
901
902 @type what: C{dict}
903 @param what: a dictionary of things to check
904 @type vm_capable: boolean
905 @param vm_capable: whether or not this node is vm capable
906 @type result: dict
907 @param result: dictionary of verification results; results of the
908 verifications in this function will be added here
909 @type all_hvparams: dict of dict of string
910 @param all_hvparams: dictionary mapping hypervisor names to hvparams
911
912 """
913 if constants.NV_INSTANCELIST in what and vm_capable:
914
915 try:
916 val = GetInstanceList(what[constants.NV_INSTANCELIST],
917 all_hvparams=all_hvparams)
918 except RPCFail, err:
919 val = str(err)
920 result[constants.NV_INSTANCELIST] = val
921
924 """Verifies the node info.
925
926 @type what: C{dict}
927 @param what: a dictionary of things to check
928 @type vm_capable: boolean
929 @param vm_capable: whether or not this node is vm capable
930 @type result: dict
931 @param result: dictionary of verification results; results of the
932 verifications in this function will be added here
933 @type all_hvparams: dict of dict of string
934 @param all_hvparams: dictionary mapping hypervisor names to hvparams
935
936 """
937 if constants.NV_HVINFO in what and vm_capable:
938 hvname = what[constants.NV_HVINFO]
939 hyper = hypervisor.GetHypervisor(hvname)
940 hvparams = all_hvparams[hvname]
941 result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams)
942
945 """Verify the existance and validity of the client SSL certificate.
946
947 Also, verify that the client certificate is not self-signed. Self-
948 signed client certificates stem from Ganeti versions 2.12.0 - 2.12.4
949 and should be replaced by client certificates signed by the server
950 certificate. Hence we output a warning when we encounter a self-signed
951 one.
952
953 """
954 create_cert_cmd = "gnt-cluster renew-crypto --new-node-certificates"
955 if not os.path.exists(cert_file):
956 return (constants.CV_ERROR,
957 "The client certificate does not exist. Run '%s' to create"
958 " client certificates for all nodes." % create_cert_cmd)
959
960 (errcode, msg) = utils.VerifyCertificate(cert_file)
961 if errcode is not None:
962 return (errcode, msg)
963
964 (errcode, msg) = utils.IsCertificateSelfSigned(cert_file)
965 if errcode is not None:
966 return (errcode, msg)
967
968
969 return (None, utils.GetCertificateDigest(cert_filename=cert_file))
970
974 """Verifies the state of the SSH key files.
975
976 @type node_status_list: list of tuples
977 @param node_status_list: list of nodes of the cluster associated with a
978 couple of flags: (uuid, name, is_master_candidate,
979 is_potential_master_candidate, online)
980 @type my_name: str
981 @param my_name: name of this node
982 @type pub_key_file: str
983 @param pub_key_file: filename of the public key file
984
985 """
986 if node_status_list is None:
987 return ["No node list to check against the pub_key_file received."]
988
989 my_status_list = [(my_uuid, name, mc, pot_mc, online) for
990 (my_uuid, name, mc, pot_mc, online)
991 in node_status_list if name == my_name]
992 if len(my_status_list) == 0:
993 return ["Cannot find node information for node '%s'." % my_name]
994 (my_uuid, _, _, potential_master_candidate, online) = \
995 my_status_list[0]
996
997 result = []
998
999 if not os.path.exists(pub_key_file):
1000 result.append("The public key file '%s' does not exist. Consider running"
1001 " 'gnt-cluster renew-crypto --new-ssh-keys"
1002 " [--no-ssh-key-check]' to fix this." % pub_key_file)
1003 return result
1004
1005 pot_mc_uuids = [uuid for (uuid, _, _, _, _) in node_status_list]
1006 offline_nodes = [uuid for (uuid, _, _, _, online) in node_status_list
1007 if not online]
1008 pub_keys = ssh.QueryPubKeyFile(None)
1009
1010 if potential_master_candidate:
1011
1012
1013 pub_uuids_set = set(pub_keys.keys()) - set(offline_nodes)
1014 pot_mc_uuids_set = set(pot_mc_uuids) - set(offline_nodes)
1015 missing_uuids = set([])
1016 if pub_uuids_set != pot_mc_uuids_set:
1017 unknown_uuids = pub_uuids_set - pot_mc_uuids_set
1018 if unknown_uuids:
1019 result.append("The following node UUIDs are listed in the public key"
1020 " file on node '%s', but are not potential master"
1021 " candidates: %s."
1022 % (my_name, ", ".join(list(unknown_uuids))))
1023 missing_uuids = pot_mc_uuids_set - pub_uuids_set
1024 if missing_uuids:
1025 result.append("The following node UUIDs of potential master candidates"
1026 " are missing in the public key file on node %s: %s."
1027 % (my_name, ", ".join(list(missing_uuids))))
1028
1029 (_, key_files) = \
1030 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1031 (_, dsa_pub_key_filename) = key_files[constants.SSHK_DSA]
1032
1033 my_keys = pub_keys[my_uuid]
1034
1035 dsa_pub_key = utils.ReadFile(dsa_pub_key_filename)
1036 if dsa_pub_key.strip() not in my_keys:
1037 result.append("The dsa key of node %s does not match this node's key"
1038 " in the pub key file." % (my_name))
1039 if len(my_keys) != 1:
1040 result.append("There is more than one key for node %s in the public key"
1041 " file." % my_name)
1042 else:
1043 if len(pub_keys.keys()) > 0:
1044 result.append("The public key file of node '%s' is not empty, although"
1045 " the node is not a potential master candidate."
1046 % my_name)
1047
1048
1049 (auth_key_file, _) = \
1050 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1051 for (uuid, name, mc, _, online) in node_status_list:
1052 if not online:
1053 continue
1054 if uuid in missing_uuids:
1055 continue
1056 if mc:
1057 for key in pub_keys[uuid]:
1058 if not ssh.HasAuthorizedKey(auth_key_file, key):
1059 result.append("A SSH key of master candidate '%s' (UUID: '%s') is"
1060 " not in the 'authorized_keys' file of node '%s'."
1061 % (name, uuid, my_name))
1062 else:
1063 for key in pub_keys[uuid]:
1064 if name != my_name and ssh.HasAuthorizedKey(auth_key_file, key):
1065 result.append("A SSH key of normal node '%s' (UUID: '%s') is in the"
1066 " 'authorized_keys' file of node '%s'."
1067 % (name, uuid, my_name))
1068 if name == my_name and not ssh.HasAuthorizedKey(auth_key_file, key):
1069 result.append("A SSH key of normal node '%s' (UUID: '%s') is not"
1070 " in the 'authorized_keys' file of itself."
1071 % (my_name, uuid))
1072
1073 return result
1074
1077 """Verifies that the 'authorized_keys' files are not cluttered up.
1078
1079 @type node_status_list: list of tuples
1080 @param node_status_list: list of nodes of the cluster associated with a
1081 couple of flags: (uuid, name, is_master_candidate,
1082 is_potential_master_candidate, online)
1083 @type my_name: str
1084 @param my_name: name of this node
1085
1086 """
1087 result = []
1088 (auth_key_file, _) = \
1089 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1090 node_names = [name for (_, name, _, _) in node_status_list]
1091 multiple_occurrences = ssh.CheckForMultipleKeys(auth_key_file, node_names)
1092 if multiple_occurrences:
1093 msg = "There are hosts which have more than one SSH key stored for the" \
1094 " same user in the 'authorized_keys' file of node %s. This can be" \
1095 " due to an unsuccessful operation which cluttered up the" \
1096 " 'authorized_keys' file. We recommend to clean this up manually. " \
1097 % my_name
1098 for host, occ in multiple_occurrences.items():
1099 msg += "Entry for '%s' in lines %s. " % (host, utils.CommaJoin(occ))
1100 result.append(msg)
1101
1102 return result
1103
1104
1105 -def VerifyNode(what, cluster_name, all_hvparams, node_groups, groups_cfg):
1106 """Verify the status of the local node.
1107
1108 Based on the input L{what} parameter, various checks are done on the
1109 local node.
1110
1111 If the I{filelist} key is present, this list of
1112 files is checksummed and the file/checksum pairs are returned.
1113
1114 If the I{nodelist} key is present, we check that we have
1115 connectivity via ssh with the target nodes (and check the hostname
1116 report).
1117
1118 If the I{node-net-test} key is present, we check that we have
1119 connectivity to the given nodes via both primary IP and, if
1120 applicable, secondary IPs.
1121
1122 @type what: C{dict}
1123 @param what: a dictionary of things to check:
1124 - filelist: list of files for which to compute checksums
1125 - nodelist: list of nodes we should check ssh communication with
1126 - node-net-test: list of nodes we should check node daemon port
1127 connectivity with
1128 - hypervisor: list with hypervisors to run the verify for
1129 @type cluster_name: string
1130 @param cluster_name: the cluster's name
1131 @type all_hvparams: dict of dict of strings
1132 @param all_hvparams: a dictionary mapping hypervisor names to hvparams
1133 @type node_groups: a dict of strings
1134 @param node_groups: node _names_ mapped to their group uuids (it's enough to
1135 have only those nodes that are in `what["nodelist"]`)
1136 @type groups_cfg: a dict of dict of strings
1137 @param groups_cfg: a dictionary mapping group uuids to their configuration
1138 @rtype: dict
1139 @return: a dictionary with the same keys as the input dict, and
1140 values representing the result of the checks
1141
1142 """
1143 result = {}
1144 my_name = netutils.Hostname.GetSysName()
1145 port = netutils.GetDaemonPort(constants.NODED)
1146 vm_capable = my_name not in what.get(constants.NV_NONVMNODES, [])
1147
1148 _VerifyHypervisors(what, vm_capable, result, all_hvparams)
1149 _VerifyHvparams(what, vm_capable, result)
1150
1151 if constants.NV_FILELIST in what:
1152 fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath,
1153 what[constants.NV_FILELIST]))
1154 result[constants.NV_FILELIST] = \
1155 dict((vcluster.MakeVirtualPath(key), value)
1156 for (key, value) in fingerprints.items())
1157
1158 if constants.NV_CLIENT_CERT in what:
1159 result[constants.NV_CLIENT_CERT] = _VerifyClientCertificate()
1160
1161 if constants.NV_SSH_SETUP in what:
1162 result[constants.NV_SSH_SETUP] = \
1163 _VerifySshSetup(what[constants.NV_SSH_SETUP], my_name)
1164 if constants.NV_SSH_CLUTTER in what:
1165 result[constants.NV_SSH_CLUTTER] = \
1166 _VerifySshClutter(what[constants.NV_SSH_SETUP], my_name)
1167
1168 if constants.NV_NODELIST in what:
1169 (nodes, bynode, mcs) = what[constants.NV_NODELIST]
1170
1171
1172 try:
1173 nodes.extend(bynode[my_name])
1174 except KeyError:
1175 pass
1176
1177
1178 random.shuffle(nodes)
1179
1180
1181 val = {}
1182 for node in nodes:
1183 params = groups_cfg.get(node_groups.get(node))
1184 ssh_port = params["ndparams"].get(constants.ND_SSH_PORT)
1185 logging.debug("Ssh port %s (None = default) for node %s",
1186 str(ssh_port), node)
1187
1188
1189
1190
1191
1192 if my_name in mcs:
1193 success, message = _GetSshRunner(cluster_name). \
1194 VerifyNodeHostname(node, ssh_port)
1195 if not success:
1196 val[node] = message
1197
1198 result[constants.NV_NODELIST] = val
1199
1200 if constants.NV_NODENETTEST in what:
1201 result[constants.NV_NODENETTEST] = tmp = {}
1202 my_pip = my_sip = None
1203 for name, pip, sip in what[constants.NV_NODENETTEST]:
1204 if name == my_name:
1205 my_pip = pip
1206 my_sip = sip
1207 break
1208 if not my_pip:
1209 tmp[my_name] = ("Can't find my own primary/secondary IP"
1210 " in the node list")
1211 else:
1212 for name, pip, sip in what[constants.NV_NODENETTEST]:
1213 fail = []
1214 if not netutils.TcpPing(pip, port, source=my_pip):
1215 fail.append("primary")
1216 if sip != pip:
1217 if not netutils.TcpPing(sip, port, source=my_sip):
1218 fail.append("secondary")
1219 if fail:
1220 tmp[name] = ("failure using the %s interface(s)" %
1221 " and ".join(fail))
1222
1223 if constants.NV_MASTERIP in what:
1224
1225
1226 master_name, master_ip = what[constants.NV_MASTERIP]
1227 if master_name == my_name:
1228 source = constants.IP4_ADDRESS_LOCALHOST
1229 else:
1230 source = None
1231 result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port,
1232 source=source)
1233
1234 if constants.NV_USERSCRIPTS in what:
1235 result[constants.NV_USERSCRIPTS] = \
1236 [script for script in what[constants.NV_USERSCRIPTS]
1237 if not utils.IsExecutable(script)]
1238
1239 if constants.NV_OOB_PATHS in what:
1240 result[constants.NV_OOB_PATHS] = tmp = []
1241 for path in what[constants.NV_OOB_PATHS]:
1242 try:
1243 st = os.stat(path)
1244 except OSError, err:
1245 tmp.append("error stating out of band helper: %s" % err)
1246 else:
1247 if stat.S_ISREG(st.st_mode):
1248 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR:
1249 tmp.append(None)
1250 else:
1251 tmp.append("out of band helper %s is not executable" % path)
1252 else:
1253 tmp.append("out of band helper %s is not a file" % path)
1254
1255 if constants.NV_LVLIST in what and vm_capable:
1256 try:
1257 val = GetVolumeList(utils.ListVolumeGroups().keys())
1258 except RPCFail, err:
1259 val = str(err)
1260 result[constants.NV_LVLIST] = val
1261
1262 _VerifyInstanceList(what, vm_capable, result, all_hvparams)
1263
1264 if constants.NV_VGLIST in what and vm_capable:
1265 result[constants.NV_VGLIST] = utils.ListVolumeGroups()
1266
1267 if constants.NV_PVLIST in what and vm_capable:
1268 check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what
1269 val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST],
1270 filter_allocatable=False,
1271 include_lvs=check_exclusive_pvs)
1272 if check_exclusive_pvs:
1273 result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val)
1274 for pvi in val:
1275
1276 pvi.lv_list = []
1277 result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val)
1278
1279 if constants.NV_VERSION in what:
1280 result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION,
1281 constants.RELEASE_VERSION)
1282
1283 _VerifyNodeInfo(what, vm_capable, result, all_hvparams)
1284
1285 if constants.NV_DRBDVERSION in what and vm_capable:
1286 try:
1287 drbd_version = DRBD8.GetProcInfo().GetVersionString()
1288 except errors.BlockDeviceError, err:
1289 logging.warning("Can't get DRBD version", exc_info=True)
1290 drbd_version = str(err)
1291 result[constants.NV_DRBDVERSION] = drbd_version
1292
1293 if constants.NV_DRBDLIST in what and vm_capable:
1294 try:
1295 used_minors = drbd.DRBD8.GetUsedDevs()
1296 except errors.BlockDeviceError, err:
1297 logging.warning("Can't get used minors list", exc_info=True)
1298 used_minors = str(err)
1299 result[constants.NV_DRBDLIST] = used_minors
1300
1301 if constants.NV_DRBDHELPER in what and vm_capable:
1302 status = True
1303 try:
1304 payload = drbd.DRBD8.GetUsermodeHelper()
1305 except errors.BlockDeviceError, err:
1306 logging.error("Can't get DRBD usermode helper: %s", str(err))
1307 status = False
1308 payload = str(err)
1309 result[constants.NV_DRBDHELPER] = (status, payload)
1310
1311 if constants.NV_NODESETUP in what:
1312 result[constants.NV_NODESETUP] = tmpr = []
1313 if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"):
1314 tmpr.append("The sysfs filesytem doesn't seem to be mounted"
1315 " under /sys, missing required directories /sys/block"
1316 " and /sys/class/net")
1317 if (not os.path.isdir("/proc/sys") or
1318 not os.path.isfile("/proc/sysrq-trigger")):
1319 tmpr.append("The procfs filesystem doesn't seem to be mounted"
1320 " under /proc, missing required directory /proc/sys and"
1321 " the file /proc/sysrq-trigger")
1322
1323 if constants.NV_TIME in what:
1324 result[constants.NV_TIME] = utils.SplitTime(time.time())
1325
1326 if constants.NV_OSLIST in what and vm_capable:
1327 result[constants.NV_OSLIST] = DiagnoseOS()
1328
1329 if constants.NV_BRIDGES in what and vm_capable:
1330 result[constants.NV_BRIDGES] = [bridge
1331 for bridge in what[constants.NV_BRIDGES]
1332 if not utils.BridgeExists(bridge)]
1333
1334 if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name:
1335 result[constants.NV_ACCEPTED_STORAGE_PATHS] = \
1336 filestorage.ComputeWrongFileStoragePaths()
1337
1338 if what.get(constants.NV_FILE_STORAGE_PATH):
1339 pathresult = filestorage.CheckFileStoragePath(
1340 what[constants.NV_FILE_STORAGE_PATH])
1341 if pathresult:
1342 result[constants.NV_FILE_STORAGE_PATH] = pathresult
1343
1344 if what.get(constants.NV_SHARED_FILE_STORAGE_PATH):
1345 pathresult = filestorage.CheckFileStoragePath(
1346 what[constants.NV_SHARED_FILE_STORAGE_PATH])
1347 if pathresult:
1348 result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult
1349
1350 return result
1351
1354 """Perform actions on the node's cryptographic tokens.
1355
1356 Token types can be 'ssl' or 'ssh'. So far only some actions are implemented
1357 for 'ssl'. Action 'get' returns the digest of the public client ssl
1358 certificate. Action 'create' creates a new client certificate and private key
1359 and also returns the digest of the certificate. The third parameter of a
1360 token request are optional parameters for the actions, so far only the
1361 filename is supported.
1362
1363 @type token_requests: list of tuples of (string, string, dict), where the
1364 first string is in constants.CRYPTO_TYPES, the second in
1365 constants.CRYPTO_ACTIONS. The third parameter is a dictionary of string
1366 to string.
1367 @param token_requests: list of requests of cryptographic tokens and actions
1368 to perform on them. The actions come with a dictionary of options.
1369 @rtype: list of tuples (string, string)
1370 @return: list of tuples of the token type and the public crypto token
1371
1372 """
1373 tokens = []
1374 for (token_type, action, _) in token_requests:
1375 if token_type not in constants.CRYPTO_TYPES:
1376 raise errors.ProgrammerError("Token type '%s' not supported." %
1377 token_type)
1378 if action not in constants.CRYPTO_ACTIONS:
1379 raise errors.ProgrammerError("Action '%s' is not supported." %
1380 action)
1381 if token_type == constants.CRYPTO_TYPE_SSL_DIGEST:
1382 tokens.append((token_type,
1383 utils.GetCertificateDigest()))
1384 return tokens
1385
1388 """Ensures the given daemon is running or stopped.
1389
1390 @type daemon_name: string
1391 @param daemon_name: name of the daemon (e.g., constants.KVMD)
1392
1393 @type run: bool
1394 @param run: whether to start or stop the daemon
1395
1396 @rtype: bool
1397 @return: 'True' if daemon successfully started/stopped,
1398 'False' otherwise
1399
1400 """
1401 allowed_daemons = [constants.KVMD]
1402
1403 if daemon_name not in allowed_daemons:
1404 fn = lambda _: False
1405 elif run:
1406 fn = utils.EnsureDaemon
1407 else:
1408 fn = utils.StopDaemon
1409
1410 return fn(daemon_name)
1411
1420
1431 """Distributes a node's public SSH key across the cluster.
1432
1433 Note that this function should only be executed on the master node, which
1434 then will copy the new node's key to all nodes in the cluster via SSH.
1435
1436 Also note: at least one of the flags C{to_authorized_keys},
1437 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for
1438 the function to actually perform any actions.
1439
1440 @type node_uuid: str
1441 @param node_uuid: the UUID of the node whose key is added
1442 @type node_name: str
1443 @param node_name: the name of the node whose key is added
1444 @type potential_master_candidates: list of str
1445 @param potential_master_candidates: list of node names of potential master
1446 candidates; this should match the list of uuids in the public key file
1447 @type to_authorized_keys: boolean
1448 @param to_authorized_keys: whether the key should be added to the
1449 C{authorized_keys} file of all nodes
1450 @type to_public_keys: boolean
1451 @param to_public_keys: whether the keys should be added to the public key file
1452 @type get_public_keys: boolean
1453 @param get_public_keys: whether the node should add the clusters' public keys
1454 to its {ganeti_pub_keys} file
1455
1456 """
1457
1458
1459 assert (to_authorized_keys or to_public_keys or get_public_keys)
1460
1461 if not ssconf_store:
1462 ssconf_store = ssconf.SimpleStore()
1463
1464
1465 keys_by_name = ssh.QueryPubKeyFile([node_name], key_file=pub_key_file)
1466 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1467
1468 if (not keys_by_name or node_name not in keys_by_name) \
1469 and (not keys_by_uuid or node_uuid not in keys_by_uuid):
1470 raise errors.SshUpdateError(
1471 "No keys found for the new node '%s' (UUID %s) in the list of public"
1472 " SSH keys, neither for the name or the UUID" % (node_name, node_uuid))
1473 else:
1474 if node_name in keys_by_name:
1475 keys_by_uuid = {}
1476
1477
1478 ssh.ReplaceNameByUuid(node_uuid, node_name,
1479 error_fn=errors.SshUpdateError,
1480 key_file=pub_key_file)
1481 keys_by_uuid[node_uuid] = keys_by_name[node_name]
1482
1483
1484 if to_authorized_keys:
1485 (auth_key_file, _) = \
1486 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1487 ssh.AddAuthorizedKeys(auth_key_file, keys_by_uuid[node_uuid])
1488
1489 base_data = {}
1490 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store)
1491 cluster_name = base_data[constants.SSHS_CLUSTER_NAME]
1492
1493 ssh_port_map = ssconf_store.GetSshPortMap()
1494
1495
1496 logging.debug("Updating SSH key files of target node '%s'.", node_name)
1497 if get_public_keys:
1498 node_data = {}
1499 _InitSshUpdateData(node_data, noded_cert_file, ssconf_store)
1500 all_keys = ssh.QueryPubKeyFile(None, key_file=pub_key_file)
1501 node_data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1502 (constants.SSHS_OVERRIDE, all_keys)
1503
1504 try:
1505 utils.RetryByNumberOfTimes(
1506 constants.SSHS_MAX_RETRIES,
1507 errors.SshUpdateError,
1508 run_cmd_fn, cluster_name, node_name, pathutils.SSH_UPDATE,
1509 ssh_port_map.get(node_name), node_data,
1510 debug=False, verbose=False, use_cluster_key=False,
1511 ask_key=False, strict_host_check=False)
1512 except errors.SshUpdateError as e:
1513
1514 if to_public_keys:
1515 ssh.RemovePublicKey(node_uuid)
1516 raise e
1517
1518
1519 if to_authorized_keys:
1520 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \
1521 (constants.SSHS_ADD, keys_by_uuid)
1522
1523 pot_mc_data = copy.deepcopy(base_data)
1524 if to_public_keys:
1525 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1526 (constants.SSHS_REPLACE_OR_ADD, keys_by_uuid)
1527
1528 all_nodes = ssconf_store.GetNodeList()
1529 master_node = ssconf_store.GetMasterNode()
1530 online_nodes = ssconf_store.GetOnlineNodeList()
1531
1532 node_errors = []
1533 for node in all_nodes:
1534 if node == master_node:
1535 logging.debug("Skipping master node '%s'.", master_node)
1536 continue
1537 if node not in online_nodes:
1538 logging.debug("Skipping offline node '%s'.", node)
1539 continue
1540 if node in potential_master_candidates:
1541 logging.debug("Updating SSH key files of node '%s'.", node)
1542 try:
1543 utils.RetryByNumberOfTimes(
1544 constants.SSHS_MAX_RETRIES,
1545 errors.SshUpdateError,
1546 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE,
1547 ssh_port_map.get(node), pot_mc_data,
1548 debug=False, verbose=False, use_cluster_key=False,
1549 ask_key=False, strict_host_check=False)
1550 except errors.SshUpdateError as last_exception:
1551 error_msg = ("When adding the key of node '%s', updating SSH key"
1552 " files of node '%s' failed after %s retries."
1553 " Not trying again. Last error was: %s." %
1554 (node, node_name, constants.SSHS_MAX_RETRIES,
1555 last_exception))
1556 node_errors.append((node, error_msg))
1557
1558
1559 logging.error(error_msg)
1560
1561 else:
1562 if to_authorized_keys:
1563 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE,
1564 ssh_port_map.get(node), base_data,
1565 debug=False, verbose=False, use_cluster_key=False,
1566 ask_key=False, strict_host_check=False)
1567
1568 return node_errors
1569
1570
1571 -def RemoveNodeSshKey(node_uuid, node_name,
1572 master_candidate_uuids,
1573 potential_master_candidates,
1574 master_uuid=None,
1575 keys_to_remove=None,
1576 from_authorized_keys=False,
1577 from_public_keys=False,
1578 clear_authorized_keys=False,
1579 clear_public_keys=False,
1580 pub_key_file=pathutils.SSH_PUB_KEYS,
1581 ssconf_store=None,
1582 noded_cert_file=pathutils.NODED_CERT_FILE,
1583 readd=False,
1584 run_cmd_fn=ssh.RunSshCmdWithStdin):
1585 """Removes the node's SSH keys from the key files and distributes those.
1586
1587 Note that at least one of the flags C{from_authorized_keys},
1588 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys}
1589 has to be set to C{True} for the function to perform any action at all.
1590 Not doing so will trigger an assertion in the function.
1591
1592 @type node_uuid: str
1593 @param node_uuid: UUID of the node whose key is removed
1594 @type node_name: str
1595 @param node_name: name of the node whose key is remove
1596 @type master_candidate_uuids: list of str
1597 @param master_candidate_uuids: list of UUIDs of the current master candidates
1598 @type potential_master_candidates: list of str
1599 @param potential_master_candidates: list of names of potential master
1600 candidates
1601 @type keys_to_remove: dict of str to list of str
1602 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys
1603 to be removed. This list is supposed to be used only if the keys are not
1604 in the public keys file. This is for example the case when removing a
1605 master node's key.
1606 @type from_authorized_keys: boolean
1607 @param from_authorized_keys: whether or not the key should be removed
1608 from the C{authorized_keys} file
1609 @type from_public_keys: boolean
1610 @param from_public_keys: whether or not the key should be remove from
1611 the C{ganeti_pub_keys} file
1612 @type clear_authorized_keys: boolean
1613 @param clear_authorized_keys: whether or not the C{authorized_keys} file
1614 should be cleared on the node whose keys are removed
1615 @type clear_public_keys: boolean
1616 @param clear_public_keys: whether to clear the node's C{ganeti_pub_key} file
1617 @type readd: boolean
1618 @param readd: whether this is called during a readd operation.
1619 @rtype: list of string
1620 @returns: list of feedback messages
1621
1622 """
1623
1624 result_msgs = []
1625
1626
1627 if not (from_authorized_keys or from_public_keys or clear_authorized_keys
1628 or clear_public_keys):
1629 raise errors.SshUpdateError("No removal from any key file was requested.")
1630
1631 if not ssconf_store:
1632 ssconf_store = ssconf.SimpleStore()
1633
1634 master_node = ssconf_store.GetMasterNode()
1635 ssh_port_map = ssconf_store.GetSshPortMap()
1636
1637 if from_authorized_keys or from_public_keys:
1638 if keys_to_remove:
1639 keys = keys_to_remove
1640 else:
1641 keys = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1642 if (not keys or node_uuid not in keys) and not readd:
1643 raise errors.SshUpdateError("Node '%s' not found in the list of public"
1644 " SSH keys. It seems someone tries to"
1645 " remove a key from outside the cluster!"
1646 % node_uuid)
1647
1648
1649
1650 master_keys = None
1651 if master_uuid:
1652 master_keys = ssh.QueryPubKeyFile([master_uuid], key_file=pub_key_file)
1653 for master_key in master_keys:
1654 if master_key in keys[node_uuid]:
1655 keys[node_uuid].remove(master_key)
1656
1657 if node_name == master_node and not keys_to_remove:
1658 raise errors.SshUpdateError("Cannot remove the master node's keys.")
1659
1660 if node_uuid in keys:
1661 base_data = {}
1662 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store)
1663 cluster_name = base_data[constants.SSHS_CLUSTER_NAME]
1664
1665 if from_authorized_keys:
1666 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \
1667 (constants.SSHS_REMOVE, keys)
1668 (auth_key_file, _) = \
1669 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False,
1670 dircheck=False)
1671 ssh.RemoveAuthorizedKeys(auth_key_file, keys[node_uuid])
1672
1673 pot_mc_data = copy.deepcopy(base_data)
1674
1675 if from_public_keys:
1676 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1677 (constants.SSHS_REMOVE, keys)
1678 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file)
1679
1680 all_nodes = ssconf_store.GetNodeList()
1681 online_nodes = ssconf_store.GetOnlineNodeList()
1682 logging.debug("Removing key of node '%s' from all nodes but itself and"
1683 " master.", node_name)
1684 for node in all_nodes:
1685 if node == master_node:
1686 logging.debug("Skipping master node '%s'.", master_node)
1687 continue
1688 if node not in online_nodes:
1689 logging.debug("Skipping offline node '%s'.", node)
1690 continue
1691 ssh_port = ssh_port_map.get(node)
1692 if not ssh_port:
1693 raise errors.OpExecError("No SSH port information available for"
1694 " node '%s', map: %s." %
1695 (node, ssh_port_map))
1696 error_msg_final = ("When removing the key of node '%s', updating the"
1697 " SSH key files of node '%s' failed. Last error"
1698 " was: %s.")
1699 if node in potential_master_candidates:
1700 logging.debug("Updating key setup of potential master candidate node"
1701 " %s.", node)
1702 try:
1703 utils.RetryByNumberOfTimes(
1704 constants.SSHS_MAX_RETRIES,
1705 errors.SshUpdateError,
1706 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE,
1707 ssh_port, pot_mc_data,
1708 debug=False, verbose=False, use_cluster_key=False,
1709 ask_key=False, strict_host_check=False)
1710 except errors.SshUpdateError as last_exception:
1711 error_msg = error_msg_final % (
1712 node_name, node, last_exception)
1713 result_msgs.append((node, error_msg))
1714 logging.error(error_msg)
1715
1716 else:
1717 if from_authorized_keys:
1718 logging.debug("Updating key setup of normal node %s.", node)
1719 try:
1720 utils.RetryByNumberOfTimes(
1721 constants.SSHS_MAX_RETRIES,
1722 errors.SshUpdateError,
1723 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE,
1724 ssh_port, base_data,
1725 debug=False, verbose=False, use_cluster_key=False,
1726 ask_key=False, strict_host_check=False)
1727 except errors.SshUpdateError as last_exception:
1728 error_msg = error_msg_final % (
1729 node_name, node, last_exception)
1730 result_msgs.append((node, error_msg))
1731 logging.error(error_msg)
1732
1733 if clear_authorized_keys or from_public_keys or clear_public_keys:
1734 data = {}
1735 _InitSshUpdateData(data, noded_cert_file, ssconf_store)
1736 cluster_name = data[constants.SSHS_CLUSTER_NAME]
1737 ssh_port = ssh_port_map.get(node_name)
1738 if not ssh_port:
1739 raise errors.OpExecError("No SSH port information available for"
1740 " node '%s', which is leaving the cluster.")
1741
1742 if clear_authorized_keys:
1743
1744
1745
1746 other_master_candidate_uuids = [uuid for uuid in master_candidate_uuids
1747 if uuid != node_uuid]
1748 candidate_keys = ssh.QueryPubKeyFile(other_master_candidate_uuids,
1749 key_file=pub_key_file)
1750 data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \
1751 (constants.SSHS_REMOVE, candidate_keys)
1752
1753 if clear_public_keys:
1754 data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1755 (constants.SSHS_CLEAR, {})
1756 elif from_public_keys:
1757
1758
1759
1760 if keys[node_uuid]:
1761 data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1762 (constants.SSHS_REMOVE, keys)
1763
1764
1765 if not (constants.SSHS_SSH_PUBLIC_KEYS in data or
1766 constants.SSHS_SSH_AUTHORIZED_KEYS in data):
1767 return
1768
1769 logging.debug("Updating SSH key setup of target node '%s'.", node_name)
1770 try:
1771 utils.RetryByNumberOfTimes(
1772 constants.SSHS_MAX_RETRIES,
1773 errors.SshUpdateError,
1774 run_cmd_fn, cluster_name, node_name, pathutils.SSH_UPDATE,
1775 ssh_port, data,
1776 debug=False, verbose=False, use_cluster_key=False,
1777 ask_key=False, strict_host_check=False)
1778 except errors.SshUpdateError as last_exception:
1779 result_msgs.append(
1780 (node_name,
1781 ("Removing SSH keys from node '%s' failed."
1782 " This can happen when the node is already unreachable."
1783 " Error: %s" % (node_name, last_exception))))
1784
1785 return result_msgs
1786
1794 """Generates the root SSH key pair on the node.
1795
1796 @type node_uuid: str
1797 @param node_uuid: UUID of the node whose key is removed
1798 @type node_name: str
1799 @param node_name: name of the node whose key is remove
1800 @type ssh_port_map: dict of str to int
1801 @param ssh_port_map: mapping of node names to their SSH port
1802
1803 """
1804 if not ssconf_store:
1805 ssconf_store = ssconf.SimpleStore()
1806
1807 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1808 if not keys_by_uuid or node_uuid not in keys_by_uuid:
1809 raise errors.SshUpdateError("Node %s (UUID: %s) whose key is requested to"
1810 " be regenerated is not registered in the"
1811 " public keys file." % (node_name, node_uuid))
1812
1813 data = {}
1814 _InitSshUpdateData(data, noded_cert_file, ssconf_store)
1815 cluster_name = data[constants.SSHS_CLUSTER_NAME]
1816 data[constants.SSHS_GENERATE] = {constants.SSHS_SUFFIX: suffix}
1817
1818 run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
1819 ssh_port_map.get(node_name), data,
1820 debug=False, verbose=False, use_cluster_key=False,
1821 ask_key=False, strict_host_check=False)
1822
1825 master_node_uuids = [node_uuid for (node_uuid, node_name)
1826 in node_uuid_name_map
1827 if node_name == master_node_name]
1828 if len(master_node_uuids) != 1:
1829 raise errors.SshUpdateError("No (unique) master UUID found. Master node"
1830 " name: '%s', Master UUID: '%s'" %
1831 (master_node_name, master_node_uuids))
1832 return master_node_uuids[0]
1833
1836 old_master_keys_by_uuid = ssh.QueryPubKeyFile([master_node_uuid],
1837 key_file=pub_key_file)
1838 if not old_master_keys_by_uuid:
1839 raise errors.SshUpdateError("No public key of the master node (UUID '%s')"
1840 " found, not generating a new key."
1841 % master_node_uuid)
1842 return old_master_keys_by_uuid
1843
1846 new_master_keys = []
1847 for (_, (_, public_key_file)) in root_keyfiles.items():
1848 public_key_dir = os.path.dirname(public_key_file)
1849 public_key_file_tmp_filename = \
1850 os.path.splitext(os.path.basename(public_key_file))[0] \
1851 + constants.SSHS_MASTER_SUFFIX + ".pub"
1852 public_key_path_tmp = os.path.join(public_key_dir,
1853 public_key_file_tmp_filename)
1854 if os.path.exists(public_key_path_tmp):
1855
1856 key = utils.ReadFile(public_key_path_tmp)
1857 new_master_keys.append(key)
1858 if not new_master_keys:
1859 raise errors.SshUpdateError("Cannot find any type of temporary SSH key.")
1860 return {master_node_uuid: new_master_keys}
1861
1864 number_of_moves = 0
1865 for (_, (private_key_file, public_key_file)) in root_keyfiles.items():
1866 key_dir = os.path.dirname(public_key_file)
1867 private_key_file_tmp = \
1868 os.path.basename(private_key_file) + constants.SSHS_MASTER_SUFFIX
1869 public_key_file_tmp = private_key_file_tmp + ".pub"
1870 private_key_path_tmp = os.path.join(key_dir,
1871 private_key_file_tmp)
1872 public_key_path_tmp = os.path.join(key_dir,
1873 public_key_file_tmp)
1874 if os.path.exists(public_key_file):
1875 utils.CreateBackup(public_key_file)
1876 utils.RemoveFile(public_key_file)
1877 if os.path.exists(private_key_file):
1878 utils.CreateBackup(private_key_file)
1879 utils.RemoveFile(private_key_file)
1880 if os.path.exists(public_key_path_tmp) and \
1881 os.path.exists(private_key_path_tmp):
1882
1883 shutil.move(public_key_path_tmp, public_key_file)
1884 shutil.move(private_key_path_tmp, private_key_file)
1885 number_of_moves += 1
1886 if not number_of_moves:
1887 raise errors.SshUpdateError("Could not move at least one master SSH key.")
1888
1896 """Renews all SSH keys and updates authorized_keys and ganeti_pub_keys.
1897
1898 @type node_uuids: list of str
1899 @param node_uuids: list of node UUIDs whose keys should be renewed
1900 @type node_names: list of str
1901 @param node_names: list of node names whose keys should be removed. This list
1902 should match the C{node_uuids} parameter
1903 @type master_candidate_uuids: list of str
1904 @param master_candidate_uuids: list of UUIDs of master candidates or
1905 master node
1906 @type pub_key_file: str
1907 @param pub_key_file: file path of the the public key file
1908 @type noded_cert_file: str
1909 @param noded_cert_file: path of the noded SSL certificate file
1910 @type run_cmd_fn: function
1911 @param run_cmd_fn: function to run commands on remote nodes via SSH
1912 @raises ProgrammerError: if node_uuids and node_names don't match;
1913 SshUpdateError if a node's key is missing from the public key file,
1914 if a node's new SSH key could not be fetched from it, if there is
1915 none or more than one entry in the public key list for the master
1916 node.
1917
1918 """
1919 if not ssconf_store:
1920 ssconf_store = ssconf.SimpleStore()
1921 cluster_name = ssconf_store.GetClusterName()
1922
1923 if not len(node_uuids) == len(node_names):
1924 raise errors.ProgrammerError("List of nodes UUIDs and node names"
1925 " does not match in length.")
1926
1927 (_, root_keyfiles) = \
1928 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1929 (_, dsa_pub_keyfile) = root_keyfiles[constants.SSHK_DSA]
1930 old_master_key = utils.ReadFile(dsa_pub_keyfile)
1931
1932 node_uuid_name_map = zip(node_uuids, node_names)
1933
1934 master_node_name = ssconf_store.GetMasterNode()
1935 master_node_uuid = _GetMasterNodeUUID(node_uuid_name_map, master_node_name)
1936 ssh_port_map = ssconf_store.GetSshPortMap()
1937
1938
1939
1940 all_node_errors = []
1941
1942
1943 for node_uuid, node_name in node_uuid_name_map:
1944 if node_name == master_node_name:
1945 continue
1946 master_candidate = node_uuid in master_candidate_uuids
1947 potential_master_candidate = node_name in potential_master_candidates
1948
1949 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1950 if not keys_by_uuid:
1951 raise errors.SshUpdateError("No public key of node %s (UUID %s) found,"
1952 " not generating a new key."
1953 % (node_name, node_uuid))
1954
1955 if master_candidate:
1956 logging.debug("Fetching old SSH key from node '%s'.", node_name)
1957 old_pub_key = ssh.ReadRemoteSshPubKeys(dsa_pub_keyfile,
1958 node_name, cluster_name,
1959 ssh_port_map[node_name],
1960 False,
1961 False)
1962 if old_pub_key != old_master_key:
1963
1964
1965
1966
1967
1968 logging.debug("Removing SSH key of node '%s'.", node_name)
1969 node_errors = RemoveNodeSshKey(
1970 node_uuid, node_name, master_candidate_uuids,
1971 potential_master_candidates,
1972 master_uuid=master_node_uuid, from_authorized_keys=master_candidate,
1973 from_public_keys=False, clear_authorized_keys=False,
1974 clear_public_keys=False)
1975 if node_errors:
1976 all_node_errors = all_node_errors + node_errors
1977 else:
1978 logging.debug("Old key of node '%s' is the same as the current master"
1979 " key. Not deleting that key on the node.", node_name)
1980
1981 logging.debug("Generating new SSH key for node '%s'.", node_name)
1982 _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map,
1983 pub_key_file=pub_key_file,
1984 ssconf_store=ssconf_store,
1985 noded_cert_file=noded_cert_file,
1986 run_cmd_fn=run_cmd_fn)
1987
1988 try:
1989 logging.debug("Fetching newly created SSH key from node '%s'.", node_name)
1990 pub_key = ssh.ReadRemoteSshPubKeys(dsa_pub_keyfile,
1991 node_name, cluster_name,
1992 ssh_port_map[node_name],
1993 False,
1994 False)
1995 except:
1996 raise errors.SshUpdateError("Could not fetch key of node %s"
1997 " (UUID %s)" % (node_name, node_uuid))
1998
1999 if potential_master_candidate:
2000 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file)
2001 ssh.AddPublicKey(node_uuid, pub_key, key_file=pub_key_file)
2002
2003 logging.debug("Add ssh key of node '%s'.", node_name)
2004 node_errors = AddNodeSshKey(
2005 node_uuid, node_name, potential_master_candidates,
2006 to_authorized_keys=master_candidate,
2007 to_public_keys=potential_master_candidate,
2008 get_public_keys=True,
2009 pub_key_file=pub_key_file, ssconf_store=ssconf_store,
2010 noded_cert_file=noded_cert_file,
2011 run_cmd_fn=run_cmd_fn)
2012 if node_errors:
2013 all_node_errors = all_node_errors + node_errors
2014
2015
2016
2017
2018 old_master_keys_by_uuid = _GetOldMasterKeys(master_node_uuid, pub_key_file)
2019
2020
2021 logging.debug("Generate new ssh key of master.")
2022 _GenerateNodeSshKey(master_node_uuid, master_node_name, ssh_port_map,
2023 pub_key_file=pub_key_file,
2024 ssconf_store=ssconf_store,
2025 noded_cert_file=noded_cert_file,
2026 run_cmd_fn=run_cmd_fn,
2027 suffix=constants.SSHS_MASTER_SUFFIX)
2028
2029 new_master_key_dict = _GetNewMasterKey(root_keyfiles, master_node_uuid)
2030
2031
2032 ssh.RemovePublicKey(master_node_uuid, key_file=pub_key_file)
2033 for pub_key in new_master_key_dict[master_node_uuid]:
2034 ssh.AddPublicKey(master_node_uuid, pub_key, key_file=pub_key_file)
2035
2036
2037 logging.debug("Add new master key to all nodes.")
2038 node_errors = AddNodeSshKey(
2039 master_node_uuid, master_node_name, potential_master_candidates,
2040 to_authorized_keys=True, to_public_keys=True,
2041 get_public_keys=False, pub_key_file=pub_key_file,
2042 ssconf_store=ssconf_store, noded_cert_file=noded_cert_file,
2043 run_cmd_fn=run_cmd_fn)
2044 if node_errors:
2045 all_node_errors = all_node_errors + node_errors
2046
2047
2048 _ReplaceMasterKeyOnMaster(root_keyfiles)
2049
2050
2051 (auth_key_file, _) = \
2052 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
2053 ssh.RemoveAuthorizedKeys(auth_key_file,
2054 old_master_keys_by_uuid[master_node_uuid])
2055
2056
2057 logging.debug("Remove the old master key from all nodes.")
2058 node_errors = RemoveNodeSshKey(
2059 master_node_uuid, master_node_name, master_candidate_uuids,
2060 potential_master_candidates,
2061 keys_to_remove=old_master_keys_by_uuid, from_authorized_keys=True,
2062 from_public_keys=False, clear_authorized_keys=False,
2063 clear_public_keys=False)
2064 if node_errors:
2065 all_node_errors = all_node_errors + node_errors
2066
2067 return all_node_errors
2068
2071 """Return the size of the given block devices
2072
2073 @type devices: list
2074 @param devices: list of block device nodes to query
2075 @rtype: dict
2076 @return:
2077 dictionary of all block devices under /dev (key). The value is their
2078 size in MiB.
2079
2080 {'/dev/disk/by-uuid/123456-12321231-312312-312': 124}
2081
2082 """
2083 DEV_PREFIX = "/dev/"
2084 blockdevs = {}
2085
2086 for devpath in devices:
2087 if not utils.IsBelowDir(DEV_PREFIX, devpath):
2088 continue
2089
2090 try:
2091 st = os.stat(devpath)
2092 except EnvironmentError, err:
2093 logging.warning("Error stat()'ing device %s: %s", devpath, str(err))
2094 continue
2095
2096 if stat.S_ISBLK(st.st_mode):
2097 result = utils.RunCmd(["blockdev", "--getsize64", devpath])
2098 if result.failed:
2099
2100 logging.warning("Cannot get size for block device %s", devpath)
2101 continue
2102
2103 size = int(result.stdout) / (1024 * 1024)
2104 blockdevs[devpath] = size
2105 return blockdevs
2106
2109 """Compute list of logical volumes and their size.
2110
2111 @type vg_names: list
2112 @param vg_names: the volume groups whose LVs we should list, or
2113 empty for all volume groups
2114 @rtype: dict
2115 @return:
2116 dictionary of all partions (key) with value being a tuple of
2117 their size (in MiB), inactive and online status::
2118
2119 {'xenvg/test1': ('20.06', True, True)}
2120
2121 in case of errors, a string is returned with the error
2122 details.
2123
2124 """
2125 lvs = {}
2126 sep = "|"
2127 if not vg_names:
2128 vg_names = []
2129 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
2130 "--separator=%s" % sep,
2131 "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names)
2132 if result.failed:
2133 _Fail("Failed to list logical volumes, lvs output: %s", result.output)
2134
2135 for line in result.stdout.splitlines():
2136 line = line.strip()
2137 match = _LVSLINE_REGEX.match(line)
2138 if not match:
2139 logging.error("Invalid line returned from lvs output: '%s'", line)
2140 continue
2141 vg_name, name, size, attr = match.groups()
2142 inactive = attr[4] == "-"
2143 online = attr[5] == "o"
2144 virtual = attr[0] == "v"
2145 if virtual:
2146
2147
2148 continue
2149 lvs[vg_name + "/" + name] = (size, inactive, online)
2150
2151 return lvs
2152
2155 """List the volume groups and their size.
2156
2157 @rtype: dict
2158 @return: dictionary with keys volume name and values the
2159 size of the volume
2160
2161 """
2162 return utils.ListVolumeGroups()
2163
2166 """List all volumes on this node.
2167
2168 @rtype: list
2169 @return:
2170 A list of dictionaries, each having four keys:
2171 - name: the logical volume name,
2172 - size: the size of the logical volume
2173 - dev: the physical device on which the LV lives
2174 - vg: the volume group to which it belongs
2175
2176 In case of errors, we return an empty list and log the
2177 error.
2178
2179 Note that since a logical volume can live on multiple physical
2180 volumes, the resulting list might include a logical volume
2181 multiple times.
2182
2183 """
2184 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
2185 "--separator=|",
2186 "--options=lv_name,lv_size,devices,vg_name"])
2187 if result.failed:
2188 _Fail("Failed to list logical volumes, lvs output: %s",
2189 result.output)
2190
2191 def parse_dev(dev):
2192 return dev.split("(")[0]
2193
2194 def handle_dev(dev):
2195 return [parse_dev(x) for x in dev.split(",")]
2196
2197 def map_line(line):
2198 line = [v.strip() for v in line]
2199 return [{"name": line[0], "size": line[1],
2200 "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])]
2201
2202 all_devs = []
2203 for line in result.stdout.splitlines():
2204 if line.count("|") >= 3:
2205 all_devs.extend(map_line(line.split("|")))
2206 else:
2207 logging.warning("Strange line in the output from lvs: '%s'", line)
2208 return all_devs
2209
2212 """Check if a list of bridges exist on the current node.
2213
2214 @rtype: boolean
2215 @return: C{True} if all of them exist, C{False} otherwise
2216
2217 """
2218 missing = []
2219 for bridge in bridges_list:
2220 if not utils.BridgeExists(bridge):
2221 missing.append(bridge)
2222
2223 if missing:
2224 _Fail("Missing bridges %s", utils.CommaJoin(missing))
2225
2229 """Provides a list of instances of the given hypervisor.
2230
2231 @type hname: string
2232 @param hname: name of the hypervisor
2233 @type hvparams: dict of strings
2234 @param hvparams: hypervisor parameters for the given hypervisor
2235 @type get_hv_fn: function
2236 @param get_hv_fn: function that returns a hypervisor for the given hypervisor
2237 name; optional parameter to increase testability
2238
2239 @rtype: list
2240 @return: a list of all running instances on the current node
2241 - instance1.example.com
2242 - instance2.example.com
2243
2244 """
2245 try:
2246 return get_hv_fn(hname).ListInstances(hvparams=hvparams)
2247 except errors.HypervisorError, err:
2248 _Fail("Error enumerating instances (hypervisor %s): %s",
2249 hname, err, exc=True)
2250
2254 """Provides a list of instances.
2255
2256 @type hypervisor_list: list
2257 @param hypervisor_list: the list of hypervisors to query information
2258 @type all_hvparams: dict of dict of strings
2259 @param all_hvparams: a dictionary mapping hypervisor types to respective
2260 cluster-wide hypervisor parameters
2261 @type get_hv_fn: function
2262 @param get_hv_fn: function that returns a hypervisor for the given hypervisor
2263 name; optional parameter to increase testability
2264
2265 @rtype: list
2266 @return: a list of all running instances on the current node
2267 - instance1.example.com
2268 - instance2.example.com
2269
2270 """
2271 results = []
2272 for hname in hypervisor_list:
2273 hvparams = all_hvparams[hname]
2274 results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams,
2275 get_hv_fn=get_hv_fn))
2276 return results
2277
2280 """Gives back the information about an instance as a dictionary.
2281
2282 @type instance: string
2283 @param instance: the instance name
2284 @type hname: string
2285 @param hname: the hypervisor type of the instance
2286 @type hvparams: dict of strings
2287 @param hvparams: the instance's hvparams
2288
2289 @rtype: dict
2290 @return: dictionary with the following keys:
2291 - memory: memory size of instance (int)
2292 - state: state of instance (HvInstanceState)
2293 - time: cpu time of instance (float)
2294 - vcpus: the number of vcpus (int)
2295
2296 """
2297 output = {}
2298
2299 iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance,
2300 hvparams=hvparams)
2301 if iinfo is not None:
2302 output["memory"] = iinfo[2]
2303 output["vcpus"] = iinfo[3]
2304 output["state"] = iinfo[4]
2305 output["time"] = iinfo[5]
2306
2307 return output
2308
2311 """Computes whether an instance can be migrated.
2312
2313 @type instance: L{objects.Instance}
2314 @param instance: object representing the instance to be checked.
2315
2316 @rtype: tuple
2317 @return: tuple of (result, description) where:
2318 - result: whether the instance can be migrated or not
2319 - description: a description of the issue, if relevant
2320
2321 """
2322 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2323 iname = instance.name
2324 if iname not in hyper.ListInstances(hvparams=instance.hvparams):
2325 _Fail("Instance %s is not running", iname)
2326
2327 for idx in range(len(instance.disks_info)):
2328 link_name = _GetBlockDevSymlinkPath(iname, idx)
2329 if not os.path.islink(link_name):
2330 logging.warning("Instance %s is missing symlink %s for disk %d",
2331 iname, link_name, idx)
2332
2335 """Gather data about all instances.
2336
2337 This is the equivalent of L{GetInstanceInfo}, except that it
2338 computes data for all instances at once, thus being faster if one
2339 needs data about more than one instance.
2340
2341 @type hypervisor_list: list
2342 @param hypervisor_list: list of hypervisors to query for instance data
2343 @type all_hvparams: dict of dict of strings
2344 @param all_hvparams: mapping of hypervisor names to hvparams
2345
2346 @rtype: dict
2347 @return: dictionary of instance: data, with data having the following keys:
2348 - memory: memory size of instance (int)
2349 - state: xen state of instance (string)
2350 - time: cpu time of instance (float)
2351 - vcpus: the number of vcpus
2352
2353 """
2354 output = {}
2355 for hname in hypervisor_list:
2356 hvparams = all_hvparams[hname]
2357 iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams)
2358 if iinfo:
2359 for name, _, memory, vcpus, state, times in iinfo:
2360 value = {
2361 "memory": memory,
2362 "vcpus": vcpus,
2363 "state": state,
2364 "time": times,
2365 }
2366 if name in output:
2367
2368
2369
2370 for key in "memory", "vcpus":
2371 if value[key] != output[name][key]:
2372 _Fail("Instance %s is running twice"
2373 " with different parameters", name)
2374 output[name] = value
2375
2376 return output
2377
2381 """Gather data about the console access of a set of instances of this node.
2382
2383 This function assumes that the caller already knows which instances are on
2384 this node, by calling a function such as L{GetAllInstancesInfo} or
2385 L{GetInstanceList}.
2386
2387 For every instance, a large amount of configuration data needs to be
2388 provided to the hypervisor interface in order to receive the console
2389 information. Whether this could or should be cut down can be discussed.
2390 The information is provided in a dictionary indexed by instance name,
2391 allowing any number of instance queries to be done.
2392
2393 @type instance_param_dict: dict of string to tuple of dictionaries, where the
2394 dictionaries represent: L{objects.Instance}, L{objects.Node},
2395 L{objects.NodeGroup}, HvParams, BeParams
2396 @param instance_param_dict: mapping of instance name to parameters necessary
2397 for console information retrieval
2398
2399 @rtype: dict
2400 @return: dictionary of instance: data, with data having the following keys:
2401 - instance: instance name
2402 - kind: console kind
2403 - message: used with kind == CONS_MESSAGE, indicates console to be
2404 unavailable, supplies error message
2405 - host: host to connect to
2406 - port: port to use
2407 - user: user for login
2408 - command: the command, broken into parts as an array
2409 - display: unknown, potentially unused?
2410
2411 """
2412
2413 output = {}
2414 for inst_name in instance_param_dict:
2415 instance = instance_param_dict[inst_name]["instance"]
2416 pnode = instance_param_dict[inst_name]["node"]
2417 group = instance_param_dict[inst_name]["group"]
2418 hvparams = instance_param_dict[inst_name]["hvParams"]
2419 beparams = instance_param_dict[inst_name]["beParams"]
2420
2421 instance = objects.Instance.FromDict(instance)
2422 pnode = objects.Node.FromDict(pnode)
2423 group = objects.NodeGroup.FromDict(group)
2424
2425 h = get_hv_fn(instance.hypervisor)
2426 output[inst_name] = h.GetInstanceConsole(instance, pnode, group,
2427 hvparams, beparams).ToDict()
2428
2429 return output
2430
2433 """Compute the OS log filename for a given instance and operation.
2434
2435 The instance name and os name are passed in as strings since not all
2436 operations have these as part of an instance object.
2437
2438 @type kind: string
2439 @param kind: the operation type (e.g. add, import, etc.)
2440 @type os_name: string
2441 @param os_name: the os name
2442 @type instance: string
2443 @param instance: the name of the instance being imported/added/etc.
2444 @type component: string or None
2445 @param component: the name of the component of the instance being
2446 transferred
2447
2448 """
2449
2450 if component:
2451 assert "/" not in component
2452 c_msg = "-%s" % component
2453 else:
2454 c_msg = ""
2455 base = ("%s-%s-%s%s-%s.log" %
2456 (kind, os_name, instance, c_msg, utils.TimestampForFilename()))
2457 return utils.PathJoin(pathutils.LOG_OS_DIR, base)
2458
2461 """Add an OS to an instance.
2462
2463 @type instance: L{objects.Instance}
2464 @param instance: Instance whose OS is to be installed
2465 @type reinstall: boolean
2466 @param reinstall: whether this is an instance reinstall
2467 @type debug: integer
2468 @param debug: debug level, passed to the OS scripts
2469 @rtype: None
2470
2471 """
2472 inst_os = OSFromDisk(instance.os)
2473
2474 create_env = OSEnvironment(instance, inst_os, debug)
2475 if reinstall:
2476 create_env["INSTANCE_REINSTALL"] = "1"
2477
2478 logfile = _InstanceLogName("add", instance.os, instance.name, None)
2479
2480 result = utils.RunCmd([inst_os.create_script], env=create_env,
2481 cwd=inst_os.path, output=logfile, reset_env=True)
2482 if result.failed:
2483 logging.error("os create command '%s' returned error: %s, logfile: %s,"
2484 " output: %s", result.cmd, result.fail_reason, logfile,
2485 result.output)
2486 lines = [utils.SafeEncode(val)
2487 for val in utils.TailFile(logfile, lines=20)]
2488 _Fail("OS create script failed (%s), last lines in the"
2489 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
2490
2493 """Run the OS rename script for an instance.
2494
2495 @type instance: L{objects.Instance}
2496 @param instance: Instance whose OS is to be installed
2497 @type old_name: string
2498 @param old_name: previous instance name
2499 @type debug: integer
2500 @param debug: debug level, passed to the OS scripts
2501 @rtype: boolean
2502 @return: the success of the operation
2503
2504 """
2505 inst_os = OSFromDisk(instance.os)
2506
2507 rename_env = OSEnvironment(instance, inst_os, debug)
2508 rename_env["OLD_INSTANCE_NAME"] = old_name
2509
2510 logfile = _InstanceLogName("rename", instance.os,
2511 "%s-%s" % (old_name, instance.name), None)
2512
2513 result = utils.RunCmd([inst_os.rename_script], env=rename_env,
2514 cwd=inst_os.path, output=logfile, reset_env=True)
2515
2516 if result.failed:
2517 logging.error("os create command '%s' returned error: %s output: %s",
2518 result.cmd, result.fail_reason, result.output)
2519 lines = [utils.SafeEncode(val)
2520 for val in utils.TailFile(logfile, lines=20)]
2521 _Fail("OS rename script failed (%s), last lines in the"
2522 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
2523
2535
2538 """Set up symlinks to a instance's block device.
2539
2540 This is an auxiliary function run when an instance is start (on the primary
2541 node) or when an instance is migrated (on the target node).
2542
2543
2544 @param instance_name: the name of the target instance
2545 @param device_path: path of the physical block device, on the node
2546 @param idx: the disk index
2547 @return: absolute path to the disk's symlink
2548
2549 """
2550
2551 if not device_path:
2552 return None
2553
2554 link_name = _GetBlockDevSymlinkPath(instance_name, idx)
2555 try:
2556 os.symlink(device_path, link_name)
2557 except OSError, err:
2558 if err.errno == errno.EEXIST:
2559 if (not os.path.islink(link_name) or
2560 os.readlink(link_name) != device_path):
2561 os.remove(link_name)
2562 os.symlink(device_path, link_name)
2563 else:
2564 raise
2565
2566 return link_name
2567
2570 """Remove the block device symlinks belonging to the given instance.
2571
2572 """
2573 for idx, _ in enumerate(disks):
2574 link_name = _GetBlockDevSymlinkPath(instance_name, idx)
2575 if os.path.islink(link_name):
2576 try:
2577 os.remove(link_name)
2578 except OSError:
2579 logging.exception("Can't remove symlink '%s'", link_name)
2580
2583 """Get the URI for the device.
2584
2585 @type instance: L{objects.Instance}
2586 @param instance: the instance which disk belongs to
2587 @type disk: L{objects.Disk}
2588 @param disk: the target disk object
2589 @type device: L{bdev.BlockDev}
2590 @param device: the corresponding BlockDevice
2591 @rtype: string
2592 @return: the device uri if any else None
2593
2594 """
2595 access_mode = disk.params.get(constants.LDP_ACCESS,
2596 constants.DISK_KERNELSPACE)
2597 if access_mode == constants.DISK_USERSPACE:
2598
2599 return device.GetUserspaceAccessUri(instance.hypervisor)
2600 else:
2601 return None
2602
2605 """Set up an instance's block device(s).
2606
2607 This is run on the primary node at instance startup. The block
2608 devices must be already assembled.
2609
2610 @type instance: L{objects.Instance}
2611 @param instance: the instance whose disks we should assemble
2612 @rtype: list
2613 @return: list of (disk_object, link_name, drive_uri)
2614
2615 """
2616 block_devices = []
2617 for idx, disk in enumerate(instance.disks_info):
2618 device = _RecursiveFindBD(disk)
2619 if device is None:
2620 raise errors.BlockDeviceError("Block device '%s' is not set up." %
2621 str(disk))
2622 device.Open()
2623 try:
2624 link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx)
2625 except OSError, e:
2626 raise errors.BlockDeviceError("Cannot create block device symlink: %s" %
2627 e.strerror)
2628 uri = _CalculateDeviceURI(instance, disk, device)
2629
2630 block_devices.append((disk, link_name, uri))
2631
2632 return block_devices
2633
2639
2645
2646
2647 -def StartInstance(instance, startup_paused, reason, store_reason=True):
2648 """Start an instance.
2649
2650 @type instance: L{objects.Instance}
2651 @param instance: the instance object
2652 @type startup_paused: bool
2653 @param instance: pause instance at startup?
2654 @type reason: list of reasons
2655 @param reason: the reason trail for this startup
2656 @type store_reason: boolean
2657 @param store_reason: whether to store the shutdown reason trail on file
2658 @rtype: None
2659
2660 """
2661 instance_info = _GetInstanceInfo(instance)
2662
2663 if instance_info and not _IsInstanceUserDown(instance_info):
2664 logging.info("Instance '%s' already running, not starting", instance.name)
2665 return
2666
2667 try:
2668 block_devices = _GatherAndLinkBlockDevs(instance)
2669 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2670 hyper.StartInstance(instance, block_devices, startup_paused)
2671 if store_reason:
2672 _StoreInstReasonTrail(instance.name, reason)
2673 except errors.BlockDeviceError, err:
2674 _Fail("Block device error: %s", err, exc=True)
2675 except errors.HypervisorError, err:
2676 _RemoveBlockDevLinks(instance.name, instance.disks_info)
2677 _Fail("Hypervisor error: %s", err, exc=True)
2678
2681 """Shut an instance down.
2682
2683 @note: this functions uses polling with a hardcoded timeout.
2684
2685 @type instance: L{objects.Instance}
2686 @param instance: the instance object
2687 @type timeout: integer
2688 @param timeout: maximum timeout for soft shutdown
2689 @type reason: list of reasons
2690 @param reason: the reason trail for this shutdown
2691 @type store_reason: boolean
2692 @param store_reason: whether to store the shutdown reason trail on file
2693 @rtype: None
2694
2695 """
2696 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2697
2698 if not _GetInstanceInfo(instance):
2699 logging.info("Instance '%s' not running, doing nothing", instance.name)
2700 return
2701
2702 class _TryShutdown(object):
2703 def __init__(self):
2704 self.tried_once = False
2705
2706 def __call__(self):
2707 if not _GetInstanceInfo(instance):
2708 return
2709
2710 try:
2711 hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout)
2712 if store_reason:
2713 _StoreInstReasonTrail(instance.name, reason)
2714 except errors.HypervisorError, err:
2715
2716
2717 if not _GetInstanceInfo(instance):
2718 return
2719
2720 _Fail("Failed to stop instance '%s': %s", instance.name, err)
2721
2722 self.tried_once = True
2723
2724 raise utils.RetryAgain()
2725
2726 try:
2727 utils.Retry(_TryShutdown(), 5, timeout)
2728 except utils.RetryTimeout:
2729
2730 logging.error("Shutdown of '%s' unsuccessful, forcing", instance.name)
2731
2732 try:
2733 hyper.StopInstance(instance, force=True)
2734 except errors.HypervisorError, err:
2735
2736
2737 if _GetInstanceInfo(instance):
2738 _Fail("Failed to force stop instance '%s': %s", instance.name, err)
2739
2740 time.sleep(1)
2741
2742 if _GetInstanceInfo(instance):
2743 _Fail("Could not shutdown instance '%s' even by destroy", instance.name)
2744
2745 try:
2746 hyper.CleanupInstance(instance.name)
2747 except errors.HypervisorError, err:
2748 logging.warning("Failed to execute post-shutdown cleanup step: %s", err)
2749
2750 _RemoveBlockDevLinks(instance.name, instance.disks_info)
2751
2752
2753 -def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):
2754 """Reboot an instance.
2755
2756 @type instance: L{objects.Instance}
2757 @param instance: the instance object to reboot
2758 @type reboot_type: str
2759 @param reboot_type: the type of reboot, one the following
2760 constants:
2761 - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the
2762 instance OS, do not recreate the VM
2763 - L{constants.INSTANCE_REBOOT_HARD}: tear down and
2764 restart the VM (at the hypervisor level)
2765 - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is
2766 not accepted here, since that mode is handled differently, in
2767 cmdlib, and translates into full stop and start of the
2768 instance (instead of a call_instance_reboot RPC)
2769 @type shutdown_timeout: integer
2770 @param shutdown_timeout: maximum timeout for soft shutdown
2771 @type reason: list of reasons
2772 @param reason: the reason trail for this reboot
2773 @rtype: None
2774
2775 """
2776
2777
2778
2779 if not _GetInstanceInfo(instance):
2780 _Fail("Cannot reboot instance '%s' that is not running", instance.name)
2781
2782 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2783 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
2784 try:
2785 hyper.RebootInstance(instance)
2786 except errors.HypervisorError, err:
2787 _Fail("Failed to soft reboot instance '%s': %s", instance.name, err)
2788 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
2789 try:
2790 InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False)
2791 result = StartInstance(instance, False, reason, store_reason=False)
2792 _StoreInstReasonTrail(instance.name, reason)
2793 return result
2794 except errors.HypervisorError, err:
2795 _Fail("Failed to hard reboot instance '%s': %s", instance.name, err)
2796 else:
2797 _Fail("Invalid reboot_type received: '%s'", reboot_type)
2798
2819
2834
2837 """Prepare the node to accept an instance.
2838
2839 @type instance: L{objects.Instance}
2840 @param instance: the instance definition
2841 @type info: string/data (opaque)
2842 @param info: migration information, from the source node
2843 @type target: string
2844 @param target: target host (usually ip), on this node
2845
2846 """
2847 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2848 try:
2849 hyper.AcceptInstance(instance, info, target)
2850 except errors.HypervisorError, err:
2851 _Fail("Failed to accept instance: %s", err, exc=True)
2852
2855 """Finalize any preparation to accept an instance.
2856
2857 @type instance: L{objects.Instance}
2858 @param instance: the instance definition
2859 @type info: string/data (opaque)
2860 @param info: migration information, from the source node
2861 @type success: boolean
2862 @param success: whether the migration was a success or a failure
2863
2864 """
2865 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2866 try:
2867 hyper.FinalizeMigrationDst(instance, info, success)
2868 except errors.HypervisorError, err:
2869 _Fail("Failed to finalize migration on the target node: %s", err, exc=True)
2870
2873 """Migrates an instance to another node.
2874
2875 @type cluster_name: string
2876 @param cluster_name: name of the cluster
2877 @type instance: L{objects.Instance}
2878 @param instance: the instance definition
2879 @type target: string
2880 @param target: the target node name
2881 @type live: boolean
2882 @param live: whether the migration should be done live or not (the
2883 interpretation of this parameter is left to the hypervisor)
2884 @raise RPCFail: if migration fails for some reason
2885
2886 """
2887 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2888
2889 try:
2890 hyper.MigrateInstance(cluster_name, instance, target, live)
2891 except errors.HypervisorError, err:
2892 _Fail("Failed to migrate instance: %s", err, exc=True)
2893
2896 """Finalize the instance migration on the source node.
2897
2898 @type instance: L{objects.Instance}
2899 @param instance: the instance definition of the migrated instance
2900 @type success: bool
2901 @param success: whether the migration succeeded or not
2902 @type live: bool
2903 @param live: whether the user requested a live migration or not
2904 @raise RPCFail: If the execution fails for some reason
2905
2906 """
2907 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2908
2909 try:
2910 hyper.FinalizeMigrationSource(instance, success, live)
2911 except Exception, err:
2912 _Fail("Failed to finalize the migration on the source node: %s", err,
2913 exc=True)
2914
2917 """Get the migration status
2918
2919 @type instance: L{objects.Instance}
2920 @param instance: the instance that is being migrated
2921 @rtype: L{objects.MigrationStatus}
2922 @return: the status of the current migration (one of
2923 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional
2924 progress info that can be retrieved from the hypervisor
2925 @raise RPCFail: If the migration status cannot be retrieved
2926
2927 """
2928 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2929 try:
2930 return hyper.GetMigrationStatus(instance)
2931 except Exception, err:
2932 _Fail("Failed to get migration status: %s", err, exc=True)
2933
2934
2935 -def HotplugDevice(instance, action, dev_type, device, extra, seq):
2936 """Hotplug a device
2937
2938 Hotplug is currently supported only for KVM Hypervisor.
2939 @type instance: L{objects.Instance}
2940 @param instance: the instance to which we hotplug a device
2941 @type action: string
2942 @param action: the hotplug action to perform
2943 @type dev_type: string
2944 @param dev_type: the device type to hotplug
2945 @type device: either L{objects.NIC} or L{objects.Disk}
2946 @param device: the device object to hotplug
2947 @type extra: tuple
2948 @param extra: extra info used for disk hotplug (disk link, drive uri)
2949 @type seq: int
2950 @param seq: the index of the device from master perspective
2951 @raise RPCFail: in case instance does not have KVM hypervisor
2952
2953 """
2954 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2955 try:
2956 hyper.VerifyHotplugSupport(instance, action, dev_type)
2957 except errors.HotplugError, err:
2958 _Fail("Hotplug is not supported: %s", err)
2959
2960 if action == constants.HOTPLUG_ACTION_ADD:
2961 fn = hyper.HotAddDevice
2962 elif action == constants.HOTPLUG_ACTION_REMOVE:
2963 fn = hyper.HotDelDevice
2964 elif action == constants.HOTPLUG_ACTION_MODIFY:
2965 fn = hyper.HotModDevice
2966 else:
2967 assert action in constants.HOTPLUG_ALL_ACTIONS
2968
2969 return fn(instance, dev_type, device, extra, seq)
2970
2981
3008
3009 retries = 5
3010
3011 while True:
3012 try:
3013 trans = utils.Retry(_Connect, 1.0, constants.LUXI_DEF_CTMO)
3014 break
3015 except utils.RetryTimeout:
3016 raise TimeoutError("Connection to metadata daemon timed out")
3017 except (socket.error, NoMasterError), err:
3018 if retries == 0:
3019 logging.error("Failed to connect to the metadata daemon",
3020 exc_info=True)
3021 raise TimeoutError("Failed to connect to metadata daemon: %s" % err)
3022 else:
3023 retries -= 1
3024
3025 data = serializer.DumpJson(metadata,
3026 private_encoder=serializer.EncodeWithPrivateFields)
3027
3028 trans.Send(data)
3029 trans.Close()
3030
3031
3032 -def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):
3033 """Creates a block device for an instance.
3034
3035 @type disk: L{objects.Disk}
3036 @param disk: the object describing the disk we should create
3037 @type size: int
3038 @param size: the size of the physical underlying device, in MiB
3039 @type owner: str
3040 @param owner: the name of the instance for which disk is created,
3041 used for device cache data
3042 @type on_primary: boolean
3043 @param on_primary: indicates if it is the primary node or not
3044 @type info: string
3045 @param info: string that will be sent to the physical device
3046 creation, used for example to set (LVM) tags on LVs
3047 @type excl_stor: boolean
3048 @param excl_stor: Whether exclusive_storage is active
3049
3050 @return: the new unique_id of the device (this can sometime be
3051 computed only after creation), or None. On secondary nodes,
3052 it's not required to return anything.
3053
3054 """
3055
3056
3057 clist = []
3058 if disk.children:
3059 for child in disk.children:
3060 try:
3061 crdev = _RecursiveAssembleBD(child, owner, on_primary)
3062 except errors.BlockDeviceError, err:
3063 _Fail("Can't assemble device %s: %s", child, err)
3064 if on_primary or disk.AssembleOnSecondary():
3065
3066
3067 try:
3068
3069 crdev.Open()
3070 except errors.BlockDeviceError, err:
3071 _Fail("Can't make child '%s' read-write: %s", child, err)
3072 clist.append(crdev)
3073
3074 try:
3075 device = bdev.Create(disk, clist, excl_stor)
3076 except errors.BlockDeviceError, err:
3077 _Fail("Can't create block device: %s", err)
3078
3079 if on_primary or disk.AssembleOnSecondary():
3080 try:
3081 device.Assemble()
3082 except errors.BlockDeviceError, err:
3083 _Fail("Can't assemble device after creation, unusual event: %s", err)
3084 if on_primary or disk.OpenOnSecondary():
3085 try:
3086 device.Open(force=True)
3087 except errors.BlockDeviceError, err:
3088 _Fail("Can't make device r/w after creation, unusual event: %s", err)
3089 DevCacheManager.UpdateCache(device.dev_path, owner,
3090 on_primary, disk.iv_name)
3091
3092 device.SetInfo(info)
3093
3094 return device.unique_id
3095
3096
3097 -def _DumpDevice(source_path, target_path, offset, size, truncate):
3098 """This function images/wipes the device using a local file.
3099
3100 @type source_path: string
3101 @param source_path: path of the image or data source (e.g., "/dev/zero")
3102
3103 @type target_path: string
3104 @param target_path: path of the device to image/wipe
3105
3106 @type offset: int
3107 @param offset: offset in MiB in the output file
3108
3109 @type size: int
3110 @param size: maximum size in MiB to write (data source might be smaller)
3111
3112 @type truncate: bool
3113 @param truncate: whether the file should be truncated
3114
3115 @return: None
3116 @raise RPCFail: in case of failure
3117
3118 """
3119
3120
3121
3122 block_size = constants.DD_BLOCK_SIZE
3123
3124 cmd = [constants.DD_CMD, "if=%s" % source_path, "seek=%d" % offset,
3125 "bs=%s" % block_size, "oflag=direct", "of=%s" % target_path,
3126 "count=%d" % size]
3127
3128 if not truncate:
3129 cmd.append("conv=notrunc")
3130
3131 result = utils.RunCmd(cmd)
3132
3133 if result.failed:
3134 _Fail("Dump command '%s' exited with error: %s; output: %s", result.cmd,
3135 result.fail_reason, result.output)
3136
3139 """This function images a device using a downloaded image file.
3140
3141 @type source_url: string
3142 @param source_url: URL of image to dump to disk
3143
3144 @type target_path: string
3145 @param target_path: path of the device to image
3146
3147 @type size: int
3148 @param size: maximum size in MiB to write (data source might be smaller)
3149
3150 @rtype: NoneType
3151 @return: None
3152 @raise RPCFail: in case of download or write failures
3153
3154 """
3155 class DDParams(object):
3156 def __init__(self, current_size, total_size):
3157 self.current_size = current_size
3158 self.total_size = total_size
3159 self.image_size_error = False
3160
3161 def dd_write(ddparams, out):
3162 if ddparams.current_size < ddparams.total_size:
3163 ddparams.current_size += len(out)
3164 target_file.write(out)
3165 else:
3166 ddparams.image_size_error = True
3167 return -1
3168
3169 target_file = open(target_path, "r+")
3170 ddparams = DDParams(0, 1024 * 1024 * size)
3171
3172 curl = pycurl.Curl()
3173 curl.setopt(pycurl.VERBOSE, True)
3174 curl.setopt(pycurl.NOSIGNAL, True)
3175 curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION)
3176 curl.setopt(pycurl.URL, source_url)
3177 curl.setopt(pycurl.WRITEFUNCTION, lambda out: dd_write(ddparams, out))
3178
3179 try:
3180 curl.perform()
3181 except pycurl.error:
3182 if ddparams.image_size_error:
3183 _Fail("Disk image larger than the disk")
3184 else:
3185 raise
3186
3187 target_file.close()
3188
3191 """Copies data from source block device to target.
3192
3193 This function gets the export and import commands from the source and
3194 target devices respectively, and then concatenates them to a single
3195 command using a pipe ("|"). Finally, executes the unified command that
3196 will transfer the data between the devices during the disk template
3197 conversion operation.
3198
3199 @type src_disk: L{objects.Disk}
3200 @param src_disk: the disk object we want to copy from
3201 @type target_disk: L{objects.Disk}
3202 @param target_disk: the disk object we want to copy to
3203
3204 @rtype: NoneType
3205 @return: None
3206 @raise RPCFail: in case of failure
3207
3208 """
3209 src_dev = _RecursiveFindBD(src_disk)
3210 if src_dev is None:
3211 _Fail("Cannot copy from device '%s': device not found", src_disk.uuid)
3212
3213 dest_dev = _RecursiveFindBD(target_disk)
3214 if dest_dev is None:
3215 _Fail("Cannot copy to device '%s': device not found", target_disk.uuid)
3216
3217 src_cmd = src_dev.Export()
3218 dest_cmd = dest_dev.Import()
3219 command = "%s | %s" % (utils.ShellQuoteArgs(src_cmd),
3220 utils.ShellQuoteArgs(dest_cmd))
3221
3222 result = utils.RunCmd(command)
3223 if result.failed:
3224 _Fail("Disk conversion command '%s' exited with error: %s; output: %s",
3225 result.cmd, result.fail_reason, result.output)
3226
3229 """Wipes a block device.
3230
3231 @type disk: L{objects.Disk}
3232 @param disk: the disk object we want to wipe
3233 @type offset: int
3234 @param offset: The offset in MiB in the file
3235 @type size: int
3236 @param size: The size in MiB to write
3237
3238 """
3239 try:
3240 rdev = _RecursiveFindBD(disk)
3241 except errors.BlockDeviceError:
3242 rdev = None
3243
3244 if not rdev:
3245 _Fail("Cannot wipe device %s: device not found", disk.iv_name)
3246 if offset < 0:
3247 _Fail("Negative offset")
3248 if size < 0:
3249 _Fail("Negative size")
3250 if offset > rdev.size:
3251 _Fail("Wipe offset is bigger than device size")
3252 if (offset + size) > rdev.size:
3253 _Fail("Wipe offset and size are bigger than device size")
3254
3255 _DumpDevice("/dev/zero", rdev.dev_path, offset, size, True)
3256
3259 """Images a block device either by dumping a local file or
3260 downloading a URL.
3261
3262 @type disk: L{objects.Disk}
3263 @param disk: the disk object we want to image
3264
3265 @type image: string
3266 @param image: file path to the disk image be dumped
3267
3268 @type size: int
3269 @param size: The size in MiB to write
3270
3271 @rtype: NoneType
3272 @return: None
3273 @raise RPCFail: in case of failure
3274
3275 """
3276 if not (utils.IsUrl(image) or os.path.exists(image)):
3277 _Fail("Image '%s' not found", image)
3278
3279 try:
3280 rdev = _RecursiveFindBD(disk)
3281 except errors.BlockDeviceError:
3282 rdev = None
3283
3284 if not rdev:
3285 _Fail("Cannot image device %s: device not found", disk.iv_name)
3286 if size < 0:
3287 _Fail("Negative size")
3288 if size > rdev.size:
3289 _Fail("Image size is bigger than device size")
3290
3291 if utils.IsUrl(image):
3292 _DownloadAndDumpDevice(image, rdev.dev_path, size)
3293 else:
3294 _DumpDevice(image, rdev.dev_path, 0, size, False)
3295
3298 """Pause or resume the sync of the block device.
3299
3300 @type disks: list of L{objects.Disk}
3301 @param disks: the disks object we want to pause/resume
3302 @type pause: bool
3303 @param pause: Wheater to pause or resume
3304
3305 """
3306 success = []
3307 for disk in disks:
3308 try:
3309 rdev = _RecursiveFindBD(disk)
3310 except errors.BlockDeviceError:
3311 rdev = None
3312
3313 if not rdev:
3314 success.append((False, ("Cannot change sync for device %s:"
3315 " device not found" % disk.iv_name)))
3316 continue
3317
3318 result = rdev.PauseResumeSync(pause)
3319
3320 if result:
3321 success.append((result, None))
3322 else:
3323 if pause:
3324 msg = "Pause"
3325 else:
3326 msg = "Resume"
3327 success.append((result, "%s for device %s failed" % (msg, disk.iv_name)))
3328
3329 return success
3330
3333 """Remove a block device.
3334
3335 @note: This is intended to be called recursively.
3336
3337 @type disk: L{objects.Disk}
3338 @param disk: the disk object we should remove
3339 @rtype: boolean
3340 @return: the success of the operation
3341
3342 """
3343 msgs = []
3344 try:
3345 rdev = _RecursiveFindBD(disk)
3346 except errors.BlockDeviceError, err:
3347
3348 logging.info("Can't attach to device %s in remove", disk)
3349 rdev = None
3350 if rdev is not None:
3351 r_path = rdev.dev_path
3352
3353 def _TryRemove():
3354 try:
3355 rdev.Remove()
3356 return []
3357 except errors.BlockDeviceError, err:
3358 return [str(err)]
3359
3360 msgs.extend(utils.SimpleRetry([], _TryRemove,
3361 constants.DISK_REMOVE_RETRY_INTERVAL,
3362 constants.DISK_REMOVE_RETRY_TIMEOUT))
3363
3364 if not msgs:
3365 DevCacheManager.RemoveCache(r_path)
3366
3367 if disk.children:
3368 for child in disk.children:
3369 try:
3370 BlockdevRemove(child)
3371 except RPCFail, err:
3372 msgs.append(str(err))
3373
3374 if msgs:
3375 _Fail("; ".join(msgs))
3376
3379 """Activate a block device for an instance.
3380
3381 This is run on the primary and secondary nodes for an instance.
3382
3383 @note: this function is called recursively.
3384
3385 @type disk: L{objects.Disk}
3386 @param disk: the disk we try to assemble
3387 @type owner: str
3388 @param owner: the name of the instance which owns the disk
3389 @type as_primary: boolean
3390 @param as_primary: if we should make the block device
3391 read/write
3392
3393 @return: the assembled device or None (in case no device
3394 was assembled)
3395 @raise errors.BlockDeviceError: in case there is an error
3396 during the activation of the children or the device
3397 itself
3398
3399 """
3400 children = []
3401 if disk.children:
3402 mcn = disk.ChildrenNeeded()
3403 if mcn == -1:
3404 mcn = 0
3405 else:
3406 mcn = len(disk.children) - mcn
3407 for chld_disk in disk.children:
3408 try:
3409 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
3410 except errors.BlockDeviceError, err:
3411 if children.count(None) >= mcn:
3412 raise
3413 cdev = None
3414 logging.error("Error in child activation (but continuing): %s",
3415 str(err))
3416 children.append(cdev)
3417
3418 if as_primary or disk.AssembleOnSecondary():
3419 r_dev = bdev.Assemble(disk, children)
3420 result = r_dev
3421 if as_primary or disk.OpenOnSecondary():
3422 r_dev.Open()
3423 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
3424 as_primary, disk.iv_name)
3425
3426 else:
3427 result = True
3428 return result
3429
3432 """Activate a block device for an instance.
3433
3434 This is a wrapper over _RecursiveAssembleBD.
3435
3436 @rtype: str or boolean
3437 @return: a tuple with the C{/dev/...} path and the created symlink
3438 for primary nodes, and (C{True}, C{True}) for secondary nodes
3439
3440 """
3441 try:
3442 result = _RecursiveAssembleBD(disk, instance.name, as_primary)
3443 if isinstance(result, BlockDev):
3444
3445 dev_path = result.dev_path
3446 link_name = None
3447 uri = None
3448 if as_primary:
3449 link_name = _SymlinkBlockDev(instance.name, dev_path, idx)
3450 uri = _CalculateDeviceURI(instance, disk, result)
3451 elif result:
3452 return result, result
3453 else:
3454 _Fail("Unexpected result from _RecursiveAssembleBD")
3455 except errors.BlockDeviceError, err:
3456 _Fail("Error while assembling disk: %s", err, exc=True)
3457 except OSError, err:
3458 _Fail("Error while symlinking disk: %s", err, exc=True)
3459
3460 return dev_path, link_name, uri
3461
3464 """Shut down a block device.
3465
3466 First, if the device is assembled (Attach() is successful), then
3467 the device is shutdown. Then the children of the device are
3468 shutdown.
3469
3470 This function is called recursively. Note that we don't cache the
3471 children or such, as oppossed to assemble, shutdown of different
3472 devices doesn't require that the upper device was active.
3473
3474 @type disk: L{objects.Disk}
3475 @param disk: the description of the disk we should
3476 shutdown
3477 @rtype: None
3478
3479 """
3480 msgs = []
3481 r_dev = _RecursiveFindBD(disk)
3482 if r_dev is not None:
3483 r_path = r_dev.dev_path
3484 try:
3485 r_dev.Shutdown()
3486 DevCacheManager.RemoveCache(r_path)
3487 except errors.BlockDeviceError, err:
3488 msgs.append(str(err))
3489
3490 if disk.children:
3491 for child in disk.children:
3492 try:
3493 BlockdevShutdown(child)
3494 except RPCFail, err:
3495 msgs.append(str(err))
3496
3497 if msgs:
3498 _Fail("; ".join(msgs))
3499
3502 """Extend a mirrored block device.
3503
3504 @type parent_cdev: L{objects.Disk}
3505 @param parent_cdev: the disk to which we should add children
3506 @type new_cdevs: list of L{objects.Disk}
3507 @param new_cdevs: the list of children which we should add
3508 @rtype: None
3509
3510 """
3511 parent_bdev = _RecursiveFindBD(parent_cdev)
3512 if parent_bdev is None:
3513 _Fail("Can't find parent device '%s' in add children", parent_cdev)
3514 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
3515 if new_bdevs.count(None) > 0:
3516 _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs)
3517 parent_bdev.AddChildren(new_bdevs)
3518
3521 """Shrink a mirrored block device.
3522
3523 @type parent_cdev: L{objects.Disk}
3524 @param parent_cdev: the disk from which we should remove children
3525 @type new_cdevs: list of L{objects.Disk}
3526 @param new_cdevs: the list of children which we should remove
3527 @rtype: None
3528
3529 """
3530 parent_bdev = _RecursiveFindBD(parent_cdev)
3531 if parent_bdev is None:
3532 _Fail("Can't find parent device '%s' in remove children", parent_cdev)
3533 devs = []
3534 for disk in new_cdevs:
3535 rpath = disk.StaticDevPath()
3536 if rpath is None:
3537 bd = _RecursiveFindBD(disk)
3538 if bd is None:
3539 _Fail("Can't find device %s while removing children", disk)
3540 else:
3541 devs.append(bd.dev_path)
3542 else:
3543 if not utils.IsNormAbsPath(rpath):
3544 _Fail("Strange path returned from StaticDevPath: '%s'", rpath)
3545 devs.append(rpath)
3546 parent_bdev.RemoveChildren(devs)
3547
3550 """Get the mirroring status of a list of devices.
3551
3552 @type disks: list of L{objects.Disk}
3553 @param disks: the list of disks which we should query
3554 @rtype: disk
3555 @return: List of L{objects.BlockDevStatus}, one for each disk
3556 @raise errors.BlockDeviceError: if any of the disks cannot be
3557 found
3558
3559 """
3560 stats = []
3561 for dsk in disks:
3562 rbd = _RecursiveFindBD(dsk)
3563 if rbd is None:
3564 _Fail("Can't find device %s", dsk)
3565
3566 stats.append(rbd.CombinedSyncStatus())
3567
3568 return stats
3569
3572 """Get the mirroring status of a list of devices.
3573
3574 @type disks: list of L{objects.Disk}
3575 @param disks: the list of disks which we should query
3576 @rtype: disk
3577 @return: List of tuples, (bool, status), one for each disk; bool denotes
3578 success/failure, status is L{objects.BlockDevStatus} on success, string
3579 otherwise
3580
3581 """
3582 result = []
3583 for disk in disks:
3584 try:
3585 rbd = _RecursiveFindBD(disk)
3586 if rbd is None:
3587 result.append((False, "Can't find device %s" % disk))
3588 continue
3589
3590 status = rbd.CombinedSyncStatus()
3591 except errors.BlockDeviceError, err:
3592 logging.exception("Error while getting disk status")
3593 result.append((False, str(err)))
3594 else:
3595 result.append((True, status))
3596
3597 assert len(disks) == len(result)
3598
3599 return result
3600
3603 """Check if a device is activated.
3604
3605 If so, return information about the real device.
3606
3607 @type disk: L{objects.Disk}
3608 @param disk: the disk object we need to find
3609
3610 @return: None if the device can't be found,
3611 otherwise the device instance
3612
3613 """
3614 children = []
3615 if disk.children:
3616 for chdisk in disk.children:
3617 children.append(_RecursiveFindBD(chdisk))
3618
3619 return bdev.FindDevice(disk, children)
3620
3623 """Opens the underlying block device of a disk.
3624
3625 @type disk: L{objects.Disk}
3626 @param disk: the disk object we want to open
3627
3628 """
3629 real_disk = _RecursiveFindBD(disk)
3630 if real_disk is None:
3631 _Fail("Block device '%s' is not set up", disk)
3632
3633 real_disk.Open()
3634
3635 return real_disk
3636
3639 """Check if a device is activated.
3640
3641 If it is, return information about the real device.
3642
3643 @type disk: L{objects.Disk}
3644 @param disk: the disk to find
3645 @rtype: None or objects.BlockDevStatus
3646 @return: None if the disk cannot be found, otherwise a the current
3647 information
3648
3649 """
3650 try:
3651 rbd = _RecursiveFindBD(disk)
3652 except errors.BlockDeviceError, err:
3653 _Fail("Failed to find device: %s", err, exc=True)
3654
3655 if rbd is None:
3656 return None
3657
3658 return rbd.GetSyncStatus()
3659
3662 """Computes the size of the given disks.
3663
3664 If a disk is not found, returns None instead.
3665
3666 @type disks: list of L{objects.Disk}
3667 @param disks: the list of disk to compute the size for
3668 @rtype: list
3669 @return: list with elements None if the disk cannot be found,
3670 otherwise the pair (size, spindles), where spindles is None if the
3671 device doesn't support that
3672
3673 """
3674 result = []
3675 for cf in disks:
3676 try:
3677 rbd = _RecursiveFindBD(cf)
3678 except errors.BlockDeviceError:
3679 result.append(None)
3680 continue
3681 if rbd is None:
3682 result.append(None)
3683 else:
3684 result.append(rbd.GetActualDimensions())
3685 return result
3686
3687
3688 -def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
3689 """Write a file to the filesystem.
3690
3691 This allows the master to overwrite(!) a file. It will only perform
3692 the operation if the file belongs to a list of configuration files.
3693
3694 @type file_name: str
3695 @param file_name: the target file name
3696 @type data: str
3697 @param data: the new contents of the file
3698 @type mode: int
3699 @param mode: the mode to give the file (can be None)
3700 @type uid: string
3701 @param uid: the owner of the file
3702 @type gid: string
3703 @param gid: the group of the file
3704 @type atime: float
3705 @param atime: the atime to set on the file (can be None)
3706 @type mtime: float
3707 @param mtime: the mtime to set on the file (can be None)
3708 @rtype: None
3709
3710 """
3711 file_name = vcluster.LocalizeVirtualPath(file_name)
3712
3713 if not os.path.isabs(file_name):
3714 _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name)
3715
3716 if file_name not in _ALLOWED_UPLOAD_FILES:
3717 _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'",
3718 file_name)
3719
3720 raw_data = _Decompress(data)
3721
3722 if not (isinstance(uid, basestring) and isinstance(gid, basestring)):
3723 _Fail("Invalid username/groupname type")
3724
3725 getents = runtime.GetEnts()
3726 uid = getents.LookupUser(uid)
3727 gid = getents.LookupGroup(gid)
3728
3729 utils.SafeWriteFile(file_name, None,
3730 data=raw_data, mode=mode, uid=uid, gid=gid,
3731 atime=atime, mtime=mtime)
3732
3733
3734 -def RunOob(oob_program, command, node, timeout):
3735 """Executes oob_program with given command on given node.
3736
3737 @param oob_program: The path to the executable oob_program
3738 @param command: The command to invoke on oob_program
3739 @param node: The node given as an argument to the program
3740 @param timeout: Timeout after which we kill the oob program
3741
3742 @return: stdout
3743 @raise RPCFail: If execution fails for some reason
3744
3745 """
3746 result = utils.RunCmd([oob_program, command, node], timeout=timeout)
3747
3748 if result.failed:
3749 _Fail("'%s' failed with reason '%s'; output: %s", result.cmd,
3750 result.fail_reason, result.output)
3751
3752 return result.stdout
3753
3756 """Compute and return the API version of a given OS.
3757
3758 This function will try to read the API version of the OS residing in
3759 the 'os_dir' directory.
3760
3761 @type os_dir: str
3762 @param os_dir: the directory in which we should look for the OS
3763 @rtype: tuple
3764 @return: tuple (status, data) with status denoting the validity and
3765 data holding either the valid versions or an error message
3766
3767 """
3768 api_file = utils.PathJoin(os_dir, constants.OS_API_FILE)
3769
3770 try:
3771 st = os.stat(api_file)
3772 except EnvironmentError, err:
3773 return False, ("Required file '%s' not found under path %s: %s" %
3774 (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err)))
3775
3776 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
3777 return False, ("File '%s' in %s is not a regular file" %
3778 (constants.OS_API_FILE, os_dir))
3779
3780 try:
3781 api_versions = utils.ReadFile(api_file).splitlines()
3782 except EnvironmentError, err:
3783 return False, ("Error while reading the API version file at %s: %s" %
3784 (api_file, utils.ErrnoOrStr(err)))
3785
3786 try:
3787 api_versions = [int(version.strip()) for version in api_versions]
3788 except (TypeError, ValueError), err:
3789 return False, ("API version(s) can't be converted to integer: %s" %
3790 str(err))
3791
3792 return True, api_versions
3793
3796 """Compute the validity for all OSes.
3797
3798 @type top_dirs: list
3799 @param top_dirs: the list of directories in which to
3800 search (if not given defaults to
3801 L{pathutils.OS_SEARCH_PATH})
3802 @rtype: list of L{objects.OS}
3803 @return: a list of tuples (name, path, status, diagnose, variants,
3804 parameters, api_version) for all (potential) OSes under all
3805 search paths, where:
3806 - name is the (potential) OS name
3807 - path is the full path to the OS
3808 - status True/False is the validity of the OS
3809 - diagnose is the error message for an invalid OS, otherwise empty
3810 - variants is a list of supported OS variants, if any
3811 - parameters is a list of (name, help) parameters, if any
3812 - api_version is a list of support OS API versions
3813
3814 """
3815 if top_dirs is None:
3816 top_dirs = pathutils.OS_SEARCH_PATH
3817
3818 result = []
3819 for dir_name in top_dirs:
3820 if os.path.isdir(dir_name):
3821 try:
3822 f_names = utils.ListVisibleFiles(dir_name)
3823 except EnvironmentError, err:
3824 logging.exception("Can't list the OS directory %s: %s", dir_name, err)
3825 break
3826 for name in f_names:
3827 os_path = utils.PathJoin(dir_name, name)
3828 status, os_inst = _TryOSFromDisk(name, base_dir=dir_name)
3829 if status:
3830 diagnose = ""
3831 variants = os_inst.supported_variants
3832 parameters = os_inst.supported_parameters
3833 api_versions = os_inst.api_versions
3834 trusted = False if os_inst.create_script_untrusted else True
3835 else:
3836 diagnose = os_inst
3837 variants = parameters = api_versions = []
3838 trusted = True
3839 result.append((name, os_path, status, diagnose, variants,
3840 parameters, api_versions, trusted))
3841
3842 return result
3843
3846 """Create an OS instance from disk.
3847
3848 This function will return an OS instance if the given name is a
3849 valid OS name.
3850
3851 @type base_dir: string
3852 @keyword base_dir: Base directory containing OS installations.
3853 Defaults to a search in all the OS_SEARCH_PATH dirs.
3854 @rtype: tuple
3855 @return: success and either the OS instance if we find a valid one,
3856 or error message
3857
3858 """
3859 if base_dir is None:
3860 os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir)
3861 else:
3862 os_dir = utils.FindFile(name, [base_dir], os.path.isdir)
3863
3864 if os_dir is None:
3865 return False, "Directory for OS %s not found in search path" % name
3866
3867 status, api_versions = _OSOndiskAPIVersion(os_dir)
3868 if not status:
3869
3870 return status, api_versions
3871
3872 if not constants.OS_API_VERSIONS.intersection(api_versions):
3873 return False, ("API version mismatch for path '%s': found %s, want %s." %
3874 (os_dir, api_versions, constants.OS_API_VERSIONS))
3875
3876
3877
3878
3879 os_files = dict.fromkeys(constants.OS_SCRIPTS, True)
3880
3881 os_files[constants.OS_SCRIPT_CREATE] = False
3882 os_files[constants.OS_SCRIPT_CREATE_UNTRUSTED] = False
3883
3884 if max(api_versions) >= constants.OS_API_V15:
3885 os_files[constants.OS_VARIANTS_FILE] = False
3886
3887 if max(api_versions) >= constants.OS_API_V20:
3888 os_files[constants.OS_PARAMETERS_FILE] = True
3889 else:
3890 del os_files[constants.OS_SCRIPT_VERIFY]
3891
3892 for (filename, required) in os_files.items():
3893 os_files[filename] = utils.PathJoin(os_dir, filename)
3894
3895 try:
3896 st = os.stat(os_files[filename])
3897 except EnvironmentError, err:
3898 if err.errno == errno.ENOENT and not required:
3899 del os_files[filename]
3900 continue
3901 return False, ("File '%s' under path '%s' is missing (%s)" %
3902 (filename, os_dir, utils.ErrnoOrStr(err)))
3903
3904 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
3905 return False, ("File '%s' under path '%s' is not a regular file" %
3906 (filename, os_dir))
3907
3908 if filename in constants.OS_SCRIPTS:
3909 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
3910 return False, ("File '%s' under path '%s' is not executable" %
3911 (filename, os_dir))
3912
3913 if not constants.OS_SCRIPT_CREATE in os_files and \
3914 not constants.OS_SCRIPT_CREATE_UNTRUSTED in os_files:
3915 return False, ("A create script (trusted or untrusted) under path '%s'"
3916 " must exist" % os_dir)
3917
3918 create_script = os_files.get(constants.OS_SCRIPT_CREATE, None)
3919 create_script_untrusted = os_files.get(constants.OS_SCRIPT_CREATE_UNTRUSTED,
3920 None)
3921
3922 variants = []
3923 if constants.OS_VARIANTS_FILE in os_files:
3924 variants_file = os_files[constants.OS_VARIANTS_FILE]
3925 try:
3926 variants = \
3927 utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file))
3928 except EnvironmentError, err:
3929
3930 if err.errno != errno.ENOENT:
3931 return False, ("Error while reading the OS variants file at %s: %s" %
3932 (variants_file, utils.ErrnoOrStr(err)))
3933
3934 parameters = []
3935 if constants.OS_PARAMETERS_FILE in os_files:
3936 parameters_file = os_files[constants.OS_PARAMETERS_FILE]
3937 try:
3938 parameters = utils.ReadFile(parameters_file).splitlines()
3939 except EnvironmentError, err:
3940 return False, ("Error while reading the OS parameters file at %s: %s" %
3941 (parameters_file, utils.ErrnoOrStr(err)))
3942 parameters = [v.split(None, 1) for v in parameters]
3943
3944 os_obj = objects.OS(name=name, path=os_dir,
3945 create_script=create_script,
3946 create_script_untrusted=create_script_untrusted,
3947 export_script=os_files[constants.OS_SCRIPT_EXPORT],
3948 import_script=os_files[constants.OS_SCRIPT_IMPORT],
3949 rename_script=os_files[constants.OS_SCRIPT_RENAME],
3950 verify_script=os_files.get(constants.OS_SCRIPT_VERIFY,
3951 None),
3952 supported_variants=variants,
3953 supported_parameters=parameters,
3954 api_versions=api_versions)
3955 return True, os_obj
3956
3959 """Create an OS instance from disk.
3960
3961 This function will return an OS instance if the given name is a
3962 valid OS name. Otherwise, it will raise an appropriate
3963 L{RPCFail} exception, detailing why this is not a valid OS.
3964
3965 This is just a wrapper over L{_TryOSFromDisk}, which doesn't raise
3966 an exception but returns true/false status data.
3967
3968 @type base_dir: string
3969 @keyword base_dir: Base directory containing OS installations.
3970 Defaults to a search in all the OS_SEARCH_PATH dirs.
3971 @rtype: L{objects.OS}
3972 @return: the OS instance if we find a valid one
3973 @raise RPCFail: if we don't find a valid OS
3974
3975 """
3976 name_only = objects.OS.GetName(name)
3977 status, payload = _TryOSFromDisk(name_only, base_dir)
3978
3979 if not status:
3980 _Fail(payload)
3981
3982 return payload
3983
3984
3985 -def OSCoreEnv(os_name, inst_os, os_params, debug=0):
3986 """Calculate the basic environment for an os script.
3987
3988 @type os_name: str
3989 @param os_name: full operating system name (including variant)
3990 @type inst_os: L{objects.OS}
3991 @param inst_os: operating system for which the environment is being built
3992 @type os_params: dict
3993 @param os_params: the OS parameters
3994 @type debug: integer
3995 @param debug: debug level (0 or 1, for OS Api 10)
3996 @rtype: dict
3997 @return: dict of environment variables
3998 @raise errors.BlockDeviceError: if the block device
3999 cannot be found
4000
4001 """
4002 result = {}
4003 api_version = \
4004 max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions))
4005 result["OS_API_VERSION"] = "%d" % api_version
4006 result["OS_NAME"] = inst_os.name
4007 result["DEBUG_LEVEL"] = "%d" % debug
4008
4009
4010 if api_version >= constants.OS_API_V15 and inst_os.supported_variants:
4011 variant = objects.OS.GetVariant(os_name)
4012 if not variant:
4013 variant = inst_os.supported_variants[0]
4014 else:
4015 variant = ""
4016 result["OS_VARIANT"] = variant
4017
4018
4019 for pname, pvalue in os_params.items():
4020 result["OSP_%s" % pname.upper().replace("-", "_")] = pvalue
4021
4022
4023
4024
4025 result["PATH"] = constants.HOOKS_PATH
4026
4027 return result
4028
4031 """Calculate the environment for an os script.
4032
4033 @type instance: L{objects.Instance}
4034 @param instance: target instance for the os script run
4035 @type inst_os: L{objects.OS}
4036 @param inst_os: operating system for which the environment is being built
4037 @type debug: integer
4038 @param debug: debug level (0 or 1, for OS Api 10)
4039 @rtype: dict
4040 @return: dict of environment variables
4041 @raise errors.BlockDeviceError: if the block device
4042 cannot be found
4043
4044 """
4045 result = OSCoreEnv(instance.os, inst_os, objects.FillDict(instance.osparams,
4046 instance.osparams_private.Unprivate()), debug=debug)
4047
4048 for attr in ["name", "os", "uuid", "ctime", "mtime", "primary_node"]:
4049 result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr))
4050
4051 result["HYPERVISOR"] = instance.hypervisor
4052 result["DISK_COUNT"] = "%d" % len(instance.disks_info)
4053 result["NIC_COUNT"] = "%d" % len(instance.nics)
4054 result["INSTANCE_SECONDARY_NODES"] = \
4055 ("%s" % " ".join(instance.secondary_nodes))
4056
4057
4058 for idx, disk in enumerate(instance.disks_info):
4059 real_disk = _OpenRealBD(disk)
4060 uri = _CalculateDeviceURI(instance, disk, real_disk)
4061 result["DISK_%d_ACCESS" % idx] = disk.mode
4062 result["DISK_%d_UUID" % idx] = disk.uuid
4063 if real_disk.dev_path:
4064 result["DISK_%d_PATH" % idx] = real_disk.dev_path
4065 if uri:
4066 result["DISK_%d_URI" % idx] = uri
4067 if disk.name:
4068 result["DISK_%d_NAME" % idx] = disk.name
4069 if constants.HV_DISK_TYPE in instance.hvparams:
4070 result["DISK_%d_FRONTEND_TYPE" % idx] = \
4071 instance.hvparams[constants.HV_DISK_TYPE]
4072 if disk.dev_type in constants.DTS_BLOCK:
4073 result["DISK_%d_BACKEND_TYPE" % idx] = "block"
4074 elif disk.dev_type in constants.DTS_FILEBASED:
4075 result["DISK_%d_BACKEND_TYPE" % idx] = \
4076 "file:%s" % disk.logical_id[0]
4077
4078
4079 for idx, nic in enumerate(instance.nics):
4080 result["NIC_%d_MAC" % idx] = nic.mac
4081 result["NIC_%d_UUID" % idx] = nic.uuid
4082 if nic.name:
4083 result["NIC_%d_NAME" % idx] = nic.name
4084 if nic.ip:
4085 result["NIC_%d_IP" % idx] = nic.ip
4086 result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE]
4087 if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4088 result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK]
4089 if nic.nicparams[constants.NIC_LINK]:
4090 result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK]
4091 if nic.netinfo:
4092 nobj = objects.Network.FromDict(nic.netinfo)
4093 result.update(nobj.HooksDict("NIC_%d_" % idx))
4094 if constants.HV_NIC_TYPE in instance.hvparams:
4095 result["NIC_%d_FRONTEND_TYPE" % idx] = \
4096 instance.hvparams[constants.HV_NIC_TYPE]
4097
4098
4099 for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]:
4100 for key, value in source.items():
4101 result["INSTANCE_%s_%s" % (kind, key)] = str(value)
4102
4103 return result
4104
4107 """Compute the validity for all ExtStorage Providers.
4108
4109 @type top_dirs: list
4110 @param top_dirs: the list of directories in which to
4111 search (if not given defaults to
4112 L{pathutils.ES_SEARCH_PATH})
4113 @rtype: list of L{objects.ExtStorage}
4114 @return: a list of tuples (name, path, status, diagnose, parameters)
4115 for all (potential) ExtStorage Providers under all
4116 search paths, where:
4117 - name is the (potential) ExtStorage Provider
4118 - path is the full path to the ExtStorage Provider
4119 - status True/False is the validity of the ExtStorage Provider
4120 - diagnose is the error message for an invalid ExtStorage Provider,
4121 otherwise empty
4122 - parameters is a list of (name, help) parameters, if any
4123
4124 """
4125 if top_dirs is None:
4126 top_dirs = pathutils.ES_SEARCH_PATH
4127
4128 result = []
4129 for dir_name in top_dirs:
4130 if os.path.isdir(dir_name):
4131 try:
4132 f_names = utils.ListVisibleFiles(dir_name)
4133 except EnvironmentError, err:
4134 logging.exception("Can't list the ExtStorage directory %s: %s",
4135 dir_name, err)
4136 break
4137 for name in f_names:
4138 es_path = utils.PathJoin(dir_name, name)
4139 status, es_inst = extstorage.ExtStorageFromDisk(name, base_dir=dir_name)
4140 if status:
4141 diagnose = ""
4142 parameters = es_inst.supported_parameters
4143 else:
4144 diagnose = es_inst
4145 parameters = []
4146 result.append((name, es_path, status, diagnose, parameters))
4147
4148 return result
4149
4150
4151 -def BlockdevGrow(disk, amount, dryrun, backingstore, excl_stor):
4152 """Grow a stack of block devices.
4153
4154 This function is called recursively, with the childrens being the
4155 first ones to resize.
4156
4157 @type disk: L{objects.Disk}
4158 @param disk: the disk to be grown
4159 @type amount: integer
4160 @param amount: the amount (in mebibytes) to grow with
4161 @type dryrun: boolean
4162 @param dryrun: whether to execute the operation in simulation mode
4163 only, without actually increasing the size
4164 @param backingstore: whether to execute the operation on backing storage
4165 only, or on "logical" storage only; e.g. DRBD is logical storage,
4166 whereas LVM, file, RBD are backing storage
4167 @rtype: (status, result)
4168 @type excl_stor: boolean
4169 @param excl_stor: Whether exclusive_storage is active
4170 @return: a tuple with the status of the operation (True/False), and
4171 the errors message if status is False
4172
4173 """
4174 r_dev = _RecursiveFindBD(disk)
4175 if r_dev is None:
4176 _Fail("Cannot find block device %s", disk)
4177
4178 try:
4179 r_dev.Grow(amount, dryrun, backingstore, excl_stor)
4180 except errors.BlockDeviceError, err:
4181 _Fail("Failed to grow block device: %s", err, exc=True)
4182
4185 """Create a snapshot copy of a block device.
4186
4187 This function is called recursively, and the snapshot is actually created
4188 just for the leaf lvm backend device.
4189
4190 @type disk: L{objects.Disk}
4191 @param disk: the disk to be snapshotted
4192 @type snap_name: string
4193 @param snap_name: the name of the snapshot
4194 @type snap_size: int
4195 @param snap_size: the size of the snapshot
4196 @rtype: string
4197 @return: snapshot disk ID as (vg, lv)
4198
4199 """
4200 def _DiskSnapshot(disk, snap_name=None, snap_size=None):
4201 r_dev = _RecursiveFindBD(disk)
4202 if r_dev is not None:
4203 return r_dev.Snapshot(snap_name=snap_name, snap_size=snap_size)
4204 else:
4205 _Fail("Cannot find block device %s", disk)
4206
4207 if disk.SupportsSnapshots():
4208 if disk.dev_type == constants.DT_DRBD8:
4209 if not disk.children:
4210 _Fail("DRBD device '%s' without backing storage cannot be snapshotted",
4211 disk.unique_id)
4212 return BlockdevSnapshot(disk.children[0], snap_name, snap_size)
4213 else:
4214 return _DiskSnapshot(disk, snap_name, snap_size)
4215 else:
4216 _Fail("Cannot snapshot block device '%s' of type '%s'",
4217 disk.logical_id, disk.dev_type)
4218
4221 """Sets 'metadata' information on block devices.
4222
4223 This function sets 'info' metadata on block devices. Initial
4224 information is set at device creation; this function should be used
4225 for example after renames.
4226
4227 @type disk: L{objects.Disk}
4228 @param disk: the disk to be grown
4229 @type info: string
4230 @param info: new 'info' metadata
4231 @rtype: (status, result)
4232 @return: a tuple with the status of the operation (True/False), and
4233 the errors message if status is False
4234
4235 """
4236 r_dev = _RecursiveFindBD(disk)
4237 if r_dev is None:
4238 _Fail("Cannot find block device %s", disk)
4239
4240 try:
4241 r_dev.SetInfo(info)
4242 except errors.BlockDeviceError, err:
4243 _Fail("Failed to set information on block device: %s", err, exc=True)
4244
4247 """Write out the export configuration information.
4248
4249 @type instance: L{objects.Instance}
4250 @param instance: the instance which we export, used for
4251 saving configuration
4252 @type snap_disks: list of L{objects.Disk}
4253 @param snap_disks: list of snapshot block devices, which
4254 will be used to get the actual name of the dump file
4255
4256 @rtype: None
4257
4258 """
4259 destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new")
4260 finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name)
4261 disk_template = utils.GetDiskTemplate(snap_disks)
4262
4263 config = objects.SerializableConfigParser()
4264
4265 config.add_section(constants.INISECT_EXP)
4266 config.set(constants.INISECT_EXP, "version", str(constants.EXPORT_VERSION))
4267 config.set(constants.INISECT_EXP, "timestamp", "%d" % int(time.time()))
4268 config.set(constants.INISECT_EXP, "source", instance.primary_node)
4269 config.set(constants.INISECT_EXP, "os", instance.os)
4270 config.set(constants.INISECT_EXP, "compression", "none")
4271
4272 config.add_section(constants.INISECT_INS)
4273 config.set(constants.INISECT_INS, "name", instance.name)
4274 config.set(constants.INISECT_INS, "maxmem", "%d" %
4275 instance.beparams[constants.BE_MAXMEM])
4276 config.set(constants.INISECT_INS, "minmem", "%d" %
4277 instance.beparams[constants.BE_MINMEM])
4278
4279 config.set(constants.INISECT_INS, "memory", "%d" %
4280 instance.beparams[constants.BE_MAXMEM])
4281 config.set(constants.INISECT_INS, "vcpus", "%d" %
4282 instance.beparams[constants.BE_VCPUS])
4283 config.set(constants.INISECT_INS, "disk_template", disk_template)
4284 config.set(constants.INISECT_INS, "hypervisor", instance.hypervisor)
4285 config.set(constants.INISECT_INS, "tags", " ".join(instance.GetTags()))
4286
4287 nic_total = 0
4288 for nic_count, nic in enumerate(instance.nics):
4289 nic_total += 1
4290 config.set(constants.INISECT_INS, "nic%d_mac" %
4291 nic_count, "%s" % nic.mac)
4292 config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip)
4293 config.set(constants.INISECT_INS, "nic%d_network" % nic_count,
4294 "%s" % nic.network)
4295 config.set(constants.INISECT_INS, "nic%d_name" % nic_count,
4296 "%s" % nic.name)
4297 for param in constants.NICS_PARAMETER_TYPES:
4298 config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param),
4299 "%s" % nic.nicparams.get(param, None))
4300
4301 config.set(constants.INISECT_INS, "nic_count", "%d" % nic_total)
4302
4303 disk_total = 0
4304 for disk_count, disk in enumerate(snap_disks):
4305 if disk:
4306 disk_total += 1
4307 config.set(constants.INISECT_INS, "disk%d_ivname" % disk_count,
4308 ("%s" % disk.iv_name))
4309 config.set(constants.INISECT_INS, "disk%d_dump" % disk_count,
4310 ("%s" % disk.uuid))
4311 config.set(constants.INISECT_INS, "disk%d_size" % disk_count,
4312 ("%d" % disk.size))
4313 config.set(constants.INISECT_INS, "disk%d_name" % disk_count,
4314 "%s" % disk.name)
4315
4316 config.set(constants.INISECT_INS, "disk_count", "%d" % disk_total)
4317
4318
4319
4320 config.add_section(constants.INISECT_HYP)
4321 for name, value in instance.hvparams.items():
4322 if name not in constants.HVC_GLOBALS:
4323 config.set(constants.INISECT_HYP, name, str(value))
4324
4325 config.add_section(constants.INISECT_BEP)
4326 for name, value in instance.beparams.items():
4327 config.set(constants.INISECT_BEP, name, str(value))
4328
4329 config.add_section(constants.INISECT_OSP)
4330 for name, value in instance.osparams.items():
4331 config.set(constants.INISECT_OSP, name, str(value))
4332
4333 config.add_section(constants.INISECT_OSP_PRIVATE)
4334 for name, value in instance.osparams_private.items():
4335 config.set(constants.INISECT_OSP_PRIVATE, name, str(value.Get()))
4336
4337 utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE),
4338 data=config.Dumps())
4339 shutil.rmtree(finaldestdir, ignore_errors=True)
4340 shutil.move(destdir, finaldestdir)
4341
4364
4367 """Return a list of exports currently available on this machine.
4368
4369 @rtype: list
4370 @return: list of the exports
4371
4372 """
4373 if os.path.isdir(pathutils.EXPORT_DIR):
4374 return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR))
4375 else:
4376 _Fail("No exports directory")
4377
4380 """Remove an existing export from the node.
4381
4382 @type export: str
4383 @param export: the name of the export to remove
4384 @rtype: None
4385
4386 """
4387 target = utils.PathJoin(pathutils.EXPORT_DIR, export)
4388
4389 try:
4390 shutil.rmtree(target)
4391 except EnvironmentError, err:
4392 _Fail("Error while removing the export: %s", err, exc=True)
4393
4396 """Rename a list of block devices.
4397
4398 @type devlist: list of tuples
4399 @param devlist: list of tuples of the form (disk, new_unique_id); disk is
4400 an L{objects.Disk} object describing the current disk, and new
4401 unique_id is the name we rename it to
4402 @rtype: boolean
4403 @return: True if all renames succeeded, False otherwise
4404
4405 """
4406 msgs = []
4407 result = True
4408 for disk, unique_id in devlist:
4409 dev = _RecursiveFindBD(disk)
4410 if dev is None:
4411 msgs.append("Can't find device %s in rename" % str(disk))
4412 result = False
4413 continue
4414 try:
4415 old_rpath = dev.dev_path
4416 dev.Rename(unique_id)
4417 new_rpath = dev.dev_path
4418 if old_rpath != new_rpath:
4419 DevCacheManager.RemoveCache(old_rpath)
4420
4421
4422
4423
4424
4425 except errors.BlockDeviceError, err:
4426 msgs.append("Can't rename device '%s' to '%s': %s" %
4427 (dev, unique_id, err))
4428 logging.exception("Can't rename device '%s' to '%s'", dev, unique_id)
4429 result = False
4430 if not result:
4431 _Fail("; ".join(msgs))
4432
4450
4453 """Create file storage directory.
4454
4455 @type file_storage_dir: str
4456 @param file_storage_dir: directory to create
4457
4458 @rtype: tuple
4459 @return: tuple with first element a boolean indicating wheter dir
4460 creation was successful or not
4461
4462 """
4463 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
4464 if os.path.exists(file_storage_dir):
4465 if not os.path.isdir(file_storage_dir):
4466 _Fail("Specified storage dir '%s' is not a directory",
4467 file_storage_dir)
4468 else:
4469 try:
4470 os.makedirs(file_storage_dir, 0750)
4471 except OSError, err:
4472 _Fail("Cannot create file storage directory '%s': %s",
4473 file_storage_dir, err, exc=True)
4474
4477 """Remove file storage directory.
4478
4479 Remove it only if it's empty. If not log an error and return.
4480
4481 @type file_storage_dir: str
4482 @param file_storage_dir: the directory we should cleanup
4483 @rtype: tuple (success,)
4484 @return: tuple of one element, C{success}, denoting
4485 whether the operation was successful
4486
4487 """
4488 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
4489 if os.path.exists(file_storage_dir):
4490 if not os.path.isdir(file_storage_dir):
4491 _Fail("Specified Storage directory '%s' is not a directory",
4492 file_storage_dir)
4493
4494 try:
4495 os.rmdir(file_storage_dir)
4496 except OSError, err:
4497 _Fail("Cannot remove file storage directory '%s': %s",
4498 file_storage_dir, err)
4499
4502 """Rename the file storage directory.
4503
4504 @type old_file_storage_dir: str
4505 @param old_file_storage_dir: the current path
4506 @type new_file_storage_dir: str
4507 @param new_file_storage_dir: the name we should rename to
4508 @rtype: tuple (success,)
4509 @return: tuple of one element, C{success}, denoting
4510 whether the operation was successful
4511
4512 """
4513 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
4514 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
4515 if not os.path.exists(new_file_storage_dir):
4516 if os.path.isdir(old_file_storage_dir):
4517 try:
4518 os.rename(old_file_storage_dir, new_file_storage_dir)
4519 except OSError, err:
4520 _Fail("Cannot rename '%s' to '%s': %s",
4521 old_file_storage_dir, new_file_storage_dir, err)
4522 else:
4523 _Fail("Specified storage dir '%s' is not a directory",
4524 old_file_storage_dir)
4525 else:
4526 if os.path.exists(old_file_storage_dir):
4527 _Fail("Cannot rename '%s' to '%s': both locations exist",
4528 old_file_storage_dir, new_file_storage_dir)
4529
4532 """Checks whether the given filename is in the queue directory.
4533
4534 @type file_name: str
4535 @param file_name: the file name we should check
4536 @rtype: None
4537 @raises RPCFail: if the file is not valid
4538
4539 """
4540 if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name):
4541 _Fail("Passed job queue file '%s' does not belong to"
4542 " the queue directory '%s'", file_name, pathutils.QUEUE_DIR)
4543
4546 """Updates a file in the queue directory.
4547
4548 This is just a wrapper over L{utils.io.WriteFile}, with proper
4549 checking.
4550
4551 @type file_name: str
4552 @param file_name: the job file name
4553 @type content: str
4554 @param content: the new job contents
4555 @rtype: boolean
4556 @return: the success of the operation
4557
4558 """
4559 file_name = vcluster.LocalizeVirtualPath(file_name)
4560
4561 _EnsureJobQueueFile(file_name)
4562 getents = runtime.GetEnts()
4563
4564
4565 utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid,
4566 gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS)
4567
4570 """Renames a job queue file.
4571
4572 This is just a wrapper over os.rename with proper checking.
4573
4574 @type old: str
4575 @param old: the old (actual) file name
4576 @type new: str
4577 @param new: the desired file name
4578 @rtype: tuple
4579 @return: the success of the operation and payload
4580
4581 """
4582 old = vcluster.LocalizeVirtualPath(old)
4583 new = vcluster.LocalizeVirtualPath(new)
4584
4585 _EnsureJobQueueFile(old)
4586 _EnsureJobQueueFile(new)
4587
4588 getents = runtime.GetEnts()
4589
4590 utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750,
4591 dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid)
4592
4595 """Closes the given block devices.
4596
4597 This means they will be switched to secondary mode (in case of
4598 DRBD).
4599
4600 @param instance_name: if the argument is not empty, the symlinks
4601 of this instance will be removed
4602 @type disks: list of L{objects.Disk}
4603 @param disks: the list of disks to be closed
4604 @rtype: tuple (success, message)
4605 @return: a tuple of success and message, where success
4606 indicates the succes of the operation, and message
4607 which will contain the error details in case we
4608 failed
4609
4610 """
4611 bdevs = []
4612 for cf in disks:
4613 rd = _RecursiveFindBD(cf)
4614 if rd is None:
4615 _Fail("Can't find device %s", cf)
4616 bdevs.append(rd)
4617
4618 msg = []
4619 for rd in bdevs:
4620 try:
4621 rd.Close()
4622 except errors.BlockDeviceError, err:
4623 msg.append(str(err))
4624 if msg:
4625 _Fail("Can't close devices: %s", ",".join(msg))
4626 else:
4627 if instance_name:
4628 _RemoveBlockDevLinks(instance_name, disks)
4629
4632 """Opens the given block devices.
4633
4634 """
4635 bdevs = []
4636 for cf in disks:
4637 rd = _RecursiveFindBD(cf)
4638 if rd is None:
4639 _Fail("Can't find device %s", cf)
4640 bdevs.append(rd)
4641
4642 msg = []
4643 for idx, rd in enumerate(bdevs):
4644 try:
4645 rd.Open(exclusive=exclusive)
4646 _SymlinkBlockDev(instance_name, rd.dev_path, idx)
4647 except errors.BlockDeviceError, err:
4648 msg.append(str(err))
4649
4650 if msg:
4651 _Fail("Can't open devices: %s", ",".join(msg))
4652
4655 """Validates the given hypervisor parameters.
4656
4657 @type hvname: string
4658 @param hvname: the hypervisor name
4659 @type hvparams: dict
4660 @param hvparams: the hypervisor parameters to be validated
4661 @rtype: None
4662
4663 """
4664 try:
4665 hv_type = hypervisor.GetHypervisor(hvname)
4666 hv_type.ValidateParameters(hvparams)
4667 except errors.HypervisorError, err:
4668 _Fail(str(err), log=False)
4669
4672 """Check whether a list of parameters is supported by the OS.
4673
4674 @type os_obj: L{objects.OS}
4675 @param os_obj: OS object to check
4676 @type parameters: list
4677 @param parameters: the list of parameters to check
4678
4679 """
4680 supported = [v[0] for v in os_obj.supported_parameters]
4681 delta = frozenset(parameters).difference(supported)
4682 if delta:
4683 _Fail("The following parameters are not supported"
4684 " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta)))
4685
4688 """Check whether an OS name conforms to the os variants specification.
4689
4690 @type os_obj: L{objects.OS}
4691 @param os_obj: OS object to check
4692
4693 @type name: string
4694 @param name: OS name passed by the user, to check for validity
4695
4696 @rtype: NoneType
4697 @return: None
4698 @raise RPCFail: if OS variant is not valid
4699
4700 """
4701 variant = objects.OS.GetVariant(name)
4702
4703 if not os_obj.supported_variants:
4704 if variant:
4705 _Fail("OS '%s' does not support variants ('%s' passed)" %
4706 (os_obj.name, variant))
4707 else:
4708 return
4709
4710 if not variant:
4711 _Fail("OS name '%s' must include a variant" % name)
4712
4713 if variant not in os_obj.supported_variants:
4714 _Fail("OS '%s' does not support variant '%s'" % (os_obj.name, variant))
4715
4716
4717 -def ValidateOS(required, osname, checks, osparams, force_variant):
4718 """Validate the given OS parameters.
4719
4720 @type required: boolean
4721 @param required: whether absence of the OS should translate into
4722 failure or not
4723 @type osname: string
4724 @param osname: the OS to be validated
4725 @type checks: list
4726 @param checks: list of the checks to run (currently only 'parameters')
4727 @type osparams: dict
4728 @param osparams: dictionary with OS parameters, some of which may be
4729 private.
4730 @rtype: boolean
4731 @return: True if the validation passed, or False if the OS was not
4732 found and L{required} was false
4733
4734 """
4735 if not constants.OS_VALIDATE_CALLS.issuperset(checks):
4736 _Fail("Unknown checks required for OS %s: %s", osname,
4737 set(checks).difference(constants.OS_VALIDATE_CALLS))
4738
4739 name_only = objects.OS.GetName(osname)
4740 status, tbv = _TryOSFromDisk(name_only, None)
4741
4742 if not status:
4743 if required:
4744 _Fail(tbv)
4745 else:
4746 return False
4747
4748 if not force_variant:
4749 _CheckOSVariant(tbv, osname)
4750
4751 if max(tbv.api_versions) < constants.OS_API_V20:
4752 return True
4753
4754 if constants.OS_VALIDATE_PARAMETERS in checks:
4755 _CheckOSPList(tbv, osparams.keys())
4756
4757 validate_env = OSCoreEnv(osname, tbv, osparams)
4758 result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env,
4759 cwd=tbv.path, reset_env=True)
4760 if result.failed:
4761 logging.error("os validate command '%s' returned error: %s output: %s",
4762 result.cmd, result.fail_reason, result.output)
4763 _Fail("OS validation script failed (%s), output: %s",
4764 result.fail_reason, result.output, log=False)
4765
4766 return True
4767
4768
4769 -def ExportOS(instance, override_env):
4770 """Creates a GZIPed tarball with an OS definition and environment.
4771
4772 The archive contains a file with the environment variables needed by
4773 the OS scripts.
4774
4775 @type instance: L{objects.Instance}
4776 @param instance: instance for which the OS definition is exported
4777
4778 @type override_env: dict of string to string
4779 @param override_env: if supplied, it overrides the environment on a
4780 key-by-key basis that is part of the archive
4781
4782 @rtype: string
4783 @return: filepath of the archive
4784
4785 """
4786 assert instance
4787 assert instance.os
4788
4789 temp_dir = tempfile.mkdtemp()
4790 inst_os = OSFromDisk(instance.os)
4791
4792 result = utils.RunCmd(["ln", "-s", inst_os.path,
4793 utils.PathJoin(temp_dir, "os")])
4794 if result.failed:
4795 _Fail("Failed to copy OS package '%s' to '%s': %s, output '%s'",
4796 inst_os, temp_dir, result.fail_reason, result.output)
4797
4798 env = OSEnvironment(instance, inst_os)
4799 env.update(override_env)
4800
4801 with open(utils.PathJoin(temp_dir, "environment"), "w") as f:
4802 for var in env:
4803 f.write(var + "=" + env[var] + "\n")
4804
4805 (fd, os_package) = tempfile.mkstemp(suffix=".tgz")
4806 os.close(fd)
4807
4808 result = utils.RunCmd(["tar", "--dereference", "-czv",
4809 "-f", os_package,
4810 "-C", temp_dir,
4811 "."])
4812 if result.failed:
4813 _Fail("Failed to create OS archive '%s': %s, output '%s'",
4814 os_package, result.fail_reason, result.output)
4815
4816 result = utils.RunCmd(["rm", "-rf", temp_dir])
4817 if result.failed:
4818 _Fail("Failed to remove copy of OS package '%s' in '%s': %s, output '%s'",
4819 inst_os, temp_dir, result.fail_reason, result.output)
4820
4821 return os_package
4822
4845
4854
4857 """Creates a new X509 certificate for SSL/TLS.
4858
4859 @type validity: int
4860 @param validity: Validity in seconds
4861 @rtype: tuple; (string, string)
4862 @return: Certificate name and public part
4863
4864 """
4865 serial_no = int(time.time())
4866 (key_pem, cert_pem) = \
4867 utils.GenerateSelfSignedX509Cert(netutils.Hostname.GetSysName(),
4868 min(validity, _MAX_SSL_CERT_VALIDITY),
4869 serial_no)
4870
4871 cert_dir = tempfile.mkdtemp(dir=cryptodir,
4872 prefix="x509-%s-" % utils.TimestampForFilename())
4873 try:
4874 name = os.path.basename(cert_dir)
4875 assert len(name) > 5
4876
4877 (_, key_file, cert_file) = _GetX509Filenames(cryptodir, name)
4878
4879 utils.WriteFile(key_file, mode=0400, data=key_pem)
4880 utils.WriteFile(cert_file, mode=0400, data=cert_pem)
4881
4882
4883 return (name, cert_pem)
4884 except Exception:
4885 shutil.rmtree(cert_dir, ignore_errors=True)
4886 raise
4887
4890 """Removes a X509 certificate.
4891
4892 @type name: string
4893 @param name: Certificate name
4894
4895 """
4896 (cert_dir, key_file, cert_file) = _GetX509Filenames(cryptodir, name)
4897
4898 utils.RemoveFile(key_file)
4899 utils.RemoveFile(cert_file)
4900
4901 try:
4902 os.rmdir(cert_dir)
4903 except EnvironmentError, err:
4904 _Fail("Cannot remove certificate directory '%s': %s",
4905 cert_dir, err)
4906
4909 """Returns the command for the requested input/output.
4910
4911 @type instance: L{objects.Instance}
4912 @param instance: The instance object
4913 @param mode: Import/export mode
4914 @param ieio: Input/output type
4915 @param ieargs: Input/output arguments
4916
4917 """
4918 assert mode in (constants.IEM_IMPORT, constants.IEM_EXPORT)
4919
4920 env = None
4921 prefix = None
4922 suffix = None
4923 exp_size = None
4924
4925 if ieio == constants.IEIO_FILE:
4926 (filename, ) = ieargs
4927
4928 if not utils.IsNormAbsPath(filename):
4929 _Fail("Path '%s' is not normalized or absolute", filename)
4930
4931 real_filename = os.path.realpath(filename)
4932 directory = os.path.dirname(real_filename)
4933
4934 if not utils.IsBelowDir(pathutils.EXPORT_DIR, real_filename):
4935 _Fail("File '%s' is not under exports directory '%s': %s",
4936 filename, pathutils.EXPORT_DIR, real_filename)
4937
4938
4939 utils.Makedirs(directory, mode=0750)
4940
4941 quoted_filename = utils.ShellQuote(filename)
4942
4943 if mode == constants.IEM_IMPORT:
4944 suffix = "> %s" % quoted_filename
4945 elif mode == constants.IEM_EXPORT:
4946 suffix = "< %s" % quoted_filename
4947
4948
4949 try:
4950 st = os.stat(filename)
4951 except EnvironmentError, err:
4952 logging.error("Can't stat(2) %s: %s", filename, err)
4953 else:
4954 exp_size = utils.BytesToMebibyte(st.st_size)
4955
4956 elif ieio == constants.IEIO_RAW_DISK:
4957 (disk, ) = ieargs
4958 real_disk = _OpenRealBD(disk)
4959
4960 if mode == constants.IEM_IMPORT:
4961 suffix = "| %s" % utils.ShellQuoteArgs(real_disk.Import())
4962
4963 elif mode == constants.IEM_EXPORT:
4964 prefix = "%s |" % utils.ShellQuoteArgs(real_disk.Export())
4965 exp_size = disk.size
4966
4967 elif ieio == constants.IEIO_SCRIPT:
4968 (disk, disk_index, ) = ieargs
4969
4970 assert isinstance(disk_index, (int, long))
4971
4972 inst_os = OSFromDisk(instance.os)
4973 env = OSEnvironment(instance, inst_os)
4974
4975 if mode == constants.IEM_IMPORT:
4976 disk_path_var = "DISK_%d_PATH" % disk_index
4977 if disk_path_var in env:
4978 env["IMPORT_DEVICE"] = env[disk_path_var]
4979 env["IMPORT_DISK_PATH"] = env[disk_path_var]
4980
4981 disk_uri_var = "DISK_%d_URI" % disk_index
4982 if disk_uri_var in env:
4983 env["IMPORT_DISK_URI"] = env[disk_uri_var]
4984
4985 env["IMPORT_INDEX"] = str(disk_index)
4986 script = inst_os.import_script
4987
4988 elif mode == constants.IEM_EXPORT:
4989 disk_path_var = "DISK_%d_PATH" % disk_index
4990 if disk_path_var in env:
4991 env["EXPORT_DEVICE"] = env[disk_path_var]
4992 env["EXPORT_DISK_PATH"] = env[disk_path_var]
4993
4994 disk_uri_var = "DISK_%d_URI" % disk_index
4995 if disk_uri_var in env:
4996 env["EXPORT_DISK_URI"] = env[disk_uri_var]
4997
4998 env["EXPORT_INDEX"] = str(disk_index)
4999 script = inst_os.export_script
5000
5001
5002 script_cmd = utils.BuildShellCmd("( cd %s && %s; )", inst_os.path, script)
5003
5004 if mode == constants.IEM_IMPORT:
5005 suffix = "| %s" % script_cmd
5006
5007 elif mode == constants.IEM_EXPORT:
5008 prefix = "%s |" % script_cmd
5009
5010
5011 exp_size = constants.IE_CUSTOM_SIZE
5012
5013 else:
5014 _Fail("Invalid %s I/O mode %r", mode, ieio)
5015
5016 return (env, prefix, suffix, exp_size)
5017
5020 """Creates status directory for import/export.
5021
5022 """
5023 return tempfile.mkdtemp(dir=pathutils.IMPORT_EXPORT_DIR,
5024 prefix=("%s-%s-" %
5025 (prefix, utils.TimestampForFilename())))
5026
5030 """Starts an import or export daemon.
5031
5032 @param mode: Import/output mode
5033 @type opts: L{objects.ImportExportOptions}
5034 @param opts: Daemon options
5035 @type host: string
5036 @param host: Remote host for export (None for import)
5037 @type port: int
5038 @param port: Remote port for export (None for import)
5039 @type instance: L{objects.Instance}
5040 @param instance: Instance object
5041 @type component: string
5042 @param component: which part of the instance is transferred now,
5043 e.g. 'disk/0'
5044 @param ieio: Input/output type
5045 @param ieioargs: Input/output arguments
5046
5047 """
5048
5049
5050
5051
5052
5053 if mode == constants.IEM_IMPORT:
5054 prefix = "import"
5055
5056 if not (host is None and port is None):
5057 _Fail("Can not specify host or port on import")
5058
5059 elif mode == constants.IEM_EXPORT:
5060 prefix = "export"
5061
5062 if host is None or port is None:
5063 _Fail("Host and port must be specified for an export")
5064
5065 else:
5066 _Fail("Invalid mode %r", mode)
5067
5068 if (opts.key_name is None) ^ (opts.ca_pem is None):
5069 _Fail("Cluster certificate can only be used for both key and CA")
5070
5071 (cmd_env, cmd_prefix, cmd_suffix, exp_size) = \
5072 _GetImportExportIoCommand(instance, mode, ieio, ieioargs)
5073
5074 if opts.key_name is None:
5075
5076 key_path = pathutils.NODED_CERT_FILE
5077 cert_path = pathutils.NODED_CERT_FILE
5078 assert opts.ca_pem is None
5079 else:
5080 (_, key_path, cert_path) = _GetX509Filenames(pathutils.CRYPTO_KEYS_DIR,
5081 opts.key_name)
5082 assert opts.ca_pem is not None
5083
5084 for i in [key_path, cert_path]:
5085 if not os.path.exists(i):
5086 _Fail("File '%s' does not exist" % i)
5087
5088 status_dir = _CreateImportExportStatusDir("%s-%s" % (prefix, component))
5089 try:
5090 status_file = utils.PathJoin(status_dir, _IES_STATUS_FILE)
5091 pid_file = utils.PathJoin(status_dir, _IES_PID_FILE)
5092 ca_file = utils.PathJoin(status_dir, _IES_CA_FILE)
5093
5094 if opts.ca_pem is None:
5095
5096 ca = utils.ReadFile(pathutils.NODED_CERT_FILE)
5097 else:
5098 ca = opts.ca_pem
5099
5100
5101 utils.WriteFile(ca_file, data=ca, mode=0400)
5102
5103 cmd = [
5104 pathutils.IMPORT_EXPORT_DAEMON,
5105 status_file, mode,
5106 "--key=%s" % key_path,
5107 "--cert=%s" % cert_path,
5108 "--ca=%s" % ca_file,
5109 ]
5110
5111 if host:
5112 cmd.append("--host=%s" % host)
5113
5114 if port:
5115 cmd.append("--port=%s" % port)
5116
5117 if opts.ipv6:
5118 cmd.append("--ipv6")
5119 else:
5120 cmd.append("--ipv4")
5121
5122 if opts.compress:
5123 cmd.append("--compress=%s" % opts.compress)
5124
5125 if opts.magic:
5126 cmd.append("--magic=%s" % opts.magic)
5127
5128 if exp_size is not None:
5129 cmd.append("--expected-size=%s" % exp_size)
5130
5131 if cmd_prefix:
5132 cmd.append("--cmd-prefix=%s" % cmd_prefix)
5133
5134 if cmd_suffix:
5135 cmd.append("--cmd-suffix=%s" % cmd_suffix)
5136
5137 if mode == constants.IEM_EXPORT:
5138
5139 cmd.append("--connect-retries=%s" % constants.RIE_CONNECT_RETRIES)
5140 cmd.append("--connect-timeout=%s" % constants.RIE_CONNECT_ATTEMPT_TIMEOUT)
5141 elif opts.connect_timeout is not None:
5142 assert mode == constants.IEM_IMPORT
5143
5144 cmd.append("--connect-timeout=%s" % opts.connect_timeout)
5145
5146 logfile = _InstanceLogName(prefix, instance.os, instance.name, component)
5147
5148
5149
5150 utils.StartDaemon(cmd, env=cmd_env, pidfile=pid_file,
5151 output=logfile)
5152
5153
5154 return os.path.basename(status_dir)
5155
5156 except Exception:
5157 shutil.rmtree(status_dir, ignore_errors=True)
5158 raise
5159
5162 """Returns import/export daemon status.
5163
5164 @type names: sequence
5165 @param names: List of names
5166 @rtype: List of dicts
5167 @return: Returns a list of the state of each named import/export or None if a
5168 status couldn't be read
5169
5170 """
5171 result = []
5172
5173 for name in names:
5174 status_file = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name,
5175 _IES_STATUS_FILE)
5176
5177 try:
5178 data = utils.ReadFile(status_file)
5179 except EnvironmentError, err:
5180 if err.errno != errno.ENOENT:
5181 raise
5182 data = None
5183
5184 if not data:
5185 result.append(None)
5186 continue
5187
5188 result.append(serializer.LoadJson(data))
5189
5190 return result
5191
5206
5209 """Cleanup after an import or export.
5210
5211 If the import/export daemon is still running it's killed. Afterwards the
5212 whole status directory is removed.
5213
5214 """
5215 logging.info("Finalizing import/export %s", name)
5216
5217 status_dir = utils.PathJoin(pathutils.IMPORT_EXPORT_DIR, name)
5218
5219 pid = utils.ReadLockedPidFile(utils.PathJoin(status_dir, _IES_PID_FILE))
5220
5221 if pid:
5222 logging.info("Import/export %s is still running with PID %s",
5223 name, pid)
5224 utils.KillProcess(pid, waitpid=False)
5225
5226 shutil.rmtree(status_dir, ignore_errors=True)
5227
5230 """Finds attached L{BlockDev}s for the given disks.
5231
5232 @type disks: list of L{objects.Disk}
5233 @param disks: the disk objects we need to find
5234
5235 @return: list of L{BlockDev} objects or C{None} if a given disk
5236 was not found or was no attached.
5237
5238 """
5239 bdevs = []
5240
5241 for disk in disks:
5242 rd = _RecursiveFindBD(disk)
5243 if rd is None:
5244 _Fail("Can't find device %s", disk)
5245 bdevs.append(rd)
5246 return bdevs
5247
5250 """Disconnects the network on a list of drbd devices.
5251
5252 """
5253 bdevs = _FindDisks(disks)
5254
5255
5256 for rd in bdevs:
5257 try:
5258 rd.DisconnectNet()
5259 except errors.BlockDeviceError, err:
5260 _Fail("Can't change network configuration to standalone mode: %s",
5261 err, exc=True)
5262
5265 """Attaches the network on a list of drbd devices.
5266
5267 """
5268 bdevs = _FindDisks(disks)
5269
5270
5271
5272 for rd in bdevs:
5273 try:
5274 rd.AttachNet(multimaster)
5275 except errors.BlockDeviceError, err:
5276 _Fail("Can't change network configuration: %s", err)
5277
5278
5279
5280
5281
5282
5283
5284 def _Attach():
5285 all_connected = True
5286
5287 for rd in bdevs:
5288 stats = rd.GetProcStatus()
5289
5290 if multimaster:
5291
5292
5293
5294
5295
5296
5297 all_connected = (all_connected and
5298 stats.is_connected and
5299 stats.is_disk_uptodate and
5300 stats.peer_disk_uptodate)
5301 else:
5302 all_connected = (all_connected and
5303 (stats.is_connected or stats.is_in_resync))
5304
5305 if stats.is_standalone:
5306
5307
5308
5309 try:
5310 rd.AttachNet(multimaster)
5311 except errors.BlockDeviceError, err:
5312 _Fail("Can't change network configuration: %s", err)
5313
5314 if not all_connected:
5315 raise utils.RetryAgain()
5316
5317 try:
5318
5319 utils.Retry(_Attach, (0.1, 1.5, 5.0), 2 * 60)
5320 except utils.RetryTimeout:
5321 _Fail("Timeout in disk reconnecting")
5322
5325 """Wait until DRBDs have synchronized.
5326
5327 """
5328 def _helper(rd):
5329 stats = rd.GetProcStatus()
5330 if not (stats.is_connected or stats.is_in_resync):
5331 raise utils.RetryAgain()
5332 return stats
5333
5334 bdevs = _FindDisks(disks)
5335
5336 min_resync = 100
5337 alldone = True
5338 for rd in bdevs:
5339 try:
5340
5341 stats = utils.Retry(_helper, 1, 15, args=[rd])
5342 except utils.RetryTimeout:
5343 stats = rd.GetProcStatus()
5344
5345 if not (stats.is_connected or stats.is_in_resync):
5346 _Fail("DRBD device %s is not in sync: stats=%s", rd, stats)
5347 alldone = alldone and (not stats.is_in_resync)
5348 if stats.sync_percent is not None:
5349 min_resync = min(min_resync, stats.sync_percent)
5350
5351 return (alldone, min_resync)
5352
5355 """Checks which of the passed disks needs activation and returns their UUIDs.
5356
5357 """
5358 faulty_disks = []
5359
5360 for disk in disks:
5361 rd = _RecursiveFindBD(disk)
5362 if rd is None:
5363 faulty_disks.append(disk)
5364 continue
5365
5366 stats = rd.GetProcStatus()
5367 if stats.is_standalone or stats.is_diskless:
5368 faulty_disks.append(disk)
5369
5370 return [disk.uuid for disk in faulty_disks]
5371
5381
5384 """Hard-powercycle the node.
5385
5386 Because we need to return first, and schedule the powercycle in the
5387 background, we won't be able to report failures nicely.
5388
5389 """
5390 hyper = hypervisor.GetHypervisor(hypervisor_type)
5391 try:
5392 pid = os.fork()
5393 except OSError:
5394
5395 pid = 0
5396 if pid > 0:
5397 return "Reboot scheduled in 5 seconds"
5398
5399 try:
5400 utils.Mlockall()
5401 except Exception:
5402 pass
5403 time.sleep(5)
5404 hyper.PowercycleNode(hvparams=hvparams)
5405
5408 """Verifies a restricted command name.
5409
5410 @type cmd: string
5411 @param cmd: Command name
5412 @rtype: tuple; (boolean, string or None)
5413 @return: The tuple's first element is the status; if C{False}, the second
5414 element is an error message string, otherwise it's C{None}
5415
5416 """
5417 if not cmd.strip():
5418 return (False, "Missing command name")
5419
5420 if os.path.basename(cmd) != cmd:
5421 return (False, "Invalid command name")
5422
5423 if not constants.EXT_PLUGIN_MASK.match(cmd):
5424 return (False, "Command name contains forbidden characters")
5425
5426 return (True, None)
5427
5430 """Common checks for restricted command file system directories and files.
5431
5432 @type path: string
5433 @param path: Path to check
5434 @param owner: C{None} or tuple containing UID and GID
5435 @rtype: tuple; (boolean, string or C{os.stat} result)
5436 @return: The tuple's first element is the status; if C{False}, the second
5437 element is an error message string, otherwise it's the result of C{os.stat}
5438
5439 """
5440 if owner is None:
5441
5442 owner = (0, 0)
5443
5444 try:
5445 st = os.stat(path)
5446 except EnvironmentError, err:
5447 return (False, "Can't stat(2) '%s': %s" % (path, err))
5448
5449 if stat.S_IMODE(st.st_mode) & (~_RCMD_MAX_MODE):
5450 return (False, "Permissions on '%s' are too permissive" % path)
5451
5452 if (st.st_uid, st.st_gid) != owner:
5453 (owner_uid, owner_gid) = owner
5454 return (False, "'%s' is not owned by %s:%s" % (path, owner_uid, owner_gid))
5455
5456 return (True, st)
5457
5460 """Verifies restricted command directory.
5461
5462 @type path: string
5463 @param path: Path to check
5464 @rtype: tuple; (boolean, string or None)
5465 @return: The tuple's first element is the status; if C{False}, the second
5466 element is an error message string, otherwise it's C{None}
5467
5468 """
5469 (status, value) = _CommonRestrictedCmdCheck(path, _owner)
5470
5471 if not status:
5472 return (False, value)
5473
5474 if not stat.S_ISDIR(value.st_mode):
5475 return (False, "Path '%s' is not a directory" % path)
5476
5477 return (True, None)
5478
5481 """Verifies a whole restricted command and returns its executable filename.
5482
5483 @type path: string
5484 @param path: Directory containing restricted commands
5485 @type cmd: string
5486 @param cmd: Command name
5487 @rtype: tuple; (boolean, string)
5488 @return: The tuple's first element is the status; if C{False}, the second
5489 element is an error message string, otherwise the second element is the
5490 absolute path to the executable
5491
5492 """
5493 executable = utils.PathJoin(path, cmd)
5494
5495 (status, msg) = _CommonRestrictedCmdCheck(executable, _owner)
5496
5497 if not status:
5498 return (False, msg)
5499
5500 if not utils.IsExecutable(executable):
5501 return (False, "access(2) thinks '%s' can't be executed" % executable)
5502
5503 return (True, executable)
5504
5510 """Performs a number of tests on a restricted command.
5511
5512 @type path: string
5513 @param path: Directory containing restricted commands
5514 @type cmd: string
5515 @param cmd: Command name
5516 @return: Same as L{_VerifyRestrictedCmd}
5517
5518 """
5519
5520 (status, msg) = _verify_dir(path)
5521 if status:
5522
5523 (status, msg) = _verify_name(cmd)
5524
5525 if not status:
5526 return (False, msg)
5527
5528
5529 return _verify_cmd(path, cmd)
5530
5540 """Executes a restricted command after performing strict tests.
5541
5542 @type cmd: string
5543 @param cmd: Command name
5544 @rtype: string
5545 @return: Command output
5546 @raise RPCFail: In case of an error
5547
5548 """
5549 logging.info("Preparing to run restricted command '%s'", cmd)
5550
5551 if not _enabled:
5552 _Fail("Restricted commands disabled at configure time")
5553
5554 lock = None
5555 try:
5556 cmdresult = None
5557 try:
5558 lock = utils.FileLock.Open(_lock_file)
5559 lock.Exclusive(blocking=True, timeout=_lock_timeout)
5560
5561 (status, value) = _prepare_fn(_path, cmd)
5562
5563 if status:
5564 cmdresult = _runcmd_fn([value], env={}, reset_env=True,
5565 postfork_fn=lambda _: lock.Unlock())
5566 else:
5567 logging.error(value)
5568 except Exception:
5569
5570 logging.exception("Caught exception")
5571
5572 if cmdresult is None:
5573 logging.info("Sleeping for %0.1f seconds before returning",
5574 _RCMD_INVALID_DELAY)
5575 _sleep_fn(_RCMD_INVALID_DELAY)
5576
5577
5578 _Fail("Executing command '%s' failed" % cmd)
5579 elif cmdresult.failed or cmdresult.fail_reason:
5580 _Fail("Restricted command '%s' failed: %s; output: %s",
5581 cmd, cmdresult.fail_reason, cmdresult.output)
5582 else:
5583 return cmdresult.output
5584 finally:
5585 if lock is not None:
5586
5587 lock.Close()
5588 lock = None
5589
5592 """Creates or removes the watcher pause file.
5593
5594 @type until: None or number
5595 @param until: Unix timestamp saying until when the watcher shouldn't run
5596
5597 """
5598 if until is None:
5599 logging.info("Received request to no longer pause watcher")
5600 utils.RemoveFile(_filename)
5601 else:
5602 logging.info("Received request to pause watcher until %s", until)
5603
5604 if not ht.TNumber(until):
5605 _Fail("Duration must be numeric")
5606
5607 utils.WriteFile(_filename, data="%d\n" % (until, ), mode=0644)
5608
5635
5638 """ Checks if a file exists and returns information related to it.
5639
5640 Currently returned information:
5641 - file size: int, size in bytes
5642
5643 @type file_path: string
5644 @param file_path: Name of file to examine.
5645
5646 @rtype: tuple of bool, dict
5647 @return: Whether the file exists, and a dictionary of information about the
5648 file gathered by os.stat.
5649
5650 """
5651 try:
5652 stat_info = os.stat(file_path)
5653 values_dict = {
5654 constants.STAT_SIZE: stat_info.st_size,
5655 }
5656 return True, values_dict
5657 except IOError:
5658 return False, {}
5659
5662 """Hook runner.
5663
5664 This class is instantiated on the node side (ganeti-noded) and not
5665 on the master side.
5666
5667 """
5668 - def __init__(self, hooks_base_dir=None):
5669 """Constructor for hooks runner.
5670
5671 @type hooks_base_dir: str or None
5672 @param hooks_base_dir: if not None, this overrides the
5673 L{pathutils.HOOKS_BASE_DIR} (useful for unittests)
5674
5675 """
5676 if hooks_base_dir is None:
5677 hooks_base_dir = pathutils.HOOKS_BASE_DIR
5678
5679
5680 self._BASE_DIR = hooks_base_dir
5681
5683 """Check that the hooks will be run only locally and then run them.
5684
5685 """
5686 assert len(node_list) == 1
5687 node = node_list[0]
5688 _, myself = ssconf.GetMasterAndMyself()
5689 assert node == myself
5690
5691 results = self.RunHooks(hpath, phase, env)
5692
5693
5694 return {node: (None, False, results)}
5695
5696 - def RunHooks(self, hpath, phase, env):
5697 """Run the scripts in the hooks directory.
5698
5699 @type hpath: str
5700 @param hpath: the path to the hooks directory which
5701 holds the scripts
5702 @type phase: str
5703 @param phase: either L{constants.HOOKS_PHASE_PRE} or
5704 L{constants.HOOKS_PHASE_POST}
5705 @type env: dict
5706 @param env: dictionary with the environment for the hook
5707 @rtype: list
5708 @return: list of 3-element tuples:
5709 - script path
5710 - script result, either L{constants.HKR_SUCCESS} or
5711 L{constants.HKR_FAIL}
5712 - output of the script
5713
5714 @raise errors.ProgrammerError: for invalid input
5715 parameters
5716
5717 """
5718 if phase == constants.HOOKS_PHASE_PRE:
5719 suffix = "pre"
5720 elif phase == constants.HOOKS_PHASE_POST:
5721 suffix = "post"
5722 else:
5723 _Fail("Unknown hooks phase '%s'", phase)
5724
5725 subdir = "%s-%s.d" % (hpath, suffix)
5726 dir_name = utils.PathJoin(self._BASE_DIR, subdir)
5727
5728 results = []
5729
5730 if not os.path.isdir(dir_name):
5731
5732
5733 return results
5734
5735 runparts_results = utils.RunParts(dir_name, env=env, reset_env=True)
5736
5737 for (relname, relstatus, runresult) in runparts_results:
5738 if relstatus == constants.RUNPARTS_SKIP:
5739 rrval = constants.HKR_SKIP
5740 output = ""
5741 elif relstatus == constants.RUNPARTS_ERR:
5742 rrval = constants.HKR_FAIL
5743 output = "Hook script execution error: %s" % runresult
5744 elif relstatus == constants.RUNPARTS_RUN:
5745 if runresult.failed:
5746 rrval = constants.HKR_FAIL
5747 else:
5748 rrval = constants.HKR_SUCCESS
5749 output = utils.SafeEncode(runresult.output.strip())
5750 results.append(("%s/%s" % (subdir, relname), rrval, output))
5751
5752 return results
5753
5756 """IAllocator runner.
5757
5758 This class is instantiated on the node side (ganeti-noded) and not on
5759 the master side.
5760
5761 """
5762 @staticmethod
5763 - def Run(name, idata, ial_params):
5764 """Run an iallocator script.
5765
5766 @type name: str
5767 @param name: the iallocator script name
5768 @type idata: str
5769 @param idata: the allocator input data
5770 @type ial_params: list
5771 @param ial_params: the iallocator parameters
5772
5773 @rtype: tuple
5774 @return: two element tuple of:
5775 - status
5776 - either error message or stdout of allocator (for success)
5777
5778 """
5779 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
5780 os.path.isfile)
5781 if alloc_script is None:
5782 _Fail("iallocator module '%s' not found in the search path", name)
5783
5784 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
5785 try:
5786 os.write(fd, idata)
5787 os.close(fd)
5788 result = utils.RunCmd([alloc_script, fin_name] + ial_params)
5789 if result.failed:
5790 _Fail("iallocator module '%s' failed: %s, output '%s'",
5791 name, result.fail_reason, result.output)
5792 finally:
5793 os.unlink(fin_name)
5794
5795 return result.stdout
5796
5799 """Simple class for managing a cache of block device information.
5800
5801 """
5802 _DEV_PREFIX = "/dev/"
5803 _ROOT_DIR = pathutils.BDEV_CACHE_DIR
5804
5805 @classmethod
5807 """Converts a /dev/name path to the cache file name.
5808
5809 This replaces slashes with underscores and strips the /dev
5810 prefix. It then returns the full path to the cache file.
5811
5812 @type dev_path: str
5813 @param dev_path: the C{/dev/} path name
5814 @rtype: str
5815 @return: the converted path name
5816
5817 """
5818 if dev_path.startswith(cls._DEV_PREFIX):
5819 dev_path = dev_path[len(cls._DEV_PREFIX):]
5820 dev_path = dev_path.replace("/", "_")
5821 fpath = utils.PathJoin(cls._ROOT_DIR, "bdev_%s" % dev_path)
5822 return fpath
5823
5824 @classmethod
5825 - def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
5826 """Updates the cache information for a given device.
5827
5828 @type dev_path: str
5829 @param dev_path: the pathname of the device
5830 @type owner: str
5831 @param owner: the owner (instance name) of the device
5832 @type on_primary: bool
5833 @param on_primary: whether this is the primary
5834 node nor not
5835 @type iv_name: str
5836 @param iv_name: the instance-visible name of the
5837 device, as in objects.Disk.iv_name
5838
5839 @rtype: None
5840
5841 """
5842 if dev_path is None:
5843 logging.error("DevCacheManager.UpdateCache got a None dev_path")
5844 return
5845 fpath = cls._ConvertPath(dev_path)
5846 if on_primary:
5847 state = "primary"
5848 else:
5849 state = "secondary"
5850 if iv_name is None:
5851 iv_name = "not_visible"
5852 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
5853 try:
5854 utils.WriteFile(fpath, data=fdata)
5855 except EnvironmentError, err:
5856 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)
5857
5858 @classmethod
5860 """Remove data for a dev_path.
5861
5862 This is just a wrapper over L{utils.io.RemoveFile} with a converted
5863 path name and logging.
5864
5865 @type dev_path: str
5866 @param dev_path: the pathname of the device
5867
5868 @rtype: None
5869
5870 """
5871 if dev_path is None:
5872 logging.error("DevCacheManager.RemoveCache got a None dev_path")
5873 return
5874 fpath = cls._ConvertPath(dev_path)
5875 try:
5876 utils.RemoveFile(fpath)
5877 except EnvironmentError, err:
5878 logging.exception("Can't update bdev cache for %s: %s", dev_path, err)
5879