1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """Logical units dealing with backup operations."""
32
33 import OpenSSL
34 import logging
35
36 from ganeti import compat
37 from ganeti import constants
38 from ganeti import errors
39 from ganeti import locking
40 from ganeti import masterd
41 from ganeti import utils
42 from ganeti.utils import retry
43
44 from ganeti.cmdlib.base import NoHooksLU, LogicalUnit
45 from ganeti.cmdlib.common import CheckNodeOnline, ExpandNodeUuidAndName, \
46 IsInstanceRunning, DetermineImageSize
47 from ganeti.cmdlib.instance_storage import StartInstanceDisks, \
48 ShutdownInstanceDisks, TemporaryDisk, ImageDisks
49 from ganeti.cmdlib.instance_utils import GetClusterDomainSecret, \
50 BuildInstanceHookEnvByObject, CheckNodeNotDrained, RemoveInstance, \
51 CheckCompressionTool
52
53
55 """Prepares an instance for an export and returns useful information.
56
57 """
58 REQ_BGL = False
59
62
73
74 - def Exec(self, feedback_fn):
75 """Prepares an instance for an export.
76
77 """
78 if self.op.mode == constants.EXPORT_MODE_REMOTE:
79 salt = utils.GenerateSecret(8)
80
81 feedback_fn("Generating X509 certificate on %s" %
82 self.cfg.GetNodeName(self.instance.primary_node))
83 result = self.rpc.call_x509_cert_create(self.instance.primary_node,
84 constants.RIE_CERT_VALIDITY)
85 result.Raise("Can't create X509 key and certificate on %s" %
86 self.cfg.GetNodeName(result.node))
87
88 (name, cert_pem) = result.payload
89
90 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
91 cert_pem)
92
93 return {
94 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
95 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
96 salt),
97 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
98 }
99
100 return None
101
102
104 """Export an instance to an image in the cluster.
105
106 """
107 HPATH = "instance-export"
108 HTYPE = constants.HTYPE_INSTANCE
109 REQ_BGL = False
110
112 """Check the arguments.
113
114 """
115 self.x509_key_name = self.op.x509_key_name
116 self.dest_x509_ca_pem = self.op.destination_x509_ca
117
118 if self.op.mode == constants.EXPORT_MODE_REMOTE:
119 if not self.x509_key_name:
120 raise errors.OpPrereqError("Missing X509 key name for encryption",
121 errors.ECODE_INVAL)
122
123 if not self.dest_x509_ca_pem:
124 raise errors.OpPrereqError("Missing destination X509 CA",
125 errors.ECODE_INVAL)
126
127 if self.op.zero_free_space and not self.op.compress:
128 raise errors.OpPrereqError("Zeroing free space does not make sense "
129 "unless compression is used")
130
131 if self.op.zero_free_space and not self.op.shutdown:
132 raise errors.OpPrereqError("Unless the instance is shut down, zeroing "
133 "cannot be used.")
134
159
161 """Last minute lock declaration."""
162
163
165 """Build hooks env.
166
167 This will run on the master, primary node and target node.
168
169 """
170 env = {
171 "EXPORT_MODE": self.op.mode,
172 "EXPORT_NODE": self.op.target_node,
173 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
174 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
175
176 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
177 }
178
179 env.update(BuildInstanceHookEnvByObject(
180 self, self.instance,
181 secondary_nodes=self.secondary_nodes, disks=self.inst_disks))
182
183 return env
184
195
197 """Check prerequisites.
198
199 This checks that the instance and node names are valid.
200
201 """
202 self.instance = self.cfg.GetInstanceInfoByName(self.op.instance_name)
203 assert self.instance is not None, \
204 "Cannot retrieve locked instance %s" % self.op.instance_name
205 CheckNodeOnline(self, self.instance.primary_node)
206
207 if (self.op.remove_instance and
208 self.instance.admin_state == constants.ADMINST_UP and
209 not self.op.shutdown):
210 raise errors.OpPrereqError("Can not remove instance without shutting it"
211 " down before", errors.ECODE_STATE)
212
213 if self.op.mode == constants.EXPORT_MODE_LOCAL:
214 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node_uuid)
215 assert self.dst_node is not None
216
217 CheckNodeOnline(self, self.dst_node.uuid)
218 CheckNodeNotDrained(self, self.dst_node.uuid)
219
220 self._cds = None
221 self.dest_disk_info = None
222 self.dest_x509_ca = None
223
224 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
225 self.dst_node = None
226
227 if len(self.op.target_node) != len(self.instance.disks):
228 raise errors.OpPrereqError(("Received destination information for %s"
229 " disks, but instance %s has %s disks") %
230 (len(self.op.target_node),
231 self.op.instance_name,
232 len(self.instance.disks)),
233 errors.ECODE_INVAL)
234
235 cds = GetClusterDomainSecret()
236
237
238 try:
239 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
240 except (TypeError, ValueError), err:
241 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
242 errors.ECODE_INVAL)
243
244 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
245 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
246 errors.ECODE_INVAL)
247
248
249 try:
250 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
251 except OpenSSL.crypto.Error, err:
252 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
253 (err, ), errors.ECODE_INVAL)
254
255 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
256 if errcode is not None:
257 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
258 (msg, ), errors.ECODE_INVAL)
259
260 self.dest_x509_ca = cert
261
262
263 disk_info = []
264 for idx, disk_data in enumerate(self.op.target_node):
265 try:
266 (host, port, magic) = \
267 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
268 except errors.GenericError, err:
269 raise errors.OpPrereqError("Target info for disk %s: %s" %
270 (idx, err), errors.ECODE_INVAL)
271
272 disk_info.append((host, port, magic))
273
274 assert len(disk_info) == len(self.op.target_node)
275 self.dest_disk_info = disk_info
276
277 else:
278 raise errors.ProgrammerError("Unhandled export mode %r" %
279 self.op.mode)
280
281
282 if self.op.zero_free_space:
283
284 hvparams = self.cfg.GetClusterInfo().FillHV(self.instance)
285 if self.instance.hypervisor == constants.HT_KVM and \
286 not hvparams.get(constants.HV_KVM_USER_SHUTDOWN, False):
287 raise errors.OpPrereqError("Instance shutdown detection must be "
288 "enabled for zeroing to work",
289 errors.ECODE_INVAL)
290
291
292 if constants.HV_BOOT_ORDER in hvparams and \
293 hvparams[constants.HV_BOOT_ORDER] != constants.HT_BO_DISK:
294 raise errors.OpPrereqError("Booting from disk must be set for zeroing "
295 "to work", errors.ECODE_INVAL)
296
297
298 if not self.cfg.GetZeroingImage():
299 raise errors.OpPrereqError("A zeroing image must be set for zeroing to"
300 " work", errors.ECODE_INVAL)
301
302 if self.op.zeroing_timeout_fixed is None:
303 self.op.zeroing_timeout_fixed = constants.HELPER_VM_STARTUP
304
305 if self.op.zeroing_timeout_per_mib is None:
306 self.op.zeroing_timeout_per_mib = constants.ZEROING_TIMEOUT_PER_MIB
307
308 else:
309 if (self.op.zeroing_timeout_fixed is not None or
310 self.op.zeroing_timeout_per_mib is not None):
311 raise errors.OpPrereqError("Zeroing timeout options can only be used"
312 " only with the --zero-free-space option",
313 errors.ECODE_INVAL)
314
315 if self.op.long_sleep and not self.op.shutdown:
316 raise errors.OpPrereqError("The long sleep option only makes sense when"
317 " the instance can be shut down.",
318 errors.ECODE_INVAL)
319
320 self.secondary_nodes = \
321 self.cfg.GetInstanceSecondaryNodes(self.instance.uuid)
322 self.inst_disks = self.cfg.GetInstanceDisks(self.instance.uuid)
323
324
325 CheckCompressionTool(self, self.op.compress)
326
328 """Removes exports of current instance from all other nodes.
329
330 If an instance in a cluster with nodes A..D was exported to node C, its
331 exports will be removed from the nodes A, B and D.
332
333 """
334 assert self.op.mode != constants.EXPORT_MODE_REMOTE
335
336 node_uuids = self.cfg.GetNodeList()
337 node_uuids.remove(self.dst_node.uuid)
338
339
340
341
342 iname = self.instance.name
343 if node_uuids:
344 feedback_fn("Removing old exports for instance %s" % iname)
345 exportlist = self.rpc.call_export_list(node_uuids)
346 for node_uuid in exportlist:
347 if exportlist[node_uuid].fail_msg:
348 continue
349 if iname in exportlist[node_uuid].payload:
350 msg = self.rpc.call_export_remove(node_uuid, iname).fail_msg
351 if msg:
352 self.LogWarning("Could not remove older export for instance %s"
353 " on node %s: %s", iname,
354 self.cfg.GetNodeName(node_uuid), msg)
355
357 """Calculates the size of all the disks of the instance used in this LU.
358
359 @rtype: int
360 @return: Size of the disks in MiB
361
362 """
363 inst_disks = self.cfg.GetInstanceDisks(self.instance.uuid)
364 return sum([d.size for d in inst_disks])
365
367 """Zeroes the free space on a shutdown instance.
368
369 @type feedback_fn: function
370 @param feedback_fn: Function used to log progress
371
372 """
373 assert self.op.zeroing_timeout_fixed is not None
374 assert self.op.zeroing_timeout_per_mib is not None
375
376 zeroing_image = self.cfg.GetZeroingImage()
377 src_node_uuid = self.instance.primary_node
378
379 try:
380 disk_size = DetermineImageSize(self, zeroing_image, src_node_uuid)
381 except errors.OpExecError, err:
382 raise errors.OpExecError("Could not create temporary disk for zeroing:"
383 " %s", err)
384
385
386 instance_disks_size_sum = self._InstanceDiskSizeSum()
387
388 with TemporaryDisk(self,
389 self.instance,
390 [(constants.DT_PLAIN, constants.DISK_RDWR, disk_size)],
391 feedback_fn):
392 feedback_fn("Activating instance disks")
393 StartInstanceDisks(self, self.instance, False)
394
395 feedback_fn("Imaging disk with zeroing image")
396 ImageDisks(self, self.instance, zeroing_image)
397
398 feedback_fn("Starting instance with zeroing image")
399 result = self.rpc.call_instance_start(src_node_uuid,
400 (self.instance, [], []),
401 False, self.op.reason)
402 result.Raise("Could not start instance %s when using the zeroing image "
403 "%s" % (self.instance.name, zeroing_image))
404
405
406 running_check = lambda: IsInstanceRunning(self, self.instance,
407 prereq=False)
408 instance_up = retry.SimpleRetry(True, running_check, 5.0,
409 self.op.shutdown_timeout)
410 if not instance_up:
411 raise errors.OpExecError("Could not boot instance when using the "
412 "zeroing image %s" % zeroing_image)
413
414 feedback_fn("Instance is up, now awaiting shutdown")
415
416
417 timeout = self.op.zeroing_timeout_fixed + \
418 self.op.zeroing_timeout_per_mib * instance_disks_size_sum
419 instance_up = retry.SimpleRetry(False, running_check, 20.0, timeout)
420 if instance_up:
421 self.LogWarning("Zeroing not completed prior to timeout; instance will"
422 "be shut down forcibly")
423
424 feedback_fn("Zeroing completed!")
425
427 """Send the node instructions to start the instance.
428
429 @raise errors.OpExecError: If the instance didn't start up.
430
431 """
432 assert self.instance.disks_active
433 feedback_fn("Starting instance %s" % self.instance.name)
434 result = self.rpc.call_instance_start(src_node_uuid,
435 (self.instance, None, None),
436 False, self.op.reason)
437 msg = result.fail_msg
438 if msg:
439 feedback_fn("Failed to start instance: %s" % msg)
440 ShutdownInstanceDisks(self, self.instance)
441 raise errors.OpExecError("Could not start instance: %s" % msg)
442
444 """Returns true if there is a reason to prefer a snapshot."""
445 return (not self.op.remove_instance and
446 self.instance.admin_state == constants.ADMINST_UP)
447
449 """Returns true iff the instance needs to be started after transfer."""
450 return (self.op.shutdown and
451 self.instance.admin_state == constants.ADMINST_UP and
452 not self.op.remove_instance)
453
454 - def Exec(self, feedback_fn):
455 """Export an instance to an image in the cluster.
456
457 """
458 assert self.op.mode in constants.EXPORT_MODES
459
460 src_node_uuid = self.instance.primary_node
461
462 if self.op.shutdown:
463
464 feedback_fn("Shutting down instance %s" % self.instance.name)
465 result = self.rpc.call_instance_shutdown(src_node_uuid, self.instance,
466 self.op.shutdown_timeout,
467 self.op.reason)
468
469 result.Raise("Could not shutdown instance %s on"
470 " node %s" % (self.instance.name,
471 self.cfg.GetNodeName(src_node_uuid)))
472
473 if self.op.zero_free_space:
474 self.ZeroFreeSpace(feedback_fn)
475
476 activate_disks = not self.instance.disks_active
477
478 if activate_disks:
479
480 feedback_fn("Activating disks for %s" % self.instance.name)
481 StartInstanceDisks(self, self.instance, None)
482 self.instance = self.cfg.GetInstanceInfo(self.instance.uuid)
483
484 try:
485 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
486 self.instance)
487
488 snapshots_available = False
489 if self.TrySnapshot():
490 snapshots_available = helper.CreateSnapshots()
491 if not snapshots_available:
492 if not self.op.shutdown:
493 raise errors.OpExecError(
494 "Not all disks could be snapshotted, and you requested a live "
495 "export; aborting"
496 )
497 if not self.op.long_sleep:
498 raise errors.OpExecError(
499 "Not all disks could be snapshotted, and you did not allow the "
500 "instance to remain offline for a longer time through the "
501 "--long-sleep option; aborting"
502 )
503
504 try:
505 if self.DoReboot() and snapshots_available:
506 self.StartInstance(feedback_fn, src_node_uuid)
507 if self.op.mode == constants.EXPORT_MODE_LOCAL:
508 (fin_resu, dresults) = helper.LocalExport(self.dst_node,
509 self.op.compress)
510 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
511 connect_timeout = constants.RIE_CONNECT_TIMEOUT
512 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
513
514 (key_name, _, _) = self.x509_key_name
515
516 dest_ca_pem = \
517 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
518 self.dest_x509_ca)
519
520 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
521 key_name, dest_ca_pem,
522 self.op.compress,
523 timeouts)
524
525 if self.DoReboot() and not snapshots_available:
526 self.StartInstance(feedback_fn, src_node_uuid)
527 finally:
528 helper.Cleanup()
529
530
531 assert len(dresults) == len(self.instance.disks)
532 assert compat.all(isinstance(i, bool) for i in dresults), \
533 "Not all results are boolean: %r" % dresults
534
535 finally:
536 if activate_disks:
537 feedback_fn("Deactivating disks for %s" % self.instance.name)
538 ShutdownInstanceDisks(self, self.instance)
539
540 if not (compat.all(dresults) and fin_resu):
541 failures = []
542 if not fin_resu:
543 failures.append("export finalization")
544 if not compat.all(dresults):
545 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
546 if not dsk)
547 failures.append("disk export: disk(s) %s" % fdsk)
548
549 raise errors.OpExecError("Export failed, errors in %s" %
550 utils.CommaJoin(failures))
551
552
553
554
555 if self.op.remove_instance:
556 feedback_fn("Removing instance %s" % self.instance.name)
557 RemoveInstance(self, feedback_fn, self.instance,
558 self.op.ignore_remove_failures)
559
560 if self.op.mode == constants.EXPORT_MODE_LOCAL:
561 self._CleanupExports(feedback_fn)
562
563 return fin_resu, dresults
564
565
567 """Remove exports related to the named instance.
568
569 """
570 REQ_BGL = False
571
579
580 - def Exec(self, feedback_fn):
581 """Remove any export.
582
583 """
584 (_, inst_name) = self.cfg.ExpandInstanceName(self.op.instance_name)
585
586
587 fqdn_warn = False
588 if not inst_name:
589 fqdn_warn = True
590 inst_name = self.op.instance_name
591
592 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
593 exportlist = self.rpc.call_export_list(locked_nodes)
594 found = False
595 for node_uuid in exportlist:
596 msg = exportlist[node_uuid].fail_msg
597 if msg:
598 self.LogWarning("Failed to query node %s (continuing): %s",
599 self.cfg.GetNodeName(node_uuid), msg)
600 continue
601 if inst_name in exportlist[node_uuid].payload:
602 found = True
603 result = self.rpc.call_export_remove(node_uuid, inst_name)
604 msg = result.fail_msg
605 if msg:
606 logging.error("Could not remove export for instance %s"
607 " on node %s: %s", inst_name,
608 self.cfg.GetNodeName(node_uuid), msg)
609
610 if fqdn_warn and not found:
611 feedback_fn("Export not found. If trying to remove an export belonging"
612 " to a deleted instance please use its Fully Qualified"
613 " Domain Name.")
614