Package ganeti :: Package cmdlib :: Module backup
[hide private]
[frames] | no frames]

Source Code for Module ganeti.cmdlib.backup

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc. 
  5  # All rights reserved. 
  6  # 
  7  # Redistribution and use in source and binary forms, with or without 
  8  # modification, are permitted provided that the following conditions are 
  9  # met: 
 10  # 
 11  # 1. Redistributions of source code must retain the above copyright notice, 
 12  # this list of conditions and the following disclaimer. 
 13  # 
 14  # 2. Redistributions in binary form must reproduce the above copyright 
 15  # notice, this list of conditions and the following disclaimer in the 
 16  # documentation and/or other materials provided with the distribution. 
 17  # 
 18  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
 19  # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
 20  # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 21  # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
 22  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 23  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 24  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 25  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 26  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 27  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 28  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 29   
 30   
 31  """Logical units dealing with backup operations.""" 
 32   
 33  import OpenSSL 
 34  import logging 
 35   
 36  from ganeti import compat 
 37  from ganeti import constants 
 38  from ganeti import errors 
 39  from ganeti import locking 
 40  from ganeti import masterd 
 41  from ganeti import utils 
 42   
 43  from ganeti.cmdlib.base import NoHooksLU, LogicalUnit 
 44  from ganeti.cmdlib.common import CheckNodeOnline, ExpandNodeUuidAndName 
 45  from ganeti.cmdlib.instance_helpervm import RunWithHelperVM 
 46  from ganeti.cmdlib.instance_storage import StartInstanceDisks, \ 
 47    ShutdownInstanceDisks 
 48  from ganeti.cmdlib.instance_utils import GetClusterDomainSecret, \ 
 49    BuildInstanceHookEnvByObject, CheckNodeNotDrained, RemoveInstance, \ 
 50    CheckCompressionTool 
 51   
 52   
53 -class LUBackupPrepare(NoHooksLU):
54 """Prepares an instance for an export and returns useful information. 55 56 """ 57 REQ_BGL = False 58
59 - def ExpandNames(self):
61
62 - def CheckPrereq(self):
63 """Check prerequisites. 64 65 """ 66 self.instance = self.cfg.GetInstanceInfoByName(self.op.instance_name) 67 assert self.instance is not None, \ 68 "Cannot retrieve locked instance %s" % self.op.instance_name 69 CheckNodeOnline(self, self.instance.primary_node) 70 71 self._cds = GetClusterDomainSecret()
72
73 - def Exec(self, feedback_fn):
74 """Prepares an instance for an export. 75 76 """ 77 if self.op.mode == constants.EXPORT_MODE_REMOTE: 78 salt = utils.GenerateSecret(8) 79 80 feedback_fn("Generating X509 certificate on %s" % 81 self.cfg.GetNodeName(self.instance.primary_node)) 82 result = self.rpc.call_x509_cert_create(self.instance.primary_node, 83 constants.RIE_CERT_VALIDITY) 84 result.Raise("Can't create X509 key and certificate on %s" % 85 self.cfg.GetNodeName(result.node)) 86 87 (name, cert_pem) = result.payload 88 89 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, 90 cert_pem) 91 92 return { 93 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds), 94 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt), 95 salt), 96 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt), 97 } 98 99 return None
100 101
102 -class LUBackupExport(LogicalUnit):
103 """Export an instance to an image in the cluster. 104 105 """ 106 HPATH = "instance-export" 107 HTYPE = constants.HTYPE_INSTANCE 108 REQ_BGL = False 109
110 - def CheckArguments(self):
111 """Check the arguments. 112 113 """ 114 self.x509_key_name = self.op.x509_key_name 115 self.dest_x509_ca_pem = self.op.destination_x509_ca 116 117 if self.op.mode == constants.EXPORT_MODE_REMOTE: 118 if not self.x509_key_name: 119 raise errors.OpPrereqError("Missing X509 key name for encryption", 120 errors.ECODE_INVAL) 121 122 if not self.dest_x509_ca_pem: 123 raise errors.OpPrereqError("Missing destination X509 CA", 124 errors.ECODE_INVAL) 125 126 if self.op.zero_free_space and not self.op.compress: 127 raise errors.OpPrereqError("Zeroing free space does not make sense " 128 "unless compression is used") 129 130 if self.op.zero_free_space and not self.op.shutdown: 131 raise errors.OpPrereqError("Unless the instance is shut down, zeroing " 132 "cannot be used.")
133
134 - def ExpandNames(self):
135 self._ExpandAndLockInstance() 136 137 # In case we are zeroing, a node lock is required as we will be creating and 138 # destroying a disk - allocations should be stopped, but not on the entire 139 # cluster 140 if self.op.zero_free_space: 141 self.recalculate_locks = {locking.LEVEL_NODE: constants.LOCKS_REPLACE} 142 self._LockInstancesNodes(primary_only=True) 143 144 # Lock all nodes for local exports 145 if self.op.mode == constants.EXPORT_MODE_LOCAL: 146 (self.op.target_node_uuid, self.op.target_node) = \ 147 ExpandNodeUuidAndName(self.cfg, self.op.target_node_uuid, 148 self.op.target_node) 149 # FIXME: lock only instance primary and destination node 150 # 151 # Sad but true, for now we have do lock all nodes, as we don't know where 152 # the previous export might be, and in this LU we search for it and 153 # remove it from its current node. In the future we could fix this by: 154 # - making a tasklet to search (share-lock all), then create the 155 # new one, then one to remove, after 156 # - removing the removal operation altogether 157 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
158
159 - def DeclareLocks(self, level):
160 """Last minute lock declaration."""
161 # All nodes are locked anyway, so nothing to do here. 162
163 - def BuildHooksEnv(self):
164 """Build hooks env. 165 166 This will run on the master, primary node and target node. 167 168 """ 169 env = { 170 "EXPORT_MODE": self.op.mode, 171 "EXPORT_NODE": self.op.target_node, 172 "EXPORT_DO_SHUTDOWN": self.op.shutdown, 173 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, 174 # TODO: Generic function for boolean env variables 175 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)), 176 } 177 178 env.update(BuildInstanceHookEnvByObject( 179 self, self.instance, 180 secondary_nodes=self.secondary_nodes, disks=self.inst_disks)) 181 182 return env
183
184 - def BuildHooksNodes(self):
185 """Build hooks nodes. 186 187 """ 188 nl = [self.cfg.GetMasterNode(), self.instance.primary_node] 189 190 if self.op.mode == constants.EXPORT_MODE_LOCAL: 191 nl.append(self.op.target_node_uuid) 192 193 return (nl, nl)
194
195 - def CheckPrereq(self):
196 """Check prerequisites. 197 198 This checks that the instance and node names are valid. 199 200 """ 201 self.instance = self.cfg.GetInstanceInfoByName(self.op.instance_name) 202 assert self.instance is not None, \ 203 "Cannot retrieve locked instance %s" % self.op.instance_name 204 CheckNodeOnline(self, self.instance.primary_node) 205 206 if (self.op.remove_instance and 207 self.instance.admin_state == constants.ADMINST_UP and 208 not self.op.shutdown): 209 raise errors.OpPrereqError("Can not remove instance without shutting it" 210 " down before", errors.ECODE_STATE) 211 212 if self.op.mode == constants.EXPORT_MODE_LOCAL: 213 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node_uuid) 214 assert self.dst_node is not None 215 216 CheckNodeOnline(self, self.dst_node.uuid) 217 CheckNodeNotDrained(self, self.dst_node.uuid) 218 219 self._cds = None 220 self.dest_disk_info = None 221 self.dest_x509_ca = None 222 223 elif self.op.mode == constants.EXPORT_MODE_REMOTE: 224 self.dst_node = None 225 226 if len(self.op.target_node) != len(self.instance.disks): 227 raise errors.OpPrereqError(("Received destination information for %s" 228 " disks, but instance %s has %s disks") % 229 (len(self.op.target_node), 230 self.op.instance_name, 231 len(self.instance.disks)), 232 errors.ECODE_INVAL) 233 234 cds = GetClusterDomainSecret() 235 236 # Check X509 key name 237 try: 238 (key_name, hmac_digest, hmac_salt) = self.x509_key_name 239 except (TypeError, ValueError), err: 240 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err, 241 errors.ECODE_INVAL) 242 243 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt): 244 raise errors.OpPrereqError("HMAC for X509 key name is wrong", 245 errors.ECODE_INVAL) 246 247 # Load and verify CA 248 try: 249 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds) 250 except OpenSSL.crypto.Error, err: 251 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" % 252 (err, ), errors.ECODE_INVAL) 253 254 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) 255 if errcode is not None: 256 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % 257 (msg, ), errors.ECODE_INVAL) 258 259 self.dest_x509_ca = cert 260 261 # Verify target information 262 disk_info = [] 263 for idx, disk_data in enumerate(self.op.target_node): 264 try: 265 (host, port, magic) = \ 266 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data) 267 except errors.GenericError, err: 268 raise errors.OpPrereqError("Target info for disk %s: %s" % 269 (idx, err), errors.ECODE_INVAL) 270 271 disk_info.append((host, port, magic)) 272 273 assert len(disk_info) == len(self.op.target_node) 274 self.dest_disk_info = disk_info 275 276 else: 277 raise errors.ProgrammerError("Unhandled export mode %r" % 278 self.op.mode) 279 280 # Check prerequisites for zeroing 281 if self.op.zero_free_space: 282 # Check that user shutdown detection has been enabled 283 hvparams = self.cfg.GetClusterInfo().FillHV(self.instance) 284 if self.instance.hypervisor == constants.HT_KVM and \ 285 not hvparams.get(constants.HV_KVM_USER_SHUTDOWN, False): 286 raise errors.OpPrereqError("Instance shutdown detection must be " 287 "enabled for zeroing to work", 288 errors.ECODE_INVAL) 289 290 # Check that the instance is set to boot from the disk 291 if constants.HV_BOOT_ORDER in hvparams and \ 292 hvparams[constants.HV_BOOT_ORDER] != constants.HT_BO_DISK: 293 raise errors.OpPrereqError("Booting from disk must be set for zeroing " 294 "to work", errors.ECODE_INVAL) 295 296 # Check that the zeroing image is set 297 if not self.cfg.GetZeroingImage(): 298 raise errors.OpPrereqError("A zeroing image must be set for zeroing to" 299 " work", errors.ECODE_INVAL) 300 301 if self.op.zeroing_timeout_fixed is None: 302 self.op.zeroing_timeout_fixed = constants.HELPER_VM_STARTUP 303 304 if self.op.zeroing_timeout_per_mib is None: 305 self.op.zeroing_timeout_per_mib = constants.ZEROING_TIMEOUT_PER_MIB 306 307 else: 308 if (self.op.zeroing_timeout_fixed is not None or 309 self.op.zeroing_timeout_per_mib is not None): 310 raise errors.OpPrereqError("Zeroing timeout options can only be used" 311 " only with the --zero-free-space option", 312 errors.ECODE_INVAL) 313 314 if self.op.long_sleep and not self.op.shutdown: 315 raise errors.OpPrereqError("The long sleep option only makes sense when" 316 " the instance can be shut down.", 317 errors.ECODE_INVAL) 318 319 self.secondary_nodes = \ 320 self.cfg.GetInstanceSecondaryNodes(self.instance.uuid) 321 self.inst_disks = self.cfg.GetInstanceDisks(self.instance.uuid) 322 323 # Check if the compression tool is whitelisted 324 CheckCompressionTool(self, self.op.compress)
325
326 - def _CleanupExports(self, feedback_fn):
327 """Removes exports of current instance from all other nodes. 328 329 If an instance in a cluster with nodes A..D was exported to node C, its 330 exports will be removed from the nodes A, B and D. 331 332 """ 333 assert self.op.mode != constants.EXPORT_MODE_REMOTE 334 335 node_uuids = self.cfg.GetNodeList() 336 node_uuids.remove(self.dst_node.uuid) 337 338 # on one-node clusters nodelist will be empty after the removal 339 # if we proceed the backup would be removed because OpBackupQuery 340 # substitutes an empty list with the full cluster node list. 341 iname = self.instance.name 342 if node_uuids: 343 feedback_fn("Removing old exports for instance %s" % iname) 344 exportlist = self.rpc.call_export_list(node_uuids) 345 for node_uuid in exportlist: 346 if exportlist[node_uuid].fail_msg: 347 continue 348 if iname in exportlist[node_uuid].payload: 349 msg = self.rpc.call_export_remove(node_uuid, iname).fail_msg 350 if msg: 351 self.LogWarning("Could not remove older export for instance %s" 352 " on node %s: %s", iname, 353 self.cfg.GetNodeName(node_uuid), msg)
354
355 - def _InstanceDiskSizeSum(self):
356 """Calculates the size of all the disks of the instance used in this LU. 357 358 @rtype: int 359 @return: Size of the disks in MiB 360 361 """ 362 inst_disks = self.cfg.GetInstanceDisks(self.instance.uuid) 363 return sum([d.size for d in inst_disks])
364
365 - def ZeroFreeSpace(self, feedback_fn):
366 """Zeroes the free space on a shutdown instance. 367 368 @type feedback_fn: function 369 @param feedback_fn: Function used to log progress 370 371 """ 372 assert self.op.zeroing_timeout_fixed is not None 373 assert self.op.zeroing_timeout_per_mib is not None 374 375 zeroing_image = self.cfg.GetZeroingImage() 376 377 # Calculate the sum prior to adding the temporary disk 378 instance_disks_size_sum = self._InstanceDiskSizeSum() 379 timeout = self.op.zeroing_timeout_fixed + \ 380 self.op.zeroing_timeout_per_mib * instance_disks_size_sum 381 382 RunWithHelperVM(self, self.instance, zeroing_image, 383 self.op.shutdown_timeout, timeout, 384 log_prefix="Zeroing free disk space", 385 feedback_fn=feedback_fn)
386
387 - def StartInstance(self, feedback_fn, src_node_uuid):
388 """Send the node instructions to start the instance. 389 390 @raise errors.OpExecError: If the instance didn't start up. 391 392 """ 393 assert self.instance.disks_active 394 feedback_fn("Starting instance %s" % self.instance.name) 395 result = self.rpc.call_instance_start(src_node_uuid, 396 (self.instance, None, None), 397 False, self.op.reason) 398 msg = result.fail_msg 399 if msg: 400 feedback_fn("Failed to start instance: %s" % msg) 401 ShutdownInstanceDisks(self, self.instance) 402 raise errors.OpExecError("Could not start instance: %s" % msg)
403
404 - def TrySnapshot(self):
405 """Returns true if there is a reason to prefer a snapshot.""" 406 return (not self.op.remove_instance and 407 self.instance.admin_state == constants.ADMINST_UP)
408
409 - def DoReboot(self):
410 """Returns true iff the instance needs to be started after transfer.""" 411 return (self.op.shutdown and 412 self.instance.admin_state == constants.ADMINST_UP and 413 not self.op.remove_instance)
414
415 - def Exec(self, feedback_fn):
416 """Export an instance to an image in the cluster. 417 418 """ 419 assert self.op.mode in constants.EXPORT_MODES 420 421 src_node_uuid = self.instance.primary_node 422 423 if self.op.shutdown: 424 # shutdown the instance, but not the disks 425 feedback_fn("Shutting down instance %s" % self.instance.name) 426 result = self.rpc.call_instance_shutdown(src_node_uuid, self.instance, 427 self.op.shutdown_timeout, 428 self.op.reason) 429 # TODO: Maybe ignore failures if ignore_remove_failures is set 430 result.Raise("Could not shutdown instance %s on" 431 " node %s" % (self.instance.name, 432 self.cfg.GetNodeName(src_node_uuid))) 433 434 if self.op.zero_free_space: 435 self.ZeroFreeSpace(feedback_fn) 436 437 activate_disks = not self.instance.disks_active 438 439 if activate_disks: 440 # Activate the instance disks if we're exporting a stopped instance 441 feedback_fn("Activating disks for %s" % self.instance.name) 442 StartInstanceDisks(self, self.instance, None) 443 self.instance = self.cfg.GetInstanceInfo(self.instance.uuid) 444 445 try: 446 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn, 447 self.instance) 448 449 snapshots_available = False 450 if self.TrySnapshot(): 451 snapshots_available = helper.CreateSnapshots() 452 if not snapshots_available: 453 if not self.op.shutdown: 454 raise errors.OpExecError( 455 "Not all disks could be snapshotted, and you requested a live " 456 "export; aborting" 457 ) 458 if not self.op.long_sleep: 459 raise errors.OpExecError( 460 "Not all disks could be snapshotted, and you did not allow the " 461 "instance to remain offline for a longer time through the " 462 "--long-sleep option; aborting" 463 ) 464 465 try: 466 if self.DoReboot() and snapshots_available: 467 self.StartInstance(feedback_fn, src_node_uuid) 468 if self.op.mode == constants.EXPORT_MODE_LOCAL: 469 (fin_resu, dresults) = helper.LocalExport(self.dst_node, 470 self.op.compress) 471 elif self.op.mode == constants.EXPORT_MODE_REMOTE: 472 connect_timeout = constants.RIE_CONNECT_TIMEOUT 473 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) 474 475 (key_name, _, _) = self.x509_key_name 476 477 dest_ca_pem = \ 478 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, 479 self.dest_x509_ca) 480 481 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info, 482 key_name, dest_ca_pem, 483 self.op.compress, 484 timeouts) 485 486 if self.DoReboot() and not snapshots_available: 487 self.StartInstance(feedback_fn, src_node_uuid) 488 finally: 489 helper.Cleanup() 490 491 # Check for backwards compatibility 492 assert len(dresults) == len(self.instance.disks) 493 assert compat.all(isinstance(i, bool) for i in dresults), \ 494 "Not all results are boolean: %r" % dresults 495 496 finally: 497 if activate_disks: 498 feedback_fn("Deactivating disks for %s" % self.instance.name) 499 ShutdownInstanceDisks(self, self.instance) 500 501 if not (compat.all(dresults) and fin_resu): 502 failures = [] 503 if not fin_resu: 504 failures.append("export finalization") 505 if not compat.all(dresults): 506 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults) 507 if not dsk) 508 failures.append("disk export: disk(s) %s" % fdsk) 509 510 raise errors.OpExecError("Export failed, errors in %s" % 511 utils.CommaJoin(failures)) 512 513 # At this point, the export was successful, we can cleanup/finish 514 515 # Remove instance if requested 516 if self.op.remove_instance: 517 feedback_fn("Removing instance %s" % self.instance.name) 518 RemoveInstance(self, feedback_fn, self.instance, 519 self.op.ignore_remove_failures) 520 521 if self.op.mode == constants.EXPORT_MODE_LOCAL: 522 self._CleanupExports(feedback_fn) 523 524 return fin_resu, dresults
525 526
527 -class LUBackupRemove(NoHooksLU):
528 """Remove exports related to the named instance. 529 530 """ 531 REQ_BGL = False 532
533 - def ExpandNames(self):
534 self.needed_locks = { 535 # We need all nodes to be locked in order for RemoveExport to work, but 536 # we don't need to lock the instance itself, as nothing will happen to it 537 # (and we can remove exports also for a removed instance) 538 locking.LEVEL_NODE: locking.ALL_SET, 539 }
540
541 - def Exec(self, feedback_fn):
542 """Remove any export. 543 544 """ 545 (_, inst_name) = self.cfg.ExpandInstanceName(self.op.instance_name) 546 # If the instance was not found we'll try with the name that was passed in. 547 # This will only work if it was an FQDN, though. 548 fqdn_warn = False 549 if not inst_name: 550 fqdn_warn = True 551 inst_name = self.op.instance_name 552 553 locked_nodes = self.owned_locks(locking.LEVEL_NODE) 554 exportlist = self.rpc.call_export_list(locked_nodes) 555 found = False 556 for node_uuid in exportlist: 557 msg = exportlist[node_uuid].fail_msg 558 if msg: 559 self.LogWarning("Failed to query node %s (continuing): %s", 560 self.cfg.GetNodeName(node_uuid), msg) 561 continue 562 if inst_name in exportlist[node_uuid].payload: 563 found = True 564 result = self.rpc.call_export_remove(node_uuid, inst_name) 565 msg = result.fail_msg 566 if msg: 567 logging.error("Could not remove export for instance %s" 568 " on node %s: %s", inst_name, 569 self.cfg.GetNodeName(node_uuid), msg) 570 571 if fqdn_warn and not found: 572 feedback_fn("Export not found. If trying to remove an export belonging" 573 " to a deleted instance please use its Fully Qualified" 574 " Domain Name.")
575