1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """Logical units dealing with instance migration an failover."""
32
33 import logging
34 import time
35
36 from ganeti import constants
37 from ganeti import errors
38 from ganeti import locking
39 from ganeti.masterd import iallocator
40 from ganeti import utils
41 from ganeti.cmdlib.base import LogicalUnit, Tasklet
42 from ganeti.cmdlib.common import ExpandInstanceUuidAndName, \
43 CheckIAllocatorOrNode, ExpandNodeUuidAndName
44 from ganeti.cmdlib.instance_storage import CheckDiskConsistency, \
45 ExpandCheckDisks, ShutdownInstanceDisks, AssembleInstanceDisks
46 from ganeti.cmdlib.instance_utils import BuildInstanceHookEnvByObject, \
47 CheckTargetNodeIPolicy, ReleaseLocks, CheckNodeNotDrained, \
48 CopyLockList, CheckNodeFreeMemory, CheckInstanceBridgesExist
49
50 import ganeti.masterd.instance
51
52
72
73
108
109
111 """Failover an instance.
112
113 """
114 HPATH = "instance-failover"
115 HTYPE = constants.HTYPE_INSTANCE
116 REQ_BGL = False
117
119 """Check the arguments.
120
121 """
122 self.iallocator = getattr(self.op, "iallocator", None)
123 self.target_node = getattr(self.op, "target_node", None)
124
126 self._ExpandAndLockInstance()
127 _ExpandNamesForMigration(self)
128
129 self._migrater = \
130 TLMigrateInstance(self, self.op.instance_uuid, self.op.instance_name,
131 self.op.cleanup, True, False,
132 self.op.ignore_consistency, True,
133 self.op.shutdown_timeout, self.op.ignore_ipolicy)
134
135 self.tasklets = [self._migrater]
136
139
141 """Build hooks env.
142
143 This runs on master, primary and secondary nodes of the instance.
144
145 """
146 instance = self._migrater.instance
147 source_node_uuid = instance.primary_node
148 target_node_uuid = self._migrater.target_node_uuid
149 env = {
150 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
151 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
152 "OLD_PRIMARY": self.cfg.GetNodeName(source_node_uuid),
153 "NEW_PRIMARY": self.cfg.GetNodeName(target_node_uuid),
154 "FAILOVER_CLEANUP": self.op.cleanup,
155 }
156
157 if instance.disk_template in constants.DTS_INT_MIRROR:
158 env["OLD_SECONDARY"] = self.cfg.GetNodeName(instance.secondary_nodes[0])
159 env["NEW_SECONDARY"] = self.cfg.GetNodeName(source_node_uuid)
160 else:
161 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
162
163 env.update(BuildInstanceHookEnvByObject(self, instance))
164
165 return env
166
175
176
178 """Migrate an instance.
179
180 This is migration without shutting down, compared to the failover,
181 which is done with shutdown.
182
183 """
184 HPATH = "instance-migrate"
185 HTYPE = constants.HTYPE_INSTANCE
186 REQ_BGL = False
187
200
203
205 """Build hooks env.
206
207 This runs on master, primary and secondary nodes of the instance.
208
209 """
210 instance = self._migrater.instance
211 source_node_uuid = instance.primary_node
212 target_node_uuid = self._migrater.target_node_uuid
213 env = BuildInstanceHookEnvByObject(self, instance)
214 env.update({
215 "MIGRATE_LIVE": self._migrater.live,
216 "MIGRATE_CLEANUP": self.op.cleanup,
217 "OLD_PRIMARY": self.cfg.GetNodeName(source_node_uuid),
218 "NEW_PRIMARY": self.cfg.GetNodeName(target_node_uuid),
219 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
220 })
221
222 if instance.disk_template in constants.DTS_INT_MIRROR:
223 env["OLD_SECONDARY"] = self.cfg.GetNodeName(instance.secondary_nodes[0])
224 env["NEW_SECONDARY"] = self.cfg.GetNodeName(source_node_uuid)
225 else:
226 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
227
228 return env
229
239
240
242 """Tasklet class for instance migration.
243
244 @type live: boolean
245 @ivar live: whether the migration will be done live or non-live;
246 this variable is initalized only after CheckPrereq has run
247 @type cleanup: boolean
248 @ivar cleanup: Wheater we cleanup from a failed migration
249 @type iallocator: string
250 @ivar iallocator: The iallocator used to determine target_node
251 @type target_node_uuid: string
252 @ivar target_node_uuid: If given, the target node UUID to reallocate the
253 instance to
254 @type failover: boolean
255 @ivar failover: Whether operation results in failover or migration
256 @type fallback: boolean
257 @ivar fallback: Whether fallback to failover is allowed if migration not
258 possible
259 @type ignore_consistency: boolean
260 @ivar ignore_consistency: Wheter we should ignore consistency between source
261 and target node
262 @type shutdown_timeout: int
263 @ivar shutdown_timeout: In case of failover timeout of the shutdown
264 @type ignore_ipolicy: bool
265 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
266
267 """
268
269
270 _MIGRATION_POLL_INTERVAL = 1
271 _MIGRATION_FEEDBACK_INTERVAL = 10
272
273 - def __init__(self, lu, instance_uuid, instance_name, cleanup, failover,
274 fallback, ignore_consistency, allow_runtime_changes,
275 shutdown_timeout, ignore_ipolicy):
276 """Initializes this class.
277
278 """
279 Tasklet.__init__(self, lu)
280
281
282 self.instance_uuid = instance_uuid
283 self.instance_name = instance_name
284 self.cleanup = cleanup
285 self.live = False
286 self.failover = failover
287 self.fallback = fallback
288 self.ignore_consistency = ignore_consistency
289 self.shutdown_timeout = shutdown_timeout
290 self.ignore_ipolicy = ignore_ipolicy
291 self.allow_runtime_changes = allow_runtime_changes
292
294 """Check prerequisites.
295
296 This checks that the instance is in the cluster.
297
298 """
299 (self.instance_uuid, self.instance_name) = \
300 ExpandInstanceUuidAndName(self.lu.cfg, self.instance_uuid,
301 self.instance_name)
302 self.instance = self.cfg.GetInstanceInfo(self.instance_uuid)
303 assert self.instance is not None
304 cluster = self.cfg.GetClusterInfo()
305
306 if (not self.cleanup and
307 not self.instance.admin_state == constants.ADMINST_UP and
308 not self.failover and self.fallback):
309 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
310 " switching to failover")
311 self.failover = True
312
313 if self.instance.disk_template not in constants.DTS_MIRRORED:
314 if self.failover:
315 text = "failovers"
316 else:
317 text = "migrations"
318 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
319 " %s" % (self.instance.disk_template, text),
320 errors.ECODE_STATE)
321
322 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
323 CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
324
325 if self.lu.op.iallocator:
326 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
327 self._RunAllocator()
328 else:
329
330
331 self.target_node_uuid = self.lu.op.target_node_uuid
332
333
334 nodeinfo = self.cfg.GetNodeInfo(self.target_node_uuid)
335 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
336 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
337 group_info)
338 CheckTargetNodeIPolicy(self.lu, ipolicy, self.instance, nodeinfo,
339 self.cfg, ignore=self.ignore_ipolicy)
340
341
342
343 target_node_uuid = self.target_node_uuid
344 if self.target_node_uuid == self.instance.primary_node:
345 raise errors.OpPrereqError(
346 "Cannot migrate instance %s to its primary (%s)" %
347 (self.instance.name,
348 self.cfg.GetNodeName(self.instance.primary_node)),
349 errors.ECODE_STATE)
350
351 if len(self.lu.tasklets) == 1:
352
353
354 ReleaseLocks(self.lu, locking.LEVEL_NODE,
355 keep=[self.instance.primary_node, self.target_node_uuid])
356 ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
357
358 else:
359 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
360
361 secondary_node_uuids = self.instance.secondary_nodes
362 if not secondary_node_uuids:
363 raise errors.ConfigurationError("No secondary node but using"
364 " %s disk template" %
365 self.instance.disk_template)
366 self.target_node_uuid = target_node_uuid = secondary_node_uuids[0]
367 if self.lu.op.iallocator or \
368 (self.lu.op.target_node_uuid and
369 self.lu.op.target_node_uuid != target_node_uuid):
370 if self.failover:
371 text = "failed over"
372 else:
373 text = "migrated"
374 raise errors.OpPrereqError("Instances with disk template %s cannot"
375 " be %s to arbitrary nodes"
376 " (neither an iallocator nor a target"
377 " node can be passed)" %
378 (self.instance.disk_template, text),
379 errors.ECODE_INVAL)
380 nodeinfo = self.cfg.GetNodeInfo(target_node_uuid)
381 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
382 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
383 group_info)
384 CheckTargetNodeIPolicy(self.lu, ipolicy, self.instance, nodeinfo,
385 self.cfg, ignore=self.ignore_ipolicy)
386
387 i_be = cluster.FillBE(self.instance)
388
389
390 if (not self.cleanup and
391 (not self.failover or
392 self.instance.admin_state == constants.ADMINST_UP)):
393 self.tgt_free_mem = CheckNodeFreeMemory(
394 self.lu, target_node_uuid,
395 "migrating instance %s" % self.instance.name,
396 i_be[constants.BE_MINMEM], self.instance.hypervisor,
397 self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor])
398 else:
399 self.lu.LogInfo("Not checking memory on the secondary node as"
400 " instance will not be started")
401
402
403 if (not self.cleanup and not self.failover and
404 i_be[constants.BE_ALWAYS_FAILOVER]):
405 self.lu.LogInfo("Instance configured to always failover; fallback"
406 " to failover")
407 self.failover = True
408
409
410 CheckInstanceBridgesExist(self.lu, self.instance,
411 node_uuid=target_node_uuid)
412
413 if not self.cleanup:
414 CheckNodeNotDrained(self.lu, target_node_uuid)
415 if not self.failover:
416 result = self.rpc.call_instance_migratable(self.instance.primary_node,
417 self.instance)
418 if result.fail_msg and self.fallback:
419 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
420 " failover")
421 self.failover = True
422 else:
423 result.Raise("Can't migrate, please use failover",
424 prereq=True, ecode=errors.ECODE_STATE)
425
426 assert not (self.failover and self.cleanup)
427
428 if not self.failover:
429 if self.lu.op.live is not None and self.lu.op.mode is not None:
430 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
431 " parameters are accepted",
432 errors.ECODE_INVAL)
433 if self.lu.op.live is not None:
434 if self.lu.op.live:
435 self.lu.op.mode = constants.HT_MIGRATION_LIVE
436 else:
437 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
438
439
440 self.lu.op.live = None
441 elif self.lu.op.mode is None:
442
443 i_hv = cluster.FillHV(self.instance, skip_globals=False)
444 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
445
446 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
447 else:
448
449 self.live = False
450
451 if not (self.failover or self.cleanup):
452 remote_info = self.rpc.call_instance_info(
453 self.instance.primary_node, self.instance.name,
454 self.instance.hypervisor, cluster.hvparams[self.instance.hypervisor])
455 remote_info.Raise("Error checking instance on node %s" %
456 self.cfg.GetNodeName(self.instance.primary_node),
457 prereq=True)
458 instance_running = bool(remote_info.payload)
459 if instance_running:
460 self.current_mem = int(remote_info.payload["memory"])
461
485
487 """Poll with custom rpc for disk sync.
488
489 This uses our own step-based rpc call.
490
491 """
492 self.feedback_fn("* wait until resync is done")
493 all_done = False
494 while not all_done:
495 all_done = True
496 result = self.rpc.call_drbd_wait_sync(self.all_node_uuids,
497 (self.instance.disks,
498 self.instance))
499 min_percent = 100
500 for node_uuid, nres in result.items():
501 nres.Raise("Cannot resync disks on node %s" %
502 self.cfg.GetNodeName(node_uuid))
503 node_done, node_percent = nres.payload
504 all_done = all_done and node_done
505 if node_percent is not None:
506 min_percent = min(min_percent, node_percent)
507 if not all_done:
508 if min_percent < 100:
509 self.feedback_fn(" - progress: %.1f%%" % min_percent)
510 time.sleep(2)
511
523
525 """Disconnect from the network.
526
527 """
528 self.feedback_fn("* changing into standalone mode")
529 result = self.rpc.call_drbd_disconnect_net(
530 self.all_node_uuids, (self.instance.disks, self.instance))
531 for node_uuid, nres in result.items():
532 nres.Raise("Cannot disconnect disks node %s" %
533 self.cfg.GetNodeName(node_uuid))
534
536 """Reconnect to the network.
537
538 """
539 if multimaster:
540 msg = "dual-master"
541 else:
542 msg = "single-master"
543 self.feedback_fn("* changing disks into %s mode" % msg)
544 result = self.rpc.call_drbd_attach_net(self.all_node_uuids,
545 (self.instance.disks, self.instance),
546 self.instance.name, multimaster)
547 for node_uuid, nres in result.items():
548 nres.Raise("Cannot change disks config on node %s" %
549 self.cfg.GetNodeName(node_uuid))
550
552 """Try to cleanup after a failed migration.
553
554 The cleanup is done by:
555 - check that the instance is running only on one node
556 (and update the config if needed)
557 - change disks on its secondary node to secondary
558 - wait until disks are fully synchronized
559 - disconnect from the network
560 - change disks into single-master mode
561 - wait again until disks are fully synchronized
562
563 """
564
565 self.feedback_fn("* checking where the instance actually runs"
566 " (if this hangs, the hypervisor might be in"
567 " a bad state)")
568 cluster_hvparams = self.cfg.GetClusterInfo().hvparams
569 ins_l = self.rpc.call_instance_list(self.all_node_uuids,
570 [self.instance.hypervisor],
571 cluster_hvparams)
572 for node_uuid, result in ins_l.items():
573 result.Raise("Can't contact node %s" % node_uuid)
574
575 runningon_source = self.instance.name in \
576 ins_l[self.source_node_uuid].payload
577 runningon_target = self.instance.name in \
578 ins_l[self.target_node_uuid].payload
579
580 if runningon_source and runningon_target:
581 raise errors.OpExecError("Instance seems to be running on two nodes,"
582 " or the hypervisor is confused; you will have"
583 " to ensure manually that it runs only on one"
584 " and restart this operation")
585
586 if not (runningon_source or runningon_target):
587 raise errors.OpExecError("Instance does not seem to be running at all;"
588 " in this case it's safer to repair by"
589 " running 'gnt-instance stop' to ensure disk"
590 " shutdown, and then restarting it")
591
592 if runningon_target:
593
594 self.feedback_fn("* instance running on secondary node (%s),"
595 " updating config" %
596 self.cfg.GetNodeName(self.target_node_uuid))
597 self.instance.primary_node = self.target_node_uuid
598 self.cfg.Update(self.instance, self.feedback_fn)
599 demoted_node_uuid = self.source_node_uuid
600 else:
601 self.feedback_fn("* instance confirmed to be running on its"
602 " primary node (%s)" %
603 self.cfg.GetNodeName(self.source_node_uuid))
604 demoted_node_uuid = self.target_node_uuid
605
606 if self.instance.disk_template in constants.DTS_INT_MIRROR:
607 self._EnsureSecondary(demoted_node_uuid)
608 try:
609 self._WaitUntilSync()
610 except errors.OpExecError:
611
612
613 pass
614 self._GoStandalone()
615 self._GoReconnect(False)
616 self._WaitUntilSync()
617
618 self.feedback_fn("* done")
619
636
638 """Call the hypervisor code to abort a started migration.
639
640 """
641 abort_result = self.rpc.call_instance_finalize_migration_dst(
642 self.target_node_uuid, self.instance, self.migration_info,
643 False)
644 abort_msg = abort_result.fail_msg
645 if abort_msg:
646 logging.error("Aborting migration failed on target node %s: %s",
647 self.cfg.GetNodeName(self.target_node_uuid), abort_msg)
648
649
650
651 abort_result = self.rpc.call_instance_finalize_migration_src(
652 self.source_node_uuid, self.instance, False, self.live)
653 abort_msg = abort_result.fail_msg
654 if abort_msg:
655 logging.error("Aborting migration failed on source node %s: %s",
656 self.cfg.GetNodeName(self.source_node_uuid), abort_msg)
657
659 """Migrate an instance.
660
661 The migrate is done by:
662 - change the disks into dual-master mode
663 - wait until disks are fully synchronized again
664 - migrate the instance
665 - change disks on the new secondary node (the old primary) to secondary
666 - wait until disks are fully synchronized
667 - change disks into single-master mode
668
669 """
670
671 hvspecs = [(self.instance.hypervisor,
672 self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor])]
673 nodeinfo = self.rpc.call_node_info(
674 [self.source_node_uuid, self.target_node_uuid], None, hvspecs)
675 for ninfo in nodeinfo.values():
676 ninfo.Raise("Unable to retrieve node information from node '%s'" %
677 ninfo.node)
678 (_, _, (src_info, )) = nodeinfo[self.source_node_uuid].payload
679 (_, _, (dst_info, )) = nodeinfo[self.target_node_uuid].payload
680
681 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
682 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
683 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
684 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
685 if src_version != dst_version:
686 self.feedback_fn("* warning: hypervisor version mismatch between"
687 " source (%s) and target (%s) node" %
688 (src_version, dst_version))
689
690 self.feedback_fn("* checking disk consistency between source and target")
691 for (idx, dev) in enumerate(self.instance.disks):
692 if not CheckDiskConsistency(self.lu, self.instance, dev,
693 self.target_node_uuid,
694 False):
695 raise errors.OpExecError("Disk %s is degraded or not fully"
696 " synchronized on target node,"
697 " aborting migration" % idx)
698
699 if self.current_mem > self.tgt_free_mem:
700 if not self.allow_runtime_changes:
701 raise errors.OpExecError("Memory ballooning not allowed and not enough"
702 " free memory to fit instance %s on target"
703 " node %s (have %dMB, need %dMB)" %
704 (self.instance.name,
705 self.cfg.GetNodeName(self.target_node_uuid),
706 self.tgt_free_mem, self.current_mem))
707 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
708 rpcres = self.rpc.call_instance_balloon_memory(self.instance.primary_node,
709 self.instance,
710 self.tgt_free_mem)
711 rpcres.Raise("Cannot modify instance runtime memory")
712
713
714 result = self.rpc.call_migration_info(self.source_node_uuid, self.instance)
715 msg = result.fail_msg
716 if msg:
717 log_err = ("Failed fetching source migration information from %s: %s" %
718 (self.cfg.GetNodeName(self.source_node_uuid), msg))
719 logging.error(log_err)
720 raise errors.OpExecError(log_err)
721
722 self.migration_info = migration_info = result.payload
723
724 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
725
726 self._EnsureSecondary(self.target_node_uuid)
727 self._GoStandalone()
728 self._GoReconnect(True)
729 self._WaitUntilSync()
730
731 self.feedback_fn("* preparing %s to accept the instance" %
732 self.cfg.GetNodeName(self.target_node_uuid))
733 result = self.rpc.call_accept_instance(self.target_node_uuid,
734 self.instance,
735 migration_info,
736 self.nodes_ip[self.target_node_uuid])
737
738 msg = result.fail_msg
739 if msg:
740 logging.error("Instance pre-migration failed, trying to revert"
741 " disk status: %s", msg)
742 self.feedback_fn("Pre-migration failed, aborting")
743 self._AbortMigration()
744 self._RevertDiskStatus()
745 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
746 (self.instance.name, msg))
747
748 self.feedback_fn("* migrating instance to %s" %
749 self.cfg.GetNodeName(self.target_node_uuid))
750 cluster = self.cfg.GetClusterInfo()
751 result = self.rpc.call_instance_migrate(
752 self.source_node_uuid, cluster.cluster_name, self.instance,
753 self.nodes_ip[self.target_node_uuid], self.live)
754 msg = result.fail_msg
755 if msg:
756 logging.error("Instance migration failed, trying to revert"
757 " disk status: %s", msg)
758 self.feedback_fn("Migration failed, aborting")
759 self._AbortMigration()
760 self._RevertDiskStatus()
761 raise errors.OpExecError("Could not migrate instance %s: %s" %
762 (self.instance.name, msg))
763
764 self.feedback_fn("* starting memory transfer")
765 last_feedback = time.time()
766 while True:
767 result = self.rpc.call_instance_get_migration_status(
768 self.source_node_uuid, self.instance)
769 msg = result.fail_msg
770 ms = result.payload
771 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
772 logging.error("Instance migration failed, trying to revert"
773 " disk status: %s", msg)
774 self.feedback_fn("Migration failed, aborting")
775 self._AbortMigration()
776 self._RevertDiskStatus()
777 if not msg:
778 msg = "hypervisor returned failure"
779 raise errors.OpExecError("Could not migrate instance %s: %s" %
780 (self.instance.name, msg))
781
782 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
783 self.feedback_fn("* memory transfer complete")
784 break
785
786 if (utils.TimeoutExpired(last_feedback,
787 self._MIGRATION_FEEDBACK_INTERVAL) and
788 ms.transferred_ram is not None):
789 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
790 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
791 last_feedback = time.time()
792
793 time.sleep(self._MIGRATION_POLL_INTERVAL)
794
795 result = self.rpc.call_instance_finalize_migration_src(
796 self.source_node_uuid, self.instance, True, self.live)
797 msg = result.fail_msg
798 if msg:
799 logging.error("Instance migration succeeded, but finalization failed"
800 " on the source node: %s", msg)
801 raise errors.OpExecError("Could not finalize instance migration: %s" %
802 msg)
803
804 self.instance.primary_node = self.target_node_uuid
805
806
807 self.cfg.Update(self.instance, self.feedback_fn)
808
809 result = self.rpc.call_instance_finalize_migration_dst(
810 self.target_node_uuid, self.instance, migration_info, True)
811 msg = result.fail_msg
812 if msg:
813 logging.error("Instance migration succeeded, but finalization failed"
814 " on the target node: %s", msg)
815 raise errors.OpExecError("Could not finalize instance migration: %s" %
816 msg)
817
818 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
819 self._EnsureSecondary(self.source_node_uuid)
820 self._WaitUntilSync()
821 self._GoStandalone()
822 self._GoReconnect(False)
823 self._WaitUntilSync()
824
825
826
827 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
828 disks = ExpandCheckDisks(self.instance, self.instance.disks)
829 self.feedback_fn("* unmapping instance's disks from %s" %
830 self.cfg.GetNodeName(self.source_node_uuid))
831 for disk in disks:
832 result = self.rpc.call_blockdev_shutdown(self.source_node_uuid,
833 (disk, self.instance))
834 msg = result.fail_msg
835 if msg:
836 logging.error("Migration was successful, but couldn't unmap the"
837 " block device %s on source node %s: %s",
838 disk.iv_name,
839 self.cfg.GetNodeName(self.source_node_uuid), msg)
840 logging.error("You need to unmap the device %s manually on %s",
841 disk.iv_name,
842 self.cfg.GetNodeName(self.source_node_uuid))
843
844 self.feedback_fn("* done")
845
847 """Failover an instance.
848
849 The failover is done by shutting it down on its present node and
850 starting it on the secondary.
851
852 """
853 primary_node = self.cfg.GetNodeInfo(self.instance.primary_node)
854
855 source_node_uuid = self.instance.primary_node
856
857 if self.instance.disks_active:
858 self.feedback_fn("* checking disk consistency between source and target")
859 for (idx, dev) in enumerate(self.instance.disks):
860
861 if not CheckDiskConsistency(self.lu, self.instance, dev,
862 self.target_node_uuid, False):
863 if primary_node.offline:
864 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
865 " target node %s" %
866 (primary_node.name, idx,
867 self.cfg.GetNodeName(self.target_node_uuid)))
868 elif not self.ignore_consistency:
869 raise errors.OpExecError("Disk %s is degraded on target node,"
870 " aborting failover" % idx)
871 else:
872 self.feedback_fn("* not checking disk consistency as instance is not"
873 " running")
874
875 self.feedback_fn("* shutting down instance on source node")
876 logging.info("Shutting down instance %s on node %s",
877 self.instance.name, self.cfg.GetNodeName(source_node_uuid))
878
879 result = self.rpc.call_instance_shutdown(source_node_uuid, self.instance,
880 self.shutdown_timeout,
881 self.lu.op.reason)
882 msg = result.fail_msg
883 if msg:
884 if self.ignore_consistency or primary_node.offline:
885 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
886 " proceeding anyway; please make sure node"
887 " %s is down; error details: %s",
888 self.instance.name,
889 self.cfg.GetNodeName(source_node_uuid),
890 self.cfg.GetNodeName(source_node_uuid), msg)
891 else:
892 raise errors.OpExecError("Could not shutdown instance %s on"
893 " node %s: %s" %
894 (self.instance.name,
895 self.cfg.GetNodeName(source_node_uuid), msg))
896
897 self.feedback_fn("* deactivating the instance's disks on source node")
898 if not ShutdownInstanceDisks(self.lu, self.instance, ignore_primary=True):
899 raise errors.OpExecError("Can't shut down the instance's disks")
900
901 self.instance.primary_node = self.target_node_uuid
902
903 self.cfg.Update(self.instance, self.feedback_fn)
904
905
906 if self.instance.admin_state == constants.ADMINST_UP:
907 self.feedback_fn("* activating the instance's disks on target node %s" %
908 self.cfg.GetNodeName(self.target_node_uuid))
909 logging.info("Starting instance %s on node %s", self.instance.name,
910 self.cfg.GetNodeName(self.target_node_uuid))
911
912 disks_ok, _ = AssembleInstanceDisks(self.lu, self.instance,
913 ignore_secondaries=True)
914 if not disks_ok:
915 ShutdownInstanceDisks(self.lu, self.instance)
916 raise errors.OpExecError("Can't activate the instance's disks")
917
918 self.feedback_fn("* starting the instance on the target node %s" %
919 self.cfg.GetNodeName(self.target_node_uuid))
920 result = self.rpc.call_instance_start(self.target_node_uuid,
921 (self.instance, None, None), False,
922 self.lu.op.reason)
923 msg = result.fail_msg
924 if msg:
925 ShutdownInstanceDisks(self.lu, self.instance)
926 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
927 (self.instance.name,
928 self.cfg.GetNodeName(self.target_node_uuid),
929 msg))
930
931 - def Exec(self, feedback_fn):
932 """Perform the migration.
933
934 """
935 self.feedback_fn = feedback_fn
936 self.source_node_uuid = self.instance.primary_node
937
938
939 if self.instance.disk_template in constants.DTS_INT_MIRROR:
940 self.target_node_uuid = self.instance.secondary_nodes[0]
941
942
943
944 self.all_node_uuids = [self.source_node_uuid, self.target_node_uuid]
945 self.nodes_ip = dict((uuid, node.secondary_ip) for (uuid, node)
946 in self.cfg.GetMultiNodeInfo(self.all_node_uuids))
947
948 if self.failover:
949 feedback_fn("Failover instance %s" % self.instance.name)
950 self._ExecFailover()
951 else:
952 feedback_fn("Migrating instance %s" % self.instance.name)
953
954 if self.cleanup:
955 return self._ExecCleanup()
956 else:
957 return self._ExecMigration()
958