1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """Module doing node maintenance for Ganeti watcher.
32
33 """
34
35 import logging
36
37 from ganeti import constants
38 from ganeti import errors
39 from ganeti import hypervisor
40 from ganeti import netutils
41 from ganeti import ssconf
42 from ganeti import utils
43 from ganeti import confd
44 from ganeti.storage import drbd
45
46 import ganeti.confd.client
47
48
49 -class NodeMaintenance(object):
50 """Talks to confd daemons and possible shutdown instances/drbd devices.
51
52 """
54 self.store_cb = confd.client.StoreResultCallback()
55 self.filter_cb = confd.client.ConfdFilterCallback(self.store_cb)
56 self.confd_client = confd.client.GetConfdClient(self.filter_cb)
57
58 @staticmethod
60 """Checks whether node maintenance should run.
61
62 """
63 try:
64 return ssconf.SimpleStore().GetMaintainNodeHealth()
65 except errors.ConfigurationError, err:
66 logging.error("Configuration error, not activating node maintenance: %s",
67 err)
68 return False
69
70 @staticmethod
72 """Compute list of hypervisor/running instances.
73
74 """
75 hyp_list = ssconf.SimpleStore().GetHypervisorList()
76 hvparams = ssconf.SimpleStore().GetHvparams()
77 results = []
78 for hv_name in hyp_list:
79 try:
80 hv = hypervisor.GetHypervisor(hv_name)
81 ilist = hv.ListInstances(hvparams=hvparams)
82 results.extend([(iname, hv_name) for iname in ilist])
83 except:
84 logging.error("Error while listing instances for hypervisor %s",
85 hv_name, exc_info=True)
86 return results
87
88 @staticmethod
90 """Get list of used DRBD minors.
91
92 """
93 return drbd.DRBD8.GetUsedDevs()
94
95 @classmethod
96 - def DoMaintenance(cls, role):
97 """Maintain the instance list.
98
99 """
100 if role == constants.CONFD_NODE_ROLE_OFFLINE:
101 inst_running = cls.GetRunningInstances()
102 cls.ShutdownInstances(inst_running)
103 drbd_running = cls.GetUsedDRBDs()
104 cls.ShutdownDRBD(drbd_running)
105 else:
106 logging.debug("Not doing anything for role %s", role)
107
108 @staticmethod
109 - def ShutdownInstances(inst_running):
110 """Shutdown running instances.
111
112 """
113 names_running = set([i[0] for i in inst_running])
114 if names_running:
115 logging.info("Following instances should not be running,"
116 " shutting them down: %s", utils.CommaJoin(names_running))
117
118
119 i2h = dict(inst_running)
120 for name in names_running:
121 hv_name = i2h[name]
122 hv = hypervisor.GetHypervisor(hv_name)
123 hv.StopInstance(None, force=True, name=name)
124
125 @staticmethod
126 - def ShutdownDRBD(drbd_running):
127 """Shutdown active DRBD devices.
128
129 """
130 if drbd_running:
131 logging.info("Following DRBD minors should not be active,"
132 " shutting them down: %s", utils.CommaJoin(drbd_running))
133 for minor in drbd_running:
134 drbd.DRBD8.ShutdownAll(minor)
135
137 """Check node status versus cluster desired state.
138
139 """
140 my_name = netutils.Hostname.GetSysName()
141 req = \
142 confd.client.ConfdClientRequest(type=constants.CONFD_REQ_NODE_ROLE_BYNAME,
143 query=my_name)
144 self.confd_client.SendRequest(req, async=False, coverage=-1)
145 timed_out, _, _ = self.confd_client.WaitForReply(req.rsalt)
146 if not timed_out:
147
148 status, result = self.store_cb.GetResponse(req.rsalt)
149 assert status, "Missing result but received replies"
150 if not self.filter_cb.consistent[req.rsalt]:
151 logging.warning("Inconsistent replies, not doing anything")
152 return
153 self.DoMaintenance(result.server_reply.answer)
154 else:
155 logging.warning("Confd query timed out, cannot do maintenance actions")
156