1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """Module keeping state for Ganeti watcher.
32
33 """
34
35 import os
36 import time
37 import logging
38
39 from ganeti import utils
40 from ganeti import serializer
41 from ganeti import errors
42
43
44
45
46
47
48 RETRY_EXPIRATION = 8 * 3600
49
50 KEY_RESTART_COUNT = "restart_count"
51 KEY_RESTART_WHEN = "restart_when"
52 KEY_BOOT_ID = "bootid"
53
54
56 """Opens the state file and acquires a lock on it.
57
58 @type path: string
59 @param path: Path to state file
60
61 """
62
63
64
65 statefile_fd = os.open(path, os.O_RDWR | os.O_CREAT)
66
67
68
69
70 try:
71 utils.LockFile(statefile_fd)
72 except errors.LockError, err:
73 logging.error("Can't acquire lock on state file %s: %s", path, err)
74 return None
75
76 return os.fdopen(statefile_fd, "w+")
77
78
80 """Interface to a state file recording restart attempts.
81
82 """
84 """Open, lock, read and parse the file.
85
86 @type statefile: file
87 @param statefile: State file object
88
89 """
90 self.statefile = statefile
91
92 try:
93 state_data = self.statefile.read()
94 if not state_data:
95 self._data = {}
96 else:
97 self._data = serializer.Load(state_data)
98 except Exception, msg:
99
100 self._data = {}
101 logging.warning(("Invalid state file. Using defaults."
102 " Error message: %s"), msg)
103
104 if "instance" not in self._data:
105 self._data["instance"] = {}
106 if "node" not in self._data:
107 self._data["node"] = {}
108
109 self._orig_data = serializer.Dump(self._data)
110
111 - def Save(self, filename):
112 """Save state to file, then unlock and close it.
113
114 """
115 assert self.statefile
116
117 serialized_form = serializer.Dump(self._data)
118 if self._orig_data == serialized_form:
119 logging.debug("Data didn't change, just touching status file")
120 os.utime(filename, None)
121 return
122
123
124
125 fd = utils.WriteFile(filename,
126 data=serialized_form,
127 prewrite=utils.LockFile, close=False)
128 self.statefile = os.fdopen(fd, "w+")
129
131 """Unlock configuration file and close it.
132
133 """
134 assert self.statefile
135
136
137 self.statefile.close()
138 self.statefile = None
139
141 """Returns the last boot ID of a node or None.
142
143 """
144 ndata = self._data["node"]
145
146 if name in ndata and KEY_BOOT_ID in ndata[name]:
147 return ndata[name][KEY_BOOT_ID]
148 return None
149
151 """Sets the boot ID of a node.
152
153 """
154 assert bootid
155
156 ndata = self._data["node"]
157
158 ndata.setdefault(name, {})[KEY_BOOT_ID] = bootid
159
161 """Returns number of previous restart attempts.
162
163 @type instance_name: string
164 @param instance_name: the name of the instance to look up
165
166 """
167 idata = self._data["instance"]
168
169 if instance_name in idata:
170 return idata[instance_name][KEY_RESTART_COUNT]
171
172 return 0
173
174 - def MaintainInstanceList(self, instances):
175 """Perform maintenance on the recorded instances.
176
177 @type instances: list of string
178 @param instances: the list of currently existing instances
179
180 """
181 idict = self._data["instance"]
182
183
184 obsolete_instances = set(idict).difference(instances)
185 for inst in obsolete_instances:
186 logging.debug("Forgetting obsolete instance %s", inst)
187 idict.pop(inst, None)
188
189
190 earliest = time.time() - RETRY_EXPIRATION
191 expired_instances = [i for i in idict
192 if idict[i][KEY_RESTART_WHEN] < earliest]
193 for inst in expired_instances:
194 logging.debug("Expiring record for instance %s", inst)
195 idict.pop(inst, None)
196
198 """Record a restart attempt.
199
200 @type instance_name: string
201 @param instance_name: the name of the instance being restarted
202
203 """
204 idata = self._data["instance"]
205
206 inst = idata.setdefault(instance_name, {})
207 inst[KEY_RESTART_WHEN] = time.time()
208 inst[KEY_RESTART_COUNT] = inst.get(KEY_RESTART_COUNT, 0) + 1
209
211 """Update state to reflect that a machine is running.
212
213 This method removes the record for a named instance (as we only
214 track down instances).
215
216 @type instance_name: string
217 @param instance_name: the name of the instance to remove from books
218
219 """
220 idata = self._data["instance"]
221
222 idata.pop(instance_name, None)
223