1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Module keeping state for Ganeti watcher.
23
24 """
25
26 import os
27 import time
28 import logging
29
30 from ganeti import utils
31 from ganeti import serializer
32 from ganeti import errors
33
34
35
36
37
38
39 RETRY_EXPIRATION = 8 * 3600
40
41 KEY_RESTART_COUNT = "restart_count"
42 KEY_RESTART_WHEN = "restart_when"
43 KEY_BOOT_ID = "bootid"
44
45
47 """Opens the state file and acquires a lock on it.
48
49 @type path: string
50 @param path: Path to state file
51
52 """
53
54
55
56 statefile_fd = os.open(path, os.O_RDWR | os.O_CREAT)
57
58
59
60
61 try:
62 utils.LockFile(statefile_fd)
63 except errors.LockError, err:
64 logging.error("Can't acquire lock on state file %s: %s", path, err)
65 return None
66
67 return os.fdopen(statefile_fd, "w+")
68
69
71 """Interface to a state file recording restart attempts.
72
73 """
75 """Open, lock, read and parse the file.
76
77 @type statefile: file
78 @param statefile: State file object
79
80 """
81 self.statefile = statefile
82
83 try:
84 state_data = self.statefile.read()
85 if not state_data:
86 self._data = {}
87 else:
88 self._data = serializer.Load(state_data)
89 except Exception, msg:
90
91 self._data = {}
92 logging.warning(("Invalid state file. Using defaults."
93 " Error message: %s"), msg)
94
95 if "instance" not in self._data:
96 self._data["instance"] = {}
97 if "node" not in self._data:
98 self._data["node"] = {}
99
100 self._orig_data = serializer.Dump(self._data)
101
102 - def Save(self, filename):
103 """Save state to file, then unlock and close it.
104
105 """
106 assert self.statefile
107
108 serialized_form = serializer.Dump(self._data)
109 if self._orig_data == serialized_form:
110 logging.debug("Data didn't change, just touching status file")
111 os.utime(filename, None)
112 return
113
114
115
116 fd = utils.WriteFile(filename,
117 data=serialized_form,
118 prewrite=utils.LockFile, close=False)
119 self.statefile = os.fdopen(fd, "w+")
120
122 """Unlock configuration file and close it.
123
124 """
125 assert self.statefile
126
127
128 self.statefile.close()
129 self.statefile = None
130
132 """Returns the last boot ID of a node or None.
133
134 """
135 ndata = self._data["node"]
136
137 if name in ndata and KEY_BOOT_ID in ndata[name]:
138 return ndata[name][KEY_BOOT_ID]
139 return None
140
142 """Sets the boot ID of a node.
143
144 """
145 assert bootid
146
147 ndata = self._data["node"]
148
149 ndata.setdefault(name, {})[KEY_BOOT_ID] = bootid
150
152 """Returns number of previous restart attempts.
153
154 @type instance_name: string
155 @param instance_name: the name of the instance to look up
156
157 """
158 idata = self._data["instance"]
159
160 if instance_name in idata:
161 return idata[instance_name][KEY_RESTART_COUNT]
162
163 return 0
164
165 - def MaintainInstanceList(self, instances):
166 """Perform maintenance on the recorded instances.
167
168 @type instances: list of string
169 @param instances: the list of currently existing instances
170
171 """
172 idict = self._data["instance"]
173
174
175 obsolete_instances = set(idict).difference(instances)
176 for inst in obsolete_instances:
177 logging.debug("Forgetting obsolete instance %s", inst)
178 idict.pop(inst, None)
179
180
181 earliest = time.time() - RETRY_EXPIRATION
182 expired_instances = [i for i in idict
183 if idict[i][KEY_RESTART_WHEN] < earliest]
184 for inst in expired_instances:
185 logging.debug("Expiring record for instance %s", inst)
186 idict.pop(inst, None)
187
189 """Record a restart attempt.
190
191 @type instance_name: string
192 @param instance_name: the name of the instance being restarted
193
194 """
195 idata = self._data["instance"]
196
197 inst = idata.setdefault(instance_name, {})
198 inst[KEY_RESTART_WHEN] = time.time()
199 inst[KEY_RESTART_COUNT] = inst.get(KEY_RESTART_COUNT, 0) + 1
200
202 """Update state to reflect that a machine is running.
203
204 This method removes the record for a named instance (as we only
205 track down instances).
206
207 @type instance_name: string
208 @param instance_name: the name of the instance to remove from books
209
210 """
211 idata = self._data["instance"]
212
213 idata.pop(instance_name, None)
214