Package ganeti :: Package watcher :: Module state
[hide private]
[frames] | no frames]

Source Code for Module ganeti.watcher.state

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc. 
  5  # 
  6  # This program is free software; you can redistribute it and/or modify 
  7  # it under the terms of the GNU General Public License as published by 
  8  # the Free Software Foundation; either version 2 of the License, or 
  9  # (at your option) any later version. 
 10  # 
 11  # This program is distributed in the hope that it will be useful, but 
 12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
 14  # General Public License for more details. 
 15  # 
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
 19  # 02110-1301, USA. 
 20   
 21   
 22  """Module keeping state for Ganeti watcher. 
 23   
 24  """ 
 25   
 26  import os 
 27  import time 
 28  import logging 
 29   
 30  from ganeti import utils 
 31  from ganeti import serializer 
 32  from ganeti import errors 
 33   
 34   
 35  # Delete any record that is older than 8 hours; this value is based on 
 36  # the fact that the current retry counter is 5, and watcher runs every 
 37  # 5 minutes, so it takes around half an hour to exceed the retry 
 38  # counter, so 8 hours (16*1/2h) seems like a reasonable reset time 
 39  RETRY_EXPIRATION = 8 * 3600 
 40   
 41  KEY_RESTART_COUNT = "restart_count" 
 42  KEY_RESTART_WHEN = "restart_when" 
 43  KEY_BOOT_ID = "bootid" 
 44   
 45   
46 -def OpenStateFile(path):
47 """Opens the state file and acquires a lock on it. 48 49 @type path: string 50 @param path: Path to state file 51 52 """ 53 # The two-step dance below is necessary to allow both opening existing 54 # file read/write and creating if not existing. Vanilla open will truncate 55 # an existing file -or- allow creating if not existing. 56 statefile_fd = os.open(path, os.O_RDWR | os.O_CREAT) 57 58 # Try to acquire lock on state file. If this fails, another watcher instance 59 # might already be running or another program is temporarily blocking the 60 # watcher from running. 61 try: 62 utils.LockFile(statefile_fd) 63 except errors.LockError, err: 64 logging.error("Can't acquire lock on state file %s: %s", path, err) 65 return None 66 67 return os.fdopen(statefile_fd, "w+")
68 69
70 -class WatcherState(object):
71 """Interface to a state file recording restart attempts. 72 73 """
74 - def __init__(self, statefile):
75 """Open, lock, read and parse the file. 76 77 @type statefile: file 78 @param statefile: State file object 79 80 """ 81 self.statefile = statefile 82 83 try: 84 state_data = self.statefile.read() 85 if not state_data: 86 self._data = {} 87 else: 88 self._data = serializer.Load(state_data) 89 except Exception, msg: # pylint: disable=W0703 90 # Ignore errors while loading the file and treat it as empty 91 self._data = {} 92 logging.warning(("Invalid state file. Using defaults." 93 " Error message: %s"), msg) 94 95 if "instance" not in self._data: 96 self._data["instance"] = {} 97 if "node" not in self._data: 98 self._data["node"] = {} 99 100 self._orig_data = serializer.Dump(self._data)
101
102 - def Save(self, filename):
103 """Save state to file, then unlock and close it. 104 105 """ 106 assert self.statefile 107 108 serialized_form = serializer.Dump(self._data) 109 if self._orig_data == serialized_form: 110 logging.debug("Data didn't change, just touching status file") 111 os.utime(filename, None) 112 return 113 114 # We need to make sure the file is locked before renaming it, otherwise 115 # starting ganeti-watcher again at the same time will create a conflict. 116 fd = utils.WriteFile(filename, 117 data=serialized_form, 118 prewrite=utils.LockFile, close=False) 119 self.statefile = os.fdopen(fd, "w+")
120
121 - def Close(self):
122 """Unlock configuration file and close it. 123 124 """ 125 assert self.statefile 126 127 # Files are automatically unlocked when closing them 128 self.statefile.close() 129 self.statefile = None
130
131 - def GetNodeBootID(self, name):
132 """Returns the last boot ID of a node or None. 133 134 """ 135 ndata = self._data["node"] 136 137 if name in ndata and KEY_BOOT_ID in ndata[name]: 138 return ndata[name][KEY_BOOT_ID] 139 return None
140
141 - def SetNodeBootID(self, name, bootid):
142 """Sets the boot ID of a node. 143 144 """ 145 assert bootid 146 147 ndata = self._data["node"] 148 149 ndata.setdefault(name, {})[KEY_BOOT_ID] = bootid
150
151 - def NumberOfRestartAttempts(self, instance_name):
152 """Returns number of previous restart attempts. 153 154 @type instance_name: string 155 @param instance_name: the name of the instance to look up 156 157 """ 158 idata = self._data["instance"] 159 160 if instance_name in idata: 161 return idata[instance_name][KEY_RESTART_COUNT] 162 163 return 0
164
165 - def MaintainInstanceList(self, instances):
166 """Perform maintenance on the recorded instances. 167 168 @type instances: list of string 169 @param instances: the list of currently existing instances 170 171 """ 172 idict = self._data["instance"] 173 174 # First, delete obsolete instances 175 obsolete_instances = set(idict).difference(instances) 176 for inst in obsolete_instances: 177 logging.debug("Forgetting obsolete instance %s", inst) 178 idict.pop(inst, None) 179 180 # Second, delete expired records 181 earliest = time.time() - RETRY_EXPIRATION 182 expired_instances = [i for i in idict 183 if idict[i][KEY_RESTART_WHEN] < earliest] 184 for inst in expired_instances: 185 logging.debug("Expiring record for instance %s", inst) 186 idict.pop(inst, None)
187
188 - def RecordRestartAttempt(self, instance_name):
189 """Record a restart attempt. 190 191 @type instance_name: string 192 @param instance_name: the name of the instance being restarted 193 194 """ 195 idata = self._data["instance"] 196 197 inst = idata.setdefault(instance_name, {}) 198 inst[KEY_RESTART_WHEN] = time.time() 199 inst[KEY_RESTART_COUNT] = inst.get(KEY_RESTART_COUNT, 0) + 1
200
201 - def RemoveInstance(self, instance_name):
202 """Update state to reflect that a machine is running. 203 204 This method removes the record for a named instance (as we only 205 track down instances). 206 207 @type instance_name: string 208 @param instance_name: the name of the instance to remove from books 209 210 """ 211 idata = self._data["instance"] 212 213 idata.pop(instance_name, None)
214