Package ganeti :: Package watcher :: Module state
[hide private]
[frames] | no frames]

Source Code for Module ganeti.watcher.state

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc. 
  5  # All rights reserved. 
  6  # 
  7  # Redistribution and use in source and binary forms, with or without 
  8  # modification, are permitted provided that the following conditions are 
  9  # met: 
 10  # 
 11  # 1. Redistributions of source code must retain the above copyright notice, 
 12  # this list of conditions and the following disclaimer. 
 13  # 
 14  # 2. Redistributions in binary form must reproduce the above copyright 
 15  # notice, this list of conditions and the following disclaimer in the 
 16  # documentation and/or other materials provided with the distribution. 
 17  # 
 18  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
 19  # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
 20  # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 21  # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
 22  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 23  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 24  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 25  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 26  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 27  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 28  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 29   
 30   
 31  """Module keeping state for Ganeti watcher. 
 32   
 33  """ 
 34   
 35  import os 
 36  import time 
 37  import logging 
 38   
 39  from ganeti import utils 
 40  from ganeti import serializer 
 41  from ganeti import errors 
 42   
 43   
 44  # Delete any record that is older than 8 hours; this value is based on 
 45  # the fact that the current retry counter is 5, and watcher runs every 
 46  # 5 minutes, so it takes around half an hour to exceed the retry 
 47  # counter, so 8 hours (16*1/2h) seems like a reasonable reset time 
 48  RETRY_EXPIRATION = 8 * 3600 
 49   
 50  KEY_RESTART_COUNT = "restart_count" 
 51  KEY_RESTART_WHEN = "restart_when" 
 52  KEY_BOOT_ID = "bootid" 
 53   
 54   
55 -def OpenStateFile(path):
56 """Opens the state file and acquires a lock on it. 57 58 @type path: string 59 @param path: Path to state file 60 61 """ 62 # The two-step dance below is necessary to allow both opening existing 63 # file read/write and creating if not existing. Vanilla open will truncate 64 # an existing file -or- allow creating if not existing. 65 statefile_fd = os.open(path, os.O_RDWR | os.O_CREAT) 66 67 # Try to acquire lock on state file. If this fails, another watcher instance 68 # might already be running or another program is temporarily blocking the 69 # watcher from running. 70 try: 71 utils.LockFile(statefile_fd) 72 except errors.LockError, err: 73 logging.error("Can't acquire lock on state file %s: %s", path, err) 74 return None 75 76 return os.fdopen(statefile_fd, "w+")
77 78
79 -class WatcherState(object):
80 """Interface to a state file recording restart attempts. 81 82 """
83 - def __init__(self, statefile):
84 """Open, lock, read and parse the file. 85 86 @type statefile: file 87 @param statefile: State file object 88 89 """ 90 self.statefile = statefile 91 92 try: 93 state_data = self.statefile.read() 94 if not state_data: 95 self._data = {} 96 else: 97 self._data = serializer.Load(state_data) 98 except Exception, msg: # pylint: disable=W0703 99 # Ignore errors while loading the file and treat it as empty 100 self._data = {} 101 logging.warning(("Invalid state file. Using defaults." 102 " Error message: %s"), msg) 103 104 if "instance" not in self._data: 105 self._data["instance"] = {} 106 if "node" not in self._data: 107 self._data["node"] = {} 108 109 self._orig_data = serializer.Dump(self._data)
110
111 - def Save(self, filename):
112 """Save state to file, then unlock and close it. 113 114 """ 115 assert self.statefile 116 117 serialized_form = serializer.Dump(self._data) 118 if self._orig_data == serialized_form: 119 logging.debug("Data didn't change, just touching status file") 120 os.utime(filename, None) 121 return 122 123 # We need to make sure the file is locked before renaming it, otherwise 124 # starting ganeti-watcher again at the same time will create a conflict. 125 fd = utils.WriteFile(filename, 126 data=serialized_form, 127 prewrite=utils.LockFile, close=False) 128 self.statefile = os.fdopen(fd, "w+")
129
130 - def Close(self):
131 """Unlock configuration file and close it. 132 133 """ 134 assert self.statefile 135 136 # Files are automatically unlocked when closing them 137 self.statefile.close() 138 self.statefile = None
139
140 - def GetNodeBootID(self, name):
141 """Returns the last boot ID of a node or None. 142 143 """ 144 ndata = self._data["node"] 145 146 if name in ndata and KEY_BOOT_ID in ndata[name]: 147 return ndata[name][KEY_BOOT_ID] 148 return None
149
150 - def SetNodeBootID(self, name, bootid):
151 """Sets the boot ID of a node. 152 153 """ 154 assert bootid 155 156 ndata = self._data["node"] 157 158 ndata.setdefault(name, {})[KEY_BOOT_ID] = bootid
159
160 - def NumberOfRestartAttempts(self, instance_name):
161 """Returns number of previous restart attempts. 162 163 @type instance_name: string 164 @param instance_name: the name of the instance to look up 165 166 """ 167 idata = self._data["instance"] 168 169 if instance_name in idata: 170 return idata[instance_name][KEY_RESTART_COUNT] 171 172 return 0
173
174 - def MaintainInstanceList(self, instances):
175 """Perform maintenance on the recorded instances. 176 177 @type instances: list of string 178 @param instances: the list of currently existing instances 179 180 """ 181 idict = self._data["instance"] 182 183 # First, delete obsolete instances 184 obsolete_instances = set(idict).difference(instances) 185 for inst in obsolete_instances: 186 logging.debug("Forgetting obsolete instance %s", inst) 187 idict.pop(inst, None) 188 189 # Second, delete expired records 190 earliest = time.time() - RETRY_EXPIRATION 191 expired_instances = [i for i in idict 192 if idict[i][KEY_RESTART_WHEN] < earliest] 193 for inst in expired_instances: 194 logging.debug("Expiring record for instance %s", inst) 195 idict.pop(inst, None)
196
197 - def RecordRestartAttempt(self, instance_name):
198 """Record a restart attempt. 199 200 @type instance_name: string 201 @param instance_name: the name of the instance being restarted 202 203 """ 204 idata = self._data["instance"] 205 206 inst = idata.setdefault(instance_name, {}) 207 inst[KEY_RESTART_WHEN] = time.time() 208 inst[KEY_RESTART_COUNT] = inst.get(KEY_RESTART_COUNT, 0) + 1
209
210 - def RemoveInstance(self, instance_name):
211 """Update state to reflect that a machine is running. 212 213 This method removes the record for a named instance (as we only 214 track down instances). 215 216 @type instance_name: string 217 @param instance_name: the name of the instance to remove from books 218 219 """ 220 idata = self._data["instance"] 221 222 idata.pop(instance_name, None)
223