Package ganeti :: Package watcher :: Module nodemaint
[hide private]
[frames] | no frames]

Source Code for Module ganeti.watcher.nodemaint

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc. 
  5  # 
  6  # This program is free software; you can redistribute it and/or modify 
  7  # it under the terms of the GNU General Public License as published by 
  8  # the Free Software Foundation; either version 2 of the License, or 
  9  # (at your option) any later version. 
 10  # 
 11  # This program is distributed in the hope that it will be useful, but 
 12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
 14  # General Public License for more details. 
 15  # 
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
 19  # 02110-1301, USA. 
 20   
 21   
 22  """Module doing node maintenance for Ganeti watcher. 
 23   
 24  """ 
 25   
 26  import logging 
 27   
 28  from ganeti import bdev 
 29  from ganeti import constants 
 30  from ganeti import errors 
 31  from ganeti import hypervisor 
 32  from ganeti import netutils 
 33  from ganeti import ssconf 
 34  from ganeti import utils 
 35  from ganeti import confd 
 36   
 37  import ganeti.confd.client # pylint: disable=W0611 
38 39 40 -class NodeMaintenance(object):
41 """Talks to confd daemons and possible shutdown instances/drbd devices. 42 43 """
44 - def __init__(self):
45 self.store_cb = confd.client.StoreResultCallback() 46 self.filter_cb = confd.client.ConfdFilterCallback(self.store_cb) 47 self.confd_client = confd.client.GetConfdClient(self.filter_cb)
48 49 @staticmethod
50 - def ShouldRun():
51 """Checks whether node maintenance should run. 52 53 """ 54 try: 55 return ssconf.SimpleStore().GetMaintainNodeHealth() 56 except errors.ConfigurationError, err: 57 logging.error("Configuration error, not activating node maintenance: %s", 58 err) 59 return False
60 61 @staticmethod
63 """Compute list of hypervisor/running instances. 64 65 """ 66 hyp_list = ssconf.SimpleStore().GetHypervisorList() 67 results = [] 68 for hv_name in hyp_list: 69 try: 70 hv = hypervisor.GetHypervisor(hv_name) 71 ilist = hv.ListInstances() 72 results.extend([(iname, hv_name) for iname in ilist]) 73 except: # pylint: disable=W0702 74 logging.error("Error while listing instances for hypervisor %s", 75 hv_name, exc_info=True) 76 return results
77 78 @staticmethod
79 - def GetUsedDRBDs():
80 """Get list of used DRBD minors. 81 82 """ 83 return bdev.DRBD8.GetUsedDevs().keys()
84 85 @classmethod
86 - def DoMaintenance(cls, role):
87 """Maintain the instance list. 88 89 """ 90 if role == constants.CONFD_NODE_ROLE_OFFLINE: 91 inst_running = cls.GetRunningInstances() 92 cls.ShutdownInstances(inst_running) 93 drbd_running = cls.GetUsedDRBDs() 94 cls.ShutdownDRBD(drbd_running) 95 else: 96 logging.debug("Not doing anything for role %s", role)
97 98 @staticmethod
99 - def ShutdownInstances(inst_running):
100 """Shutdown running instances. 101 102 """ 103 names_running = set([i[0] for i in inst_running]) 104 if names_running: 105 logging.info("Following instances should not be running," 106 " shutting them down: %s", utils.CommaJoin(names_running)) 107 # this dictionary will collapse duplicate instance names (only 108 # xen pvm/vhm) into a single key, which is fine 109 i2h = dict(inst_running) 110 for name in names_running: 111 hv_name = i2h[name] 112 hv = hypervisor.GetHypervisor(hv_name) 113 hv.StopInstance(None, force=True, name=name)
114 115 @staticmethod
116 - def ShutdownDRBD(drbd_running):
117 """Shutdown active DRBD devices. 118 119 """ 120 if drbd_running: 121 logging.info("Following DRBD minors should not be active," 122 " shutting them down: %s", utils.CommaJoin(drbd_running)) 123 for minor in drbd_running: 124 # pylint: disable=W0212 125 # using the private method as is, pending enhancements to the DRBD 126 # interface 127 bdev.DRBD8._ShutdownAll(minor)
128
129 - def Exec(self):
130 """Check node status versus cluster desired state. 131 132 """ 133 if not constants.ENABLE_CONFD: 134 logging.warning("Confd use not enabled, cannot do maintenance") 135 return 136 137 my_name = netutils.Hostname.GetSysName() 138 req = \ 139 confd.client.ConfdClientRequest(type=constants.CONFD_REQ_NODE_ROLE_BYNAME, 140 query=my_name) 141 self.confd_client.SendRequest(req, async=False, coverage=-1) 142 timed_out, _, _ = self.confd_client.WaitForReply(req.rsalt) 143 if not timed_out: 144 # should have a valid response 145 status, result = self.store_cb.GetResponse(req.rsalt) 146 assert status, "Missing result but received replies" 147 if not self.filter_cb.consistent[req.rsalt]: 148 logging.warning("Inconsistent replies, not doing anything") 149 return 150 self.DoMaintenance(result.server_reply.answer) 151 else: 152 logging.warning("Confd query timed out, cannot do maintenance actions")
153